From cfa1eaa904b9ad7276c5b1b20aeddbf5a9457fc9 Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Sat, 23 May 2026 09:18:02 +0800 Subject: [PATCH] Complete batch import v2 runtime and host capability recovery --- cmd/cli/main.go | 24 +- cmd/cli/main_test.go | 5 +- .../2026-05-18-PRODUCTION_READINESS_REVIEW.md | 7 +- ...CH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md | 12 +- docs/EXECUTION_BOARD.md | 18 +- docs/KNOWN_LIMITATIONS.md | 28 +- docs/PRODUCTION_CLOSURE_BOARD.md | 4 +- docs/openapi.yaml | 102 +++ ...atch-auto-import-v2-implementation-plan.md | 784 ++++++++++++++++++ internal/access/closure.go | 154 ---- internal/access/closure_test.go | 68 +- internal/access/gateway_validation.go | 80 ++ internal/access/openai_responses_repair.go | 69 ++ internal/access/planner.go | 18 + internal/access/self_service.go | 7 + internal/access/service.go | 23 + internal/access/subscription.go | 36 + internal/access/types.go | 49 ++ internal/access/validation.go | 25 + internal/app/app_test.go | 13 +- internal/app/batch_runtime.go | 35 +- internal/app/batch_runtime_background.go | 88 ++ internal/app/batch_runtime_background_test.go | 111 +++ internal/app/bootstrap.go | 25 +- internal/app/bootstrap_test.go | 59 ++ internal/app/http_api.go | 23 +- internal/app/http_batch_import.go | 69 +- internal/app/http_batch_runs.go | 40 +- internal/app/http_batch_runs_test.go | 29 +- internal/app/reconcile_background.go | 312 +++++++ internal/app/reconcile_background_test.go | 171 ++++ internal/batch/confirmation.go | 22 +- internal/batch/confirmation_test.go | 77 ++ internal/batch/service.go | 99 ++- internal/batch/service_test.go | 69 ++ internal/config/config.go | 62 +- internal/config/config_test.go | 28 + .../host/sub2api/account_capability_repair.go | 36 + internal/host/sub2api/client.go | 1 + internal/host/sub2api/sub2api_test.go | 35 + internal/provision/batch_detail_service.go | 57 ++ .../provision/batch_detail_service_test.go | 53 -- internal/provision/import_service.go | 34 +- internal/provision/import_service_test.go | 114 ++- internal/provision/reconcile_compat_test.go | 26 + internal/provision/reconcile_service_test.go | 49 ++ internal/provision/runtime_import_service.go | 26 +- .../provision/runtime_import_service_test.go | 109 +++ .../service.go} | 403 ++++++--- internal/reconcile/service_test.go | 104 +++ .../0009_batch_import_run_request_context.sql | 6 + internal/store/sqlite/import_batches_repo.go | 30 + .../store/sqlite/import_run_items_repo.go | 42 + internal/store/sqlite/import_runs_repo.go | 58 +- .../store/sqlite/import_runs_repo_test.go | 26 +- internal/store/sqlite/reconcile_runs_repo.go | 9 +- .../store/sqlite/reconcile_runs_repo_test.go | 56 ++ internal/worker/runner.go | 65 ++ internal/worker/runner_test.go | 53 ++ tests/integration/store_init_test.go | 11 + 60 files changed, 3718 insertions(+), 530 deletions(-) create mode 100644 docs/plans/2026-05-22-batch-auto-import-v2-implementation-plan.md delete mode 100644 internal/access/closure.go create mode 100644 internal/access/gateway_validation.go create mode 100644 internal/access/openai_responses_repair.go create mode 100644 internal/access/planner.go create mode 100644 internal/access/self_service.go create mode 100644 internal/access/service.go create mode 100644 internal/access/subscription.go create mode 100644 internal/access/types.go create mode 100644 internal/access/validation.go create mode 100644 internal/app/batch_runtime_background.go create mode 100644 internal/app/batch_runtime_background_test.go create mode 100644 internal/app/bootstrap_test.go create mode 100644 internal/app/reconcile_background.go create mode 100644 internal/app/reconcile_background_test.go create mode 100644 internal/host/sub2api/account_capability_repair.go create mode 100644 internal/provision/batch_detail_service.go create mode 100644 internal/provision/reconcile_compat_test.go rename internal/{provision/batch_detail_and_reconcile_service.go => reconcile/service.go} (51%) create mode 100644 internal/reconcile/service_test.go create mode 100644 internal/store/migrations/0009_batch_import_run_request_context.sql create mode 100644 internal/store/sqlite/reconcile_runs_repo_test.go create mode 100644 internal/worker/runner.go create mode 100644 internal/worker/runner_test.go diff --git a/cmd/cli/main.go b/cmd/cli/main.go index 73863fb5..f0779328 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -13,6 +13,7 @@ import ( "sub2api-cn-relay-manager/internal/host/sub2api" "sub2api-cn-relay-manager/internal/pack" "sub2api-cn-relay-manager/internal/provision" + "sub2api-cn-relay-manager/internal/reconcile" "sub2api-cn-relay-manager/internal/store/sqlite" ) @@ -20,7 +21,7 @@ type installPackFunc func(context.Context, installPackCLIRequest) (provision.Pac type importProviderFunc func(context.Context, importCLIRequest) (provision.ImportReport, error) type previewProviderFunc func(context.Context, previewCLIRequest) (provision.PreviewReport, error) type rollbackProviderFunc func(context.Context, rollbackCLIRequest) (rollbackSummary, error) -type reconcileProviderFunc func(context.Context, reconcileCLIRequest) (provision.ReconcileResult, error) +type reconcileProviderFunc func(context.Context, reconcileCLIRequest) (reconcile.Result, error) type installPackCLIRequest struct { HostBaseURL string @@ -445,34 +446,39 @@ func runRollbackProvider(ctx context.Context, req rollbackCLIRequest) (rollbackS }, nil } -func runReconcileProvider(ctx context.Context, req reconcileCLIRequest) (provision.ReconcileResult, error) { +func runReconcileProvider(ctx context.Context, req reconcileCLIRequest) (reconcile.Result, error) { loadedPack, err := pack.LoadDir(req.PackDir) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } providerManifest, err := findProvider(loadedPack, req.ProviderID) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } client, err := sub2api.NewClient(req.HostBaseURL, sub2api.WithAPIKey(req.HostAPIKey), sub2api.WithBearerToken(req.HostBearerToken)) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } startupConfig, err := config.LoadStartupFromEnv() if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } store, err := sqlite.Open(ctx, startupConfig.Database.SQLiteDSN) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } defer store.Close() - service := provision.NewReconcileService(store, client) - return service.Reconcile(ctx, provision.ReconcileRequest{HostBaseURL: req.HostBaseURL, AccessProbeAPIKey: req.AccessAPIKey, Pack: loadedPack, Provider: providerManifest}) + hostRow, err := store.Hosts().GetByBaseURL(ctx, req.HostBaseURL) + if err != nil { + return reconcile.Result{}, err + } + + service := reconcile.NewService(store, client) + return service.Reconcile(ctx, reconcile.Request{HostID: hostRow.HostID, HostBaseURL: req.HostBaseURL, AccessProbeAPIKey: req.AccessAPIKey, Pack: loadedPack, Provider: providerManifest}) } func findProvider(loaded pack.LoadedPack, providerID string) (pack.ProviderManifest, error) { diff --git a/cmd/cli/main_test.go b/cmd/cli/main_test.go index 3a9ece85..91845e2d 100644 --- a/cmd/cli/main_test.go +++ b/cmd/cli/main_test.go @@ -9,6 +9,7 @@ import ( "sub2api-cn-relay-manager/internal/config" "sub2api-cn-relay-manager/internal/provision" + "sub2api-cn-relay-manager/internal/reconcile" "sub2api-cn-relay-manager/internal/store/sqlite" ) @@ -221,12 +222,12 @@ func TestExecuteReconcileProviderWritesSummary(t *testing.T) { "--pack-dir", "/tmp/pack", "--provider-id", "deepseek", "--access-api-key", "user-key", - }, nil, nil, nil, nil, nil, func(_ context.Context, req reconcileCLIRequest) (provision.ReconcileResult, error) { + }, nil, nil, nil, nil, nil, func(_ context.Context, req reconcileCLIRequest) (reconcile.Result, error) { reconcileCalled = true if req.ProviderID != "deepseek" || req.AccessAPIKey != "user-key" { t.Fatalf("unexpected reconcile request: %+v", req) } - return provision.ReconcileResult{Status: "drifted", MissingCount: 1, ExtraCount: 2, ProbeFailureCount: 1, AccessStatus: provision.AccessStatusBroken}, nil + return reconcile.Result{Status: "drifted", MissingCount: 1, ExtraCount: 2, ProbeFailureCount: 1, AccessStatus: provision.AccessStatusBroken}, nil }, nil) if err != nil { t.Fatalf("execute() reconcile error = %v", err) diff --git a/docs/2026-05-18-PRODUCTION_READINESS_REVIEW.md b/docs/2026-05-18-PRODUCTION_READINESS_REVIEW.md index 0ae6453e..e2de1b0a 100644 --- a/docs/2026-05-18-PRODUCTION_READINESS_REVIEW.md +++ b/docs/2026-05-18-PRODUCTION_READINESS_REVIEW.md @@ -58,7 +58,8 @@ - 能力探测:[capability_probe.go](/home/long/project/sub2api-cn-relay-manager/internal/host/sub2api/capability_probe.go:1) - 导入运行时:[runtime_import_service.go](/home/long/project/sub2api-cn-relay-manager/internal/provision/runtime_import_service.go:1) - 回滚:[rollback_service.go](/home/long/project/sub2api-cn-relay-manager/internal/provision/rollback_service.go:1) - - 对账:[batch_detail_and_reconcile_service.go](/home/long/project/sub2api-cn-relay-manager/internal/provision/batch_detail_and_reconcile_service.go:1) + - 对账:[service.go](/home/long/project/sub2api-cn-relay-manager/internal/reconcile/service.go:1) + - batch detail:[batch_detail_service.go](/home/long/project/sub2api-cn-relay-manager/internal/provision/batch_detail_service.go:1) - 状态库:[db.go](/home/long/project/sub2api-cn-relay-manager/internal/store/sqlite/db.go:1) - 资源记录:[managed_resources_repo.go](/home/long/project/sub2api-cn-relay-manager/internal/store/sqlite/managed_resources_repo.go:1) @@ -217,8 +218,8 @@ 证据: -- implementation plan 里期望的 `internal/reconcile/*`、`access/planner.go`、`worker/scheduler.go` 等结构仍未落地,[implementation-plan.md](/home/long/project/sub2api-cn-relay-manager/docs/plans/2026-05-12-sub2api-cn-relay-manager-implementation-plan.md:69) -- 当前逻辑主要仍集中在 `internal/provision/*` 与 `internal/access/closure.go`。 +- 该评审形成时,implementation plan 里期望的 `internal/reconcile/*`、`access/planner.go`、`worker/scheduler.go` 等结构仍未落地,[implementation-plan.md](/home/long/project/sub2api-cn-relay-manager/docs/plans/2026-05-12-sub2api-cn-relay-manager-implementation-plan.md:69) +- 截至 2026-05-22,这些结构项已分别落到 `internal/reconcile/*`、`internal/access/{planner,subscription,self_service,validation}.go` 与 `internal/worker/*`。 影响: diff --git a/docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md b/docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md index 152bf4d7..d8ce7a3f 100644 --- a/docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md +++ b/docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md @@ -30,6 +30,7 @@ - [x] 单一状态源为 `import_runs / import_run_items / import_run_item_events` - [x] migration 已落地并受集成测试保护 +- [x] run 级请求上下文(`host_id / subscription_users / subscription_days / probe_api_key`)已持久化,支持重启后恢复 validate - [x] `/api/batch-import/runs*` 已接到 V2 projection - [x] CLI `batch-import` 已通过 `ActionSet.CreateBatchImportRun` 进入真实 pipeline - [x] 结果页/结果 API 不回退 legacy 表结构 @@ -54,14 +55,14 @@ - Probe / alias / capability:`internal/probe/models.go`、`internal/probe/aliases.go`、`internal/probe/capability.go`、`internal/probe/completion.go` - Reuse / orchestration / worker / validation:`internal/batch/provider_id.go`、`internal/batch/reuse_policy.go`、`internal/batch/service.go`、`internal/batch/confirmation.go`、`internal/batch/validation.go` -- 状态库存储:`internal/store/sqlite/import_runs_repo.go`、`internal/store/sqlite/import_run_items_repo.go`、`internal/store/sqlite/import_run_item_events_repo.go` -- Projection / API / CLI:`internal/batch/status_projection.go`、`internal/app/http_batch_import.go`、`internal/app/batch_runtime.go`、`internal/app/http_batch_runs.go`、`cmd/cli/batch_import.go` +- 状态库存储:`internal/store/sqlite/import_runs_repo.go`、`internal/store/sqlite/import_run_items_repo.go`、`internal/store/sqlite/import_run_item_events_repo.go`、`internal/store/migrations/0009_batch_import_run_request_context.sql` +- Projection / API / CLI:`internal/batch/status_projection.go`、`internal/app/http_batch_import.go`、`internal/app/batch_runtime.go`、`internal/app/batch_runtime_background.go`、`internal/app/http_batch_runs.go`、`cmd/cli/batch_import.go` ### 测试文件映射 - 单测:`internal/batch/types_test.go`、`internal/probe/models_test.go`、`internal/probe/aliases_test.go`、`internal/probe/capability_test.go`、`internal/probe/completion_test.go` - 状态机:`internal/batch/provider_id_test.go`、`internal/batch/reuse_policy_test.go`、`internal/batch/service_test.go`、`internal/batch/confirmation_test.go`、`internal/batch/validation_test.go`、`internal/batch/status_projection_test.go` -- API / CLI:`internal/app/http_batch_import_test.go`、`internal/app/http_batch_runs_test.go`、`cmd/cli/batch_import_test.go` +- API / CLI:`internal/app/http_batch_import_test.go`、`internal/app/http_batch_runs_test.go`、`internal/app/batch_runtime_background_test.go`、`cmd/cli/batch_import_test.go` - 集成:`tests/integration/batch_import_v2_test.go` ### API 路由映射 @@ -78,7 +79,7 @@ - `go test ./tests/integration/... -count=1`:PASS - `go test -cover ./internal/... -count=1`:PASS - `internal/access` 76.7% - - `internal/batch` 75.4% + - `internal/batch` 72.9% - `internal/probe` 78.2% - `internal/provision` 76.4% - `internal/pack` 73.9% @@ -89,6 +90,9 @@ - `internal/app/http_batch_import.go` 的 `buildCreateBatchImportRunAction` 已改为先解析已注册 host,再委托 `batchImportRuntimeRunner.execute` - `internal/app/batch_runtime.go` 已把 `BatchImportService + ConfirmationWorker + ValidationService` 串成 create-run 的同步入口驱动链 +- `internal/app/batch_runtime_background.go` 已补后台 runtime scheduler;`running` run 在控制面重启后会继续被拾取并推进 +- `internal/store/sqlite/import_run_items_repo.go` 已补原子 lease 获取;不会再在 lease 落库前并发双发 confirmer +- `internal/app/http_batch_import.go` / `internal/app/http_batch_runs.go` 已补 `cursor/next_cursor`,且 run 列表 `q` 可命中 `run_id / provider_id / base_url` - `cmd/cli/batch_import.go` 继续复用 `ActionSet.CreateBatchImportRun`,因此 CLI create-run 也随入口修复自动进入真实 pipeline - `internal/app/http_batch_import_test.go` 已新增真实 stub 回归,直接验证 create-run 最终把 item 推进到 `current_stage=done` 且 `access_status=active` diff --git a/docs/EXECUTION_BOARD.md b/docs/EXECUTION_BOARD.md index 9e6b361e..dbdb2b2c 100644 --- a/docs/EXECUTION_BOARD.md +++ b/docs/EXECUTION_BOARD.md @@ -15,6 +15,9 @@ - `self_service` 主链路已通过 latest-head 标准 fresh-host 复验: - `artifacts/real-host-acceptance/20260521_210403/05-import.json` - `artifacts/real-host-acceptance/20260521_210403/07-access-status.json` +- latest-head relay-manager 已新增宿主 capability 自愈: + - 当第三方 OpenAI-compatible upstream 因宿主把 `openai_responses_supported` 误判成 `true` 而导致 host `/v1/chat/completions` 返回 `502 upstream_error` 时,access closure 与后台 reconcile 会自动把相关 account 修正到 raw `/chat/completions` 路径后再重试 + - 该修正现在不再依赖宿主长期保留补丁,宿主升级后只要下次 import/access/reconcile 触发,就能重新收敛到正确 capability - 官方 provider 验证矩阵当前仍保留一条非阻塞事实: - `artifacts/real-host-acceptance/20260521_222212_remote43_minimax-m2-7-official_key_import/21-summary.json` 已证明 official MiniMax 模板链路是通的,但该验证 key 当前命中 upstream `429` - `reconcile=drifted` 仍可能在 shared fresh-host 上出现,但当前解释是“历史残留资源噪音”,不阻塞 PRD 首版放行 @@ -69,6 +72,9 @@ - account test 首次 `403 Forbidden` 已降级为 advisory warning;只要 `/models` 已命中 `smoke_test_model`,不会再把 batch 误判为 blocking failure - access closure 对导入后瞬时 `503 / no available accounts` 增加短暂 completion retry,避免宿主异步 probe / account warm-up 窗口把真实可用链路误记成 `broken` - `20260522_122706_local_v0129_kimi_a7m_subscription_freshhost` 已证明:在修复后的 relay-manager + patched host 组合下,`kimi-a7m / kimi-k2.6` 可落到 `batch_status=succeeded`、`provider_status=active`、`latest_access_status=subscription_ready` +14. relay-manager latest-head 已补宿主升级后的 capability 自愈 + - 对 `API returned 403: Forbidden` 这类 `/responses` 误判 advisory,控制面现在会在 access closure 与 reconcile rerun 中把目标 account 的 `openai_responses_supported` 修正为 `false`,随后重试 gateway `/v1/chat/completions` + - 这样即使宿主升级或异步 probe 把 capability 标记覆写错,控制面也能在“安装后确认”与“后台持续对账”两个环节重新拉回可用状态 ## 已验证门禁 @@ -122,16 +128,13 @@ - 真实宿主初始化不会自动创建普通用户;上线前必须显式创建普通用户并留存可复用凭据 - `self_service` 需要普通用户 key 绑定目标标准 group,且通常还需要可用余额 - `subscription` 需要 subscription 类型 group + 普通用户订阅分配 + key/group 绑定 + - 若启用持续后台 reconcile,SQLite 状态库将持久化最新 access probe 元数据,部署时必须按 secret 级别保护数据库文件 -2. 结构债务 - - access / reconcile 仍未完全按 implementation plan 拆到独立子模块 - - 当前仍无内置 scheduler/jobs - -3. 部署与环境限制 +2. 部署与环境限制 - 标准多阶段 Dockerfile 在受限网络环境下仍不稳 - 当前推荐 `scripts/build_local_image.sh` + `Dockerfile.local` -4. official provider 验证矩阵 +3. official provider 验证矩阵 - official MiniMax 当前 live 样本已证明模板链路可用,但验证 key 命中 upstream `429` - Qwen / GLM / Kimi / Step 等官方 provider 是否通过 live 验收,仍取决于后续官方 key 与 quota @@ -168,7 +171,9 @@ - 其余 review 问题也已同步收口: - capability 从 upstream 总画像升级为 transport + model profiles - 结果页字段、状态库存储字段、retry/event trail 已统一 + - run 级请求上下文已持久化到 `import_runs`,控制面重启后 validate 能继续使用 `host_id / subscription_users / subscription_days / probe_api_key` - OpenAPI 已补齐 `/api/batch-import/runs*`,legacy `/api/import-batches/*` 降级为 v1/legacy + - run/item 列表 API 已补齐 `cursor/next_cursor`;run 列表 `q` 可命中 `run_id / provider_id / base_url` - 已补充重复导入自动复用策略:按 `provider_id + api_key_fingerprint + canonical_model_family` 判断 `reused / patch_only / replace` - 已补充同模型别名归一化契约:例如 `kimi 2.6 / kimi-2.6 / kimi-k2.6` 可归并到同一模型家族并快速复用 - 已补充多账号重复导入与弃用账号再启用策略:active 账号提示“重复已启用”,disabled/deprecated 账号显示原状态并走 `reactivated` 快速启用路径 @@ -188,6 +193,7 @@ - `docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md` 已完成 - latest-head 已补齐 `internal/app/http_batch_import.go` -> `internal/app/batch_runtime.go` 的 create-run 入口 wiring - API 与 CLI create-run 现在都会真实驱动 `BatchImportService + ConfirmationWorker + ValidationService` +- 控制面 server 启动后会自动运行 batch-import background scheduler,`running` run 在重启后可继续推进 - 最新一轮验证结果保持全绿:`go test ./... -count=1`、`go test ./tests/integration/... -count=1`、`go test -cover ./internal/... -count=1`、`go vet ./...`、`gofmt -l .` **真实 Gate**:✅ 文档、状态机、投影、测试、审计与 create-run 入口已经对齐,**V2 设计已按基线计划交付** diff --git a/docs/KNOWN_LIMITATIONS.md b/docs/KNOWN_LIMITATIONS.md index 3bf7c7d3..9cc58ef2 100644 --- a/docs/KNOWN_LIMITATIONS.md +++ b/docs/KNOWN_LIMITATIONS.md @@ -4,10 +4,11 @@ This document covers known limitations that operators should be aware of before ## Core Limitations -### 1. No Automated Reconcile Scheduler (P2) -- Reconcilation must be triggered manually via `POST /api/providers/{providerID}/reconcile` or CLI. -- No cron/scheduler service is bundled. -- Workaround: set up a cron job on the host OS calling the HTTP API periodically. +### 1. Automated Reconcile Is Available, but Disabled by Default (P2) +- A built-in background reconcile worker is now available in the control plane server. +- It is gated by `SUB2API_CRM_RECONCILE_WORKER_ENABLED=true` and uses `SUB2API_CRM_RECONCILE_POLL_INTERVAL` for cadence. +- The current scheduler model is still a simple polling runner rather than a full generic jobs platform. +- Manual `POST /api/providers/{providerID}/reconcile` and CLI reconcile remain supported. ### 2. Real sub2api Compatibility Is Verified on a Fresh Host, but Requires Explicit Operator Preparation - Real-host validation has now been executed against a fresh redeployed sub2api host. @@ -16,28 +17,19 @@ This document covers known limitations that operators should be aware of before - However, host initialization alone is not enough: operators must explicitly create ordinary users, keep reusable credentials, bind keys to the correct group, and satisfy the billing/subscription prerequisites documented in `docs/REAL_HOST_ACCEPTANCE_RUNBOOK.md`. - This is therefore no longer a code-compatibility blocker; it is an explicit operational prerequisite. -### 3. Access Module Not Fully Structured per Implementation Plan -- The `access` package contains only `closure.go` (the combined close/validate logic). -- `planner.go`, `subscription_service.go`, `self_service_checker.go` are not separately extracted. -- All access logic is functional in `closure.go` but not split per the planned directory structure. - -### 4. Reconcile Logic Inline in Provision Package -- Reconcile lives in `internal/provision/batch_detail_and_reconcile_service.go` rather than a separate `internal/reconcile/*` package. -- Functionally complete but structural gap vs implementation plan. - -### 5. Standard Multi-stage Docker Build Still Depends on Outbound Module Download +### 3. Standard Multi-stage Docker Build Still Depends on Outbound Module Download - `Dockerfile.local` has been validated as the recommended proxy-safe build path. - `scripts/build_local_image.sh` now prebuilds the Linux binary on the host and produces `sub2api-cn-relay-manager:local` reliably in this environment. - The standard multi-stage `Dockerfile` still requires outbound Go module download from inside the container build context; in restricted networks, prefer the local-image path. ## Accepted Design Trade-offs -### 6. CLI Run Functions Not Unit-Tested +### 4. CLI Run Functions Not Unit-Tested - `runInstallPack`, `runImportProvider`, `runPreviewProvider`, `runRollbackProvider`, `runReconcileProvider`, `findProvider` connect to real SQLite/sub2api — these are 0% covered in unit tests. - The `execute()` dispatch and all `parse*` functions are fully tested. - In an integration/E2E context these functions are exercised through the host stub. -### 7. No Web UI +### 5. No Web UI - Administration is through CLI and HTTP API only. - Consistent with MVP scope defined in PRD. @@ -45,7 +37,9 @@ This document covers known limitations that operators should be aware of before ### Token Security - `SUB2API_CRM_ADMIN_TOKEN` must be at least 20 characters, rotated outside source control. -- API keys imported via `--access-api-key` are used for gateway probe calls — they are not stored in control-plane state (only key fingerprint/hash is stored). +- To support continuous background reconcile, the latest access closure now persists probe metadata in control-plane state: + `self_service` stores the probe API key, and `subscription` stores the subscription user selector metadata. +- Operators should therefore treat the SQLite database as secret-bearing state and protect it accordingly. ### Database - SQLite is the only supported database backend for v0.1. diff --git a/docs/PRODUCTION_CLOSURE_BOARD.md b/docs/PRODUCTION_CLOSURE_BOARD.md index 7a34a748..20d179ca 100644 --- a/docs/PRODUCTION_CLOSURE_BOARD.md +++ b/docs/PRODUCTION_CLOSURE_BOARD.md @@ -86,9 +86,7 @@ - `subscription` 需要 subscription 类型 group + 普通用户订阅分配 + key/group 绑定 ### P2 已接受技术债务 -- access 模块仍未按 implementation plan 拆到 `planner.go / subscription_service.go / self_service_checker.go` -- reconcile 仍内联在 `internal/provision/`,未拆到 `internal/reconcile/*` -- 无内置 scheduler/jobs;当前通过手动 reconcile + 外部 cron 补偿 +- `internal/worker` 已抽出通用 polling runner,当前 batch-import runtime 与后台 reconcile 都已接入;调度模型仍是固定间隔 polling,而不是完整 jobs/reconcile 平台 - CLI `run*` 真实链路函数未做系统性 mock 单测 - 标准多阶段 `Dockerfile` 在受限网络下仍依赖容器内联网拉取 Go modules;本地部署默认走 `scripts/build_local_image.sh` - `subscription` 这条 provider matrix 已通过;剩余待补的是 latest-head `self_service` fresh-host 复验,而不是继续替换 provider key diff --git a/docs/openapi.yaml b/docs/openapi.yaml index 062b7a8f..a660671c 100644 --- a/docs/openapi.yaml +++ b/docs/openapi.yaml @@ -173,6 +173,12 @@ paths: get: security: - bearerAuth: [] + parameters: + - $ref: '#/components/parameters/BatchImportRunStateQuery' + - $ref: '#/components/parameters/BatchImportAccessModeQuery' + - $ref: '#/components/parameters/BatchImportQuery' + - $ref: '#/components/parameters/CursorQuery' + - $ref: '#/components/parameters/LimitQuery' responses: '200': description: list batch import runs @@ -205,6 +211,16 @@ paths: - bearerAuth: [] parameters: - $ref: '#/components/parameters/RunID' + - $ref: '#/components/parameters/BatchImportCurrentStageQuery' + - $ref: '#/components/parameters/BatchImportConfirmationStatusQuery' + - $ref: '#/components/parameters/BatchImportAccessStatusQuery' + - $ref: '#/components/parameters/BatchImportHasWarningQuery' + - $ref: '#/components/parameters/BatchImportProviderIDQuery' + - $ref: '#/components/parameters/BatchImportMatchedAccountStateQuery' + - $ref: '#/components/parameters/BatchImportAccountResolutionQuery' + - $ref: '#/components/parameters/BatchImportQuery' + - $ref: '#/components/parameters/CursorQuery' + - $ref: '#/components/parameters/LimitQuery' responses: '200': description: batch import run items @@ -471,6 +487,86 @@ components: required: false schema: type: string + BatchImportRunStateQuery: + name: state + in: query + required: false + schema: + type: string + enum: [running, completed, completed_with_warnings, failed, cancelled] + BatchImportAccessModeQuery: + name: access_mode + in: query + required: false + schema: + type: string + enum: [subscription, self_service] + BatchImportQuery: + name: q + in: query + required: false + schema: + type: string + CursorQuery: + name: cursor + in: query + required: false + schema: + type: string + LimitQuery: + name: limit + in: query + required: false + schema: + type: integer + minimum: 1 + BatchImportCurrentStageQuery: + name: current_stage + in: query + required: false + schema: + type: string + enum: [probe, provision, confirm, validate, done] + BatchImportConfirmationStatusQuery: + name: confirmation_status + in: query + required: false + schema: + type: string + enum: [pending, confirmed, advisory, failed] + BatchImportAccessStatusQuery: + name: access_status + in: query + required: false + schema: + type: string + enum: [unknown, active, degraded, broken] + BatchImportHasWarningQuery: + name: has_warning + in: query + required: false + schema: + type: boolean + BatchImportProviderIDQuery: + name: provider_id + in: query + required: false + schema: + type: string + BatchImportMatchedAccountStateQuery: + name: matched_account_state + in: query + required: false + schema: + type: string + enum: [none, active, disabled, deprecated, broken] + BatchImportAccountResolutionQuery: + name: account_resolution + in: query + required: false + schema: + type: string + enum: [created, reused, reactivated, replaced] responses: Unauthorized: description: missing or invalid admin token @@ -710,6 +806,9 @@ components: type: array items: $ref: '#/components/schemas/BatchImportRunSummary' + next_cursor: + type: string + nullable: true BatchImportCapabilityTransportProfile: type: object properties: @@ -886,6 +985,9 @@ components: type: array items: $ref: '#/components/schemas/BatchImportRunItemSummary' + next_cursor: + type: string + nullable: true ImportBatchInfo: type: object properties: diff --git a/docs/plans/2026-05-22-batch-auto-import-v2-implementation-plan.md b/docs/plans/2026-05-22-batch-auto-import-v2-implementation-plan.md new file mode 100644 index 00000000..81587984 --- /dev/null +++ b/docs/plans/2026-05-22-batch-auto-import-v2-implementation-plan.md @@ -0,0 +1,784 @@ +# Batch Auto-Import V2 Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** 实现 V2 的 URL + key 批量导入能力,覆盖模型发现、同模型别名归并、重复导入复用、异步确认、最终 gateway 验证、结果 API 与结果页所需状态投影。 + +**Architecture:** 采用 `BatchImportService + ConfirmationWorker + ValidationService + RunStateStore + ResultProjection` 分层架构。V2 只以 `import_runs / import_run_items / import_run_item_events` 作为运行态真相,旧 `import_batches/*` 仅保留 legacy linkage。重复导入决策基于 `provider_id + api_key_fingerprint + canonical_model_family`,最终可用性只认宿主真实 `/v1/chat/completions`。 + +**Tech Stack:** Go 1.22.2、`database/sql` + SQLite、Chi、OpenAPI 3.1、Go `testing`、`httptest`、现有 `internal/host/sub2api` 适配层与 `tests/integration` 集成测试套件。 + +--- + +## 0. 实施约束 + +- 只通过宿主 HTTP API 工作,不直写宿主数据库。 +- 所有状态枚举、字段名、API 路由必须遵循当前 canonical contract。 +- 每个任务都先写失败测试,再做最小实现,再跑验证。 +- 每个任务独立提交,避免大而混杂的 commit。 +- 任何 UI/API 展示都只能读 V2 canonical state,不得回退到日志拼接。 + +## 1. 任务总览 + +```text +T1 Canonical types and enums +T2 Probe models + alias normalization + canonical family +T3 Capability profile + smoke completion routing +T4 Provider ID + reuse policy +T5 Run/item/event state store repositories +T6 BatchImportService: Stage 0~2 +T7 ConfirmationWorker + retry + lease +T8 ValidationService + access status +T9 ResultProjection +T10 HTTP API: runs/items +T11 CLI: batch-import +T12 Integration + contract verification +T13 Design restoration audit +``` + +## 2. 设计还原验证矩阵 + +### 2.1 目标覆盖矩阵 + +| 设计目标 | 对应任务 | 验证方式 | +|---|---|---| +| URL + key 自动发现模型 | T2, T6, T12 | `/v1/models` 拉取、集成测试 | +| 模型纠错与别名归一化 | T2, T4, T9, T12 | unit + item detail projection | +| 同模型跨中转快速识别 | T2, T4, T12 | `canonical_model_family` 测试 | +| 重复导入自动复用 | T4, T6, T9, T12 | reuse decision + projection | +| 已启用重复账号直接复用 | T4, T6, T9, T12 | `matched_account_state=active` | +| 已停用/已弃用账号快速启用 | T4, T6, T7, T9, T12 | `account_resolution=reactivated` | +| transport + model capability profile | T3, T9, T10, T12 | profile persistence + API schema | +| channel/account 演化 | T6, T12 | patch contract + host stub | +| 异步确认与重试 | T7, T12 | lease/retry/event trail | +| gateway completion 最终判定 | T8, T12 | `access_status` 唯一写入 | +| 结果 API 与结果页数据源 | T5, T9, T10, T12 | run/item/event projection | +| 单一状态源 | T5, T7, T8, T9 | 只读 `import_runs/*` | + +### 2.2 契约覆盖矩阵 + +| 契约 | 对应任务 | +|---|---| +| `run_id / item_id / provider_id` | T1, T4, T5 | +| `run.state` | T1, T5, T9 | +| `current_stage / confirmation_status / access_status` | T1, T5, T7, T8 | +| `matched_account_state / account_resolution` | T4, T5, T6, T9, T10 | +| `api_key_fingerprint` | T4, T5, T6 | +| `canonical_model_families` | T2, T4, T5, T9, T10 | +| `provision_reused / reused_from_*` | T4, T5, T6, T9, T10 | +| `/api/batch-import/runs*` | T10, T12 | + +如果 T1~T12 全部完成并通过验证,T13 必须能证明上述矩阵全部为“已覆盖”,否则不得宣称 V2 可按设计实现。 + +## 3. 实施任务 + +### Task 1: Canonical Types And Enums + +**Files:** +- Create: `internal/batch/types.go` +- Test: `internal/batch/types_test.go` +- Reference: `docs/2026-05-21-BATCH_AUTO_IMPORT_SPEC.md` + +**Step 1: Write the failing test** + +为以下枚举写失败测试: +- `RunState` +- `ItemStage` +- `ConfirmationStatus` +- `AccessStatus` +- `MatchedAccountState` +- `AccountResolution` + +至少覆盖: +- 常量值是否与文档一致 +- 非法字符串是否会在后续解析层被拒绝 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/batch -run 'TestRunStateConstants|TestItemStateConstants' -count=1 +``` + +Expected: FAIL,提示类型或常量不存在。 + +**Step 3: Write minimal implementation** + +在 `internal/batch/types.go` 中定义上述类型与常量,不提前引入不需要的 helper。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/batch -run 'TestRunStateConstants|TestItemStateConstants' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/batch/types.go internal/batch/types_test.go +git commit -m "feat(batch): add canonical v2 state enums" +``` + +### Task 2: Probe Models, Alias Normalization, Canonical Family + +**Files:** +- Create: `internal/probe/models.go` +- Create: `internal/probe/aliases.go` +- Test: `internal/probe/models_test.go` +- Test: `internal/probe/aliases_test.go` +- Reference: `docs/2026-05-21-BATCH_AUTO_IMPORT_TDD_PLAN.md` + +**Step 1: Write the failing test** + +覆盖: +- `/v1/models` OpenAI 格式解析 +- 空模型列表 +- 鉴权失败 +- `kimi 2.6 / kimi-2.6 / kimi-k2.6` 归并到同一 `canonical_model_family` +- `deepseek-ai/DeepSeek-V4-Pro` vendor 前缀归一化 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/probe -run 'TestProviderModels|TestCanonicalModelFamily' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- `ProviderModels` +- `NormalizeModelID` +- `CanonicalModelID` +- `CanonicalModelFamily` +- `BuildAliasTable` +- `ResolveRequestedModel` +- `RecommendModels` + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/probe -run 'TestProviderModels|TestCanonicalModelFamily' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/probe/models.go internal/probe/aliases.go internal/probe/models_test.go internal/probe/aliases_test.go +git commit -m "feat(probe): add model discovery and canonical family normalization" +``` + +### Task 3: Capability Profile And Smoke Completion Routing + +**Files:** +- Create: `internal/probe/capability.go` +- Create: `internal/probe/completion.go` +- Test: `internal/probe/capability_test.go` +- Test: `internal/probe/completion_test.go` +- Reference: `docs/2026-05-22-BATCH_AUTO_IMPORT_V2_ARCHITECTURE.md` + +**Step 1: Write the failing test** + +覆盖: +- `responses` 不支持但 `chat/completions` 可用 +- transport profile 的 advisory 记录 +- per-model profile 记录 +- `ResolveSmokeModel` 基于别名与能力选择 smoke model + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/probe -run 'TestProbeCapabilities|TestResolveSmokeModel|TestSmokeCompletion' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- `TransportProfile` +- `ModelCapabilityProfile` +- `CapabilityProfile` +- `ProbeCapabilities` +- `CompletionResult` +- `ResolveSmokeModel` +- `SmokeCompletion` + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/probe -run 'TestProbeCapabilities|TestResolveSmokeModel|TestSmokeCompletion' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/probe/capability.go internal/probe/completion.go internal/probe/capability_test.go internal/probe/completion_test.go +git commit -m "feat(probe): add capability profile and smoke completion routing" +``` + +### Task 4: Provider ID And Reuse Policy + +**Files:** +- Create: `internal/batch/provider_id.go` +- Create: `internal/batch/reuse_policy.go` +- Test: `internal/batch/provider_id_test.go` +- Test: `internal/batch/reuse_policy_test.go` +- Reference: `docs/2026-05-21-BATCH_AUTO_IMPORT_SPEC.md:336` + +**Step 1: Write the failing test** + +覆盖: +- 同 host 不同 path 生成不同 `provider_id` +- 已存在 active provider 且 family 已覆盖 -> `reused` +- 已存在 active account -> `matched_account_state=active`, `account_resolution=reused` +- `disabled/deprecated` 账号 -> `reactivated` +- `broken` provider/account -> `replace` +- 同 family 不同 alias -> 视为已覆盖 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/batch -run 'TestNormalizeProviderID|TestDecideReuse' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- `NormalizeProviderID` +- `ReuseDecision` +- `DecideReuse` + +不要在这一步直接改 service。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/batch -run 'TestNormalizeProviderID|TestDecideReuse' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/batch/provider_id.go internal/batch/reuse_policy.go internal/batch/provider_id_test.go internal/batch/reuse_policy_test.go +git commit -m "feat(batch): add provider id and reuse policy" +``` + +### Task 5: Run/Item/Event State Store Repositories + +**Files:** +- Modify: `internal/store/migrations/0007_batch_import_runs.sql` +- Modify: `internal/store/migrations/0008_batch_import_run_events.sql` +- Modify: `internal/store/sqlite/import_runs_repo.go` +- Create: `internal/store/sqlite/import_run_items_repo.go` +- Create: `internal/store/sqlite/import_run_item_events_repo.go` +- Modify: `internal/store/sqlite/db.go` +- Test: `internal/store/sqlite/import_runs_repo_test.go` +- Test: `tests/integration/store_init_test.go` + +**Step 1: Write the failing test** + +覆盖: +- run 创建/更新 +- item upsert 持久化 `api_key_fingerprint / canonical_model_families / matched_account_state / account_resolution / provision_reused` +- event append/list +- lease 字段持久化 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/store/sqlite/... ./tests/integration/... -run 'TestRunStateStore|TestStoreAppliesLatestMigration' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +补足 repo 与 migration,确保 schema 与文档完全一致。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/store/sqlite/... ./tests/integration/... -run 'TestRunStateStore|TestStoreAppliesLatestMigration' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/store/migrations/0007_batch_import_runs.sql internal/store/migrations/0008_batch_import_run_events.sql internal/store/sqlite/import_runs_repo.go internal/store/sqlite/import_run_items_repo.go internal/store/sqlite/import_run_item_events_repo.go internal/store/sqlite/db.go internal/store/sqlite/import_runs_repo_test.go tests/integration/store_init_test.go +git commit -m "feat(store): complete v2 runtime state repositories" +``` + +### Task 6: BatchImportService Stage 0~2 + +**Files:** +- Create: `internal/batch/service.go` +- Create: `internal/batch/capability_profile.go` +- Create: `internal/batch/channel_evolution.go` +- Test: `internal/batch/service_test.go` +- Test: `internal/batch/channel_evolution_test.go` +- Reference: `internal/provision/import_service.go` + +**Step 1: Write the failing test** + +覆盖: +- 创建 run + items +- reuse preflight 跳过重复 provision +- active 账号重复导入 -> reused +- deprecated 账号重复导入 -> reactivated +- patch-only 新 alias +- legacy batch/provider link 回写 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/batch -run 'TestBatchImport_StartRun|TestModelMappingDelta' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- `BatchImportService.StartRun` +- `ImportRoutingStrategy` +- `BuildImportRoutingStrategy` +- `ChannelPatchContract` +- `ModelMappingDelta` + +先接现有 `provision.ImportService`,不要提前扩展 UI/API。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/batch -run 'TestBatchImport_StartRun|TestModelMappingDelta' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/batch/service.go internal/batch/capability_profile.go internal/batch/channel_evolution.go internal/batch/service_test.go internal/batch/channel_evolution_test.go +git commit -m "feat(batch): implement v2 run setup and provision stages" +``` + +### Task 7: ConfirmationWorker, Lease And Retry + +**Files:** +- Create: `internal/batch/confirmation.go` +- Test: `internal/batch/confirmation_test.go` +- Reference: `docs/2026-05-22-BATCH_AUTO_IMPORT_V2_ARCHITECTURE.md:398` + +**Step 1: Write the failing test** + +覆盖: +- 只捞 `confirm + pending + retry_due + lease_expired` +- `403` probe race -> advisory +- 初次 `503 no available accounts` -> retry -> success +- 多 worker lease 互斥 +- `disabled/deprecated` 命中后 reactivated 投影正确 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/batch -run 'TestConfirmationWorker' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- `ConfirmationWorker.Tick` +- `ConfirmationWorker.ConfirmItem` +- retry 计划 +- lease 生命周期 +- advisory event 写入 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/batch -run 'TestConfirmationWorker' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/batch/confirmation.go internal/batch/confirmation_test.go +git commit -m "feat(batch): add confirmation worker and retry handling" +``` + +### Task 8: ValidationService And Final Access Status + +**Files:** +- Create: `internal/batch/validation.go` +- Test: `internal/batch/validation_test.go` +- Reference: `internal/access/closure.go` + +**Step 1: Write the failing test** + +覆盖: +- `confirmed/advisory + chat 200 -> active` +- exhausted transient -> `degraded` +- definitive invalid path -> `broken` +- 只有 ValidationService 可以写 `access_status` + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/batch -run 'TestValidationService' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- `ValidationService.ValidateItem` +- `access_status` 映射 +- 对 run summary 的最小更新 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/batch -run 'TestValidationService' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/batch/validation.go internal/batch/validation_test.go +git commit -m "feat(batch): add validation service for final access status" +``` + +### Task 9: ResultProjection + +**Files:** +- Create: `internal/batch/status_projection.go` +- Test: `internal/batch/status_projection_test.go` +- Reference: `docs/2026-05-22-BATCH_AUTO_IMPORT_V2_API_SCHEMAS.md` + +**Step 1: Write the failing test** + +覆盖: +- run summary 聚合 +- item summary/detail projection +- warning 文案模板 +- `provision_reused` badge +- `matched_account_state / account_resolution` 文案与 badge + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/batch -run 'TestStatusProjection' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现: +- run list projection +- item list projection +- item detail projection +- warning/badge mapping + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/batch -run 'TestStatusProjection' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/batch/status_projection.go internal/batch/status_projection_test.go +git commit -m "feat(batch): add result projection for v2 runs and items" +``` + +### Task 10: HTTP API For Runs And Items + +**Files:** +- Create: `internal/app/http_batch_import.go` +- Create: `internal/app/http_batch_runs.go` +- Modify: `internal/app/http_api.go` +- Test: `internal/app/http_batch_import_test.go` +- Test: `internal/app/http_batch_runs_test.go` +- Reference: `docs/openapi.yaml` + +**Step 1: Write the failing test** + +覆盖: +- `POST /api/batch-import/runs` +- `GET /api/batch-import/runs` +- `GET /api/batch-import/runs/{run_id}` +- `GET /api/batch-import/runs/{run_id}/items` +- `GET /api/batch-import/runs/{run_id}/items/{item_id}` +- `subscription/self_service` 条件必填 +- 列表过滤 `matched_account_state / account_resolution` + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./internal/app -run 'TestBatchImportHTTP|TestBatchRunsHTTP' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +按 OpenAPI 只输出 projection,不泄漏 legacy 表结构。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./internal/app -run 'TestBatchImportHTTP|TestBatchRunsHTTP' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add internal/app/http_batch_import.go internal/app/http_batch_runs.go internal/app/http_api.go internal/app/http_batch_import_test.go internal/app/http_batch_runs_test.go +git commit -m "feat(api): add batch import v2 endpoints" +``` + +### Task 11: CLI Entry For Batch Import + +**Files:** +- Modify: `cmd/cli/main.go` +- Create: `cmd/cli/batch_import.go` +- Test: `cmd/cli/batch_import_test.go` + +**Step 1: Write the failing test** + +覆盖: +- 参数解析 +- `subscription` 必填订阅参数 +- `self_service` 必填 `probe_api_key` +- `--confirm-timeout` +- 结果输出 `run_id/result_page` + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./cmd/cli -run 'TestBatchImportCLI' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +实现 CLI 到 V2 API/service 的入口,不在 CLI 层重复业务逻辑。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./cmd/cli -run 'TestBatchImportCLI' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add cmd/cli/main.go cmd/cli/batch_import.go cmd/cli/batch_import_test.go +git commit -m "feat(cli): add v2 batch import command" +``` + +### Task 12: Integration And End-To-End Verification + +**Files:** +- Create: `tests/integration/batch_import_v2_test.go` +- Modify: `tests/integration/host_stub_test.go`(如需 stub 扩展) + +**Step 1: Write the failing test** + +至少覆盖 6 条真实业务链: +- 发现模型并归一化 +- 重复导入 active 账号 -> reused +- deprecated 账号 -> reactivated +- 同 family 不同 alias -> patch_only +- probe race + warmup retry -> advisory + active +- run/item/event 详情可从 V2 新表完全读出 + +**Step 2: Run test to verify it fails** + +Run: +```bash +go test ./tests/integration/... -run 'TestBatchImportV2' -count=1 +``` + +Expected: FAIL + +**Step 3: Write minimal implementation** + +补齐 host stub、fake adapter、seed data,确保每条链路都可复现。 + +**Step 4: Run test to verify it passes** + +Run: +```bash +go test ./tests/integration/... -run 'TestBatchImportV2' -count=1 +``` + +Expected: PASS + +**Step 5: Commit** + +```bash +git add tests/integration/batch_import_v2_test.go tests/integration/host_stub_test.go +git commit -m "test(integration): cover batch import v2 flows" +``` + +### Task 13: Design Restoration Audit + +**Files:** +- Create: `docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md` +- Modify: `docs/EXECUTION_BOARD.md` + +**Step 1: Write the failing audit checklist** + +列出必须逐项勾选的设计恢复项: +- 8 项 Objective +- canonical contract +- 结果 API +- migration +- worker/retry/lease +- reuse/reactivation + +**Step 2: Run verification to identify gaps** + +Run: +```bash +go test ./... -count=1 +go test ./tests/integration/... -count=1 +go test -cover ./internal/... -count=1 +go vet ./... +gofmt -l . +``` + +Expected: 在实现完成前,这一步用来发现剩余设计缺口;在最终完成时必须全绿。 + +**Step 3: Write the audit artifact** + +将每一项设计要求映射到: +- 代码文件 +- 测试文件 +- API 路由 +- 状态字段 + +**Step 4: Update board with true gate** + +在执行板中明确: +- 哪些任务完成 +- 哪些设计要求已还原 +- 是否可宣称“V2 设计已被完整实现” + +**Step 5: Commit** + +```bash +git add docs/2026-05-22-BATCH_AUTO_IMPORT_V2_RESTORATION_CHECKLIST.md docs/EXECUTION_BOARD.md +git commit -m "docs(v2): add restoration checklist and completion gate" +``` + +## 4. 全局验证门禁 + +完成 T1~T13 后,必须一次性通过: + +```bash +gofmt -l . +go vet ./... +go test ./... -count=1 +go test ./tests/integration/... -count=1 +go test -cover ./internal/... -count=1 +``` + +额外检查: + +- `docs/openapi.yaml` 与 handler 响应字段一致 +- `import_runs/*` 足以支撑结果页,不依赖 legacy 表拼接 +- `matched_account_state / account_resolution / provision_reused` 能在 item detail 里直接读到 +- `canonical_model_family` 能把同模型别名判定为同一族 + +## 5. 计划完整性结论 + +这份计划只有在满足以下条件时,才算“任务可以完全还原规划设计”: + +1. T1~T12 实现完成并全部通过验证 +2. T13 的还原清单中不存在未映射设计项 +3. 任一 Objective 都能指向至少一条: + - 实现任务 + - 自动化测试 + - API 或状态字段证据 +4. 结果页/API 不需要额外新增未规划字段才能解释最终状态 + +如果 T13 审核时发现任何一项设计要求无法映射到任务或测试,这份计划必须回退修改,不能直接进入实现。 + +## 6. 推荐提交顺序 + +建议按以下小步提交: + +1. `feat(batch): add canonical v2 state enums` +2. `feat(probe): add model discovery and canonical family normalization` +3. `feat(probe): add capability profile and smoke completion routing` +4. `feat(batch): add provider id and reuse policy` +5. `feat(store): complete v2 runtime state repositories` +6. `feat(batch): implement v2 run setup and provision stages` +7. `feat(batch): add confirmation worker and retry handling` +8. `feat(batch): add validation service for final access status` +9. `feat(batch): add result projection for v2 runs and items` +10. `feat(api): add batch import v2 endpoints` +11. `feat(cli): add v2 batch import command` +12. `test(integration): cover batch import v2 flows` +13. `docs(v2): add restoration checklist and completion gate` + +Plan complete and saved to `docs/plans/2026-05-22-batch-auto-import-v2-implementation-plan.md`. Two execution options: + +**1. Subagent-Driven (this session)** - I dispatch fresh subagent per task, review between tasks, fast iteration + +**2. Parallel Session (separate)** - Open new session with executing-plans, batch execution with checkpoints + +Which approach? diff --git a/internal/access/closure.go b/internal/access/closure.go deleted file mode 100644 index be469515..00000000 --- a/internal/access/closure.go +++ /dev/null @@ -1,154 +0,0 @@ -package access - -import ( - "context" - "fmt" - "strings" - "time" - - "sub2api-cn-relay-manager/internal/host/sub2api" -) - -const ( - ModeSubscription = "subscription" - ModeSelfService = "self_service" - - gatewayCompletionRetryAttempts = 3 - gatewayCompletionRetryDelay = 300 * time.Millisecond -) - -type SubscriptionTarget struct { - UserID string - DurationDays int -} - -type ClosureRequest struct { - Mode string - ProbeAPIKey string - Subscriptions []SubscriptionTarget - GroupID string - ExpectedModel string - Prompt string - MaxTokens int -} - -type Host interface { - EnsureSubscriptionAccess(ctx context.Context, req sub2api.EnsureSubscriptionAccessRequest) (sub2api.SubscriptionAccessRef, error) - AssignSubscription(ctx context.Context, req sub2api.AssignSubscriptionRequest) (sub2api.SubscriptionRef, error) - CheckGatewayAccess(ctx context.Context, req sub2api.GatewayAccessCheckRequest) (sub2api.GatewayAccessResult, error) - CheckGatewayCompletion(ctx context.Context, req sub2api.GatewayCompletionCheckRequest) (sub2api.GatewayCompletionResult, error) -} - -type Service struct { - host Host -} - -func NewService(host Host) *Service { - return &Service{host: host} -} - -func Validate(req ClosureRequest) error { - switch strings.TrimSpace(req.Mode) { - case ModeSubscription: - if len(req.Subscriptions) == 0 { - return fmt.Errorf("subscription access requires at least one subscription target") - } - case ModeSelfService: - if strings.TrimSpace(req.ProbeAPIKey) == "" { - return fmt.Errorf("self_service access requires probe api key") - } - default: - return fmt.Errorf("unsupported access mode %q", req.Mode) - } - if strings.TrimSpace(req.Mode) != ModeSubscription && strings.TrimSpace(req.ProbeAPIKey) == "" { - return fmt.Errorf("access probe api key is required to verify gateway closure") - } - return nil -} - -func (s *Service) Close(ctx context.Context, req ClosureRequest) (sub2api.GatewayAccessResult, error) { - if s == nil || s.host == nil { - return sub2api.GatewayAccessResult{}, fmt.Errorf("access host is required") - } - if err := Validate(req); err != nil { - return sub2api.GatewayAccessResult{}, err - } - probeAPIKey := strings.TrimSpace(req.ProbeAPIKey) - if strings.TrimSpace(req.Mode) == ModeSubscription { - for _, target := range req.Subscriptions { - resolvedTarget := target.UserID - accessRef, err := s.host.EnsureSubscriptionAccess(ctx, sub2api.EnsureSubscriptionAccessRequest{UserSelector: target.UserID, GroupID: req.GroupID}) - if err != nil { - return sub2api.GatewayAccessResult{}, fmt.Errorf("ensure subscription access for %s: %w", target.UserID, err) - } - if strings.TrimSpace(accessRef.UserID) != "" { - resolvedTarget = accessRef.UserID - } - if strings.TrimSpace(accessRef.APIKey) != "" { - probeAPIKey = strings.TrimSpace(accessRef.APIKey) - } - if _, err := s.host.AssignSubscription(ctx, sub2api.AssignSubscriptionRequest{UserID: resolvedTarget, GroupID: req.GroupID, DurationDays: target.DurationDays}); err != nil { - return sub2api.GatewayAccessResult{}, fmt.Errorf("assign subscription for %s: %w", target.UserID, err) - } - } - } - if probeAPIKey == "" { - return sub2api.GatewayAccessResult{}, fmt.Errorf("access probe api key is required to verify gateway closure") - } - result, err := s.host.CheckGatewayAccess(ctx, sub2api.GatewayAccessCheckRequest{APIKey: probeAPIKey, ExpectedModel: req.ExpectedModel}) - if err != nil { - return sub2api.GatewayAccessResult{}, fmt.Errorf("check gateway access: %w", err) - } - if result.OK && result.HasExpectedModel && strings.TrimSpace(req.ExpectedModel) != "" { - completion, err := s.checkGatewayCompletionWithRetry(ctx, sub2api.GatewayCompletionCheckRequest{ - APIKey: probeAPIKey, - Model: req.ExpectedModel, - Prompt: req.Prompt, - MaxTokens: req.MaxTokens, - }) - if err != nil { - return sub2api.GatewayAccessResult{}, fmt.Errorf("check gateway completion: %w", err) - } - result.CompletionOK = completion.OK - result.CompletionStatus = completion.StatusCode - result.CompletionType = completion.ContentType - result.CompletionBody = completion.BodyPreview - } - return result, nil -} - -func (s *Service) checkGatewayCompletionWithRetry(ctx context.Context, req sub2api.GatewayCompletionCheckRequest) (sub2api.GatewayCompletionResult, error) { - var last sub2api.GatewayCompletionResult - for attempt := 1; attempt <= gatewayCompletionRetryAttempts; attempt++ { - completion, err := s.host.CheckGatewayCompletion(ctx, req) - if err != nil { - return sub2api.GatewayCompletionResult{}, err - } - last = completion - if completion.OK || !isTransientGatewayCompletionFailure(completion) || attempt == gatewayCompletionRetryAttempts { - return completion, nil - } - timer := time.NewTimer(gatewayCompletionRetryDelay) - select { - case <-ctx.Done(): - timer.Stop() - return last, ctx.Err() - case <-timer.C: - } - } - return last, nil -} - -func isTransientGatewayCompletionFailure(result sub2api.GatewayCompletionResult) bool { - if result.OK { - return false - } - if result.StatusCode != 0 && result.StatusCode != 429 && result.StatusCode != 502 && result.StatusCode != 503 && result.StatusCode != 504 { - return false - } - body := strings.ToLower(strings.TrimSpace(result.BodyPreview)) - return strings.Contains(body, "service temporarily unavailable") || - strings.Contains(body, "no available accounts") || - strings.Contains(body, "temporar") || - strings.Contains(body, "try again") -} diff --git a/internal/access/closure_test.go b/internal/access/closure_test.go index ed0c2c8a..538bb4f6 100644 --- a/internal/access/closure_test.go +++ b/internal/access/closure_test.go @@ -143,18 +143,55 @@ func TestServiceCloseRetriesTransientGatewayCompletionFailure(t *testing.T) { } } +func TestServiceCloseRepairsOpenAIResponsesCapabilityMismatch(t *testing.T) { + host := &fakeClosureHost{ + gatewayResult: sub2api.GatewayAccessResult{OK: true, StatusCode: 200, HasExpectedModel: true, Models: []string{"kimi-k2.6"}}, + completionResults: []sub2api.GatewayCompletionResult{ + {OK: false, StatusCode: 502, ContentType: "application/json", BodyPreview: `{"error":{"message":"Upstream service temporarily unavailable","type":"upstream_error"}}`}, + }, + completionAfterRepair: &sub2api.GatewayCompletionResult{OK: true, StatusCode: 200, ContentType: "application/json"}, + managedAccess: map[string]sub2api.SubscriptionAccessRef{ + "user-1": {UserID: "host-user-1", APIKey: "managed-user-key"}, + }, + } + + result, err := NewService(host).Close(context.Background(), ClosureRequest{ + Mode: "subscription", + GroupID: "group-1", + AccountIDs: []string{"account-1", "account-1"}, + ExpectedModel: "kimi-k2.6", + ResponsesCapabilitySuspect: true, + Subscriptions: []SubscriptionTarget{{UserID: "user-1", DurationDays: 30}}, + }) + if err != nil { + t.Fatalf("Close() error = %v", err) + } + if !result.CompletionOK || result.CompletionStatus != 200 { + t.Fatalf("completion result = %+v, want repaired success", result) + } + if host.disableResponsesCalls != 1 { + t.Fatalf("disable responses calls = %d, want 1", host.disableResponsesCalls) + } + if len(host.disabledResponsesAccountIDs) != 1 || host.disabledResponsesAccountIDs[0] != "account-1" { + t.Fatalf("disabled responses account ids = %v, want [account-1]", host.disabledResponsesAccountIDs) + } +} + type fakeClosureHost struct { - assigned []sub2api.AssignSubscriptionRequest - managedAccess map[string]sub2api.SubscriptionAccessRef - assignErr error - gatewayProbe sub2api.GatewayAccessCheckRequest - gatewayResult sub2api.GatewayAccessResult - gatewayErr error - completionProbe sub2api.GatewayCompletionCheckRequest - completionCalls int - completionResults []sub2api.GatewayCompletionResult - completionResult sub2api.GatewayCompletionResult - completionErr error + assigned []sub2api.AssignSubscriptionRequest + managedAccess map[string]sub2api.SubscriptionAccessRef + assignErr error + gatewayProbe sub2api.GatewayAccessCheckRequest + gatewayResult sub2api.GatewayAccessResult + gatewayErr error + completionProbe sub2api.GatewayCompletionCheckRequest + completionCalls int + completionResults []sub2api.GatewayCompletionResult + completionResult sub2api.GatewayCompletionResult + completionAfterRepair *sub2api.GatewayCompletionResult + completionErr error + disableResponsesCalls int + disabledResponsesAccountIDs []string } func (f *fakeClosureHost) EnsureSubscriptionAccess(_ context.Context, req sub2api.EnsureSubscriptionAccessRequest) (sub2api.SubscriptionAccessRef, error) { @@ -186,6 +223,9 @@ func (f *fakeClosureHost) CheckGatewayCompletion(_ context.Context, req sub2api. if f.completionErr != nil { return sub2api.GatewayCompletionResult{}, f.completionErr } + if f.disableResponsesCalls > 0 && f.completionAfterRepair != nil { + return *f.completionAfterRepair, nil + } if len(f.completionResults) > 0 { idx := f.completionCalls - 1 if idx >= len(f.completionResults) { @@ -195,3 +235,9 @@ func (f *fakeClosureHost) CheckGatewayCompletion(_ context.Context, req sub2api. } return f.completionResult, nil } + +func (f *fakeClosureHost) DisableOpenAIResponsesAPI(_ context.Context, accountIDs []string) error { + f.disableResponsesCalls++ + f.disabledResponsesAccountIDs = append([]string(nil), accountIDs...) + return nil +} diff --git a/internal/access/gateway_validation.go b/internal/access/gateway_validation.go new file mode 100644 index 00000000..79a125f4 --- /dev/null +++ b/internal/access/gateway_validation.go @@ -0,0 +1,80 @@ +package access + +import ( + "context" + "fmt" + "strings" + "time" + + "sub2api-cn-relay-manager/internal/host/sub2api" +) + +func (s *Service) verifyGatewayClosure(ctx context.Context, req ClosureRequest, plan closurePlan) (sub2api.GatewayAccessResult, error) { + if plan.probeAPIKey == "" { + return sub2api.GatewayAccessResult{}, fmt.Errorf("access probe api key is required to verify gateway closure") + } + result, err := s.host.CheckGatewayAccess(ctx, sub2api.GatewayAccessCheckRequest{ + APIKey: plan.probeAPIKey, + ExpectedModel: req.ExpectedModel, + }) + if err != nil { + return sub2api.GatewayAccessResult{}, fmt.Errorf("check gateway access: %w", err) + } + if result.OK && result.HasExpectedModel && strings.TrimSpace(req.ExpectedModel) != "" { + completionReq := sub2api.GatewayCompletionCheckRequest{ + APIKey: plan.probeAPIKey, + Model: req.ExpectedModel, + Prompt: req.Prompt, + MaxTokens: req.MaxTokens, + } + completion, err := s.checkGatewayCompletionWithRetry(ctx, completionReq) + if err != nil { + return sub2api.GatewayAccessResult{}, fmt.Errorf("check gateway completion: %w", err) + } + completion, err = s.maybeRepairOpenAIResponsesCapability(ctx, req, completionReq, completion) + if err != nil { + return sub2api.GatewayAccessResult{}, fmt.Errorf("re-check gateway completion after capability repair: %w", err) + } + result.CompletionOK = completion.OK + result.CompletionStatus = completion.StatusCode + result.CompletionType = completion.ContentType + result.CompletionBody = completion.BodyPreview + } + return result, nil +} + +func (s *Service) checkGatewayCompletionWithRetry(ctx context.Context, req sub2api.GatewayCompletionCheckRequest) (sub2api.GatewayCompletionResult, error) { + var last sub2api.GatewayCompletionResult + for attempt := 1; attempt <= gatewayCompletionRetryAttempts; attempt++ { + completion, err := s.host.CheckGatewayCompletion(ctx, req) + if err != nil { + return sub2api.GatewayCompletionResult{}, err + } + last = completion + if completion.OK || !isTransientGatewayCompletionFailure(completion) || attempt == gatewayCompletionRetryAttempts { + return completion, nil + } + timer := time.NewTimer(gatewayCompletionRetryDelay) + select { + case <-ctx.Done(): + timer.Stop() + return last, ctx.Err() + case <-timer.C: + } + } + return last, nil +} + +func isTransientGatewayCompletionFailure(result sub2api.GatewayCompletionResult) bool { + if result.OK { + return false + } + if result.StatusCode != 0 && result.StatusCode != 429 && result.StatusCode != 502 && result.StatusCode != 503 && result.StatusCode != 504 { + return false + } + body := strings.ToLower(strings.TrimSpace(result.BodyPreview)) + return strings.Contains(body, "service temporarily unavailable") || + strings.Contains(body, "no available accounts") || + strings.Contains(body, "temporar") || + strings.Contains(body, "try again") +} diff --git a/internal/access/openai_responses_repair.go b/internal/access/openai_responses_repair.go new file mode 100644 index 00000000..44f73ad9 --- /dev/null +++ b/internal/access/openai_responses_repair.go @@ -0,0 +1,69 @@ +package access + +import ( + "context" + "strings" + + "sub2api-cn-relay-manager/internal/host/sub2api" +) + +func SuspectsOpenAIResponsesCapabilityMismatch(probe sub2api.ProbeResult) bool { + if probe.OK { + return false + } + message := strings.ToLower(strings.TrimSpace(probe.Message)) + if message == "" { + return false + } + if strings.Contains(message, "api returned 403: forbidden") { + return true + } + return strings.Contains(message, "responses api") && + (strings.Contains(message, "当前测试接口仅支持") || + strings.Contains(message, "账号本身可正常使用") || + strings.Contains(message, "please directly") || + strings.Contains(message, "actual api")) +} + +func ShouldAttemptOpenAIResponsesCapabilityRepair(suspect bool, completion sub2api.GatewayCompletionResult) bool { + if !suspect || completion.OK { + return false + } + if completion.StatusCode != 502 && completion.StatusCode != 503 { + return false + } + body := strings.ToLower(strings.TrimSpace(completion.BodyPreview)) + return strings.Contains(body, "service temporarily unavailable") || + strings.Contains(body, "no available accounts") +} + +func (s *Service) maybeRepairOpenAIResponsesCapability(ctx context.Context, req ClosureRequest, completionReq sub2api.GatewayCompletionCheckRequest, completion sub2api.GatewayCompletionResult) (sub2api.GatewayCompletionResult, error) { + if !ShouldAttemptOpenAIResponsesCapabilityRepair(req.ResponsesCapabilitySuspect, completion) { + return completion, nil + } + accountIDs := normalizedAccountIDs(req.AccountIDs) + if len(accountIDs) == 0 { + return completion, nil + } + if err := s.host.DisableOpenAIResponsesAPI(ctx, accountIDs); err != nil { + return completion, nil + } + return s.checkGatewayCompletionWithRetry(ctx, completionReq) +} + +func normalizedAccountIDs(accountIDs []string) []string { + seen := map[string]struct{}{} + values := make([]string, 0, len(accountIDs)) + for _, rawID := range accountIDs { + accountID := strings.TrimSpace(rawID) + if accountID == "" { + continue + } + if _, ok := seen[accountID]; ok { + continue + } + seen[accountID] = struct{}{} + values = append(values, accountID) + } + return values +} diff --git a/internal/access/planner.go b/internal/access/planner.go new file mode 100644 index 00000000..519f7d80 --- /dev/null +++ b/internal/access/planner.go @@ -0,0 +1,18 @@ +package access + +import ( + "context" + "strings" +) + +type closurePlan struct { + probeAPIKey string +} + +func (s *Service) prepareClosurePlan(ctx context.Context, req ClosureRequest) (closurePlan, error) { + plan := closurePlan{probeAPIKey: strings.TrimSpace(req.ProbeAPIKey)} + if strings.TrimSpace(req.Mode) != ModeSubscription { + return plan, nil + } + return s.prepareSubscriptionPlan(ctx, req, plan) +} diff --git a/internal/access/self_service.go b/internal/access/self_service.go new file mode 100644 index 00000000..063f86ed --- /dev/null +++ b/internal/access/self_service.go @@ -0,0 +1,7 @@ +package access + +import "strings" + +func selfServiceProbeAPIKey(req ClosureRequest) string { + return strings.TrimSpace(req.ProbeAPIKey) +} diff --git a/internal/access/service.go b/internal/access/service.go new file mode 100644 index 00000000..ff64b019 --- /dev/null +++ b/internal/access/service.go @@ -0,0 +1,23 @@ +package access + +import ( + "context" + "fmt" + + "sub2api-cn-relay-manager/internal/host/sub2api" +) + +func (s *Service) Close(ctx context.Context, req ClosureRequest) (sub2api.GatewayAccessResult, error) { + if s == nil || s.host == nil { + return sub2api.GatewayAccessResult{}, fmt.Errorf("access host is required") + } + if err := Validate(req); err != nil { + return sub2api.GatewayAccessResult{}, err + } + + plan, err := s.prepareClosurePlan(ctx, req) + if err != nil { + return sub2api.GatewayAccessResult{}, err + } + return s.verifyGatewayClosure(ctx, req, plan) +} diff --git a/internal/access/subscription.go b/internal/access/subscription.go new file mode 100644 index 00000000..cbf1c9d6 --- /dev/null +++ b/internal/access/subscription.go @@ -0,0 +1,36 @@ +package access + +import ( + "context" + "fmt" + "strings" + + "sub2api-cn-relay-manager/internal/host/sub2api" +) + +func (s *Service) prepareSubscriptionPlan(ctx context.Context, req ClosureRequest, plan closurePlan) (closurePlan, error) { + for _, target := range req.Subscriptions { + resolvedTarget := target.UserID + accessRef, err := s.host.EnsureSubscriptionAccess(ctx, sub2api.EnsureSubscriptionAccessRequest{ + UserSelector: target.UserID, + GroupID: req.GroupID, + }) + if err != nil { + return closurePlan{}, fmt.Errorf("ensure subscription access for %s: %w", target.UserID, err) + } + if strings.TrimSpace(accessRef.UserID) != "" { + resolvedTarget = accessRef.UserID + } + if strings.TrimSpace(accessRef.APIKey) != "" { + plan.probeAPIKey = strings.TrimSpace(accessRef.APIKey) + } + if _, err := s.host.AssignSubscription(ctx, sub2api.AssignSubscriptionRequest{ + UserID: resolvedTarget, + GroupID: req.GroupID, + DurationDays: target.DurationDays, + }); err != nil { + return closurePlan{}, fmt.Errorf("assign subscription for %s: %w", target.UserID, err) + } + } + return plan, nil +} diff --git a/internal/access/types.go b/internal/access/types.go new file mode 100644 index 00000000..ef15a2ca --- /dev/null +++ b/internal/access/types.go @@ -0,0 +1,49 @@ +package access + +import ( + "context" + "time" + + "sub2api-cn-relay-manager/internal/host/sub2api" +) + +const ( + ModeSubscription = "subscription" + ModeSelfService = "self_service" + + gatewayCompletionRetryAttempts = 3 + gatewayCompletionRetryDelay = 300 * time.Millisecond +) + +type SubscriptionTarget struct { + UserID string + DurationDays int +} + +type ClosureRequest struct { + Mode string + ProbeAPIKey string + Subscriptions []SubscriptionTarget + GroupID string + AccountIDs []string + ExpectedModel string + Prompt string + MaxTokens int + ResponsesCapabilitySuspect bool +} + +type Host interface { + EnsureSubscriptionAccess(ctx context.Context, req sub2api.EnsureSubscriptionAccessRequest) (sub2api.SubscriptionAccessRef, error) + AssignSubscription(ctx context.Context, req sub2api.AssignSubscriptionRequest) (sub2api.SubscriptionRef, error) + CheckGatewayAccess(ctx context.Context, req sub2api.GatewayAccessCheckRequest) (sub2api.GatewayAccessResult, error) + CheckGatewayCompletion(ctx context.Context, req sub2api.GatewayCompletionCheckRequest) (sub2api.GatewayCompletionResult, error) + DisableOpenAIResponsesAPI(ctx context.Context, accountIDs []string) error +} + +type Service struct { + host Host +} + +func NewService(host Host) *Service { + return &Service{host: host} +} diff --git a/internal/access/validation.go b/internal/access/validation.go new file mode 100644 index 00000000..ccf3b231 --- /dev/null +++ b/internal/access/validation.go @@ -0,0 +1,25 @@ +package access + +import ( + "fmt" + "strings" +) + +func Validate(req ClosureRequest) error { + switch strings.TrimSpace(req.Mode) { + case ModeSubscription: + if len(req.Subscriptions) == 0 { + return fmt.Errorf("subscription access requires at least one subscription target") + } + case ModeSelfService: + if selfServiceProbeAPIKey(req) == "" { + return fmt.Errorf("self_service access requires probe api key") + } + default: + return fmt.Errorf("unsupported access mode %q", req.Mode) + } + if strings.TrimSpace(req.Mode) != ModeSubscription && selfServiceProbeAPIKey(req) == "" { + return fmt.Errorf("access probe api key is required to verify gateway closure") + } + return nil +} diff --git a/internal/app/app_test.go b/internal/app/app_test.go index 8fff8e6b..04409495 100644 --- a/internal/app/app_test.go +++ b/internal/app/app_test.go @@ -18,6 +18,7 @@ import ( "sub2api-cn-relay-manager/internal/host/sub2api" "sub2api-cn-relay-manager/internal/pack" "sub2api-cn-relay-manager/internal/provision" + "sub2api-cn-relay-manager/internal/reconcile" "sub2api-cn-relay-manager/internal/store/sqlite" ) @@ -303,11 +304,11 @@ func TestAPIRollbackProviderReturnsSummary(t *testing.T) { func TestAPIReconcileProviderReturnsSummary(t *testing.T) { handler := NewAPIHandler("secret-token", ActionSet{ - ReconcileProvider: func(_ context.Context, req ReconcileProviderRequest) (provision.ReconcileResult, error) { + ReconcileProvider: func(_ context.Context, req ReconcileProviderRequest) (reconcile.Result, error) { if req.AccessAPIKey != "user-key" { t.Fatalf("AccessAPIKey = %q, want user-key", req.AccessAPIKey) } - return provision.ReconcileResult{BatchID: 7, Status: "drifted", MissingCount: 1, ExtraCount: 2, StaleNoiseCount: 3, ProbeFailureCount: 1, AccessStatus: provision.AccessStatusBroken, Summary: map[string]any{"probe_failures": 1, "stale_noise_count": 3}}, nil + return reconcile.Result{BatchID: 7, Status: "drifted", MissingCount: 1, ExtraCount: 2, StaleNoiseCount: 3, ProbeFailureCount: 1, AccessStatus: provision.AccessStatusBroken, Summary: map[string]any{"probe_failures": 1, "stale_noise_count": 3}}, nil }, }) request := httptestRequest(t, http.MethodPost, "/api/providers/deepseek/reconcile", map[string]any{"host_base_url": "https://sub2api.example.com", "pack_path": "/tmp/openai-pack.zip", "access_api_key": "user-key"}, "secret-token") @@ -610,9 +611,9 @@ func TestAPIRollbackProviderReturnsConfiguredError(t *testing.T) { } func TestAPIReconcileProviderRejectsTrailingNonObjectPayload(t *testing.T) { - handler := NewAPIHandler("secret-token", ActionSet{ReconcileProvider: func(context.Context, ReconcileProviderRequest) (provision.ReconcileResult, error) { + handler := NewAPIHandler("secret-token", ActionSet{ReconcileProvider: func(context.Context, ReconcileProviderRequest) (reconcile.Result, error) { t.Fatal("ReconcileProvider should not be called for invalid JSON") - return provision.ReconcileResult{}, nil + return reconcile.Result{}, nil }}) request, err := http.NewRequest(http.MethodPost, "/api/providers/deepseek/reconcile", strings.NewReader(`{"host_base_url":"https://sub2api.example.com"}[]`)) if err != nil { @@ -789,8 +790,8 @@ func TestHandlerErrorPaths(t *testing.T) { path: "/api/providers/x/reconcile", body: `{}`, actionSet: ActionSet{ - ReconcileProvider: func(context.Context, ReconcileProviderRequest) (provision.ReconcileResult, error) { - return provision.ReconcileResult{}, errors.New("boom") + ReconcileProvider: func(context.Context, ReconcileProviderRequest) (reconcile.Result, error) { + return reconcile.Result{}, errors.New("boom") }, }, wantStatus: http.StatusInternalServerError, diff --git a/internal/app/batch_runtime.go b/internal/app/batch_runtime.go index 72633d34..15f18614 100644 --- a/internal/app/batch_runtime.go +++ b/internal/app/batch_runtime.go @@ -55,11 +55,14 @@ func (r batchImportRuntimeRunner) execute(ctx context.Context) (BatchImportRunCr } if _, err := service.StartRun(ctx, batch.BatchImportRunRequest{ - RunID: runID, - Mode: r.request.Mode, - AccessMode: r.request.AccessMode, - HostID: r.hostRow.HostID, - Entries: entries, + RunID: runID, + Mode: r.request.Mode, + AccessMode: r.request.AccessMode, + HostID: r.hostRow.HostID, + SubscriptionUsers: append([]string(nil), r.request.SubscriptionUsers...), + SubscriptionDays: r.request.SubscriptionDays, + ProbeAPIKey: r.request.ProbeAPIKey, + Entries: entries, }); err != nil { return BatchImportRunCreateResponse{}, err } @@ -89,7 +92,14 @@ func (r batchImportRuntimeRunner) advanceRun(ctx context.Context, runID string) if timeout <= 0 { timeout = time.Second } - deadline := time.Now().Add(timeout) + return r.driveRun(ctx, runID, timeout) +} + +func (r batchImportRuntimeRunner) driveRun(ctx context.Context, runID string, waitBudget time.Duration) error { + deadline := time.Now() + if waitBudget > 0 { + deadline = deadline.Add(waitBudget) + } worker := batch.ConfirmationWorker{ WorkerID: "batch-import-api", @@ -137,7 +147,7 @@ func (r batchImportRuntimeRunner) advanceRun(ctx context.Context, runID string) if run.TotalItems > 0 && run.CompletedItems >= run.TotalItems { return nil } - if !pendingWork || !time.Now().Before(deadline) { + if !pendingWork || waitBudget <= 0 || !time.Now().Before(deadline) { return nil } if err := sleepWithContext(ctx, batchImportRetryDelay); err != nil { @@ -274,6 +284,17 @@ func (s batchImportRunItemStore) Upsert(ctx context.Context, item sqlite.ImportR return s.store.ImportRunItems().Upsert(ctx, item) } +func (s batchImportRunItemStore) TryAcquireLease(ctx context.Context, itemID, workerID string, now time.Time, leaseDuration time.Duration) (sqlite.ImportRunItem, bool, error) { + item, claimed, err := s.store.ImportRunItems().TryAcquireConfirmationLease(ctx, itemID, workerID, now, leaseDuration) + if err != nil || !claimed { + return item, claimed, err + } + if item.RunID != s.runID { + return sqlite.ImportRunItem{}, false, nil + } + return item, true, nil +} + func generatedBatchImportPack(providerManifest pack.ProviderManifest) pack.LoadedPack { return pack.LoadedPack{ Manifest: pack.Manifest{ diff --git a/internal/app/batch_runtime_background.go b/internal/app/batch_runtime_background.go new file mode 100644 index 00000000..bc0ca53a --- /dev/null +++ b/internal/app/batch_runtime_background.go @@ -0,0 +1,88 @@ +package app + +import ( + "context" + "encoding/json" + "log" + "strings" + "time" + + "sub2api-cn-relay-manager/internal/store/sqlite" + "sub2api-cn-relay-manager/internal/worker" +) + +const batchImportBackgroundPollInterval = time.Second + +func runBatchImportBackgroundScheduler(ctx context.Context, sqliteDSN string) { + worker.NewRunner( + []worker.Job{batchImportResumeJob{sqliteDSN: sqliteDSN}}, + batchImportBackgroundPollInterval, + log.Printf, + ).Start(ctx) +} + +type batchImportResumeJob struct { + sqliteDSN string +} + +func (j batchImportResumeJob) Name() string { + return "batch import runtime scheduler" +} + +func (j batchImportResumeJob) Run(ctx context.Context) error { + return resumePendingBatchImportRuns(ctx, j.sqliteDSN) +} + +func resumePendingBatchImportRuns(ctx context.Context, sqliteDSN string) error { + store, err := sqlite.Open(ctx, sqliteDSN) + if err != nil { + return err + } + defer store.Close() + + runs, err := store.ImportRuns().List(ctx, 1000) + if err != nil { + return err + } + for _, run := range runs { + if strings.TrimSpace(run.State) != "running" { + continue + } + runner, err := newBatchImportRuntimeRunnerFromStoredRun(ctx, store, run) + if err != nil { + return err + } + if err := runner.driveRun(ctx, run.RunID, 0); err != nil { + return err + } + } + return nil +} + +func newBatchImportRuntimeRunnerFromStoredRun(ctx context.Context, store *sqlite.DB, run sqlite.ImportRun) (batchImportRuntimeRunner, error) { + hostRow, client, err := resolveManagedHost(ctx, store, run.HostID, "", CreateHostAuth{}) + if err != nil { + return batchImportRuntimeRunner{}, err + } + return batchImportRuntimeRunner{ + store: store, + hostRow: hostRow, + hostClient: client, + request: CreateBatchImportRunRequest{ + HostID: run.HostID, + Mode: run.Mode, + AccessMode: run.AccessMode, + SubscriptionUsers: parseJSONStringList(run.SubscriptionUsersJSON), + SubscriptionDays: run.SubscriptionDays, + ProbeAPIKey: run.ProbeAPIKey, + }, + }, nil +} + +func parseJSONStringList(raw string) []string { + values := []string{} + if err := json.Unmarshal([]byte(strings.TrimSpace(raw)), &values); err != nil { + return []string{} + } + return values +} diff --git a/internal/app/batch_runtime_background_test.go b/internal/app/batch_runtime_background_test.go new file mode 100644 index 00000000..2869d74a --- /dev/null +++ b/internal/app/batch_runtime_background_test.go @@ -0,0 +1,111 @@ +package app + +import ( + "context" + "fmt" + "net/http/httptest" + "path/filepath" + "testing" + + "sub2api-cn-relay-manager/internal/store/sqlite" +) + +func TestResumePendingBatchImportRunsCompletesStoredRun(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(newBatchImportActionStubServer(t)) + defer server.Close() + + dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(filepath.Join(t.TempDir(), "state.db"))) + store, err := sqlite.Open(context.Background(), dsn) + if err != nil { + t.Fatalf("sqlite.Open() error = %v", err) + } + defer closeAppTestStore(t, store) + if _, err := store.SQLDB().Exec("PRAGMA foreign_keys = OFF"); err != nil { + t.Fatalf("disable foreign keys pragma error = %v", err) + } + + hostPK, err := store.Hosts().Create(context.Background(), sqlite.Host{ + HostID: "host-1", + BaseURL: server.URL, + HostVersion: "0.1.126", + CapabilityProbeJSON: "{}", + AuthType: "apikey", + AuthToken: "host-token", + }) + if err != nil { + t.Fatalf("Hosts().Create() error = %v", err) + } + + if err := store.ImportRuns().Create(context.Background(), sqlite.ImportRun{ + RunID: "run-resume-1", + HostID: "host-1", + Mode: "strict", + AccessMode: "self_service", + SubscriptionUsersJSON: `[]`, + SubscriptionDays: 0, + ProbeAPIKey: "gateway-key", + State: "running", + TotalItems: 1, + }); err != nil { + t.Fatalf("ImportRuns().Create() error = %v", err) + } + + legacyBatchID := int64(81) + if err := store.ImportRunItems().Upsert(context.Background(), sqlite.ImportRunItem{ + ItemID: "item-resume-1", + RunID: "run-resume-1", + BaseURL: server.URL, + ProviderID: "provider-resume-1", + APIKeyFingerprint: "sha256:test", + RequestedModelsJSON: `["kimi-k2.6"]`, + RawModelsJSON: `["kimi-k2.6"]`, + NormalizedModelsJSON: `["kimi-k2.6"]`, + CanonicalFamiliesJSON: `["kimi-2.6"]`, + RecommendedModelsJSON: `["kimi-k2.6"]`, + ResolvedSmokeModel: "kimi-k2.6", + CapabilityProfileJSON: `{"transport_profile":{"supports_openai_chat_completions":true}}`, + CurrentStage: "confirm", + ConfirmationStatus: "pending", + AccessStatus: "unknown", + MatchedAccountState: "none", + AccountResolution: "created", + LegacyBatchID: &legacyBatchID, + }); err != nil { + t.Fatalf("ImportRunItems().Upsert() error = %v", err) + } + + if _, err := store.ManagedResources().Create(context.Background(), sqlite.ManagedResource{ + BatchID: legacyBatchID, + HostID: hostPK, + ResourceType: "account", + HostResourceID: "account_1", + ResourceName: "batch-import-account", + }); err != nil { + t.Fatalf("ManagedResources().Create(account) error = %v", err) + } + + if err := resumePendingBatchImportRuns(context.Background(), dsn); err != nil { + t.Fatalf("resumePendingBatchImportRuns() error = %v", err) + } + + run, err := store.ImportRuns().GetByRunID(context.Background(), "run-resume-1") + if err != nil { + t.Fatalf("ImportRuns().GetByRunID() error = %v", err) + } + if run.State != "completed" { + t.Fatalf("run.State = %q, want completed", run.State) + } + if run.CompletedItems != 1 || run.ActiveItems != 1 { + t.Fatalf("run counters = %+v, want completed_items=1 active_items=1", run) + } + + item, err := store.ImportRunItems().GetByItemID(context.Background(), "item-resume-1") + if err != nil { + t.Fatalf("ImportRunItems().GetByItemID() error = %v", err) + } + if item.CurrentStage != "done" || item.ConfirmationStatus != "confirmed" || item.AccessStatus != "active" { + t.Fatalf("item = %+v, want done/confirmed/active", item) + } +} diff --git a/internal/app/bootstrap.go b/internal/app/bootstrap.go index 7f20a123..b2833002 100644 --- a/internal/app/bootstrap.go +++ b/internal/app/bootstrap.go @@ -2,11 +2,12 @@ package app import ( "context" + "time" "sub2api-cn-relay-manager/internal/config" ) -func Bootstrap(_ context.Context) (*Server, error) { +func Bootstrap(ctx context.Context) (*Server, error) { cfg, err := config.LoadStartupFromEnv() if err != nil { return nil, err @@ -15,6 +16,28 @@ func Bootstrap(_ context.Context) (*Server, error) { if err != nil { return nil, err } + startBackgroundSchedulers(ctx, cfg, defaultBackgroundSchedulers()) handler := NewAPIHandler(adminToken, NewActionSet(cfg.Database.SQLiteDSN)) return NewServer(cfg.Server.ListenAddr, handler, nil), nil } + +type backgroundSchedulers struct { + runBatchImport func(context.Context, string) + runReconcile func(context.Context, string, time.Duration) +} + +func defaultBackgroundSchedulers() backgroundSchedulers { + return backgroundSchedulers{ + runBatchImport: runBatchImportBackgroundScheduler, + runReconcile: runReconcileBackgroundScheduler, + } +} + +func startBackgroundSchedulers(ctx context.Context, cfg config.StartupConfig, schedulers backgroundSchedulers) { + if schedulers.runBatchImport != nil { + schedulers.runBatchImport(ctx, cfg.Database.SQLiteDSN) + } + if cfg.Reconcile.WorkerEnabled && schedulers.runReconcile != nil { + schedulers.runReconcile(ctx, cfg.Database.SQLiteDSN, cfg.Reconcile.PollInterval) + } +} diff --git a/internal/app/bootstrap_test.go b/internal/app/bootstrap_test.go new file mode 100644 index 00000000..a74d90c9 --- /dev/null +++ b/internal/app/bootstrap_test.go @@ -0,0 +1,59 @@ +package app + +import ( + "context" + "testing" + "time" + + "sub2api-cn-relay-manager/internal/config" +) + +func TestStartBackgroundSchedulersHonorsReconcileWorkerFlag(t *testing.T) { + t.Parallel() + + batchCalls := 0 + reconcileCalls := 0 + startBackgroundSchedulers(context.Background(), config.StartupConfig{ + Database: config.DatabaseConfig{SQLiteDSN: "file:test.db"}, + Reconcile: config.ReconcileConfig{ + WorkerEnabled: true, + PollInterval: 15 * time.Minute, + }, + }, backgroundSchedulers{ + runBatchImport: func(context.Context, string) { + batchCalls++ + }, + runReconcile: func(context.Context, string, time.Duration) { + reconcileCalls++ + }, + }) + + if batchCalls != 1 { + t.Fatalf("batchCalls = %d, want 1", batchCalls) + } + if reconcileCalls != 1 { + t.Fatalf("reconcileCalls = %d, want 1", reconcileCalls) + } +} + +func TestStartBackgroundSchedulersSkipsDisabledReconcileWorker(t *testing.T) { + t.Parallel() + + reconcileCalls := 0 + startBackgroundSchedulers(context.Background(), config.StartupConfig{ + Database: config.DatabaseConfig{SQLiteDSN: "file:test.db"}, + Reconcile: config.ReconcileConfig{ + WorkerEnabled: false, + PollInterval: 15 * time.Minute, + }, + }, backgroundSchedulers{ + runBatchImport: func(context.Context, string) {}, + runReconcile: func(context.Context, string, time.Duration) { + reconcileCalls++ + }, + }) + + if reconcileCalls != 0 { + t.Fatalf("reconcileCalls = %d, want 0", reconcileCalls) + } +} diff --git a/internal/app/http_api.go b/internal/app/http_api.go index 07a6074e..af84ce4f 100644 --- a/internal/app/http_api.go +++ b/internal/app/http_api.go @@ -14,6 +14,7 @@ import ( "sub2api-cn-relay-manager/internal/host/sub2api" "sub2api-cn-relay-manager/internal/pack" "sub2api-cn-relay-manager/internal/provision" + "sub2api-cn-relay-manager/internal/reconcile" "sub2api-cn-relay-manager/internal/store/sqlite" "sub2api-cn-relay-manager/internal/access" @@ -21,9 +22,9 @@ import ( type ActionSet struct { CreateBatchImportRun func(context.Context, CreateBatchImportRunRequest) (BatchImportRunCreateResponse, error) - ListBatchImportRuns func(context.Context, ListBatchImportRunsRequest) ([]batch.RunSummaryProjection, error) + ListBatchImportRuns func(context.Context, ListBatchImportRunsRequest) (ListBatchImportRunsResponse, error) GetBatchImportRun func(context.Context, string) (batch.RunSummaryProjection, error) - ListBatchImportRunItems func(context.Context, ListBatchImportRunItemsRequest) ([]batch.ItemSummaryProjection, error) + ListBatchImportRunItems func(context.Context, ListBatchImportRunItemsRequest) (ListBatchImportRunItemsResponse, error) GetBatchImportRunItem func(context.Context, GetBatchImportRunItemRequest) (batch.ItemDetailProjection, error) InstallPack func(context.Context, InstallPackRequest) (provision.PackInstallResult, error) BatchDetail func(context.Context, BatchDetailRequest) (provision.BatchDetailResult, error) @@ -35,7 +36,7 @@ type ActionSet struct { ImportProvider func(context.Context, ImportProviderRequest) (provision.RuntimeImportResult, error) RollbackProvider func(context.Context, RollbackProviderRequest) (provision.RollbackReport, error) RollbackBatch func(context.Context, RollbackBatchRequest) (provision.RollbackReport, error) - ReconcileProvider func(context.Context, ReconcileProviderRequest) (provision.ReconcileResult, error) + ReconcileProvider func(context.Context, ReconcileProviderRequest) (reconcile.Result, error) CreateHost func(context.Context, CreateHostRequest) (HostInfo, error) ProbeHost func(context.Context, ProbeHostRequest) (HostInfo, error) ListHosts func(context.Context) ([]HostInfo, error) @@ -713,7 +714,7 @@ func handleRollbackBatch(w http.ResponseWriter, r *http.Request, fn func(context }) } -func handleReconcileProvider(w http.ResponseWriter, r *http.Request, fn func(context.Context, ReconcileProviderRequest) (provision.ReconcileResult, error)) { +func handleReconcileProvider(w http.ResponseWriter, r *http.Request, fn func(context.Context, ReconcileProviderRequest) (reconcile.Result, error)) { if fn == nil { writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "reconcile-provider action is not configured"}) return @@ -1124,26 +1125,26 @@ func NewActionSet(sqliteDSN string) ActionSet { } return report, nil }, - ReconcileProvider: func(ctx context.Context, req ReconcileProviderRequest) (provision.ReconcileResult, error) { + ReconcileProvider: func(ctx context.Context, req ReconcileProviderRequest) (reconcile.Result, error) { loadedPack, err := pack.LoadPath(req.PackPath) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } providerManifest, err := findProvider(loadedPack, req.ProviderID) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } store, err := sqlite.Open(ctx, sqliteDSN) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } defer store.Close() hostRow, client, err := resolveManagedHost(ctx, store, req.HostID, req.HostBaseURL, createHostAuthFromLegacyFields(req.HostAPIKey, req.HostBearerToken)) if err != nil { - return provision.ReconcileResult{}, err + return reconcile.Result{}, err } - service := provision.NewReconcileService(store, client) - return service.Reconcile(ctx, provision.ReconcileRequest{HostID: hostRow.HostID, HostBaseURL: hostRow.BaseURL, AccessProbeAPIKey: req.AccessAPIKey, Pack: loadedPack, Provider: providerManifest}) + service := reconcile.NewService(store, client) + return service.Reconcile(ctx, reconcile.Request{HostID: hostRow.HostID, HostBaseURL: hostRow.BaseURL, AccessProbeAPIKey: req.AccessAPIKey, Pack: loadedPack, Provider: providerManifest}) }, CreateHost: func(ctx context.Context, req CreateHostRequest) (HostInfo, error) { if strings.TrimSpace(req.BaseURL) == "" { diff --git a/internal/app/http_batch_import.go b/internal/app/http_batch_import.go index 9e319b25..ae64d36a 100644 --- a/internal/app/http_batch_import.go +++ b/internal/app/http_batch_import.go @@ -42,9 +42,15 @@ type ListBatchImportRunsRequest struct { State string AccessMode string Query string + Cursor string Limit int } +type ListBatchImportRunsResponse struct { + Runs []batch.RunSummaryProjection `json:"runs"` + NextCursor *string `json:"next_cursor"` +} + type ListBatchImportRunItemsRequest struct { RunID string CurrentStage string @@ -55,9 +61,15 @@ type ListBatchImportRunItemsRequest struct { MatchedAccountState string AccountResolution string Query string + Cursor string Limit int } +type ListBatchImportRunItemsResponse struct { + Items []batch.ItemSummaryProjection `json:"items"` + NextCursor *string `json:"next_cursor"` +} + type GetBatchImportRunItemRequest struct { RunID string ItemID string @@ -85,7 +97,7 @@ func handleCreateBatchImportRun(w http.ResponseWriter, r *http.Request, fn func( writeJSON(w, http.StatusOK, result) } -func handleListBatchImportRuns(w http.ResponseWriter, r *http.Request, fn func(context.Context, ListBatchImportRunsRequest) ([]batch.RunSummaryProjection, error)) { +func handleListBatchImportRuns(w http.ResponseWriter, r *http.Request, fn func(context.Context, ListBatchImportRunsRequest) (ListBatchImportRunsResponse, error)) { if fn == nil { writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "list-batch-import-runs action is not configured"}) return @@ -94,16 +106,17 @@ func handleListBatchImportRuns(w http.ResponseWriter, r *http.Request, fn func(c State: strings.TrimSpace(r.URL.Query().Get("state")), AccessMode: strings.TrimSpace(r.URL.Query().Get("access_mode")), Query: strings.TrimSpace(r.URL.Query().Get("q")), + Cursor: strings.TrimSpace(r.URL.Query().Get("cursor")), Limit: parsePositiveInt(r.URL.Query().Get("limit")), }) if err != nil { writeHTTPError(w, classifyError(err)) return } - if result == nil { - result = []batch.RunSummaryProjection{} + if result.Runs == nil { + result.Runs = []batch.RunSummaryProjection{} } - writeJSON(w, http.StatusOK, map[string]any{"runs": result}) + writeJSON(w, http.StatusOK, result) } func buildCreateBatchImportRunAction(sqliteDSN string) func(context.Context, CreateBatchImportRunRequest) (BatchImportRunCreateResponse, error) { @@ -128,33 +141,63 @@ func buildCreateBatchImportRunAction(sqliteDSN string) func(context.Context, Cre } } -func buildListBatchImportRunsAction(sqliteDSN string) func(context.Context, ListBatchImportRunsRequest) ([]batch.RunSummaryProjection, error) { - return func(ctx context.Context, req ListBatchImportRunsRequest) ([]batch.RunSummaryProjection, error) { +func buildListBatchImportRunsAction(sqliteDSN string) func(context.Context, ListBatchImportRunsRequest) (ListBatchImportRunsResponse, error) { + return func(ctx context.Context, req ListBatchImportRunsRequest) (ListBatchImportRunsResponse, error) { store, err := sqlite.Open(ctx, sqliteDSN) if err != nil { - return nil, err + return ListBatchImportRunsResponse{}, err } defer store.Close() - runs, err := store.ImportRuns().List(ctx, defaultPositiveInt(req.Limit, 50)) + runs, err := store.ImportRuns().List(ctx, 1000) if err != nil { - return nil, err + return ListBatchImportRunsResponse{}, err } - result := make([]batch.RunSummaryProjection, 0, len(runs)) + limit := defaultPositiveInt(req.Limit, 50) + result := make([]batch.RunSummaryProjection, 0, limit) + nextCursor := (*string)(nil) + started := strings.TrimSpace(req.Cursor) == "" for _, run := range runs { + if !started { + if run.RunID == strings.TrimSpace(req.Cursor) { + started = true + } + continue + } if req.State != "" && run.State != req.State { continue } if req.AccessMode != "" && run.AccessMode != req.AccessMode { continue } - if req.Query != "" && !strings.Contains(strings.ToLower(run.RunID), strings.ToLower(req.Query)) { - continue + if req.Query != "" { + query := strings.ToLower(req.Query) + if !strings.Contains(strings.ToLower(run.RunID), query) { + items, err := store.ImportRunItems().ListByRunID(ctx, run.RunID) + if err != nil { + return ListBatchImportRunsResponse{}, err + } + matched := false + for _, item := range items { + if strings.Contains(strings.ToLower(item.ProviderID), query) || strings.Contains(strings.ToLower(item.BaseURL), query) { + matched = true + break + } + } + if !matched { + continue + } + } + } + if len(result) >= limit { + cursor := run.RunID + nextCursor = &cursor + break } result = append(result, batch.ProjectRunSummary(run)) } - return result, nil + return ListBatchImportRunsResponse{Runs: result, NextCursor: nextCursor}, nil } } diff --git a/internal/app/http_batch_runs.go b/internal/app/http_batch_runs.go index 321dda08..c5c63075 100644 --- a/internal/app/http_batch_runs.go +++ b/internal/app/http_batch_runs.go @@ -32,7 +32,7 @@ func handleGetBatchImportRun(w http.ResponseWriter, r *http.Request, fn func(con }) } -func handleListBatchImportRunItems(w http.ResponseWriter, r *http.Request, fn func(context.Context, ListBatchImportRunItemsRequest) ([]batch.ItemSummaryProjection, error)) { +func handleListBatchImportRunItems(w http.ResponseWriter, r *http.Request, fn func(context.Context, ListBatchImportRunItemsRequest) (ListBatchImportRunItemsResponse, error)) { if fn == nil { writeHTTPError(w, &httpError{StatusCode: http.StatusInternalServerError, Code: "server_misconfigured", Message: "list-batch-import-run-items action is not configured"}) return @@ -51,6 +51,7 @@ func handleListBatchImportRunItems(w http.ResponseWriter, r *http.Request, fn fu MatchedAccountState: strings.TrimSpace(r.URL.Query().Get("matched_account_state")), AccountResolution: strings.TrimSpace(r.URL.Query().Get("account_resolution")), Query: strings.TrimSpace(r.URL.Query().Get("q")), + Cursor: strings.TrimSpace(r.URL.Query().Get("cursor")), Limit: parsePositiveInt(r.URL.Query().Get("limit")), } if hasWarningRaw := strings.TrimSpace(r.URL.Query().Get("has_warning")); hasWarningRaw != "" { @@ -62,10 +63,10 @@ func handleListBatchImportRunItems(w http.ResponseWriter, r *http.Request, fn fu writeHTTPError(w, classifyError(err)) return } - if items == nil { - items = []batch.ItemSummaryProjection{} + if items.Items == nil { + items.Items = []batch.ItemSummaryProjection{} } - writeJSON(w, http.StatusOK, map[string]any{"items": items}) + writeJSON(w, http.StatusOK, items) } func handleGetBatchImportRunItem(w http.ResponseWriter, r *http.Request, fn func(context.Context, GetBatchImportRunItemRequest) (batch.ItemDetailProjection, error)) { @@ -108,37 +109,48 @@ func buildGetBatchImportRunAction(sqliteDSN string) func(context.Context, string } } -func buildListBatchImportRunItemsAction(sqliteDSN string) func(context.Context, ListBatchImportRunItemsRequest) ([]batch.ItemSummaryProjection, error) { - return func(ctx context.Context, req ListBatchImportRunItemsRequest) ([]batch.ItemSummaryProjection, error) { +func buildListBatchImportRunItemsAction(sqliteDSN string) func(context.Context, ListBatchImportRunItemsRequest) (ListBatchImportRunItemsResponse, error) { + return func(ctx context.Context, req ListBatchImportRunItemsRequest) (ListBatchImportRunItemsResponse, error) { store, err := sqlite.Open(ctx, sqliteDSN) if err != nil { - return nil, err + return ListBatchImportRunItemsResponse{}, err } defer store.Close() if _, err := store.ImportRuns().GetByRunID(ctx, req.RunID); err != nil { if err == sql.ErrNoRows { - return nil, fmt.Errorf("run not found: %s", req.RunID) + return ListBatchImportRunItemsResponse{}, fmt.Errorf("run not found: %s", req.RunID) } - return nil, err + return ListBatchImportRunItemsResponse{}, err } items, err := store.ImportRunItems().ListByRunID(ctx, req.RunID) if err != nil { - return nil, err + return ListBatchImportRunItemsResponse{}, err } - result := make([]batch.ItemSummaryProjection, 0, len(items)) + limit := defaultPositiveInt(req.Limit, 50) + result := make([]batch.ItemSummaryProjection, 0, limit) + nextCursor := (*string)(nil) + started := strings.TrimSpace(req.Cursor) == "" for _, item := range items { + if !started { + if item.ItemID == strings.TrimSpace(req.Cursor) { + started = true + } + continue + } view := batch.ProjectItemSummary(item) if !matchesItemFilters(view, req) { continue } - result = append(result, view) - if req.Limit > 0 && len(result) >= req.Limit { + if len(result) >= limit { + cursor := item.ItemID + nextCursor = &cursor break } + result = append(result, view) } - return result, nil + return ListBatchImportRunItemsResponse{Items: result, NextCursor: nextCursor}, nil } } diff --git a/internal/app/http_batch_runs_test.go b/internal/app/http_batch_runs_test.go index 9bfb8a85..a9aff0b3 100644 --- a/internal/app/http_batch_runs_test.go +++ b/internal/app/http_batch_runs_test.go @@ -17,25 +17,32 @@ func TestBatchRunsHTTP(t *testing.T) { t.Parallel() handler := NewAPIHandler("secret-token", ActionSet{ - ListBatchImportRuns: func(_ context.Context, req ListBatchImportRunsRequest) ([]batch.RunSummaryProjection, error) { + ListBatchImportRuns: func(_ context.Context, req ListBatchImportRunsRequest) (ListBatchImportRunsResponse, error) { if req.State != "completed_with_warnings" { t.Fatalf("State = %q, want completed_with_warnings", req.State) } if req.AccessMode != "subscription" { t.Fatalf("AccessMode = %q, want subscription", req.AccessMode) } - return []batch.RunSummaryProjection{{ + if req.Query != "kimi" { + t.Fatalf("Query = %q, want kimi", req.Query) + } + if req.Cursor != "cursor-1" { + t.Fatalf("Cursor = %q, want cursor-1", req.Cursor) + } + nextCursor := "cursor-2" + return ListBatchImportRunsResponse{Runs: []batch.RunSummaryProjection{{ RunID: "run-1", State: "completed_with_warnings", Mode: "partial", AccessMode: "subscription", TotalItems: 2, WarningItems: 1, - }}, nil + }}, NextCursor: &nextCursor}, nil }, }) - req := httptestRequest(t, http.MethodGet, "/api/batch-import/runs?state=completed_with_warnings&access_mode=subscription", nil, "secret-token") + req := httptestRequest(t, http.MethodGet, "/api/batch-import/runs?state=completed_with_warnings&access_mode=subscription&q=kimi&cursor=cursor-1", nil, "secret-token") res := httptestRecorder(handler, req) assertStatusCode(t, res, http.StatusOK) run := decodeJSONArrayObjectAt(t, res.Body().Bytes(), "runs", 0) @@ -43,6 +50,7 @@ func TestBatchRunsHTTP(t *testing.T) { assertJSONObjectValue(t, run, "state", "completed_with_warnings") assertJSONObjectValue(t, run, "access_mode", "subscription") assertJSONObjectValue(t, run, "warning_items", float64(1)) + assertJSONContains(t, res.Body().Bytes(), "next_cursor", "cursor-2") }) t.Run("GET run detail returns wrapped projection", func(t *testing.T) { @@ -79,7 +87,7 @@ func TestBatchRunsHTTP(t *testing.T) { t.Parallel() handler := NewAPIHandler("secret-token", ActionSet{ - ListBatchImportRunItems: func(_ context.Context, req ListBatchImportRunItemsRequest) ([]batch.ItemSummaryProjection, error) { + ListBatchImportRunItems: func(_ context.Context, req ListBatchImportRunItemsRequest) (ListBatchImportRunItemsResponse, error) { if req.RunID != "run-1" { t.Fatalf("RunID = %q, want run-1", req.RunID) } @@ -89,7 +97,11 @@ func TestBatchRunsHTTP(t *testing.T) { if req.AccountResolution != "reused" { t.Fatalf("AccountResolution = %q, want reused", req.AccountResolution) } - return []batch.ItemSummaryProjection{{ + if req.Cursor != "item-cursor-1" { + t.Fatalf("Cursor = %q, want item-cursor-1", req.Cursor) + } + nextCursor := "item-cursor-2" + return ListBatchImportRunItemsResponse{Items: []batch.ItemSummaryProjection{{ ItemID: "item-1", BaseURL: "https://kimi.example.com/v1", ProviderID: "kimi-a7m-1", @@ -100,11 +112,11 @@ func TestBatchRunsHTTP(t *testing.T) { MatchedAccountState: "active", AccountResolution: "reused", ProvisionReused: true, - }}, nil + }}, NextCursor: &nextCursor}, nil }, }) - req := httptestRequest(t, http.MethodGet, "/api/batch-import/runs/run-1/items?matched_account_state=active&account_resolution=reused", nil, "secret-token") + req := httptestRequest(t, http.MethodGet, "/api/batch-import/runs/run-1/items?matched_account_state=active&account_resolution=reused&cursor=item-cursor-1", nil, "secret-token") res := httptestRecorder(handler, req) assertStatusCode(t, res, http.StatusOK) item := decodeJSONArrayObjectAt(t, res.Body().Bytes(), "items", 0) @@ -112,6 +124,7 @@ func TestBatchRunsHTTP(t *testing.T) { assertJSONObjectValue(t, item, "matched_account_state", "active") assertJSONObjectValue(t, item, "account_resolution", "reused") assertJSONObjectValue(t, item, "provision_reused", true) + assertJSONContains(t, res.Body().Bytes(), "next_cursor", "item-cursor-2") }) t.Run("GET item detail returns capability profile and events", func(t *testing.T) { diff --git a/internal/app/reconcile_background.go b/internal/app/reconcile_background.go new file mode 100644 index 00000000..a8723936 --- /dev/null +++ b/internal/app/reconcile_background.go @@ -0,0 +1,312 @@ +package app + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "log" + "strings" + "time" + + "sub2api-cn-relay-manager/internal/host/sub2api" + "sub2api-cn-relay-manager/internal/pack" + "sub2api-cn-relay-manager/internal/provision" + "sub2api-cn-relay-manager/internal/reconcile" + "sub2api-cn-relay-manager/internal/store/sqlite" + "sub2api-cn-relay-manager/internal/worker" +) + +const sqliteTimestampLayout = "2006-01-02 15:04:05" + +func runReconcileBackgroundScheduler(ctx context.Context, sqliteDSN string, interval time.Duration) { + if interval <= 0 { + return + } + worker.NewRunner( + []worker.Job{reconcileSweepJob{sqliteDSN: sqliteDSN, interval: interval}}, + interval, + log.Printf, + ).Start(ctx) +} + +type reconcileSweepJob struct { + sqliteDSN string + interval time.Duration +} + +func (j reconcileSweepJob) Name() string { + return "reconcile background scheduler" +} + +func (j reconcileSweepJob) Run(ctx context.Context) error { + store, err := sqlite.Open(ctx, j.sqliteDSN) + if err != nil { + return err + } + defer store.Close() + return runReconcileBackgroundSweep(ctx, store, j.interval, time.Now()) +} + +func runReconcileBackgroundSweep(ctx context.Context, store *sqlite.DB, interval time.Duration, now time.Time) error { + if store == nil { + return fmt.Errorf("store is required") + } + candidates, err := store.ImportBatches().ListLatestReconcilable(ctx) + if err != nil { + return err + } + + var errs []error + for _, batch := range candidates { + if ctx.Err() != nil { + return ctx.Err() + } + lastRun, err := latestReconcileRunForBatch(ctx, store, batch.ProviderID, batch.HostID) + if err != nil { + errs = append(errs, fmt.Errorf("load latest reconcile run for batch %d: %w", batch.ID, err)) + continue + } + if !reconcileRunDue(now, lastRun, interval) { + continue + } + if err := runReconcileCandidate(ctx, store, batch); err != nil { + errs = append(errs, fmt.Errorf("run reconcile for batch %d: %w", batch.ID, err)) + } + } + return errors.Join(errs...) +} + +func latestReconcileRunForBatch(ctx context.Context, store *sqlite.DB, providerID, hostID int64) (*sqlite.ReconcileRun, error) { + runs, err := store.ReconcileRuns().GetByProviderIDAndHostID(ctx, providerID, hostID) + if err != nil { + return nil, err + } + if len(runs) == 0 { + return nil, nil + } + return &runs[0], nil +} + +func reconcileRunDue(now time.Time, run *sqlite.ReconcileRun, interval time.Duration) bool { + if run == nil || interval <= 0 { + return true + } + lastRunAt, err := time.ParseInLocation(sqliteTimestampLayout, strings.TrimSpace(run.CreatedAt), time.UTC) + if err != nil { + return true + } + return now.Sub(lastRunAt) >= interval +} + +func runReconcileCandidate(ctx context.Context, store *sqlite.DB, batch sqlite.ImportBatch) error { + hostRow, err := store.Hosts().GetByID(ctx, batch.HostID) + if err != nil { + return err + } + packRow, err := store.Packs().GetByID(ctx, batch.PackID) + if err != nil { + return err + } + providerRow, err := store.Providers().GetByID(ctx, batch.ProviderID) + if err != nil { + return err + } + accessClosures, err := store.AccessClosures().GetByBatchID(ctx, batch.ID) + if err != nil { + return err + } + accessProbeAPIKey, err := reconcileProbeAPIKey(ctx, store, hostRow, batch, accessClosures) + if err != nil { + return err + } + + loadedPack, err := storedLoadedPack(packRow) + if err != nil { + return err + } + providerManifest, err := storedProviderManifest(providerRow) + if err != nil { + return err + } + + client, err := newSub2APIClient(hostRow.BaseURL, authFromStoredHost(hostRow)) + if err != nil { + return err + } + _, err = reconcile.NewService(store, client).Reconcile(ctx, reconcile.Request{ + HostID: hostRow.HostID, + HostBaseURL: hostRow.BaseURL, + AccessProbeAPIKey: accessProbeAPIKey, + Pack: loadedPack, + Provider: providerManifest, + }) + return err +} + +func reconcileProbeAPIKey(ctx context.Context, store *sqlite.DB, hostRow sqlite.Host, batch sqlite.ImportBatch, accessClosures []sqlite.AccessClosureRecord) (string, error) { + if len(accessClosures) == 0 { + return "", fmt.Errorf("access closure not found for batch %d", batch.ID) + } + latestClosure := accessClosures[len(accessClosures)-1] + switch strings.TrimSpace(latestClosure.ClosureType) { + case provision.AccessModeSelfService: + details := parseAccessClosureDetails(latestClosure.DetailsJSON) + apiKey, _ := details["access_api_key"].(string) + if strings.TrimSpace(apiKey) == "" { + apiKey, _ = details["probe_api_key"].(string) + } + if strings.TrimSpace(apiKey) == "" { + return "", fmt.Errorf("self_service access closure missing probe api key") + } + return strings.TrimSpace(apiKey), nil + case provision.AccessModeSubscription: + details := parseAccessClosureDetails(latestClosure.DetailsJSON) + subscriptionUsers := parseJSONStringArray(details["subscription_users"]) + if len(subscriptionUsers) == 0 { + return "", fmt.Errorf("subscription access closure missing subscription_users") + } + subscriptionDays := parseJSONInt(details["subscription_days"]) + groupID, err := resolveManagedResourceHostIDByBatch(ctx, store, batch.ID, "group") + if err != nil { + return "", err + } + client, err := newSub2APIClient(hostRow.BaseURL, authFromStoredHost(hostRow)) + if err != nil { + return "", err + } + accessRef, err := client.EnsureSubscriptionAccess(ctx, sub2api.EnsureSubscriptionAccessRequest{ + UserSelector: subscriptionUsers[0], + GroupID: groupID, + }) + if err != nil { + return "", err + } + userID := strings.TrimSpace(accessRef.UserID) + if userID == "" { + userID = subscriptionUsers[0] + } + if subscriptionDays > 0 { + if _, err := client.AssignSubscription(ctx, sub2api.AssignSubscriptionRequest{ + UserID: userID, + GroupID: groupID, + DurationDays: subscriptionDays, + }); err != nil { + return "", err + } + } + if strings.TrimSpace(accessRef.APIKey) == "" { + return "", fmt.Errorf("subscription access api key is empty") + } + return strings.TrimSpace(accessRef.APIKey), nil + default: + return "", fmt.Errorf("unsupported access closure type %q", latestClosure.ClosureType) + } +} + +func parseAccessClosureDetails(raw string) map[string]any { + payload := map[string]any{} + if err := json.Unmarshal([]byte(strings.TrimSpace(raw)), &payload); err != nil { + return map[string]any{} + } + return payload +} + +func parseJSONStringArray(raw any) []string { + values, ok := raw.([]any) + if !ok { + return nil + } + result := make([]string, 0, len(values)) + for _, value := range values { + text, ok := value.(string) + if !ok { + continue + } + if trimmed := strings.TrimSpace(text); trimmed != "" { + result = append(result, trimmed) + } + } + return result +} + +func parseJSONInt(raw any) int { + switch value := raw.(type) { + case float64: + return int(value) + case int: + return value + default: + return 0 + } +} + +func storedLoadedPack(packRow sqlite.Pack) (pack.LoadedPack, error) { + manifest := pack.Manifest{} + if trimmed := strings.TrimSpace(packRow.ManifestJSON); trimmed != "" && trimmed != "{}" { + if err := json.Unmarshal([]byte(trimmed), &manifest); err != nil { + return pack.LoadedPack{}, fmt.Errorf("decode stored pack manifest: %w", err) + } + } + if strings.TrimSpace(manifest.PackID) == "" { + manifest.PackID = strings.TrimSpace(packRow.PackID) + } + if strings.TrimSpace(manifest.Version) == "" { + manifest.Version = strings.TrimSpace(packRow.Version) + } + if strings.TrimSpace(manifest.Vendor) == "" { + manifest.Vendor = strings.TrimSpace(packRow.Vendor) + } + if strings.TrimSpace(manifest.TargetHost) == "" { + manifest.TargetHost = strings.TrimSpace(packRow.TargetHost) + } + if strings.TrimSpace(manifest.MinHostVersion) == "" { + manifest.MinHostVersion = strings.TrimSpace(packRow.MinHostVersion) + } + if strings.TrimSpace(manifest.MaxHostVersion) == "" { + manifest.MaxHostVersion = strings.TrimSpace(packRow.MaxHostVersion) + } + return pack.LoadedPack{Manifest: manifest, Checksum: strings.TrimSpace(packRow.Checksum)}, nil +} + +func storedProviderManifest(providerRow sqlite.Provider) (pack.ProviderManifest, error) { + provider := pack.ProviderManifest{} + if trimmed := strings.TrimSpace(providerRow.ManifestJSON); trimmed != "" && trimmed != "{}" { + if err := json.Unmarshal([]byte(trimmed), &provider); err != nil { + return pack.ProviderManifest{}, fmt.Errorf("decode stored provider manifest: %w", err) + } + } + if strings.TrimSpace(provider.ProviderID) == "" { + provider.ProviderID = strings.TrimSpace(providerRow.ProviderID) + } + if strings.TrimSpace(provider.DisplayName) == "" { + provider.DisplayName = strings.TrimSpace(providerRow.DisplayName) + } + if strings.TrimSpace(provider.BaseURL) == "" { + provider.BaseURL = strings.TrimSpace(providerRow.BaseURL) + } + if strings.TrimSpace(provider.Platform) == "" { + provider.Platform = strings.TrimSpace(providerRow.Platform) + } + if strings.TrimSpace(provider.AccountType) == "" { + provider.AccountType = strings.TrimSpace(providerRow.AccountType) + } + if strings.TrimSpace(provider.SmokeTestModel) == "" { + provider.SmokeTestModel = strings.TrimSpace(providerRow.SmokeTestModel) + } + return provider, nil +} + +func resolveManagedResourceHostIDByBatch(ctx context.Context, store *sqlite.DB, batchID int64, resourceType string) (string, error) { + resources, err := store.ManagedResources().GetByBatchID(ctx, batchID) + if err != nil { + return "", err + } + resourceType = strings.TrimSpace(resourceType) + for _, resource := range resources { + if strings.TrimSpace(resource.ResourceType) == resourceType && strings.TrimSpace(resource.HostResourceID) != "" { + return strings.TrimSpace(resource.HostResourceID), nil + } + } + return "", fmt.Errorf("managed resource %q not found for batch %d", resourceType, batchID) +} diff --git a/internal/app/reconcile_background_test.go b/internal/app/reconcile_background_test.go new file mode 100644 index 00000000..7fc3cce0 --- /dev/null +++ b/internal/app/reconcile_background_test.go @@ -0,0 +1,171 @@ +package app + +import ( + "context" + "fmt" + "net/http/httptest" + "path/filepath" + "testing" + "time" + + "sub2api-cn-relay-manager/internal/pack" + "sub2api-cn-relay-manager/internal/provision" + "sub2api-cn-relay-manager/internal/store/sqlite" +) + +func TestRunReconcileBackgroundSweepCreatesReconcileRunForLatestSuccessfulBatch(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(newBatchImportActionStubServer(t)) + defer server.Close() + + store := openReconcileBackgroundTestStore(t) + defer closeAppTestStore(t, store) + + batchID, hostPK, _ := seedReconcileBackgroundRuntimeImport(t, store, server.URL) + + if err := runReconcileBackgroundSweep(context.Background(), store, 10*time.Minute, time.Now()); err != nil { + t.Fatalf("runReconcileBackgroundSweep() error = %v", err) + } + + providers, err := store.Providers().ListByProviderID(context.Background(), "deepseek") + if err != nil { + t.Fatalf("Providers().ListByProviderID() error = %v", err) + } + runs, err := store.ReconcileRuns().GetByProviderIDAndHostID(context.Background(), providers[0].ID, hostPK) + if err != nil { + t.Fatalf("ReconcileRuns().GetByProviderIDAndHostID() error = %v", err) + } + if len(runs) != 1 { + t.Fatalf("reconcile runs = %d, want 1", len(runs)) + } + if runs[0].BatchID != batchID { + t.Fatalf("reconcile batch_id = %d, want %d", runs[0].BatchID, batchID) + } + if runs[0].Status == "" { + t.Fatal("reconcile status = empty, want persisted result") + } +} + +func TestRunReconcileBackgroundSweepSkipsRecentReconcileRun(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(newBatchImportActionStubServer(t)) + defer server.Close() + + store := openReconcileBackgroundTestStore(t) + defer closeAppTestStore(t, store) + + batchID, hostPK, providerPK := seedReconcileBackgroundRuntimeImport(t, store, server.URL) + if _, err := store.ReconcileRuns().Create(context.Background(), sqlite.ReconcileRun{ + BatchID: batchID, + HostID: hostPK, + ProviderID: providerPK, + Status: "active", + SummaryJSON: `{"seed":true}`, + }); err != nil { + t.Fatalf("ReconcileRuns().Create() error = %v", err) + } + + if err := runReconcileBackgroundSweep(context.Background(), store, 10*time.Minute, time.Now()); err != nil { + t.Fatalf("runReconcileBackgroundSweep() error = %v", err) + } + + runs, err := store.ReconcileRuns().GetByProviderIDAndHostID(context.Background(), providerPK, hostPK) + if err != nil { + t.Fatalf("ReconcileRuns().GetByProviderIDAndHostID() error = %v", err) + } + if len(runs) != 1 { + t.Fatalf("reconcile runs = %d, want 1 recent run only", len(runs)) + } +} + +func openReconcileBackgroundTestStore(t *testing.T) *sqlite.DB { + t.Helper() + + dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(filepath.Join(t.TempDir(), "state.db"))) + store, err := sqlite.Open(context.Background(), dsn) + if err != nil { + t.Fatalf("sqlite.Open() error = %v", err) + } + if _, err := store.SQLDB().Exec("PRAGMA foreign_keys = OFF"); err != nil { + t.Fatalf("disable foreign keys pragma error = %v", err) + } + return store +} + +func seedReconcileBackgroundRuntimeImport(t *testing.T, store *sqlite.DB, baseURL string) (int64, int64, int64) { + t.Helper() + + hostPK, err := store.Hosts().Create(context.Background(), sqlite.Host{ + HostID: "host-1", + BaseURL: baseURL, + HostVersion: "0.1.126", + CapabilityProbeJSON: "{}", + AuthType: "apikey", + AuthToken: "host-token", + }) + if err != nil { + t.Fatalf("Hosts().Create() error = %v", err) + } + + client, err := newSub2APIClient(baseURL, CreateHostAuth{Type: "apikey", Token: "host-token"}) + if err != nil { + t.Fatalf("newSub2APIClient() error = %v", err) + } + + loadedPack := pack.LoadedPack{ + Manifest: pack.Manifest{ + PackID: "openai-cn-pack", + Version: "1.0.0", + Vendor: "OpenAI CN", + TargetHost: "sub2api", + MinHostVersion: "0.1.126", + MaxHostVersion: "0.2.x", + }, + Providers: []pack.ProviderManifest{{ + ProviderID: "deepseek", + DisplayName: "DeepSeek", + BaseURL: "https://api.deepseek.example", + Platform: "openai", + AccountType: "openai", + DefaultModels: []string{"kimi-k2.6"}, + SmokeTestModel: "kimi-k2.6", + GroupTemplate: pack.GroupTemplate{Name: "DeepSeek 默认分组", RateMultiplier: 1}, + ChannelTemplate: pack.ChannelTemplate{ + Name: "DeepSeek 默认渠道", + ModelMapping: map[string]string{"kimi-k2.6": "kimi-k2.6"}, + }, + PlanTemplate: pack.PlanTemplate{Name: "DeepSeek 默认套餐", Price: 0, ValidityDays: 30, ValidityUnit: "day"}, + Import: pack.ImportOptions{SupportsMultiKey: true, SupportsStrict: true, SupportsPartial: true}, + }}, + Checksum: "checksum-1", + } + + result, err := provision.NewRuntimeImportService(store, client).Import(context.Background(), provision.RuntimeImportRequest{ + HostID: "host-1", + HostBaseURL: baseURL, + Pack: loadedPack, + Provider: loadedPack.Providers[0], + Mode: provision.ImportModePartial, + Keys: []string{"entry-key"}, + Access: provision.AccessRequest{ + Mode: provision.AccessModeSelfService, + ProbeAPIKey: "gateway-key", + }, + }) + if err != nil { + t.Fatalf("RuntimeImportService.Import() error = %v", err) + } + + packRow, err := store.Packs().GetByPackID(context.Background(), loadedPack.Manifest.PackID) + if err != nil { + t.Fatalf("Packs().GetByPackID() error = %v", err) + } + providerRow, err := store.Providers().GetByPackIDAndProviderID(context.Background(), packRow.ID, loadedPack.Providers[0].ProviderID) + if err != nil { + t.Fatalf("Providers().GetByPackIDAndProviderID() error = %v", err) + } + + return result.BatchID, hostPK, providerRow.ID +} diff --git a/internal/batch/confirmation.go b/internal/batch/confirmation.go index b0d37b68..a5516e69 100644 --- a/internal/batch/confirmation.go +++ b/internal/batch/confirmation.go @@ -20,6 +20,10 @@ type ConfirmationItemStore interface { Upsert(ctx context.Context, item sqlite.ImportRunItem) error } +type ConfirmationLeaseClaimer interface { + TryAcquireLease(ctx context.Context, itemID, workerID string, now time.Time, leaseDuration time.Duration) (sqlite.ImportRunItem, bool, error) +} + type ConfirmationEventStore interface { Append(ctx context.Context, event sqlite.ImportRunItemEvent) error } @@ -53,6 +57,16 @@ func (w ConfirmationWorker) Tick(ctx context.Context, now time.Time) error { if !isConfirmationCandidate(item, now) { continue } + if claimer, ok := w.ItemStore.(ConfirmationLeaseClaimer); ok { + claimedItem, claimed, err := claimer.TryAcquireLease(ctx, item.ItemID, w.WorkerID, now, w.LeaseDuration) + if err != nil { + return err + } + if !claimed { + continue + } + item = claimedItem + } if err := w.ConfirmItem(ctx, item, now); err != nil { return err } @@ -66,9 +80,11 @@ func (w ConfirmationWorker) ConfirmItem(ctx context.Context, item sqlite.ImportR return err } - item.ConfirmationAttempts++ - item.LeaseOwner = strings.TrimSpace(w.WorkerID) - item.LeaseUntil = now.Add(defaultDuration(w.LeaseDuration, time.Minute)).Format(time.RFC3339) + if strings.TrimSpace(item.LeaseOwner) == "" { + item.ConfirmationAttempts++ + item.LeaseOwner = strings.TrimSpace(w.WorkerID) + item.LeaseUntil = now.Add(defaultDuration(w.LeaseDuration, time.Minute)).Format(time.RFC3339) + } switch { case result.StatusCode >= 200 && result.StatusCode < 300: diff --git a/internal/batch/confirmation_test.go b/internal/batch/confirmation_test.go index 3e0be126..6fa05570 100644 --- a/internal/batch/confirmation_test.go +++ b/internal/batch/confirmation_test.go @@ -3,6 +3,8 @@ package batch import ( "context" "strings" + "sync" + "sync/atomic" "testing" "time" @@ -160,6 +162,50 @@ func TestConfirmationWorker(t *testing.T) { } }) + t.Run("concurrent workers do not both call confirmer before lease is persisted", func(t *testing.T) { + t.Parallel() + + now := time.Date(2026, 5, 22, 13, 3, 30, 0, time.UTC) + store := newFakeConfirmationStore([]sqlite.ImportRunItem{ + {ItemID: "shared", RunID: "run-1", CurrentStage: "confirm", ConfirmationStatus: "pending"}, + }) + + started := make(chan struct{}, 2) + release := make(chan struct{}) + var calls atomic.Int32 + + confirmer := func(ctx context.Context, item sqlite.ImportRunItem) (ConfirmationResult, error) { + calls.Add(1) + started <- struct{}{} + <-release + return ConfirmationResult{StatusCode: 200}, nil + } + + workerA := ConfirmationWorker{WorkerID: "worker-a", ItemStore: store, EventStore: store, LeaseDuration: time.Minute, RetryDelay: time.Second, Confirmer: confirmer} + workerB := ConfirmationWorker{WorkerID: "worker-b", ItemStore: store, EventStore: store, LeaseDuration: time.Minute, RetryDelay: time.Second, Confirmer: confirmer} + + errCh := make(chan error, 2) + go func() { errCh <- workerA.Tick(context.Background(), now) }() + go func() { errCh <- workerB.Tick(context.Background(), now) }() + + <-started + select { + case <-started: + t.Fatal("second worker reached confirmer before lease was acquired") + case <-time.After(50 * time.Millisecond): + } + + close(release) + for range 2 { + if err := <-errCh; err != nil { + t.Fatalf("Tick() error = %v", err) + } + } + if got := calls.Load(); got != 1 { + t.Fatalf("confirmer calls = %d, want 1", got) + } + }) + t.Run("reactivated account metadata is preserved", func(t *testing.T) { t.Parallel() @@ -200,6 +246,7 @@ func TestConfirmationWorker(t *testing.T) { } type fakeConfirmationStore struct { + mu sync.Mutex items map[string]sqlite.ImportRunItem processed []string events []sqlite.ImportRunItemEvent @@ -217,6 +264,9 @@ func newFakeConfirmationStore(items []sqlite.ImportRunItem) *fakeConfirmationSto } func (f *fakeConfirmationStore) List(ctx context.Context) ([]sqlite.ImportRunItem, error) { + f.mu.Lock() + defer f.mu.Unlock() + items := make([]sqlite.ImportRunItem, 0, len(f.items)) for _, item := range f.items { items = append(items, item) @@ -225,12 +275,36 @@ func (f *fakeConfirmationStore) List(ctx context.Context) ([]sqlite.ImportRunIte } func (f *fakeConfirmationStore) Upsert(ctx context.Context, item sqlite.ImportRunItem) error { + f.mu.Lock() + defer f.mu.Unlock() + f.items[item.ItemID] = item f.processed = append(f.processed, item.ItemID) return nil } +func (f *fakeConfirmationStore) TryAcquireLease(ctx context.Context, itemID, workerID string, now time.Time, leaseDuration time.Duration) (sqlite.ImportRunItem, bool, error) { + f.mu.Lock() + defer f.mu.Unlock() + + item, ok := f.items[itemID] + if !ok { + return sqlite.ImportRunItem{}, false, nil + } + if !isConfirmationCandidate(item, now) { + return sqlite.ImportRunItem{}, false, nil + } + item.ConfirmationAttempts++ + item.LeaseOwner = workerID + item.LeaseUntil = now.Add(leaseDuration).Format(time.RFC3339) + f.items[itemID] = item + return item, true, nil +} + func (f *fakeConfirmationStore) Append(ctx context.Context, event sqlite.ImportRunItemEvent) error { + f.mu.Lock() + defer f.mu.Unlock() + f.events = append(f.events, event) return nil } @@ -238,6 +312,9 @@ func (f *fakeConfirmationStore) Append(ctx context.Context, event sqlite.ImportR func (f *fakeConfirmationStore) mustItem(t *testing.T, itemID string) sqlite.ImportRunItem { t.Helper() + f.mu.Lock() + defer f.mu.Unlock() + item, ok := f.items[itemID] if !ok { t.Fatalf("item %q not found", itemID) diff --git a/internal/batch/service.go b/internal/batch/service.go index 96a2d862..1008aa11 100644 --- a/internal/batch/service.go +++ b/internal/batch/service.go @@ -19,12 +19,15 @@ type BatchImportEntry struct { } type BatchImportRunRequest struct { - RunID string - Mode string - AccessMode string - HostID string - HostBaseURL string - Entries []BatchImportEntry + RunID string + Mode string + AccessMode string + HostID string + HostBaseURL string + SubscriptionUsers []string + SubscriptionDays int + ProbeAPIKey string + Entries []BatchImportEntry } type BatchImportRunResult struct { @@ -34,6 +37,7 @@ type BatchImportRunResult struct { type RunStateStore interface { Create(ctx context.Context, run sqlite.ImportRun) error + Update(ctx context.Context, run sqlite.ImportRun) error } type ItemStateStore interface { @@ -114,11 +118,15 @@ func (s BatchImportService) StartRun(ctx context.Context, req BatchImportRunRequ } if err := s.RunStore.Create(ctx, sqlite.ImportRun{ - RunID: runID, - Mode: strings.TrimSpace(req.Mode), - AccessMode: strings.TrimSpace(req.AccessMode), - State: string(RunStateRunning), - TotalItems: len(req.Entries), + RunID: runID, + HostID: strings.TrimSpace(req.HostID), + Mode: strings.TrimSpace(req.Mode), + AccessMode: strings.TrimSpace(req.AccessMode), + SubscriptionUsersJSON: mustMarshalJSON(req.SubscriptionUsers, "[]"), + SubscriptionDays: req.SubscriptionDays, + ProbeAPIKey: strings.TrimSpace(req.ProbeAPIKey), + State: string(RunStateRunning), + TotalItems: len(req.Entries), }); err != nil { return BatchImportRunResult{}, err } @@ -152,17 +160,26 @@ func (s BatchImportService) StartRun(ctx context.Context, req BatchImportRunRequ modelsResult, err := s.ProbeModels(ctx, entry.BaseURL, entry.APIKey) if err != nil { - return BatchImportRunResult{}, err + if failErr := s.failRun(ctx, req, initialItem, ItemStageProbe, err); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } rawModels := append([]string(nil), modelsResult.RawModels...) capabilityProfile, err := s.ProbeCapabilities(ctx, entry.BaseURL, entry.APIKey, rawModels) if err != nil { - return BatchImportRunResult{}, err + if failErr := s.failRun(ctx, req, initialItem, ItemStageProbe, err); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } routingStrategy := BuildImportRoutingStrategy(capabilityProfile) resolvedSmokeModel, recommendedModels, err := probe.ResolveSmokeModel(entry.RequestedModels, rawModels, capabilityProfile) if err != nil { - return BatchImportRunResult{}, err + if failErr := s.failRun(ctx, req, initialItem, ItemStageProbe, err); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } canonicalFamilies := uniqueCanonicalFamilies(rawModels) @@ -176,7 +193,10 @@ func (s BatchImportService) StartRun(ctx context.Context, req BatchImportRunRequ CanonicalModelFamilies: canonicalFamilies, }) if err != nil { - return BatchImportRunResult{}, err + if failErr := s.failRun(ctx, req, initialItem, ItemStageProbe, err); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } } @@ -217,18 +237,27 @@ func (s BatchImportService) StartRun(ctx context.Context, req BatchImportRunRequ patchContract := ModelMappingDelta(reuseLookup.ExistingModelMapping, probe.BuildAliasTable(rawModels)) if shouldPatchAliases(reuseLookup.ExistingModelMapping, patchContract.ModelMapping) { if s.Provisioner == nil { - return BatchImportRunResult{}, fmt.Errorf("provisioner is required for patch-only flow") + if failErr := s.failRun(ctx, req, initialItem, ItemStageProvision, fmt.Errorf("provisioner is required for patch-only flow")); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } if err := s.Provisioner.Patch(ctx, PatchProvisionRequest{ ProviderID: reuseDecision.ReusedFromProviderID, Contract: patchContract, }); err != nil { - return BatchImportRunResult{}, err + if failErr := s.failRun(ctx, req, initialItem, ItemStageProvision, err); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } } } else { if s.Provisioner == nil { - return BatchImportRunResult{}, fmt.Errorf("provisioner is required") + if failErr := s.failRun(ctx, req, initialItem, ItemStageProvision, fmt.Errorf("provisioner is required")); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } provisionResult, err := s.Provisioner.Provision(ctx, ProvisionRequest{ RunID: runID, @@ -240,7 +269,10 @@ func (s BatchImportService) StartRun(ctx context.Context, req BatchImportRunRequ CapabilityProfile: capabilityProfile, }) if err != nil { - return BatchImportRunResult{}, err + if failErr := s.failRun(ctx, req, initialItem, ItemStageProvision, err); failErr != nil { + return BatchImportRunResult{}, failErr + } + return result, nil } finalItem.LegacyBatchID = provisionResult.LegacyBatchID finalItem.LegacyProviderID = strings.TrimSpace(provisionResult.LegacyProviderID) @@ -254,6 +286,35 @@ func (s BatchImportService) StartRun(ctx context.Context, req BatchImportRunRequ return result, nil } +func (s BatchImportService) failRun(ctx context.Context, req BatchImportRunRequest, item sqlite.ImportRunItem, stage ItemStage, cause error) error { + item.CurrentStage = string(ItemStageDone) + item.ConfirmationStatus = string(ConfirmationFailed) + item.AccessStatus = string(AccessStatusBroken) + item.LastErrorStage = string(stage) + item.LastError = strings.TrimSpace(cause.Error()) + item.LeaseOwner = "" + item.LeaseUntil = "" + item.NextRetryAt = "" + if err := s.ItemStore.Upsert(ctx, item); err != nil { + return err + } + + return s.RunStore.Update(ctx, sqlite.ImportRun{ + RunID: strings.TrimSpace(req.RunID), + HostID: strings.TrimSpace(req.HostID), + Mode: strings.TrimSpace(req.Mode), + AccessMode: strings.TrimSpace(req.AccessMode), + SubscriptionUsersJSON: mustMarshalJSON(req.SubscriptionUsers, "[]"), + SubscriptionDays: req.SubscriptionDays, + ProbeAPIKey: strings.TrimSpace(req.ProbeAPIKey), + State: string(RunStateFailed), + TotalItems: len(req.Entries), + CompletedItems: 1, + BrokenItems: 1, + FinishedAt: time.Now().UTC().Format(time.RFC3339), + }) +} + func uniqueCanonicalFamilies(rawModels []string) []string { seen := make(map[string]struct{}, len(rawModels)) families := make([]string, 0, len(rawModels)) diff --git a/internal/batch/service_test.go b/internal/batch/service_test.go index 4402dec8..5cd33c45 100644 --- a/internal/batch/service_test.go +++ b/internal/batch/service_test.go @@ -115,6 +115,7 @@ func TestBatchImport_StartRun(t *testing.T) { RunID: "run-2", Mode: "strict", AccessMode: "subscription", + HostID: "host-1", Entries: []BatchImportEntry{ {BaseURL: "https://api.kimi.com/v1", APIKey: "sk-live", RequestedModels: []string{"kimi 2.6"}}, }, @@ -170,6 +171,7 @@ func TestBatchImport_StartRun(t *testing.T) { RunID: "run-3", Mode: "strict", AccessMode: "subscription", + HostID: "host-1", Entries: []BatchImportEntry{ {BaseURL: "https://api.kimi.com/v1", APIKey: "sk-live", RequestedModels: []string{"kimi 2.6"}}, }, @@ -221,6 +223,7 @@ func TestBatchImport_StartRun(t *testing.T) { RunID: "run-4", Mode: "strict", AccessMode: "subscription", + HostID: "host-1", Entries: []BatchImportEntry{ {BaseURL: "https://api.kimi.com/v1", APIKey: "sk-live", RequestedModels: []string{"kimi 2.6"}}, }, @@ -243,10 +246,71 @@ func TestBatchImport_StartRun(t *testing.T) { t.Fatal("ProvisionReused = false, want true for patch-only flow") } }) + + t.Run("probe failure marks run failed instead of leaving running half state", func(t *testing.T) { + t.Parallel() + + runStore := &fakeRunStore{} + itemStore := &fakeItemStore{} + service := BatchImportService{ + RunStore: runStore, + ItemStore: itemStore, + ProbeModels: func(context.Context, string, string) (*probe.ModelsResult, error) { + return nil, context.DeadlineExceeded + }, + ProbeCapabilities: func(context.Context, string, string, []string) (*probe.CapabilityProfile, error) { + t.Fatal("ProbeCapabilities should not be called after probe failure") + return nil, nil + }, + } + + result, err := service.StartRun(context.Background(), BatchImportRunRequest{ + RunID: "run-probe-fail", + Mode: "strict", + AccessMode: "self_service", + HostID: "host-1", + Entries: []BatchImportEntry{ + {BaseURL: "https://api.deepseek.com/v1", APIKey: "sk-live", RequestedModels: []string{"DeepSeek V4 Pro"}}, + }, + }) + if err != nil { + t.Fatalf("StartRun() error = %v, want persisted failed run without transport error", err) + } + if result.RunID != "run-probe-fail" { + t.Fatalf("result.RunID = %q, want run-probe-fail", result.RunID) + } + if len(runStore.updated) == 0 { + t.Fatal("run store was not updated to failed state") + } + gotRun := runStore.updated[len(runStore.updated)-1] + if gotRun.State != string(RunStateFailed) { + t.Fatalf("run.State = %q, want failed", gotRun.State) + } + if gotRun.CompletedItems != 1 || gotRun.BrokenItems != 1 { + t.Fatalf("run counters = %+v, want completed_items=1 broken_items=1", gotRun) + } + if len(itemStore.upserts) < 2 { + t.Fatalf("item upserts = %d, want initial + failed terminal state", len(itemStore.upserts)) + } + gotItem := itemStore.upserts[len(itemStore.upserts)-1] + if gotItem.CurrentStage != string(ItemStageDone) { + t.Fatalf("item.CurrentStage = %q, want done", gotItem.CurrentStage) + } + if gotItem.ConfirmationStatus != string(ConfirmationFailed) { + t.Fatalf("item.ConfirmationStatus = %q, want failed", gotItem.ConfirmationStatus) + } + if gotItem.AccessStatus != string(AccessStatusBroken) { + t.Fatalf("item.AccessStatus = %q, want broken", gotItem.AccessStatus) + } + if gotItem.LastErrorStage != string(ItemStageProbe) { + t.Fatalf("item.LastErrorStage = %q, want probe", gotItem.LastErrorStage) + } + }) } type fakeRunStore struct { created []sqlite.ImportRun + updated []sqlite.ImportRun } func (f *fakeRunStore) Create(ctx context.Context, run sqlite.ImportRun) error { @@ -254,6 +318,11 @@ func (f *fakeRunStore) Create(ctx context.Context, run sqlite.ImportRun) error { return nil } +func (f *fakeRunStore) Update(ctx context.Context, run sqlite.ImportRun) error { + f.updated = append(f.updated, run) + return nil +} + type fakeItemStore struct { upserts []sqlite.ImportRunItem } diff --git a/internal/config/config.go b/internal/config/config.go index 411fd76b..1b7b7227 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -4,15 +4,19 @@ import ( "fmt" "os" "strings" + "time" ) const ( - EnvListenAddr = "SUB2API_CRM_LISTEN_ADDR" - EnvSQLiteDSN = "SUB2API_CRM_SQLITE_DSN" - EnvAdminToken = "SUB2API_CRM_ADMIN_TOKEN" + EnvListenAddr = "SUB2API_CRM_LISTEN_ADDR" + EnvSQLiteDSN = "SUB2API_CRM_SQLITE_DSN" + EnvAdminToken = "SUB2API_CRM_ADMIN_TOKEN" + EnvReconcileWorkerEnabled = "SUB2API_CRM_RECONCILE_WORKER_ENABLED" + EnvReconcilePollInterval = "SUB2API_CRM_RECONCILE_POLL_INTERVAL" - DefaultListenAddr = ":8080" - DefaultSQLiteDSN = "file:sub2api-cn-relay-manager.db?_foreign_keys=on&_busy_timeout=5000" + DefaultListenAddr = ":8080" + DefaultSQLiteDSN = "file:sub2api-cn-relay-manager.db?_foreign_keys=on&_busy_timeout=5000" + DefaultReconcilePollInterval = 10 * time.Minute ) type ServerConfig struct { @@ -23,9 +27,15 @@ type DatabaseConfig struct { SQLiteDSN string } +type ReconcileConfig struct { + WorkerEnabled bool + PollInterval time.Duration +} + type StartupConfig struct { - Server ServerConfig - Database DatabaseConfig + Server ServerConfig + Database DatabaseConfig + Reconcile ReconcileConfig } func LoadStartupFromEnv() (StartupConfig, error) { @@ -33,6 +43,10 @@ func LoadStartupFromEnv() (StartupConfig, error) { } func loadStartupFromLookupEnv(lookup func(string) (string, bool)) (StartupConfig, error) { + reconcilePollInterval, err := readOptionalDurationEnv(lookup, EnvReconcilePollInterval, DefaultReconcilePollInterval) + if err != nil { + return StartupConfig{}, err + } cfg := StartupConfig{ Server: ServerConfig{ ListenAddr: readOptionalEnv(lookup, EnvListenAddr, DefaultListenAddr), @@ -40,6 +54,10 @@ func loadStartupFromLookupEnv(lookup func(string) (string, bool)) (StartupConfig Database: DatabaseConfig{ SQLiteDSN: readOptionalEnv(lookup, EnvSQLiteDSN, DefaultSQLiteDSN), }, + Reconcile: ReconcileConfig{ + WorkerEnabled: readOptionalBoolEnv(lookup, EnvReconcileWorkerEnabled, false), + PollInterval: reconcilePollInterval, + }, } return cfg, nil @@ -80,3 +98,33 @@ func readRequiredEnv(lookup func(string) (string, bool), key string) string { return value } + +func readOptionalBoolEnv(lookup func(string) (string, bool), key string, defaultValue bool) bool { + value, ok := lookup(key) + if !ok { + return defaultValue + } + switch strings.ToLower(strings.TrimSpace(value)) { + case "1", "true", "yes", "on": + return true + case "0", "false", "no", "off", "": + return false + default: + return defaultValue + } +} + +func readOptionalDurationEnv(lookup func(string) (string, bool), key string, defaultValue time.Duration) (time.Duration, error) { + value, ok := lookup(key) + if !ok || strings.TrimSpace(value) == "" { + return defaultValue, nil + } + duration, err := time.ParseDuration(strings.TrimSpace(value)) + if err != nil { + return 0, fmt.Errorf("%s: parse duration: %w", key, err) + } + if duration <= 0 { + return 0, fmt.Errorf("%s: duration must be positive", key) + } + return duration, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index c5382f6e..d93af95e 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -3,6 +3,7 @@ package config import ( "errors" "testing" + "time" ) func TestReadOptionalEnv(t *testing.T) { @@ -62,6 +63,10 @@ func TestLoadStartupFromLookupEnv(t *testing.T) { return ":9090", true case EnvSQLiteDSN: return "/data/db.sqlite", true + case EnvReconcileWorkerEnabled: + return "true", true + case EnvReconcilePollInterval: + return "15m", true default: return "", false } @@ -76,6 +81,12 @@ func TestLoadStartupFromLookupEnv(t *testing.T) { if cfg.Database.SQLiteDSN != "/data/db.sqlite" { t.Fatalf("SQLiteDSN = %q, want %q", cfg.Database.SQLiteDSN, "/data/db.sqlite") } + if !cfg.Reconcile.WorkerEnabled { + t.Fatal("WorkerEnabled = false, want true") + } + if cfg.Reconcile.PollInterval != 15*time.Minute { + t.Fatalf("PollInterval = %s, want 15m", cfg.Reconcile.PollInterval) + } }) t.Run("default values", func(t *testing.T) { lookup := func(k string) (string, bool) { @@ -91,6 +102,23 @@ func TestLoadStartupFromLookupEnv(t *testing.T) { if cfg.Database.SQLiteDSN != DefaultSQLiteDSN { t.Fatalf("SQLiteDSN = %q, want %q", cfg.Database.SQLiteDSN, DefaultSQLiteDSN) } + if cfg.Reconcile.WorkerEnabled { + t.Fatal("WorkerEnabled = true, want false by default") + } + if cfg.Reconcile.PollInterval != DefaultReconcilePollInterval { + t.Fatalf("PollInterval = %s, want %s", cfg.Reconcile.PollInterval, DefaultReconcilePollInterval) + } + }) + t.Run("invalid reconcile interval", func(t *testing.T) { + lookup := func(k string) (string, bool) { + if k == EnvReconcilePollInterval { + return "not-a-duration", true + } + return "", false + } + if _, err := loadStartupFromLookupEnv(lookup); err == nil { + t.Fatal("loadStartupFromLookupEnv() error = nil, want invalid interval") + } }) } diff --git a/internal/host/sub2api/account_capability_repair.go b/internal/host/sub2api/account_capability_repair.go new file mode 100644 index 00000000..df5b8520 --- /dev/null +++ b/internal/host/sub2api/account_capability_repair.go @@ -0,0 +1,36 @@ +package sub2api + +import ( + "context" + "net/http" + "strings" +) + +func (c *Client) DisableOpenAIResponsesAPI(ctx context.Context, accountIDs []string) error { + seen := map[string]struct{}{} + for _, rawID := range accountIDs { + accountID := strings.TrimSpace(rawID) + if accountID == "" { + continue + } + if _, ok := seen[accountID]; ok { + continue + } + seen[accountID] = struct{}{} + + path := "/api/v1/admin/accounts/" + accountID + payload := map[string]any{ + "extra": map[string]any{ + "openai_responses_supported": false, + }, + } + statusCode, _, body, err := c.perform(ctx, http.MethodPut, path, payload) + if err != nil { + return err + } + if statusCode < http.StatusOK || statusCode >= http.StatusMultipleChoices { + return newHTTPError(http.MethodPut, path, statusCode, body) + } + } + return nil +} diff --git a/internal/host/sub2api/client.go b/internal/host/sub2api/client.go index 21b4fd47..add5a226 100644 --- a/internal/host/sub2api/client.go +++ b/internal/host/sub2api/client.go @@ -31,6 +31,7 @@ type HostAdapter interface { AssignSubscription(ctx context.Context, req AssignSubscriptionRequest) (SubscriptionRef, error) CheckGatewayAccess(ctx context.Context, req GatewayAccessCheckRequest) (GatewayAccessResult, error) CheckGatewayCompletion(ctx context.Context, req GatewayCompletionCheckRequest) (GatewayCompletionResult, error) + DisableOpenAIResponsesAPI(ctx context.Context, accountIDs []string) error ListManagedResources(ctx context.Context, req ListManagedResourcesRequest) (ManagedResourceSnapshot, error) } diff --git a/internal/host/sub2api/sub2api_test.go b/internal/host/sub2api/sub2api_test.go index bb7bb12c..26705256 100644 --- a/internal/host/sub2api/sub2api_test.go +++ b/internal/host/sub2api/sub2api_test.go @@ -979,6 +979,41 @@ func TestCheckGatewayCompletionWithMock(t *testing.T) { } } +func TestDisableOpenAIResponsesAPIWithMock(t *testing.T) { + var calls []string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + calls = append(calls, r.Method+" "+r.URL.Path) + if r.Method != http.MethodPut { + t.Fatalf("method = %q, want PUT", r.Method) + } + var payload struct { + Extra map[string]any `json:"extra"` + } + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + t.Fatalf("decode request: %v", err) + } + if got, ok := payload.Extra["openai_responses_supported"].(bool); !ok || got { + t.Fatalf("openai_responses_supported = %+v, want false", payload.Extra["openai_responses_supported"]) + } + w.Write([]byte(`{"data":{"id":1}}`)) + })) + defer srv.Close() + + client, _ := NewClient(srv.URL, WithAPIKey("k")) + if err := client.DisableOpenAIResponsesAPI(context.Background(), []string{"101", "101", " ", "102"}); err != nil { + t.Fatalf("DisableOpenAIResponsesAPI() error = %v", err) + } + if len(calls) != 2 { + t.Fatalf("calls = %v, want 2 unique account updates", calls) + } + if calls[0] != "PUT /api/v1/admin/accounts/101" { + t.Fatalf("first call = %q, want PUT /api/v1/admin/accounts/101", calls[0]) + } + if calls[1] != "PUT /api/v1/admin/accounts/102" { + t.Fatalf("second call = %q, want PUT /api/v1/admin/accounts/102", calls[1]) + } +} + func TestBatchCreateAccountsWithMock(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { var req struct { diff --git a/internal/provision/batch_detail_service.go b/internal/provision/batch_detail_service.go new file mode 100644 index 00000000..dff9e2b3 --- /dev/null +++ b/internal/provision/batch_detail_service.go @@ -0,0 +1,57 @@ +package provision + +import ( + "context" + "fmt" + + "sub2api-cn-relay-manager/internal/store/sqlite" +) + +type BatchDetailResult struct { + Batch sqlite.ImportBatch + Items []sqlite.ImportBatchItem + ManagedResources []sqlite.ManagedResource + AccessClosures []sqlite.AccessClosureRecord + ReconcileRuns []sqlite.ReconcileRun +} + +type BatchDetailService struct { + store *sqlite.DB +} + +func NewBatchDetailService(store *sqlite.DB) *BatchDetailService { + return &BatchDetailService{store: store} +} + +func (s *BatchDetailService) Get(ctx context.Context, batchID int64) (BatchDetailResult, error) { + if s == nil || s.store == nil { + return BatchDetailResult{}, fmt.Errorf("store is required") + } + batch, err := s.store.ImportBatches().GetByID(ctx, batchID) + if err != nil { + return BatchDetailResult{}, err + } + items, err := s.store.ImportBatchItems().GetByBatchID(ctx, batchID) + if err != nil { + return BatchDetailResult{}, err + } + managedResources, err := s.store.ManagedResources().GetByBatchID(ctx, batchID) + if err != nil { + return BatchDetailResult{}, err + } + accessClosures, err := s.store.AccessClosures().GetByBatchID(ctx, batchID) + if err != nil { + return BatchDetailResult{}, err + } + reconcileRuns, err := s.store.ReconcileRuns().GetByBatchID(ctx, batchID) + if err != nil { + return BatchDetailResult{}, err + } + return BatchDetailResult{ + Batch: batch, + Items: items, + ManagedResources: managedResources, + AccessClosures: accessClosures, + ReconcileRuns: reconcileRuns, + }, nil +} diff --git a/internal/provision/batch_detail_service_test.go b/internal/provision/batch_detail_service_test.go index 9b96707e..fe8938b3 100644 --- a/internal/provision/batch_detail_service_test.go +++ b/internal/provision/batch_detail_service_test.go @@ -142,59 +142,6 @@ func TestAccountIDFromProbeSummary(t *testing.T) { } } -func TestReconcileServiceRerunAccessClosureWithoutProbeKeyUsesLatestStatus(t *testing.T) { - store := openProvisionTestStore(t) - defer closeProvisionTestStore(t, store) - - status, checked, err := NewReconcileService(store, &fakeHostAdapter{}).rerunAccessClosure(context.Background(), 1, []sqlite.AccessClosureRecord{{ClosureType: AccessModeSubscription, Status: AccessStatusSubscriptionReady}}, "", "deepseek-chat") - if err != nil { - t.Fatalf("rerunAccessClosure() error = %v", err) - } - if checked { - t.Fatal("checked = true, want false without probe key") - } - if status != AccessStatusSubscriptionReady { - t.Fatalf("status = %q, want %q", status, AccessStatusSubscriptionReady) - } -} - -func TestReconcileServiceRerunAccessClosureMarksBrokenWhenGatewayCheckFails(t *testing.T) { - store := openProvisionTestStore(t) - defer closeProvisionTestStore(t, store) - - hostSeed := &fakeHostAdapter{ - batchAccounts: []sub2api.AccountRef{{ID: "account_1", Name: "deepseek-01"}, {ID: "account_2", Name: "deepseek-02"}}, - testResults: map[string]sub2api.ProbeResult{ - "account_1": {OK: true, Status: "passed"}, - "account_2": {OK: true, Status: "passed"}, - }, - models: map[string][]sub2api.AccountModel{ - "account_1": {{ID: "deepseek-chat"}}, - "account_2": {{ID: "deepseek-chat"}}, - }, - gatewayResult: sub2api.GatewayAccessResult{OK: true, StatusCode: 200, HasExpectedModel: true, Models: []string{"deepseek-chat"}}, - } - batchID := seedRuntimeImportForReconcile(t, store, hostSeed) - - host := &fakeHostAdapter{gatewayResult: sub2api.GatewayAccessResult{OK: false, StatusCode: 403, HasExpectedModel: false}} - status, checked, err := NewReconcileService(store, host).rerunAccessClosure(context.Background(), batchID, []sqlite.AccessClosureRecord{{ClosureType: AccessModeSelfService, Status: AccessStatusSelfServiceReady}}, "user-key", "deepseek-chat") - if err != nil { - t.Fatalf("rerunAccessClosure() error = %v", err) - } - if !checked { - t.Fatal("checked = false, want true") - } - if status != AccessStatusBroken { - t.Fatalf("status = %q, want %q", status, AccessStatusBroken) - } - if got := queryCount(t, store.SQLDB(), "access_closure_records"); got != 2 { - t.Fatalf("access_closure_records row count = %d, want 2 after rerun", got) - } - if host.gatewayProbe.ExpectedModel != "deepseek-chat" { - t.Fatalf("ExpectedModel = %q, want deepseek-chat", host.gatewayProbe.ExpectedModel) - } -} - func TestDiffManagedResourcesCountsMissingAndExtra(t *testing.T) { missing, extra := diffManagedResources( []sqlite.ManagedResource{ diff --git a/internal/provision/import_service.go b/internal/provision/import_service.go index c28e85e7..8009469d 100644 --- a/internal/provision/import_service.go +++ b/internal/provision/import_service.go @@ -258,11 +258,13 @@ func (s *ImportService) Import(ctx context.Context, req ImportRequest) (report I closureService := access.NewService(s.host) gateway, err := closureService.Close(ctx, access.ClosureRequest{ - Mode: req.Access.Mode, - ProbeAPIKey: req.Access.ProbeAPIKey, - Subscriptions: toAccessSubscriptionTargets(req.Access.Subscriptions), - GroupID: resources.Group.ID, - ExpectedModel: req.Provider.SmokeTestModel, + Mode: req.Access.Mode, + ProbeAPIKey: req.Access.ProbeAPIKey, + Subscriptions: toAccessSubscriptionTargets(req.Access.Subscriptions), + GroupID: resources.Group.ID, + AccountIDs: importedAccountIDs(report.Accounts), + ExpectedModel: req.Provider.SmokeTestModel, + ResponsesCapabilitySuspect: importedAccountsSuspectResponsesCapabilityMismatch(report.Accounts), }) if err != nil { return failOrDegrade(report, req.Mode, err) @@ -326,6 +328,28 @@ func (s *ImportService) ensureManagedResources(ctx context.Context, provider pac return result, nil } +func importedAccountIDs(accounts []AccountImportResult) []string { + ids := make([]string, 0, len(accounts)) + for _, account := range accounts { + if trimmed := strings.TrimSpace(account.Ref.ID); trimmed != "" { + ids = append(ids, trimmed) + } + } + return ids +} + +func importedAccountsSuspectResponsesCapabilityMismatch(accounts []AccountImportResult) bool { + for _, account := range accounts { + if !account.SmokeModelSeen { + continue + } + if access.SuspectsOpenAIResponsesCapabilityMismatch(account.Probe) { + return true + } + } + return false +} + func ensureGroup(ctx context.Context, host hostAdapter, existing []sub2api.NamedResource, provider pack.ProviderManifest, accessMode, groupName string) (sub2api.GroupRef, bool, error) { switch len(existing) { case 0: diff --git a/internal/provision/import_service_test.go b/internal/provision/import_service_test.go index e71a61d5..c71b8f6b 100644 --- a/internal/provision/import_service_test.go +++ b/internal/provision/import_service_test.go @@ -342,6 +342,51 @@ func TestImportServiceRetriesTransientGatewayCompletionFailure(t *testing.T) { } } +func TestImportServiceRepairsOpenAIResponsesCapabilityMismatchAfterInstall(t *testing.T) { + host := &fakeHostAdapter{ + batchAccounts: []sub2api.AccountRef{{ID: "account_1", Name: "kimi-a7m-01"}}, + testResults: map[string]sub2api.ProbeResult{ + "account_1": { + OK: false, + Status: "failed", + Message: "API returned 403: Forbidden", + }, + }, + models: map[string][]sub2api.AccountModel{ + "account_1": {{ID: "deepseek-chat"}}, + }, + gatewayResult: sub2api.GatewayAccessResult{ + OK: true, + StatusCode: 200, + HasExpectedModel: true, + Models: []string{"deepseek-chat"}, + }, + completionResults: []sub2api.GatewayCompletionResult{ + {OK: false, StatusCode: 502, ContentType: "application/json", BodyPreview: `{"error":{"message":"Upstream service temporarily unavailable","type":"upstream_error"}}`}, + }, + completionAfterRepair: &sub2api.GatewayCompletionResult{OK: true, StatusCode: 200, ContentType: "application/json"}, + } + + report, err := NewImportService(host).Import(context.Background(), ImportRequest{ + Provider: sampleProviderManifest(), + Mode: ImportModePartial, + Access: AccessRequest{Mode: AccessModeSelfService, ProbeAPIKey: "user-key"}, + Keys: []string{"key-1"}, + }) + if err != nil { + t.Fatalf("Import() error = %v", err) + } + if !report.Gateway.CompletionOK || report.Gateway.CompletionStatus != 200 { + t.Fatalf("Gateway completion = %+v, want repaired success", report.Gateway) + } + if host.disableResponsesCalls != 1 { + t.Fatalf("disable responses calls = %d, want 1", host.disableResponsesCalls) + } + if len(host.disabledResponsesAccountIDs) != 1 || host.disabledResponsesAccountIDs[0] != "account_1" { + t.Fatalf("disabled responses account ids = %v, want [account_1]", host.disabledResponsesAccountIDs) + } +} + func TestImportServiceStrictModeRollsBackCreatedResources(t *testing.T) { host := &fakeHostAdapter{ batchAccounts: []sub2api.AccountRef{{ID: "account_1"}, {ID: "account_2"}}, @@ -594,35 +639,38 @@ func TestImportKeepsExistingAccountsWhenReplacementValidationFails(t *testing.T) } type fakeHostAdapter struct { - batchAccounts []sub2api.AccountRef - batchCreateReq sub2api.BatchCreateAccountsRequest - testResults map[string]sub2api.ProbeResult - models map[string][]sub2api.AccountModel - gatewayResult sub2api.GatewayAccessResult - batchCreateErr error - assignErr error - gatewayErr error - hostVersion string - assignedSubscriptions []sub2api.AssignSubscriptionRequest - gatewayProbe sub2api.GatewayAccessCheckRequest - completionProbe sub2api.GatewayCompletionCheckRequest - deletedResources []string - managedSnapshot sub2api.ManagedResourceSnapshot - listManagedReq sub2api.ListManagedResourcesRequest - createGroupCalls int - createChannelCalls int - updateChannelCalls int - createPlanCalls int - createGroupReq sub2api.CreateGroupRequest - createChannelReq sub2api.CreateChannelRequest - updateChannelID string - updateChannelReq sub2api.CreateChannelRequest - callSequence []string - completionCalls int - completionResults []sub2api.GatewayCompletionResult - completionResult sub2api.GatewayCompletionResult - completionErr error - testedModels map[string]string + batchAccounts []sub2api.AccountRef + batchCreateReq sub2api.BatchCreateAccountsRequest + testResults map[string]sub2api.ProbeResult + models map[string][]sub2api.AccountModel + gatewayResult sub2api.GatewayAccessResult + batchCreateErr error + assignErr error + gatewayErr error + hostVersion string + assignedSubscriptions []sub2api.AssignSubscriptionRequest + gatewayProbe sub2api.GatewayAccessCheckRequest + completionProbe sub2api.GatewayCompletionCheckRequest + deletedResources []string + managedSnapshot sub2api.ManagedResourceSnapshot + listManagedReq sub2api.ListManagedResourcesRequest + createGroupCalls int + createChannelCalls int + updateChannelCalls int + createPlanCalls int + createGroupReq sub2api.CreateGroupRequest + createChannelReq sub2api.CreateChannelRequest + updateChannelID string + updateChannelReq sub2api.CreateChannelRequest + callSequence []string + completionCalls int + completionResults []sub2api.GatewayCompletionResult + completionResult sub2api.GatewayCompletionResult + completionAfterRepair *sub2api.GatewayCompletionResult + completionErr error + testedModels map[string]string + disableResponsesCalls int + disabledResponsesAccountIDs []string } func (f *fakeHostAdapter) GetHostVersion(context.Context) (string, error) { @@ -724,6 +772,9 @@ func (f *fakeHostAdapter) CheckGatewayCompletion(_ context.Context, req sub2api. if f.completionErr != nil { return sub2api.GatewayCompletionResult{}, f.completionErr } + if f.disableResponsesCalls > 0 && f.completionAfterRepair != nil { + return *f.completionAfterRepair, nil + } if len(f.completionResults) > 0 { idx := f.completionCalls - 1 if idx >= len(f.completionResults) { @@ -736,6 +787,11 @@ func (f *fakeHostAdapter) CheckGatewayCompletion(_ context.Context, req sub2api. } return f.completionResult, nil } +func (f *fakeHostAdapter) DisableOpenAIResponsesAPI(_ context.Context, accountIDs []string) error { + f.disableResponsesCalls++ + f.disabledResponsesAccountIDs = append([]string(nil), accountIDs...) + return nil +} func (f *fakeHostAdapter) ListManagedResources(_ context.Context, req sub2api.ListManagedResourcesRequest) (sub2api.ManagedResourceSnapshot, error) { f.listManagedReq = req return sub2api.ManagedResourceSnapshot{ diff --git a/internal/provision/reconcile_compat_test.go b/internal/provision/reconcile_compat_test.go new file mode 100644 index 00000000..a1573435 --- /dev/null +++ b/internal/provision/reconcile_compat_test.go @@ -0,0 +1,26 @@ +package provision + +import ( + "sub2api-cn-relay-manager/internal/host/sub2api" + "sub2api-cn-relay-manager/internal/reconcile" + "sub2api-cn-relay-manager/internal/store/sqlite" +) + +type ReconcileRequest = reconcile.Request +type ReconcileResult = reconcile.Result + +func NewReconcileService(store *sqlite.DB, host sub2api.HostAdapter) *reconcile.Service { + return reconcile.NewService(store, host) +} + +func accountIDFromProbeSummary(summaryJSON string) (string, error) { + return reconcile.AccountIDFromProbeSummary(summaryJSON) +} + +func diffManagedResources(stored []sqlite.ManagedResource, snapshot sub2api.ManagedResourceSnapshot) (int, int) { + return reconcile.DiffManagedResources(stored, snapshot) +} + +func deriveHealthyAccessStatus(closureType string) string { + return reconcile.DeriveHealthyAccessStatus(closureType) +} diff --git a/internal/provision/reconcile_service_test.go b/internal/provision/reconcile_service_test.go index cc5b7e14..cf14fd23 100644 --- a/internal/provision/reconcile_service_test.go +++ b/internal/provision/reconcile_service_test.go @@ -162,6 +162,55 @@ func TestReconcileServiceIgnoresAdvisoryProbeFailureWhenModelsAndGatewayAreHealt } } +func TestReconcileServiceRepairsOpenAIResponsesCapabilityMismatch(t *testing.T) { + store := openProvisionTestStore(t) + defer closeProvisionTestStore(t, store) + + host := &fakeHostAdapter{ + batchAccounts: []sub2api.AccountRef{{ID: "account_1", Name: "deepseek-01"}, {ID: "account_2", Name: "deepseek-02"}}, + testResults: map[string]sub2api.ProbeResult{ + "account_1": {OK: false, Status: "failed", Message: "API returned 403: Forbidden"}, + "account_2": {OK: false, Status: "failed", Message: "API returned 403: Forbidden"}, + }, + models: map[string][]sub2api.AccountModel{ + "account_1": {{ID: "deepseek-chat"}}, + "account_2": {{ID: "deepseek-chat"}}, + }, + gatewayResult: sub2api.GatewayAccessResult{OK: true, StatusCode: 200, HasExpectedModel: true, Models: []string{"deepseek-chat"}}, + completionResults: []sub2api.GatewayCompletionResult{ + {OK: false, StatusCode: 502, ContentType: "application/json", BodyPreview: `{"error":{"message":"Upstream service temporarily unavailable","type":"upstream_error"}}`}, + }, + completionAfterRepair: &sub2api.GatewayCompletionResult{OK: true, StatusCode: 200, ContentType: "application/json"}, + } + + seedRuntimeImportForReconcile(t, store, host) + host.managedSnapshot = sub2api.ManagedResourceSnapshot{ + Groups: []sub2api.NamedResource{{ID: "group_1", Name: "DeepSeek 默认分组-self-service"}}, + Channels: []sub2api.NamedResource{{ID: "channel_1", Name: "DeepSeek 默认渠道-self-service"}}, + Accounts: []sub2api.NamedResource{{ID: "account_1", Name: "deepseek-01"}, {ID: "account_2", Name: "deepseek-02"}}, + } + + result, err := NewReconcileService(store, host).Reconcile(context.Background(), ReconcileRequest{ + HostID: "host-1", + HostBaseURL: "https://sub2api.example.com", + AccessProbeAPIKey: "user-key", + Pack: pack.LoadedPack{Manifest: pack.Manifest{PackID: "openai-cn-pack", Version: "1.0.0", TargetHost: "sub2api", MinHostVersion: "0.1.126", MaxHostVersion: "0.2.x"}}, + Provider: sampleProviderManifest(), + }) + if err != nil { + t.Fatalf("Reconcile() error = %v", err) + } + if result.Status != "active" { + t.Fatalf("Status = %q, want active after repair", result.Status) + } + if host.disableResponsesCalls != 1 { + t.Fatalf("disable responses calls = %d, want 1", host.disableResponsesCalls) + } + if len(host.disabledResponsesAccountIDs) != 2 { + t.Fatalf("disabled responses account ids = %v, want both accounts", host.disabledResponsesAccountIDs) + } +} + func TestReconcileServiceReturnsDriftedWhenManagedResourceMissing(t *testing.T) { store := openProvisionTestStore(t) defer closeProvisionTestStore(t, store) diff --git a/internal/provision/runtime_import_service.go b/internal/provision/runtime_import_service.go index 3194b5a2..7aad020b 100644 --- a/internal/provision/runtime_import_service.go +++ b/internal/provision/runtime_import_service.go @@ -110,7 +110,7 @@ func (s *RuntimeImportService) Import(ctx context.Context, req RuntimeImportRequ } includeManagedResources := importErr == nil || req.Mode != ImportModeStrict - if persistErr := s.persistRuntimeArtifacts(ctx, batchID, hostRow.ID, req.Access.Mode, report, includeManagedResources); persistErr != nil { + if persistErr := s.persistRuntimeArtifacts(ctx, batchID, hostRow.ID, req.Access, report, includeManagedResources); persistErr != nil { return RuntimeImportResult{}, persistErr } if err := s.store.ImportBatches().UpdateStatus(ctx, batchID, report.BatchStatus, report.AccessStatus); err != nil { @@ -167,7 +167,7 @@ func (s *RuntimeImportService) ensureProvider(ctx context.Context, packID int64, return s.store.Providers().GetByPackIDAndProviderID(ctx, packID, provider.ProviderID) } -func (s *RuntimeImportService) persistRuntimeArtifacts(ctx context.Context, batchID, hostID int64, accessMode string, report ImportReport, includeManagedResources bool) error { +func (s *RuntimeImportService) persistRuntimeArtifacts(ctx context.Context, batchID, hostID int64, access AccessRequest, report ImportReport, includeManagedResources bool) error { for i, account := range report.Accounts { validationStatus := account.ValidationStatus() payload, err := json.Marshal(map[string]any{ @@ -222,6 +222,9 @@ func (s *RuntimeImportService) persistRuntimeArtifacts(ctx context.Context, batc } accessPayload, err := json.Marshal(map[string]any{ + "probe_api_key": strings.TrimSpace(access.ProbeAPIKey), + "subscription_users": subscriptionUserIDs(access.Subscriptions), + "subscription_days": subscriptionDurationDays(access.Subscriptions), "status_code": report.Gateway.StatusCode, "ok": report.Gateway.OK, "has_expected_model": report.Gateway.HasExpectedModel, @@ -236,7 +239,7 @@ func (s *RuntimeImportService) persistRuntimeArtifacts(ctx context.Context, batc } if _, err := s.store.AccessClosures().Create(ctx, sqlite.AccessClosureRecord{ BatchID: batchID, - ClosureType: firstNonEmpty(strings.TrimSpace(accessMode), "unknown"), + ClosureType: firstNonEmpty(strings.TrimSpace(access.Mode), "unknown"), Status: firstNonEmpty(report.AccessStatus, AccessStatusBroken), DetailsJSON: string(accessPayload), }); err != nil { @@ -245,6 +248,23 @@ func (s *RuntimeImportService) persistRuntimeArtifacts(ctx context.Context, batc return nil } +func subscriptionUserIDs(targets []SubscriptionTarget) []string { + values := make([]string, 0, len(targets)) + for _, target := range targets { + if trimmed := strings.TrimSpace(target.UserID); trimmed != "" { + values = append(values, trimmed) + } + } + return values +} + +func subscriptionDurationDays(targets []SubscriptionTarget) int { + if len(targets) == 0 { + return 0 + } + return targets[0].DurationDays +} + func (s *RuntimeImportService) persistManagedResourceIfAbsent(ctx context.Context, batchID, hostID int64, resourceType, hostResourceID, resourceName string) error { resourceType = strings.TrimSpace(resourceType) hostResourceID = strings.TrimSpace(hostResourceID) diff --git a/internal/provision/runtime_import_service_test.go b/internal/provision/runtime_import_service_test.go index cc17c14c..b1079fc2 100644 --- a/internal/provision/runtime_import_service_test.go +++ b/internal/provision/runtime_import_service_test.go @@ -3,6 +3,7 @@ package provision import ( "context" "database/sql" + "encoding/json" "fmt" "path/filepath" "strings" @@ -352,6 +353,114 @@ func TestRuntimeImportServicePersistsPartialManagedResourcesOnAccessFailure(t *t } } +func TestRuntimeImportServicePersistsSelfServiceProbeKeyInAccessClosure(t *testing.T) { + store := openProvisionTestStore(t) + defer closeProvisionTestStore(t, store) + + seedProvisionHost(t, store, "host-1", "https://sub2api.example.com") + + host := &fakeHostAdapter{ + batchAccounts: []sub2api.AccountRef{{ID: "account_1", Name: "deepseek-01"}}, + testResults: map[string]sub2api.ProbeResult{ + "account_1": {OK: true, Status: "passed"}, + }, + models: map[string][]sub2api.AccountModel{ + "account_1": {{ID: "deepseek-chat"}}, + }, + gatewayResult: sub2api.GatewayAccessResult{OK: true, StatusCode: 200, HasExpectedModel: true, Models: []string{"deepseek-chat"}, CompletionOK: true, CompletionStatus: 200}, + } + + result, err := NewRuntimeImportService(store, host).Import(context.Background(), RuntimeImportRequest{ + HostID: "host-1", + HostBaseURL: "https://sub2api.example.com", + Pack: pack.LoadedPack{ + Manifest: pack.Manifest{PackID: "openai-cn-pack", Version: "1.0.0", TargetHost: "sub2api", MinHostVersion: "0.1.126", MaxHostVersion: "0.2.x"}, + Checksum: "checksum-1", + }, + Provider: sampleProviderManifest(), + Mode: ImportModePartial, + Keys: []string{"key-1"}, + Access: AccessRequest{ + Mode: AccessModeSelfService, + ProbeAPIKey: "user-probe-key", + }, + }) + if err != nil { + t.Fatalf("RuntimeImportService.Import() error = %v", err) + } + + closures, err := store.AccessClosures().GetByBatchID(context.Background(), result.BatchID) + if err != nil { + t.Fatalf("AccessClosures().GetByBatchID() error = %v", err) + } + if len(closures) != 1 { + t.Fatalf("access closures = %d, want 1", len(closures)) + } + var payload map[string]any + if err := json.Unmarshal([]byte(closures[0].DetailsJSON), &payload); err != nil { + t.Fatalf("decode access closure details: %v", err) + } + if got, _ := payload["probe_api_key"].(string); got != "user-probe-key" { + t.Fatalf("probe_api_key = %q, want user-probe-key", got) + } +} + +func TestRuntimeImportServicePersistsSubscriptionMetadataInAccessClosure(t *testing.T) { + store := openProvisionTestStore(t) + defer closeProvisionTestStore(t, store) + + seedProvisionHost(t, store, "host-1", "https://sub2api.example.com") + + host := &fakeHostAdapter{ + batchAccounts: []sub2api.AccountRef{{ID: "account_1", Name: "deepseek-01"}}, + testResults: map[string]sub2api.ProbeResult{ + "account_1": {OK: true, Status: "passed"}, + }, + models: map[string][]sub2api.AccountModel{ + "account_1": {{ID: "deepseek-chat"}}, + }, + gatewayResult: sub2api.GatewayAccessResult{OK: true, StatusCode: 200, HasExpectedModel: true, Models: []string{"deepseek-chat"}, CompletionOK: true, CompletionStatus: 200}, + } + + result, err := NewRuntimeImportService(store, host).Import(context.Background(), RuntimeImportRequest{ + HostID: "host-1", + HostBaseURL: "https://sub2api.example.com", + Pack: pack.LoadedPack{ + Manifest: pack.Manifest{PackID: "openai-cn-pack", Version: "1.0.0", TargetHost: "sub2api", MinHostVersion: "0.1.126", MaxHostVersion: "0.2.x"}, + Checksum: "checksum-1", + }, + Provider: sampleProviderManifest(), + Mode: ImportModePartial, + Keys: []string{"key-1"}, + Access: AccessRequest{ + Mode: AccessModeSubscription, + Subscriptions: []SubscriptionTarget{{UserID: "user-42", DurationDays: 30}}, + }, + }) + if err != nil { + t.Fatalf("RuntimeImportService.Import() error = %v", err) + } + + closures, err := store.AccessClosures().GetByBatchID(context.Background(), result.BatchID) + if err != nil { + t.Fatalf("AccessClosures().GetByBatchID() error = %v", err) + } + if len(closures) != 1 { + t.Fatalf("access closures = %d, want 1", len(closures)) + } + var payload map[string]any + if err := json.Unmarshal([]byte(closures[0].DetailsJSON), &payload); err != nil { + t.Fatalf("decode access closure details: %v", err) + } + users, _ := payload["subscription_users"].([]any) + if len(users) != 1 || users[0] != "user-42" { + t.Fatalf("subscription_users = %#v, want [user-42]", users) + } + if got, _ := payload["subscription_days"].(float64); int(got) != 30 { + t.Fatalf("subscription_days = %v, want 30", got) + } +} + func TestRuntimeImportServiceRepeatedImportReusesManagedResources(t *testing.T) { store := openProvisionTestStore(t) defer closeProvisionTestStore(t, store) diff --git a/internal/provision/batch_detail_and_reconcile_service.go b/internal/reconcile/service.go similarity index 51% rename from internal/provision/batch_detail_and_reconcile_service.go rename to internal/reconcile/service.go index 1c8b329f..8b5c5410 100644 --- a/internal/provision/batch_detail_and_reconcile_service.go +++ b/internal/reconcile/service.go @@ -1,66 +1,37 @@ -package provision +package reconcile import ( "context" "encoding/json" "fmt" + "regexp" "strings" + "sub2api-cn-relay-manager/internal/access" "sub2api-cn-relay-manager/internal/host/sub2api" "sub2api-cn-relay-manager/internal/pack" "sub2api-cn-relay-manager/internal/store/sqlite" ) -type BatchDetailResult struct { - Batch sqlite.ImportBatch - Items []sqlite.ImportBatchItem - ManagedResources []sqlite.ManagedResource - AccessClosures []sqlite.AccessClosureRecord - ReconcileRuns []sqlite.ReconcileRun -} +const ( + accessModeSubscription = "subscription" + accessModeSelfService = "self_service" -type BatchDetailService struct { - store *sqlite.DB -} + accessStatusSubscriptionReady = "subscription_ready" + accessStatusSelfServiceReady = "self_service_ready" + accessStatusBroken = "broken" -func NewBatchDetailService(store *sqlite.DB) *BatchDetailService { - return &BatchDetailService{store: store} -} + batchStatusSucceeded = "succeeded" + batchStatusPartial = "partially_succeeded" -func (s *BatchDetailService) Get(ctx context.Context, batchID int64) (BatchDetailResult, error) { - if s == nil || s.store == nil { - return BatchDetailResult{}, fmt.Errorf("store is required") - } - batch, err := s.store.ImportBatches().GetByID(ctx, batchID) - if err != nil { - return BatchDetailResult{}, err - } - items, err := s.store.ImportBatchItems().GetByBatchID(ctx, batchID) - if err != nil { - return BatchDetailResult{}, err - } - managedResources, err := s.store.ManagedResources().GetByBatchID(ctx, batchID) - if err != nil { - return BatchDetailResult{}, err - } - accessClosures, err := s.store.AccessClosures().GetByBatchID(ctx, batchID) - if err != nil { - return BatchDetailResult{}, err - } - reconcileRuns, err := s.store.ReconcileRuns().GetByBatchID(ctx, batchID) - if err != nil { - return BatchDetailResult{}, err - } - return BatchDetailResult{ - Batch: batch, - Items: items, - ManagedResources: managedResources, - AccessClosures: accessClosures, - ReconcileRuns: reconcileRuns, - }, nil -} + accountStatusPassed = "passed" + accountStatusWarning = "warning" + accountStatusFailed = "failed" +) -type ReconcileRequest struct { +var nonSlugPattern = regexp.MustCompile(`[^a-z0-9]+`) + +type Request struct { HostID string HostBaseURL string AccessProbeAPIKey string @@ -68,7 +39,7 @@ type ReconcileRequest struct { Provider pack.ProviderManifest } -type ReconcileResult struct { +type Result struct { BatchID int64 Status string MissingCount int @@ -79,97 +50,103 @@ type ReconcileResult struct { Summary map[string]any } -type ReconcileService struct { +type accountProbeRerunSummary struct { + Failures int + AccountIDs []string + ResponsesCapabilitySuspect bool +} + +type Service struct { store *sqlite.DB host sub2api.HostAdapter } -func NewReconcileService(store *sqlite.DB, host sub2api.HostAdapter) *ReconcileService { - return &ReconcileService{store: store, host: host} +func NewService(store *sqlite.DB, host sub2api.HostAdapter) *Service { + return &Service{store: store, host: host} } -func (s *ReconcileService) Reconcile(ctx context.Context, req ReconcileRequest) (ReconcileResult, error) { +func (s *Service) Reconcile(ctx context.Context, req Request) (Result, error) { if s == nil || s.store == nil { - return ReconcileResult{}, fmt.Errorf("store is required") + return Result{}, fmt.Errorf("store is required") } if s.host == nil { - return ReconcileResult{}, fmt.Errorf("host adapter is required") + return Result{}, fmt.Errorf("host adapter is required") } if strings.TrimSpace(req.HostID) == "" { - return ReconcileResult{}, fmt.Errorf("host_id is required") + return Result{}, fmt.Errorf("host_id is required") } if strings.TrimSpace(req.HostBaseURL) == "" { - return ReconcileResult{}, fmt.Errorf("host_base_url is required") + return Result{}, fmt.Errorf("host_base_url is required") } hostVersion, err := s.host.GetHostVersion(ctx) if err != nil { - return ReconcileResult{}, fmt.Errorf("get host version: %w", err) + return Result{}, fmt.Errorf("get host version: %w", err) } if err := pack.CheckHostCompatibility(req.Pack.Manifest, hostVersion); err != nil { - return ReconcileResult{}, err + return Result{}, err } packRow, err := s.store.Packs().GetByPackID(ctx, req.Pack.Manifest.PackID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } providerRow, err := s.store.Providers().GetByPackIDAndProviderID(ctx, packRow.ID, req.Provider.ProviderID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } hostRow, err := s.store.Hosts().GetByHostID(ctx, req.HostID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } batchRow, err := s.store.ImportBatches().GetLatestByProviderIDAndHostID(ctx, providerRow.ID, hostRow.ID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } switch strings.TrimSpace(batchRow.BatchStatus) { - case BatchStatusSucceeded, BatchStatusPartial: + case batchStatusSucceeded, batchStatusPartial: default: - return ReconcileResult{}, fmt.Errorf("latest import batch is %s; run import again before reconcile", batchRow.BatchStatus) + return Result{}, fmt.Errorf("latest import batch is %s; run import again before reconcile", batchRow.BatchStatus) } storedResources, err := s.storedResourcesForReconcile(ctx, providerRow.ID, hostRow.ID, batchRow.ID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } currentBatchResources, err := s.store.ManagedResources().GetByBatchID(ctx, batchRow.ID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } batchItems, err := s.store.ImportBatchItems().GetByBatchID(ctx, batchRow.ID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } accessClosures, err := s.store.AccessClosures().GetByBatchID(ctx, batchRow.ID) if err != nil { - return ReconcileResult{}, err + return Result{}, err } snapshot, err := s.host.ListManagedResources(ctx, buildManagedResourceListRequest(req.Provider, accessClosureType(accessClosures))) if err != nil { - return ReconcileResult{}, fmt.Errorf("list managed resources: %w", err) + return Result{}, fmt.Errorf("list managed resources: %w", err) } missing, extra := diffManagedResources(storedResources, snapshot) rawExtra := extra - staleNoiseAccounts := classifyHistoricalAccountNoise(currentBatchResources, snapshot.Accounts, SuggestAccountNamePrefix(req.Provider)) + staleNoiseAccounts := classifyHistoricalAccountNoise(currentBatchResources, snapshot.Accounts, suggestAccountNamePrefix(req.Provider)) if len(staleNoiseAccounts) > 0 { extra -= len(staleNoiseAccounts) if extra < 0 { extra = 0 } } - probeFailures, err := s.rerunAccountProbes(ctx, batchItems, req.Provider.SmokeTestModel) + probeSummary, err := s.rerunAccountProbes(ctx, batchItems, req.Provider.SmokeTestModel) if err != nil { - return ReconcileResult{}, err + return Result{}, err } - accessStatus, accessChecked, err := s.rerunAccessClosure(ctx, batchRow.ID, accessClosures, req.AccessProbeAPIKey, req.Provider.SmokeTestModel) + accessStatus, accessChecked, err := s.rerunAccessClosure(ctx, batchRow.ID, accessClosures, req.AccessProbeAPIKey, req.Provider.SmokeTestModel, probeSummary.AccountIDs, probeSummary.ResponsesCapabilitySuspect) if err != nil { - return ReconcileResult{}, err + return Result{}, err } status := "active" if missing > 0 || extra > 0 { status = "drifted" - } else if probeFailures > 0 || (accessChecked && accessStatus == AccessStatusBroken) { + } else if probeSummary.Failures > 0 || (accessChecked && accessStatus == accessStatusBroken) { status = "degraded" } summary := map[string]any{ @@ -179,48 +156,56 @@ func (s *ReconcileService) Reconcile(ctx context.Context, req ReconcileRequest) "stale_noise_count": len(staleNoiseAccounts), "stale_noise_accounts": staleNoiseAccounts, "host_version": hostVersion, - "probe_failures": probeFailures, + "probe_failures": probeSummary.Failures, "access_status": accessStatus, "access_rechecked": accessChecked, } summaryJSON, err := json.Marshal(summary) if err != nil { - return ReconcileResult{}, fmt.Errorf("marshal reconcile summary: %w", err) + return Result{}, fmt.Errorf("marshal reconcile summary: %w", err) } if _, err := s.store.ReconcileRuns().Create(ctx, sqlite.ReconcileRun{BatchID: batchRow.ID, HostID: hostRow.ID, ProviderID: providerRow.ID, Status: status, SummaryJSON: string(summaryJSON)}); err != nil { - return ReconcileResult{}, err + return Result{}, err } - return ReconcileResult{BatchID: batchRow.ID, Status: status, MissingCount: missing, ExtraCount: extra, StaleNoiseCount: len(staleNoiseAccounts), ProbeFailureCount: probeFailures, AccessStatus: accessStatus, Summary: summary}, nil + return Result{ + BatchID: batchRow.ID, + Status: status, + MissingCount: missing, + ExtraCount: extra, + StaleNoiseCount: len(staleNoiseAccounts), + ProbeFailureCount: probeSummary.Failures, + AccessStatus: accessStatus, + Summary: summary, + }, nil } -func (s *ReconcileService) rerunAccountProbes(ctx context.Context, items []sqlite.ImportBatchItem, expectedModel string) (int, error) { +func (s *Service) rerunAccountProbes(ctx context.Context, items []sqlite.ImportBatchItem, expectedModel string) (accountProbeRerunSummary, error) { if len(items) == 0 { - return 0, nil + return accountProbeRerunSummary{}, nil } - failures := 0 + summary := accountProbeRerunSummary{} for _, item := range items { accountID, err := accountIDFromProbeSummary(item.ProbeSummaryJSON) if err != nil { - return 0, fmt.Errorf("decode import batch item %d probe summary: %w", item.ID, err) + return accountProbeRerunSummary{}, fmt.Errorf("decode import batch item %d probe summary: %w", item.ID, err) } if strings.TrimSpace(accountID) == "" { - return 0, fmt.Errorf("import batch item %d missing account_id in probe summary", item.ID) + return accountProbeRerunSummary{}, fmt.Errorf("import batch item %d missing account_id in probe summary", item.ID) } + summary.AccountIDs = append(summary.AccountIDs, accountID) probe, err := s.host.TestAccount(ctx, accountID, expectedModel) if err != nil { - return 0, fmt.Errorf("re-test account %s: %w", accountID, err) + return accountProbeRerunSummary{}, fmt.Errorf("re-test account %s: %w", accountID, err) } models, err := s.host.GetAccountModels(ctx, accountID) if err != nil { - return 0, fmt.Errorf("reload account models %s: %w", accountID, err) + return accountProbeRerunSummary{}, fmt.Errorf("reload account models %s: %w", accountID, err) } smokeModelSeen := hasModel(models, expectedModel) - result := AccountImportResult{ - Probe: probe, - Models: models, - SmokeModelSeen: smokeModelSeen, + status := accountValidationStatus(probe, smokeModelSeen) + if smokeModelSeen && access.SuspectsOpenAIResponsesCapabilityMismatch(probe) { + summary.ResponsesCapabilitySuspect = true } - status := result.ValidationStatus() payload, err := json.Marshal(map[string]any{ "account_id": accountID, "probe_ok": probe.OK, @@ -228,27 +213,28 @@ func (s *ReconcileService) rerunAccountProbes(ctx context.Context, items []sqlit "probe_message": probe.Message, "models": models, "smoke_model_seen": smokeModelSeen, - "probe_advisory": result.HasAdvisoryWarning(), + "probe_advisory": status == accountStatusWarning, "validation_status": status, "reconcile_rerun": true, }) if err != nil { - return 0, fmt.Errorf("marshal probe rerun summary for %s: %w", accountID, err) + return accountProbeRerunSummary{}, fmt.Errorf("marshal probe rerun summary for %s: %w", accountID, err) } if err := s.store.ImportBatchItems().UpdateResult(ctx, item.ID, status, string(payload)); err != nil { - return 0, err + return accountProbeRerunSummary{}, err } if _, err := s.store.ProbeResults().Create(ctx, sqlite.ProbeResult{BatchItemID: item.ID, ProbeType: "account_smoke_rerun", Status: status, SummaryJSON: string(payload)}); err != nil { - return 0, err + return accountProbeRerunSummary{}, err } - if result.HasBlockingFailure() { - failures++ + if status == accountStatusFailed { + summary.Failures++ } } - return failures, nil + summary.AccountIDs = normalizedUniqueAccountIDs(summary.AccountIDs) + return summary, nil } -func (s *ReconcileService) rerunAccessClosure(ctx context.Context, batchID int64, accessClosures []sqlite.AccessClosureRecord, probeAPIKey, expectedModel string) (string, bool, error) { +func (s *Service) rerunAccessClosure(ctx context.Context, batchID int64, accessClosures []sqlite.AccessClosureRecord, probeAPIKey, expectedModel string, accountIDs []string, suspectResponsesCapabilityMismatch bool) (string, bool, error) { if len(accessClosures) == 0 { return "not_configured", false, nil } @@ -262,24 +248,33 @@ func (s *ReconcileService) rerunAccessClosure(ctx context.Context, batchID int64 return "", false, fmt.Errorf("re-check gateway access: %w", err) } if result.OK && result.HasExpectedModel { - completion, err := s.host.CheckGatewayCompletion(ctx, sub2api.GatewayCompletionCheckRequest{ + completionReq := sub2api.GatewayCompletionCheckRequest{ APIKey: probeAPIKey, Model: expectedModel, Prompt: "ping", MaxTokens: 8, - }) + } + completion, err := s.host.CheckGatewayCompletion(ctx, completionReq) if err != nil { return "", false, fmt.Errorf("re-check gateway completion: %w", err) } + if access.ShouldAttemptOpenAIResponsesCapabilityRepair(suspectResponsesCapabilityMismatch, completion) { + if err := s.host.DisableOpenAIResponsesAPI(ctx, accountIDs); err == nil { + completion, err = s.host.CheckGatewayCompletion(ctx, completionReq) + if err != nil { + return "", false, fmt.Errorf("re-check gateway completion after capability repair: %w", err) + } + } + } result.CompletionOK = completion.OK result.CompletionStatus = completion.StatusCode result.CompletionType = completion.ContentType result.CompletionBody = completion.BodyPreview } - if GatewayAccessReady(result) { + if gatewayAccessReady(result) { status = deriveHealthyAccessStatus(latest.ClosureType) } else { - status = AccessStatusBroken + status = accessStatusBroken } payload, err := json.Marshal(map[string]any{ "status_code": result.StatusCode, @@ -301,17 +296,38 @@ func (s *ReconcileService) rerunAccessClosure(ctx context.Context, batchID int64 return status, true, nil } +func normalizedUniqueAccountIDs(accountIDs []string) []string { + seen := map[string]struct{}{} + values := make([]string, 0, len(accountIDs)) + for _, rawID := range accountIDs { + accountID := strings.TrimSpace(rawID) + if accountID == "" { + continue + } + if _, ok := seen[accountID]; ok { + continue + } + seen[accountID] = struct{}{} + values = append(values, accountID) + } + return values +} + func deriveHealthyAccessStatus(closureType string) string { switch strings.TrimSpace(closureType) { - case AccessModeSubscription: - return AccessStatusSubscriptionReady - case AccessModeSelfService: - return AccessStatusSelfServiceReady + case accessModeSubscription: + return accessStatusSubscriptionReady + case accessModeSelfService: + return accessStatusSelfServiceReady default: return "unknown" } } +func DeriveHealthyAccessStatus(closureType string) string { + return deriveHealthyAccessStatus(closureType) +} + func accessClosureType(accessClosures []sqlite.AccessClosureRecord) string { if len(accessClosures) == 0 { return "" @@ -320,13 +336,13 @@ func accessClosureType(accessClosures []sqlite.AccessClosureRecord) string { } func buildManagedResourceListRequest(provider pack.ProviderManifest, accessMode string) sub2api.ListManagedResourcesRequest { - names := SuggestResourceNamesForMode(provider, accessMode) + names := suggestResourceNamesForMode(provider, accessMode) req := sub2api.ListManagedResourcesRequest{ GroupName: names.Group, ChannelName: names.Channel, - AccountNamePrefix: SuggestAccountNamePrefix(provider), + AccountNamePrefix: suggestAccountNamePrefix(provider), } - if strings.TrimSpace(accessMode) == AccessModeSubscription { + if strings.TrimSpace(accessMode) == accessModeSubscription { req.PlanName = names.Plan } return req @@ -344,7 +360,11 @@ func accountIDFromProbeSummary(summaryJSON string) (string, error) { return strings.TrimSpace(accountID), nil } -func (s *ReconcileService) storedResourcesForReconcile(ctx context.Context, providerID, hostID, batchID int64) ([]sqlite.ManagedResource, error) { +func AccountIDFromProbeSummary(summaryJSON string) (string, error) { + return accountIDFromProbeSummary(summaryJSON) +} + +func (s *Service) storedResourcesForReconcile(ctx context.Context, providerID, hostID, batchID int64) ([]sqlite.ManagedResource, error) { storedResources, err := s.store.ManagedResources().GetByBatchID(ctx, batchID) if err != nil { return nil, err @@ -427,6 +447,10 @@ func diffManagedResources(stored []sqlite.ManagedResource, snapshot sub2api.Mana return missing, extra } +func DiffManagedResources(stored []sqlite.ManagedResource, snapshot sub2api.ManagedResourceSnapshot) (int, int) { + return diffManagedResources(stored, snapshot) +} + func classifyHistoricalAccountNoise(currentBatchResources []sqlite.ManagedResource, snapshotAccounts []sub2api.NamedResource, accountNamePrefix string) []sub2api.NamedResource { currentAccountIDs := make(map[string]struct{}) for _, resource := range currentBatchResources { @@ -455,3 +479,160 @@ func classifyHistoricalAccountNoise(currentBatchResources []sqlite.ManagedResour } return staleNoise } + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if trimmed := strings.TrimSpace(value); trimmed != "" { + return trimmed + } + } + return "" +} + +func hasModel(models []sub2api.AccountModel, target string) bool { + for _, model := range models { + if strings.TrimSpace(model.ID) == strings.TrimSpace(target) { + return true + } + } + return false +} + +func gatewayAccessReady(result sub2api.GatewayAccessResult) bool { + return result.OK && result.HasExpectedModel && result.CompletionOK +} + +func accountValidationStatus(probe sub2api.ProbeResult, smokeModelSeen bool) string { + if !smokeModelSeen { + return accountStatusFailed + } + if probe.OK { + return accountStatusPassed + } + if isAdvisoryAccountProbeFailure(probe) { + return accountStatusWarning + } + return accountStatusFailed +} + +func isAdvisoryAccountProbeFailure(probe sub2api.ProbeResult) bool { + if probe.OK { + return false + } + + message := strings.ToLower(strings.TrimSpace(probe.Message)) + if message == "" { + return false + } + + if isTransientAccountProbeFailure(message) { + return true + } + + if strings.Contains(message, "api returned 403: forbidden") { + return true + } + + if !strings.Contains(message, "responses api") { + return false + } + + return strings.Contains(message, "当前测试接口仅支持") || + strings.Contains(message, "账号本身可正常使用") || + strings.Contains(message, "please directly") || + strings.Contains(message, "actual api") +} + +func isTransientAccountProbeFailure(message string) bool { + if !(strings.Contains(message, "429") || + strings.Contains(message, "rate limit") || + strings.Contains(message, "too many requests") || + strings.Contains(message, "502") || + strings.Contains(message, "503") || + strings.Contains(message, "504") || + strings.Contains(message, "bad gateway") || + strings.Contains(message, "service unavailable") || + strings.Contains(message, "timeout")) { + return false + } + + return strings.Contains(message, "api returned") || + strings.Contains(message, "rate_limit") || + strings.Contains(message, "upstream") || + strings.Contains(message, "temporar") || + strings.Contains(message, "retry") +} + +type resourceNames struct { + Group string + Channel string + Plan string +} + +func suggestAccountNamePrefix(provider pack.ProviderManifest) string { + return fmt.Sprintf("%s-", resourceSlug(provider.ProviderID)) +} + +func suggestResourceNames(provider pack.ProviderManifest) resourceNames { + slug := resourceSlug(provider.ProviderID) + return resourceNames{ + Group: fallbackString(strings.TrimSpace(provider.GroupTemplate.Name), fmt.Sprintf("crm-%s-group", slug)), + Channel: fallbackString(strings.TrimSpace(provider.ChannelTemplate.Name), fmt.Sprintf("crm-%s-channel", slug)), + Plan: fallbackString(strings.TrimSpace(provider.PlanTemplate.Name), fmt.Sprintf("crm-%s-plan", slug)), + } +} + +func suggestResourceNamesForMode(provider pack.ProviderManifest, accessMode string) resourceNames { + base := suggestResourceNames(provider) + suffix := accessModeResourceSuffix(accessMode) + if suffix == "" { + return base + } + return resourceNames{ + Group: appendResourceNameSuffix(base.Group, suffix), + Channel: appendResourceNameSuffix(base.Channel, suffix), + Plan: appendResourceNameSuffix(base.Plan, suffix), + } +} + +func accessModeResourceSuffix(accessMode string) string { + switch strings.TrimSpace(accessMode) { + case accessModeSubscription: + return "subscription" + case accessModeSelfService: + return "self-service" + default: + return "" + } +} + +func appendResourceNameSuffix(name, suffix string) string { + name = strings.TrimSpace(name) + suffix = strings.TrimSpace(suffix) + if name == "" || suffix == "" { + return name + } + if strings.HasSuffix(name, "-"+suffix) { + return name + } + return name + "-" + suffix +} + +func resourceSlug(raw string) string { + slug := strings.ToLower(strings.TrimSpace(raw)) + slug = nonSlugPattern.ReplaceAllString(slug, "-") + slug = strings.Trim(slug, "-") + if slug == "" { + return "provider" + } + return slug +} + +func fallbackString(values ...string) string { + for _, value := range values { + if trimmed := strings.TrimSpace(value); trimmed != "" { + return trimmed + } + } + return "" +} diff --git a/internal/reconcile/service_test.go b/internal/reconcile/service_test.go new file mode 100644 index 00000000..c6193000 --- /dev/null +++ b/internal/reconcile/service_test.go @@ -0,0 +1,104 @@ +package reconcile + +import ( + "testing" + + "sub2api-cn-relay-manager/internal/host/sub2api" + "sub2api-cn-relay-manager/internal/pack" + "sub2api-cn-relay-manager/internal/store/sqlite" +) + +func TestAccountIDFromProbeSummary(t *testing.T) { + t.Parallel() + + accountID, err := AccountIDFromProbeSummary(`{"account_id":" account_1 "}`) + if err != nil { + t.Fatalf("AccountIDFromProbeSummary() error = %v", err) + } + if accountID != "account_1" { + t.Fatalf("accountID = %q, want account_1", accountID) + } + if _, err := AccountIDFromProbeSummary(`{`); err == nil { + t.Fatal("AccountIDFromProbeSummary() error = nil, want JSON decode error") + } + blank, err := AccountIDFromProbeSummary("") + if err != nil { + t.Fatalf("AccountIDFromProbeSummary(blank) error = %v", err) + } + if blank != "" { + t.Fatalf("blank accountID = %q, want empty", blank) + } +} + +func TestDiffManagedResourcesCountsMissingAndExtra(t *testing.T) { + t.Parallel() + + missing, extra := DiffManagedResources( + []sqlite.ManagedResource{ + {ResourceType: "group", HostResourceID: "group_1"}, + {ResourceType: "account", HostResourceID: "account_1"}, + }, + sub2api.ManagedResourceSnapshot{ + Groups: []sub2api.NamedResource{{ID: "group_1"}}, + Accounts: []sub2api.NamedResource{{ID: "account_2"}}, + }, + ) + if missing != 1 || extra != 1 { + t.Fatalf("DiffManagedResources() = (%d, %d), want (1, 1)", missing, extra) + } +} + +func TestDeriveHealthyAccessStatus(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + closureType string + want string + }{ + {name: "subscription", closureType: accessModeSubscription, want: accessStatusSubscriptionReady}, + {name: "self-service", closureType: accessModeSelfService, want: accessStatusSelfServiceReady}, + {name: "unknown", closureType: "other", want: "unknown"}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + if got := DeriveHealthyAccessStatus(tc.closureType); got != tc.want { + t.Fatalf("DeriveHealthyAccessStatus(%q) = %q, want %q", tc.closureType, got, tc.want) + } + }) + } +} + +func TestBuildManagedResourceListRequestUsesModeSpecificNames(t *testing.T) { + t.Parallel() + + provider := pack.ProviderManifest{ProviderID: "deepseek"} + subscriptionReq := buildManagedResourceListRequest(provider, accessModeSubscription) + if subscriptionReq.PlanName == "" { + t.Fatal("subscription PlanName = empty, want mode-specific plan") + } + if subscriptionReq.AccountNamePrefix != "deepseek-" { + t.Fatalf("subscription AccountNamePrefix = %q, want deepseek-", subscriptionReq.AccountNamePrefix) + } + + selfServiceReq := buildManagedResourceListRequest(provider, accessModeSelfService) + if selfServiceReq.PlanName != "" { + t.Fatalf("selfService PlanName = %q, want empty", selfServiceReq.PlanName) + } + if selfServiceReq.GroupName == subscriptionReq.GroupName { + t.Fatalf("GroupName = %q, want different names per access mode", selfServiceReq.GroupName) + } +} + +func TestAccountValidationStatusTreatsResponsesRaceAsWarning(t *testing.T) { + t.Parallel() + + status := accountValidationStatus(sub2api.ProbeResult{ + OK: false, + Status: "failed", + Message: "账号本身可正常使用,但当前测试接口仅支持 Responses API 路径。请直接通过实际 API 调用验证。", + }, true) + if status != accountStatusWarning { + t.Fatalf("accountValidationStatus() = %q, want %q", status, accountStatusWarning) + } +} diff --git a/internal/store/migrations/0009_batch_import_run_request_context.sql b/internal/store/migrations/0009_batch_import_run_request_context.sql new file mode 100644 index 00000000..bd9753ab --- /dev/null +++ b/internal/store/migrations/0009_batch_import_run_request_context.sql @@ -0,0 +1,6 @@ +ALTER TABLE import_runs ADD COLUMN host_id TEXT NOT NULL DEFAULT ''; +ALTER TABLE import_runs ADD COLUMN subscription_users_json TEXT NOT NULL DEFAULT '[]'; +ALTER TABLE import_runs ADD COLUMN subscription_days INTEGER NOT NULL DEFAULT 0; +ALTER TABLE import_runs ADD COLUMN probe_api_key TEXT NOT NULL DEFAULT ''; + +CREATE INDEX idx_import_runs_host_id ON import_runs(host_id); diff --git a/internal/store/sqlite/import_batches_repo.go b/internal/store/sqlite/import_batches_repo.go index 8799074d..7f9394e7 100644 --- a/internal/store/sqlite/import_batches_repo.go +++ b/internal/store/sqlite/import_batches_repo.go @@ -182,6 +182,36 @@ func (r *ImportBatchesRepo) ListByProviderIDAndHostID(ctx context.Context, provi return batches, nil } +func (r *ImportBatchesRepo) ListLatestReconcilable(ctx context.Context) ([]ImportBatch, error) { + rows, err := r.db.QueryContext(ctx, ` + SELECT ib.id, ib.host_id, ib.pack_id, ib.provider_id, ib.mode, ib.batch_status, ib.access_status + FROM import_batches ib + INNER JOIN ( + SELECT provider_id, host_id, MAX(id) AS latest_id + FROM import_batches + GROUP BY provider_id, host_id + ) latest ON latest.latest_id = ib.id + WHERE ib.batch_status IN ('succeeded', 'partially_succeeded') + ORDER BY ib.id DESC`) + if err != nil { + return nil, fmt.Errorf("query latest reconcilable import batches: %w", err) + } + defer rows.Close() + + batches := make([]ImportBatch, 0) + for rows.Next() { + var batch ImportBatch + if err := rows.Scan(&batch.ID, &batch.HostID, &batch.PackID, &batch.ProviderID, &batch.Mode, &batch.BatchStatus, &batch.AccessStatus); err != nil { + return nil, fmt.Errorf("scan latest reconcilable import batch: %w", err) + } + batches = append(batches, batch) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterate latest reconcilable import batches: %w", err) + } + return batches, nil +} + func (r *ImportBatchItemsRepo) GetByBatchID(ctx context.Context, batchID int64) ([]ImportBatchItem, error) { if batchID <= 0 { return nil, fmt.Errorf("batch_id is required") diff --git a/internal/store/sqlite/import_run_items_repo.go b/internal/store/sqlite/import_run_items_repo.go index d3ed5528..0a4d44a1 100644 --- a/internal/store/sqlite/import_run_items_repo.go +++ b/internal/store/sqlite/import_run_items_repo.go @@ -5,6 +5,7 @@ import ( "database/sql" "fmt" "strings" + "time" ) type ImportRunItem struct { @@ -236,6 +237,47 @@ func (r *ImportRunItemsRepo) ListByRunID(ctx context.Context, runID string) ([]I return items, nil } +func (r *ImportRunItemsRepo) TryAcquireConfirmationLease(ctx context.Context, itemID, workerID string, now time.Time, leaseDuration time.Duration) (ImportRunItem, bool, error) { + itemID = strings.TrimSpace(itemID) + workerID = strings.TrimSpace(workerID) + if itemID == "" { + return ImportRunItem{}, false, fmt.Errorf("item_id is required") + } + if workerID == "" { + return ImportRunItem{}, false, fmt.Errorf("worker_id is required") + } + if leaseDuration <= 0 { + leaseDuration = time.Minute + } + + nowText := now.UTC().Format(time.RFC3339) + leaseUntil := now.UTC().Add(leaseDuration).Format(time.RFC3339) + result, err := r.db.ExecContext(ctx, `UPDATE import_run_items + SET lease_owner = ?, lease_until = ?, confirmation_attempts = confirmation_attempts + 1, updated_at = CURRENT_TIMESTAMP + WHERE item_id = ? + AND current_stage = 'confirm' + AND confirmation_status = 'pending' + AND (next_retry_at IS NULL OR next_retry_at = '' OR next_retry_at <= ?) + AND (lease_until IS NULL OR lease_until = '' OR lease_until < ?)`, + workerID, leaseUntil, itemID, nowText, nowText) + if err != nil { + return ImportRunItem{}, false, fmt.Errorf("acquire confirmation lease for %q: %w", itemID, err) + } + rows, err := result.RowsAffected() + if err != nil { + return ImportRunItem{}, false, err + } + if rows == 0 { + return ImportRunItem{}, false, nil + } + + item, err := r.GetByItemID(ctx, itemID) + if err != nil { + return ImportRunItem{}, false, err + } + return item, true, nil +} + func boolToInt(value bool) int { if value { return 1 diff --git a/internal/store/sqlite/import_runs_repo.go b/internal/store/sqlite/import_runs_repo.go index 14c8b135..214d8bad 100644 --- a/internal/store/sqlite/import_runs_repo.go +++ b/internal/store/sqlite/import_runs_repo.go @@ -7,19 +7,23 @@ import ( ) type ImportRun struct { - RunID string - Mode string - AccessMode string - State string - TotalItems int - CompletedItems int - ActiveItems int - DegradedItems int - BrokenItems int - WarningItems int - StartedAt string - UpdatedAt string - FinishedAt string + RunID string + HostID string + Mode string + AccessMode string + SubscriptionUsersJSON string + SubscriptionDays int + ProbeAPIKey string + State string + TotalItems int + CompletedItems int + ActiveItems int + DegradedItems int + BrokenItems int + WarningItems int + StartedAt string + UpdatedAt string + FinishedAt string } type ImportRunsRepo struct { @@ -32,13 +36,18 @@ func newImportRunsRepo(db execQuerier) *ImportRunsRepo { func (r *ImportRunsRepo) Create(ctx context.Context, run ImportRun) error { runID := strings.TrimSpace(run.RunID) + hostID := strings.TrimSpace(run.HostID) mode := strings.TrimSpace(run.Mode) accessMode := strings.TrimSpace(run.AccessMode) + subscriptionUsersJSON := defaultJSON(run.SubscriptionUsersJSON, "[]") + probeAPIKey := strings.TrimSpace(run.ProbeAPIKey) state := strings.TrimSpace(run.State) switch { case runID == "": return fmt.Errorf("run_id is required") + case hostID == "": + return fmt.Errorf("host_id is required") case mode == "": return fmt.Errorf("mode is required") case accessMode == "": @@ -47,8 +56,8 @@ func (r *ImportRunsRepo) Create(ctx context.Context, run ImportRun) error { return fmt.Errorf("state is required") } - if _, err := r.db.ExecContext(ctx, `INSERT INTO import_runs (run_id, mode, access_mode, state, total_items, completed_items, active_items, degraded_items, broken_items, warning_items) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, - runID, mode, accessMode, state, run.TotalItems, run.CompletedItems, run.ActiveItems, run.DegradedItems, run.BrokenItems, run.WarningItems); err != nil { + if _, err := r.db.ExecContext(ctx, `INSERT INTO import_runs (run_id, host_id, mode, access_mode, subscription_users_json, subscription_days, probe_api_key, state, total_items, completed_items, active_items, degraded_items, broken_items, warning_items) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + runID, hostID, mode, accessMode, subscriptionUsersJSON, run.SubscriptionDays, probeAPIKey, state, run.TotalItems, run.CompletedItems, run.ActiveItems, run.DegradedItems, run.BrokenItems, run.WarningItems); err != nil { return fmt.Errorf("insert import run %q: %w", runID, err) } return nil @@ -56,14 +65,19 @@ func (r *ImportRunsRepo) Create(ctx context.Context, run ImportRun) error { func (r *ImportRunsRepo) Update(ctx context.Context, run ImportRun) error { runID := strings.TrimSpace(run.RunID) + hostID := strings.TrimSpace(run.HostID) mode := strings.TrimSpace(run.Mode) accessMode := strings.TrimSpace(run.AccessMode) + subscriptionUsersJSON := defaultJSON(run.SubscriptionUsersJSON, "[]") + probeAPIKey := strings.TrimSpace(run.ProbeAPIKey) state := strings.TrimSpace(run.State) finishedAt := strings.TrimSpace(run.FinishedAt) switch { case runID == "": return fmt.Errorf("run_id is required") + case hostID == "": + return fmt.Errorf("host_id is required") case mode == "": return fmt.Errorf("mode is required") case accessMode == "": @@ -73,9 +87,9 @@ func (r *ImportRunsRepo) Update(ctx context.Context, run ImportRun) error { } if _, err := r.db.ExecContext(ctx, `UPDATE import_runs - SET mode = ?, access_mode = ?, state = ?, total_items = ?, completed_items = ?, active_items = ?, degraded_items = ?, broken_items = ?, warning_items = ?, finished_at = ?, updated_at = CURRENT_TIMESTAMP - WHERE run_id = ?`, - mode, accessMode, state, run.TotalItems, run.CompletedItems, run.ActiveItems, run.DegradedItems, run.BrokenItems, run.WarningItems, nullableString(finishedAt), runID); err != nil { + SET host_id = ?, mode = ?, access_mode = ?, subscription_users_json = ?, subscription_days = ?, probe_api_key = ?, state = ?, total_items = ?, completed_items = ?, active_items = ?, degraded_items = ?, broken_items = ?, warning_items = ?, finished_at = ?, updated_at = CURRENT_TIMESTAMP + WHERE run_id = ?`, + hostID, mode, accessMode, subscriptionUsersJSON, run.SubscriptionDays, probeAPIKey, state, run.TotalItems, run.CompletedItems, run.ActiveItems, run.DegradedItems, run.BrokenItems, run.WarningItems, nullableString(finishedAt), runID); err != nil { return fmt.Errorf("update import run %q: %w", runID, err) } return nil @@ -88,8 +102,8 @@ func (r *ImportRunsRepo) GetByRunID(ctx context.Context, runID string) (ImportRu } var run ImportRun - if err := r.db.QueryRowContext(ctx, `SELECT run_id, mode, access_mode, state, total_items, completed_items, active_items, degraded_items, broken_items, warning_items, started_at, updated_at, COALESCE(finished_at, '') FROM import_runs WHERE run_id = ?`, runID). - Scan(&run.RunID, &run.Mode, &run.AccessMode, &run.State, &run.TotalItems, &run.CompletedItems, &run.ActiveItems, &run.DegradedItems, &run.BrokenItems, &run.WarningItems, &run.StartedAt, &run.UpdatedAt, &run.FinishedAt); err != nil { + if err := r.db.QueryRowContext(ctx, `SELECT run_id, host_id, mode, access_mode, subscription_users_json, subscription_days, COALESCE(probe_api_key, ''), state, total_items, completed_items, active_items, degraded_items, broken_items, warning_items, started_at, updated_at, COALESCE(finished_at, '') FROM import_runs WHERE run_id = ?`, runID). + Scan(&run.RunID, &run.HostID, &run.Mode, &run.AccessMode, &run.SubscriptionUsersJSON, &run.SubscriptionDays, &run.ProbeAPIKey, &run.State, &run.TotalItems, &run.CompletedItems, &run.ActiveItems, &run.DegradedItems, &run.BrokenItems, &run.WarningItems, &run.StartedAt, &run.UpdatedAt, &run.FinishedAt); err != nil { return ImportRun{}, err } return run, nil @@ -100,7 +114,7 @@ func (r *ImportRunsRepo) List(ctx context.Context, limit int) ([]ImportRun, erro limit = 50 } - rows, err := r.db.QueryContext(ctx, `SELECT run_id, mode, access_mode, state, total_items, completed_items, active_items, degraded_items, broken_items, warning_items, started_at, updated_at, COALESCE(finished_at, '') FROM import_runs ORDER BY started_at DESC LIMIT ?`, limit) + rows, err := r.db.QueryContext(ctx, `SELECT run_id, host_id, mode, access_mode, subscription_users_json, subscription_days, COALESCE(probe_api_key, ''), state, total_items, completed_items, active_items, degraded_items, broken_items, warning_items, started_at, updated_at, COALESCE(finished_at, '') FROM import_runs ORDER BY started_at DESC LIMIT ?`, limit) if err != nil { return nil, fmt.Errorf("list import runs: %w", err) } @@ -109,7 +123,7 @@ func (r *ImportRunsRepo) List(ctx context.Context, limit int) ([]ImportRun, erro runs := make([]ImportRun, 0) for rows.Next() { var run ImportRun - if err := rows.Scan(&run.RunID, &run.Mode, &run.AccessMode, &run.State, &run.TotalItems, &run.CompletedItems, &run.ActiveItems, &run.DegradedItems, &run.BrokenItems, &run.WarningItems, &run.StartedAt, &run.UpdatedAt, &run.FinishedAt); err != nil { + if err := rows.Scan(&run.RunID, &run.HostID, &run.Mode, &run.AccessMode, &run.SubscriptionUsersJSON, &run.SubscriptionDays, &run.ProbeAPIKey, &run.State, &run.TotalItems, &run.CompletedItems, &run.ActiveItems, &run.DegradedItems, &run.BrokenItems, &run.WarningItems, &run.StartedAt, &run.UpdatedAt, &run.FinishedAt); err != nil { return nil, fmt.Errorf("scan import run: %w", err) } runs = append(runs, run) diff --git a/internal/store/sqlite/import_runs_repo_test.go b/internal/store/sqlite/import_runs_repo_test.go index 41173413..62c67907 100644 --- a/internal/store/sqlite/import_runs_repo_test.go +++ b/internal/store/sqlite/import_runs_repo_test.go @@ -13,11 +13,15 @@ func TestRunStateStore(t *testing.T) { store := openTestDB(t) run := ImportRun{ - RunID: "run-1", - Mode: "strict", - AccessMode: "subscription", - State: "running", - TotalItems: 1, + RunID: "run-1", + HostID: "host-1", + Mode: "strict", + AccessMode: "subscription", + SubscriptionUsersJSON: `["user-1"]`, + SubscriptionDays: 30, + ProbeAPIKey: "probe-key", + State: "running", + TotalItems: 1, } if err := store.ImportRuns().Create(ctx, run); err != nil { t.Fatalf("ImportRuns().Create() error = %v", err) @@ -42,6 +46,18 @@ func TestRunStateStore(t *testing.T) { if gotRun.WarningItems != 1 { t.Fatalf("run.WarningItems = %d, want 1", gotRun.WarningItems) } + if gotRun.HostID != "host-1" { + t.Fatalf("run.HostID = %q, want host-1", gotRun.HostID) + } + if gotRun.SubscriptionUsersJSON != `["user-1"]` { + t.Fatalf("run.SubscriptionUsersJSON = %q, want persisted subscription users", gotRun.SubscriptionUsersJSON) + } + if gotRun.SubscriptionDays != 30 { + t.Fatalf("run.SubscriptionDays = %d, want 30", gotRun.SubscriptionDays) + } + if gotRun.ProbeAPIKey != "probe-key" { + t.Fatalf("run.ProbeAPIKey = %q, want probe-key", gotRun.ProbeAPIKey) + } legacyBatchID := int64(88) reusedAccountID := int64(321) diff --git a/internal/store/sqlite/reconcile_runs_repo.go b/internal/store/sqlite/reconcile_runs_repo.go index 69ff9195..9037c1b0 100644 --- a/internal/store/sqlite/reconcile_runs_repo.go +++ b/internal/store/sqlite/reconcile_runs_repo.go @@ -13,6 +13,7 @@ type ReconcileRun struct { ProviderID int64 Status string SummaryJSON string + CreatedAt string } type ReconcileRunsRepo struct { @@ -58,7 +59,7 @@ func (r *ReconcileRunsRepo) GetByBatchID(ctx context.Context, batchID int64) ([] return nil, fmt.Errorf("batch_id is required") } - rows, err := r.db.QueryContext(ctx, `SELECT id, batch_id, host_id, provider_id, status, summary_json FROM reconcile_runs WHERE batch_id = ? ORDER BY id DESC`, batchID) + rows, err := r.db.QueryContext(ctx, `SELECT id, batch_id, host_id, provider_id, status, summary_json, created_at FROM reconcile_runs WHERE batch_id = ? ORDER BY id DESC`, batchID) if err != nil { return nil, fmt.Errorf("query reconcile runs by batch_id: %w", err) } @@ -67,7 +68,7 @@ func (r *ReconcileRunsRepo) GetByBatchID(ctx context.Context, batchID int64) ([] runs := make([]ReconcileRun, 0) for rows.Next() { var run ReconcileRun - if err := rows.Scan(&run.ID, &run.BatchID, &run.HostID, &run.ProviderID, &run.Status, &run.SummaryJSON); err != nil { + if err := rows.Scan(&run.ID, &run.BatchID, &run.HostID, &run.ProviderID, &run.Status, &run.SummaryJSON, &run.CreatedAt); err != nil { return nil, fmt.Errorf("scan reconcile run by batch_id: %w", err) } runs = append(runs, run) @@ -86,7 +87,7 @@ func (r *ReconcileRunsRepo) GetByProviderIDAndHostID(ctx context.Context, provid return nil, fmt.Errorf("host_id is required") } - rows, err := r.db.QueryContext(ctx, `SELECT id, batch_id, host_id, provider_id, status, summary_json FROM reconcile_runs WHERE provider_id = ? AND host_id = ? ORDER BY id DESC`, providerID, hostID) + rows, err := r.db.QueryContext(ctx, `SELECT id, batch_id, host_id, provider_id, status, summary_json, created_at FROM reconcile_runs WHERE provider_id = ? AND host_id = ? ORDER BY id DESC`, providerID, hostID) if err != nil { return nil, fmt.Errorf("query reconcile runs: %w", err) } @@ -95,7 +96,7 @@ func (r *ReconcileRunsRepo) GetByProviderIDAndHostID(ctx context.Context, provid runs := make([]ReconcileRun, 0) for rows.Next() { var run ReconcileRun - if err := rows.Scan(&run.ID, &run.BatchID, &run.HostID, &run.ProviderID, &run.Status, &run.SummaryJSON); err != nil { + if err := rows.Scan(&run.ID, &run.BatchID, &run.HostID, &run.ProviderID, &run.Status, &run.SummaryJSON, &run.CreatedAt); err != nil { return nil, fmt.Errorf("scan reconcile run: %w", err) } runs = append(runs, run) diff --git a/internal/store/sqlite/reconcile_runs_repo_test.go b/internal/store/sqlite/reconcile_runs_repo_test.go new file mode 100644 index 00000000..80c500e3 --- /dev/null +++ b/internal/store/sqlite/reconcile_runs_repo_test.go @@ -0,0 +1,56 @@ +package sqlite + +import ( + "context" + "testing" +) + +func TestReconcileRunsRepoPersistsCreatedAt(t *testing.T) { + t.Parallel() + + store := openTestDB(t) + hostID := createTestHostWithBaseURL(t, store, "host-1", "https://host.example") + packID := createTestPack(t, store) + providerID, err := store.Providers().Create(context.Background(), Provider{ + PackID: packID, + ProviderID: "provider-1", + DisplayName: "Provider 1", + BaseURL: "https://provider.example", + Platform: "openai", + }) + if err != nil { + t.Fatalf("Providers().Create() error = %v", err) + } + batchID, err := store.ImportBatches().Create(context.Background(), ImportBatch{ + HostID: hostID, + PackID: packID, + ProviderID: providerID, + Mode: "partial", + BatchStatus: "succeeded", + AccessStatus: "self_service_ready", + }) + if err != nil { + t.Fatalf("ImportBatches().Create() error = %v", err) + } + + if _, err := store.ReconcileRuns().Create(context.Background(), ReconcileRun{ + BatchID: batchID, + HostID: hostID, + ProviderID: providerID, + Status: "active", + SummaryJSON: `{"ok":true}`, + }); err != nil { + t.Fatalf("Create() error = %v", err) + } + + runs, err := store.ReconcileRuns().GetByProviderIDAndHostID(context.Background(), providerID, hostID) + if err != nil { + t.Fatalf("GetByProviderIDAndHostID() error = %v", err) + } + if len(runs) != 1 { + t.Fatalf("reconcile runs = %d, want 1", len(runs)) + } + if runs[0].CreatedAt == "" { + t.Fatal("CreatedAt = empty, want timestamp") + } +} diff --git a/internal/worker/runner.go b/internal/worker/runner.go new file mode 100644 index 00000000..4cbf437b --- /dev/null +++ b/internal/worker/runner.go @@ -0,0 +1,65 @@ +package worker + +import ( + "context" + "time" +) + +type Job interface { + Name() string + Run(context.Context) error +} + +type Logger func(format string, args ...any) + +type Runner struct { + jobs []Job + interval time.Duration + logger Logger +} + +func NewRunner(jobs []Job, interval time.Duration, logger Logger) *Runner { + return &Runner{ + jobs: append([]Job(nil), jobs...), + interval: interval, + logger: logger, + } +} + +func (r *Runner) Start(ctx context.Context) { + if r == nil { + return + } + go func() { + r.runOnce(ctx) + if ctx.Err() != nil || r.interval <= 0 { + return + } + + ticker := time.NewTicker(r.interval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + r.runOnce(ctx) + } + } + }() +} + +func (r *Runner) runOnce(ctx context.Context) { + for _, job := range r.jobs { + if ctx.Err() != nil { + return + } + if job == nil { + continue + } + if err := job.Run(ctx); err != nil && ctx.Err() == nil && r.logger != nil { + r.logger("%s: %v", job.Name(), err) + } + } +} diff --git a/internal/worker/runner_test.go b/internal/worker/runner_test.go new file mode 100644 index 00000000..8643f385 --- /dev/null +++ b/internal/worker/runner_test.go @@ -0,0 +1,53 @@ +package worker + +import ( + "context" + "sync" + "testing" + "time" +) + +func TestRunnerStartRunsJobsImmediatelyAndOnSchedule(t *testing.T) { + t.Parallel() + + job := &stubJob{} + runner := NewRunner([]Job{job}, 10*time.Millisecond, nil) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + runner.Start(ctx) + + deadline := time.Now().Add(250 * time.Millisecond) + for { + if job.Count() >= 2 { + break + } + if time.Now().After(deadline) { + t.Fatalf("job count = %d, want at least 2 scheduled runs", job.Count()) + } + time.Sleep(5 * time.Millisecond) + } +} + +type stubJob struct { + mu sync.Mutex + count int +} + +func (j *stubJob) Name() string { + return "stub" +} + +func (j *stubJob) Run(context.Context) error { + j.mu.Lock() + defer j.mu.Unlock() + j.count++ + return nil +} + +func (j *stubJob) Count() int { + j.mu.Lock() + defer j.mu.Unlock() + return j.count +} diff --git a/tests/integration/store_init_test.go b/tests/integration/store_init_test.go index c35eb44d..662e934b 100644 --- a/tests/integration/store_init_test.go +++ b/tests/integration/store_init_test.go @@ -139,6 +139,17 @@ func TestStoreAppliesLatestMigration(t *testing.T) { } } + for _, column := range []string{ + "host_id", + "subscription_users_json", + "subscription_days", + "probe_api_key", + } { + if !tableColumnExists(t, store.SQLDB(), "import_runs", column) { + t.Fatalf("column %q missing from import_runs", column) + } + } + for _, column := range []string{ "api_key_fingerprint", "canonical_model_families_json",