Harden host deletion and test stability

This commit is contained in:
phamnazage-jpg
2026-05-25 07:30:07 +08:00
parent 916569ccc5
commit 5e76fb20d0
12 changed files with 240 additions and 61 deletions

View File

@@ -37,6 +37,10 @@
- 调通细节与诊断经验已沉淀到:
- `docs/REAL_HOST_ACCEPTANCE_LEARNINGS.md`
- `docs/REAL_HOST_ARTIFACT_RETENTION.md`
- 2026-05-24 本地代码门禁修复已继续收口三类非回归点:
- `go test -race ./... -count=1` 现已再次真实通过;根因不是业务逻辑,而是多个测试包并行 `sqlite.Open()` 时与 `modernc.org/sqlite` 初始化路径的 race 噪音。当前已把 `internal/app``internal/provision``internal/reconcile` 的测试 SQLite 打开路径收口到串行 helper关闭这类假红灯同时保持 sqlite 包内测试不引入导入环。
- `DELETE /api/hosts/{hostID}` 不再默认放行危险级联删除;`hosts` repo 现在会先统计 `import_batches / managed_resources / reconcile_runs` 三类运行态依赖,有残留时返回 `409 host_in_use`,避免误删状态库里的回滚/对账真相。
- 控制面 JSON 请求体现在统一受 `MaxBytesReader` 限制;超限请求会明确返回 `413 request_too_large`,不再允许无界 body 直接进入解码路径。
## 本轮已完成
@@ -88,6 +92,11 @@
14. relay-manager latest-head 已补宿主升级后的 capability 自愈
-`API returned 403: Forbidden` 这类 `/responses` 误判 advisory控制面现在会在 access closure 与 reconcile rerun 中把目标 account 的 `openai_responses_supported` 修正为 `false`,随后重试 gateway `/v1/chat/completions`
- 这样即使宿主升级或异步 probe 把 capability 标记覆写错,控制面也能在“安装后确认”与“后台持续对账”两个环节重新拉回可用状态
15. 2026-05-24 本地质量门禁补丁已完成
- 新增 repo 级删除保护:`internal/store/sqlite/hosts_repo.go` 引入 `RuntimeDependencyCountsByHostID``HostDeleteBlocker`
- 新增回归测试:`TestHostsRepoDeleteByHostIDBlocksWhenRuntimeStateExists``TestBatchImportRejectsOversizedJSONBody``TestDecodeJSON/rejects oversized request body`
- `internal/app/http_api.go` 现已统一限制 JSON request body 大小,并把 host 删除占用态映射为 `host_in_use`
- `internal/app` / `internal/provision` / `internal/reconcile` 测试 SQLite 打开路径已改为串行 helper当前 `go test -race ./... -count=1` 重新恢复为绿
## 已验证门禁

View File

@@ -5,12 +5,10 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"time"
@@ -20,6 +18,7 @@ import (
"sub2api-cn-relay-manager/internal/provision"
"sub2api-cn-relay-manager/internal/reconcile"
"sub2api-cn-relay-manager/internal/store/sqlite"
"sub2api-cn-relay-manager/internal/testutil"
)
func TestServeExposesHealthz(t *testing.T) {
@@ -497,6 +496,19 @@ func TestDecodeJSON(t *testing.T) {
t.Fatalf("Message = %q, want single object error", err.Message)
}
})
t.Run("rejects oversized request body", func(t *testing.T) {
payload := `{"host_base_url":"https://example.com","pack_path":"` + strings.Repeat("x", int(maxJSONBodyBytes)) + `"}`
request := httptest.NewRequest(http.MethodPost, "/", strings.NewReader(payload))
var got InstallPackRequest
err := decodeJSON(request, &got)
if err == nil {
t.Fatal("decodeJSON() error = nil, want oversized error")
}
if err.StatusCode != http.StatusRequestEntityTooLarge || err.Code != "request_too_large" {
t.Fatalf("decodeJSON() = %#v, want request_too_large", err)
}
})
}
func TestWriteJSON(t *testing.T) {
@@ -975,20 +987,12 @@ func TestHostSupportStatusRequiresPlansCapability(t *testing.T) {
func openAppTestStore(t *testing.T) *sqlite.DB {
t.Helper()
dbPath := filepath.Join(t.TempDir(), "state.db")
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(dbPath))
store, err := sqlite.Open(context.Background(), dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
return store
return testutil.OpenSQLiteStore(t, testutil.SQLiteTestDSN(t, "state.db", true))
}
func closeAppTestStore(t *testing.T, store *sqlite.DB) {
t.Helper()
if err := store.Close(); err != nil {
t.Fatalf("store.Close() error = %v", err)
}
testutil.CloseSQLiteStore(t, store)
}
func assertJSONContains(t *testing.T, payload []byte, key string, want any) {

View File

@@ -2,12 +2,11 @@ package app
import (
"context"
"fmt"
"net/http/httptest"
"path/filepath"
"testing"
"sub2api-cn-relay-manager/internal/store/sqlite"
"sub2api-cn-relay-manager/internal/testutil"
)
func TestResumePendingBatchImportRunsCompletesStoredRun(t *testing.T) {
@@ -16,11 +15,8 @@ func TestResumePendingBatchImportRunsCompletesStoredRun(t *testing.T) {
server := httptest.NewServer(newBatchImportActionStubServer(t))
defer server.Close()
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(filepath.Join(t.TempDir(), "state.db")))
store, err := sqlite.Open(context.Background(), dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
dsn := testutil.SQLiteTestDSN(t, "state.db", true)
store := testutil.OpenSQLiteStore(t, dsn)
defer closeAppTestStore(t, store)
if _, err := store.SQLDB().Exec("PRAGMA foreign_keys = OFF"); err != nil {
t.Fatalf("disable foreign keys pragma error = %v", err)

View File

@@ -1545,11 +1545,13 @@ func TestActionSetHostClosuresAndAccessPreview(t *testing.T) {
t.Fatalf("AccessPreview(subscription) = %+v, want available=false", preview)
}
if err := actions.DeleteHost(context.Background(), "host-main"); err != nil {
t.Fatalf("DeleteHost() error = %v", err)
}
if _, err := store.Hosts().GetByHostID(context.Background(), "host-main"); err == nil {
t.Fatal("DeleteHost() did not remove host-main")
if err := actions.DeleteHost(context.Background(), "host-main"); err == nil {
t.Fatal("DeleteHost() error = nil, want host_in_use conflict")
} else {
httpErr, ok := err.(*httpError)
if !ok || httpErr.StatusCode != http.StatusConflict || httpErr.Code != "host_in_use" {
t.Fatalf("DeleteHost() error = %T %v, want *httpError host_in_use conflict", err, err)
}
}
}

View File

@@ -49,6 +49,8 @@ type ActionSet struct {
AccessPreview func(context.Context, AccessPreviewRequest) (AccessPreviewResult, error)
}
const maxJSONBodyBytes int64 = 1 << 20
type HostInfo struct {
HostID string `json:"host_id"`
BaseURL string `json:"base_url"`
@@ -834,9 +836,17 @@ func handleDeleteHost(w http.ResponseWriter, r *http.Request, fn func(context.Co
}
func decodeJSON(r *http.Request, dest any) *httpError {
if r == nil {
return &httpError{StatusCode: http.StatusBadRequest, Code: "bad_request", Message: "request is required"}
}
r.Body = http.MaxBytesReader(nil, r.Body, maxJSONBodyBytes)
decoder := json.NewDecoder(r.Body)
decoder.DisallowUnknownFields()
if err := decoder.Decode(dest); err != nil {
var maxBytesErr *http.MaxBytesError
if errors.As(err, &maxBytesErr) {
return &httpError{StatusCode: http.StatusRequestEntityTooLarge, Code: "request_too_large", Message: fmt.Sprintf("request body exceeds %d bytes", maxJSONBodyBytes)}
}
return &httpError{StatusCode: http.StatusBadRequest, Code: "bad_request", Message: fmt.Sprintf("decode request body: %v", err)}
}
if err := decoder.Decode(&struct{}{}); err != nil && !errors.Is(err, io.EOF) {
@@ -870,6 +880,10 @@ func classifyError(err error) *httpError {
if errors.As(err, &upstreamErr) {
return &httpError{StatusCode: http.StatusBadGateway, Code: "host_request_failed", Message: err.Error(), UpstreamStatus: upstreamErr.StatusCode}
}
var hostDeleteBlocker *sqlite.HostDeleteBlocker
if errors.As(err, &hostDeleteBlocker) {
return &httpError{StatusCode: http.StatusConflict, Code: "host_in_use", Message: err.Error()}
}
message := err.Error()
switch {
case strings.Contains(message, "already installed") || strings.Contains(message, "checksum drift"):
@@ -1254,7 +1268,10 @@ func NewActionSet(sqliteDSN string) ActionSet {
return err
}
defer store.Close()
return store.Hosts().DeleteByHostID(ctx, hostID)
if err := store.Hosts().DeleteByHostID(ctx, hostID); err != nil {
return classifyError(err)
}
return nil
},
ListPacks: func(ctx context.Context) ([]PackInfo, error) {
store, err := sqlite.Open(ctx, sqliteDSN)

View File

@@ -5,11 +5,11 @@ import (
"fmt"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"sub2api-cn-relay-manager/internal/store/sqlite"
"sub2api-cn-relay-manager/internal/testutil"
)
func TestBatchImportHTTP(t *testing.T) {
@@ -139,11 +139,8 @@ func TestBatchImportHTTP(t *testing.T) {
server := httptest.NewServer(newBatchImportActionStubServer(t))
defer server.Close()
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(filepath.Join(t.TempDir(), "state.db")))
store, err := sqlite.Open(context.Background(), dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
dsn := testutil.SQLiteTestDSN(t, "state.db", true)
store := testutil.OpenSQLiteStore(t, dsn)
defer closeAppTestStore(t, store)
if _, err := store.Hosts().Create(context.Background(), sqlite.Host{
@@ -260,6 +257,24 @@ func TestBatchImportWrapperFunctions(t *testing.T) {
})
}
func TestBatchImportRejectsOversizedJSONBody(t *testing.T) {
t.Parallel()
handler := NewAPIHandler("secret-token", ActionSet{
CreateBatchImportRun: func(_ context.Context, req CreateBatchImportRunRequest) (BatchImportRunCreateResponse, error) {
t.Fatal("CreateBatchImportRun should not be called for oversized body")
return BatchImportRunCreateResponse{}, nil
},
})
payload := `{"host_id":"host-1","mode":"strict","access_mode":"self_service","probe_api_key":"probe-key","entries":[{"base_url":"https://kimi.example.com/v1","api_key":"` + strings.Repeat("x", int(maxJSONBodyBytes)) + `"}]}`
req := httptest.NewRequest(http.MethodPost, "/api/batch-import/runs", strings.NewReader(payload))
req.Header.Set("Authorization", "Bearer secret-token")
res := httptestRecorder(handler, req)
assertStatusCode(t, res, http.StatusRequestEntityTooLarge)
assertJSONContains(t, res.Body().Bytes(), "error.code", "request_too_large")
}
func newBatchImportActionStubServer(t *testing.T) http.Handler {
t.Helper()

View File

@@ -6,7 +6,6 @@ import (
"fmt"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"time"
@@ -14,6 +13,7 @@ import (
"sub2api-cn-relay-manager/internal/pack"
"sub2api-cn-relay-manager/internal/provision"
"sub2api-cn-relay-manager/internal/store/sqlite"
"sub2api-cn-relay-manager/internal/testutil"
)
func TestRunReconcileBackgroundSweepCreatesReconcileRunForLatestSuccessfulBatch(t *testing.T) {
@@ -86,11 +86,7 @@ func TestRunReconcileBackgroundSweepSkipsRecentReconcileRun(t *testing.T) {
func openReconcileBackgroundTestStore(t *testing.T) *sqlite.DB {
t.Helper()
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(filepath.Join(t.TempDir(), "state.db")))
store, err := sqlite.Open(context.Background(), dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
store := testutil.OpenSQLiteStore(t, testutil.SQLiteTestDSN(t, "state.db", true))
if _, err := store.SQLDB().Exec("PRAGMA foreign_keys = OFF"); err != nil {
t.Fatalf("disable foreign keys pragma error = %v", err)
}

View File

@@ -5,7 +5,6 @@ import (
"database/sql"
"encoding/json"
"fmt"
"path/filepath"
"strings"
"testing"
@@ -13,6 +12,7 @@ import (
"sub2api-cn-relay-manager/internal/host/sub2api"
"sub2api-cn-relay-manager/internal/pack"
"sub2api-cn-relay-manager/internal/store/sqlite"
"sub2api-cn-relay-manager/internal/testutil"
)
func TestRuntimeImportServicePersistsOperationalState(t *testing.T) {
@@ -706,21 +706,12 @@ func TestRuntimeImportServiceImportReconcilesExistingChannelConfiguration(t *tes
func openProvisionTestStore(t *testing.T) *sqlite.DB {
t.Helper()
dbPath := filepath.Join(t.TempDir(), "state.db")
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(dbPath))
store, err := sqlite.Open(context.Background(), dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
return store
return testutil.OpenSQLiteStore(t, testutil.SQLiteTestDSN(t, "state.db", true))
}
func closeProvisionTestStore(t *testing.T, store *sqlite.DB) {
t.Helper()
if err := store.Close(); err != nil {
t.Fatalf("store.Close() error = %v", err)
}
testutil.CloseSQLiteStore(t, store)
}
func seedProvisionHost(t *testing.T, store *sqlite.DB, hostID, baseURL string) int64 {

View File

@@ -4,12 +4,12 @@ import (
"context"
"errors"
"fmt"
"path/filepath"
"testing"
"sub2api-cn-relay-manager/internal/host/sub2api"
"sub2api-cn-relay-manager/internal/pack"
"sub2api-cn-relay-manager/internal/store/sqlite"
"sub2api-cn-relay-manager/internal/testutil"
)
func TestRerunAccountProbesReturnsErrorForInvalidProbeSummary(t *testing.T) {
@@ -513,13 +513,7 @@ type reconcileFixture struct {
func openReconcileTestStore(t *testing.T) *sqlite.DB {
t.Helper()
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000&_pragma=foreign_keys(0)", filepath.ToSlash(filepath.Join(t.TempDir(), "state.db")))
store, err := sqlite.Open(context.Background(), dsn)
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
return store
return testutil.OpenSQLiteStore(t, testutil.SQLiteTestDSN(t, "state.db", true))
}
func closeReconcileTestStore(t *testing.T, store *sqlite.DB) {

View File

@@ -2,6 +2,7 @@ package sqlite
import (
"context"
"database/sql"
"fmt"
"strings"
)
@@ -158,6 +159,49 @@ func (r *HostsRepo) ListAll(ctx context.Context) ([]Host, error) {
}
return hosts, nil
}
func (r *HostsRepo) RuntimeDependencyCountsByHostID(ctx context.Context, hostID string) (HostDeleteBlocker, error) {
hostID = strings.TrimSpace(hostID)
if hostID == "" {
return HostDeleteBlocker{}, fmt.Errorf("host_id is required")
}
host, err := r.GetByHostID(ctx, hostID)
if err != nil {
return HostDeleteBlocker{}, err
}
blocker := HostDeleteBlocker{HostID: host.HostID}
if err := r.db.QueryRowContext(ctx, `SELECT COUNT(1) FROM import_batches WHERE host_id = ?`, host.ID).Scan(&blocker.ImportBatchCount); err != nil {
return HostDeleteBlocker{}, fmt.Errorf("count import batches for host %q: %w", hostID, err)
}
if err := r.db.QueryRowContext(ctx, `SELECT COUNT(1) FROM managed_resources WHERE host_id = ?`, host.ID).Scan(&blocker.ManagedResourceCount); err != nil {
return HostDeleteBlocker{}, fmt.Errorf("count managed resources for host %q: %w", hostID, err)
}
if err := r.db.QueryRowContext(ctx, `SELECT COUNT(1) FROM reconcile_runs WHERE host_id = ?`, host.ID).Scan(&blocker.ReconcileRunCount); err != nil {
return HostDeleteBlocker{}, fmt.Errorf("count reconcile runs for host %q: %w", hostID, err)
}
return blocker, nil
}
type HostDeleteBlocker struct {
HostID string
ImportBatchCount int
ManagedResourceCount int
ReconcileRunCount int
}
func (e *HostDeleteBlocker) Error() string {
if e == nil {
return "host delete is blocked"
}
return fmt.Sprintf(
"host %q cannot be deleted while runtime state exists (import_batches=%d managed_resources=%d reconcile_runs=%d)",
e.HostID,
e.ImportBatchCount,
e.ManagedResourceCount,
e.ReconcileRunCount,
)
}
func (r *HostsRepo) DeleteByHostID(ctx context.Context, hostID string) error {
hostID = strings.TrimSpace(hostID)
@@ -165,6 +209,17 @@ func (r *HostsRepo) DeleteByHostID(ctx context.Context, hostID string) error {
return fmt.Errorf("host_id is required")
}
blocker, err := r.RuntimeDependencyCountsByHostID(ctx, hostID)
if err != nil {
if err == sql.ErrNoRows {
return fmt.Errorf("host %q not found", hostID)
}
return fmt.Errorf("resolve host %q runtime dependencies: %w", hostID, err)
}
if blocker.ImportBatchCount > 0 || blocker.ManagedResourceCount > 0 || blocker.ReconcileRunCount > 0 {
return &blocker
}
result, err := r.db.ExecContext(ctx, `DELETE FROM hosts WHERE host_id = ?`, hostID)
if err != nil {
return fmt.Errorf("delete host %q: %w", hostID, err)

View File

@@ -13,12 +13,12 @@ import (
func openTestDB(t *testing.T) *DB {
t.Helper()
dbPath := filepath.Join(t.TempDir(), "test.db")
dsn := "file:" + filepath.ToSlash(dbPath) + "?_pragma=foreign_keys(0)"
dsn := "file:" + filepath.ToSlash(dbPath) + "?_busy_timeout=5000&_pragma=foreign_keys(0)"
store, err := Open(context.Background(), dsn)
if err != nil {
t.Fatalf("Open() error = %v", err)
}
t.Cleanup(func() { store.Close() })
t.Cleanup(func() { _ = store.Close() })
return store
}
@@ -26,12 +26,12 @@ func openTestDB(t *testing.T) *DB {
func openTestDBWithFK(t *testing.T) *DB {
t.Helper()
dbPath := filepath.Join(t.TempDir(), "test-fk.db")
dsn := "file:" + filepath.ToSlash(dbPath)
dsn := "file:" + filepath.ToSlash(dbPath) + "?_busy_timeout=5000"
store, err := Open(context.Background(), dsn)
if err != nil {
t.Fatalf("Open() error = %v", err)
}
t.Cleanup(func() { store.Close() })
t.Cleanup(func() { _ = store.Close() })
return store
}
@@ -279,6 +279,47 @@ func TestHostsRepoDeleteByHostID(t *testing.T) {
t.Fatalf("ListAll() after delete len = %d, want 0", len(hosts))
}
}
func TestHostsRepoDeleteByHostIDBlocksWhenRuntimeStateExists(t *testing.T) {
store := openTestDBWithFK(t)
batchID := createTestBatch(t, store)
hostRowID := mustHostRowIDForBatch(t, store, batchID)
host, err := store.Hosts().GetByID(context.Background(), hostRowID)
if err != nil {
t.Fatalf("Hosts().GetByID() error = %v", err)
}
if _, err := store.ManagedResources().Create(context.Background(), ManagedResource{
BatchID: batchID,
HostID: host.ID,
ResourceType: "group",
HostResourceID: "group_1",
ResourceName: "group",
}); err != nil {
t.Fatalf("ManagedResources().Create() error = %v", err)
}
providerID := mustProviderIDForBatch(t, store, batchID)
if _, err := store.ReconcileRuns().Create(context.Background(), ReconcileRun{
BatchID: batchID,
HostID: host.ID,
ProviderID: providerID,
Status: "active",
SummaryJSON: `{}`,
}); err != nil {
t.Fatalf("ReconcileRuns().Create() error = %v", err)
}
err = store.Hosts().DeleteByHostID(context.Background(), host.HostID)
if err == nil {
t.Fatal("DeleteByHostID() error = nil, want blocked error")
}
var blocker *HostDeleteBlocker
if !errors.As(err, &blocker) {
t.Fatalf("DeleteByHostID() error = %T %v, want HostDeleteBlocker", err, err)
}
if blocker.ImportBatchCount != 1 || blocker.ManagedResourceCount != 1 || blocker.ReconcileRunCount != 1 {
t.Fatalf("blocker = %+v, want all dependency counts = 1", blocker)
}
}
func TestHostsRepoUpdateProbeByHostID(t *testing.T) {
store := openTestDB(t)
@@ -361,3 +402,20 @@ func TestHostsRepoDeleteByHostIDEmptyError(t *testing.T) {
t.Fatal("DeleteByHostID('') error = nil, want error")
}
}
func mustHostRowIDForBatch(t *testing.T, store *DB, batchID int64) int64 {
t.Helper()
var hostID int64
if err := store.SQLDB().QueryRow(`SELECT host_id FROM import_batches WHERE id = ?`, batchID).Scan(&hostID); err != nil {
t.Fatalf("query host_id for batch %d error = %v", batchID, err)
}
return hostID
}
func mustProviderIDForBatch(t *testing.T, store *DB, batchID int64) int64 {
t.Helper()
var providerID int64
if err := store.SQLDB().QueryRow(`SELECT provider_id FROM import_batches WHERE id = ?`, batchID).Scan(&providerID); err != nil {
t.Fatalf("query provider_id for batch %d error = %v", batchID, err)
}
return providerID
}

View File

@@ -0,0 +1,42 @@
package testutil
import (
"context"
"fmt"
"path/filepath"
"sync"
"testing"
"sub2api-cn-relay-manager/internal/store/sqlite"
)
var sqliteOpenMu sync.Mutex
func SQLiteTestDSN(t testing.TB, fileName string, disableForeignKeys bool) string {
t.Helper()
dsn := fmt.Sprintf("file:%s?_busy_timeout=5000", filepath.ToSlash(filepath.Join(t.TempDir(), fileName)))
if disableForeignKeys {
dsn += "&_pragma=foreign_keys(0)"
}
return dsn
}
func OpenSQLiteStore(t testing.TB, dsn string) *sqlite.DB {
t.Helper()
sqliteOpenMu.Lock()
store, err := sqlite.Open(context.Background(), dsn)
sqliteOpenMu.Unlock()
if err != nil {
t.Fatalf("sqlite.Open() error = %v", err)
}
return store
}
func CloseSQLiteStore(t testing.TB, store *sqlite.DB) {
t.Helper()
if err := store.Close(); err != nil {
t.Fatalf("store.Close() error = %v", err)
}
}