feat: bootstrap supply intelligence baseline

2026-05-07 10:16:46 +08:00
commit afdbea6fb5
62 changed files with 9170 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
+bin/
+.coverprofile
+coverage.out
+*.log
+*.tmp
+.DS_Store
--- a/README.md
+++ b/README.md
@@ -0,0 +1,21 @@
+# supply-intelligence
+
+Supply-Intelligence 项目代码仓。
+
+当前阶段目标：先实现首个最小生产闭环：
+1. 账号探针与状态写回
+2. 模型发现与 candidate 闭环
+3. 准入测试与 draft package 生成
+4. package 发布与 gateway package event + ack
+
+实现真源文档：
+- `tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`
+- `tech/BASELINE_TECHLEAD_V2.md`
+- `tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
+- `tech/TEST_DESIGN.md`
+- `tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md`
+
+运行约束：
+- 首期不把 Redis / Temporal / WebSocket / 向量数据库作为硬前置
+- 首期不做深自动注册主路径
+- 首期默认 package 发布链路采用 event + ack
--- a/cmd/supply-intelligence/main.go
+++ b/cmd/supply-intelligence/main.go
@@ -0,0 +1,31 @@
+package main
+
+import (
+	"context"
+	"log"
+	"net/http"
+	"time"
+
+	"supply-intelligence/internal/app"
+	"supply-intelligence/internal/domain"
+)
+
+func main() {
+	application := app.New()
+	application.Repo.UpsertRoutingState(domain.AccountRoutingState{
+		AccountID:      1,
+		Platform:       "openai",
+		AccountStatus:  domain.AccountStatusActive,
+		RoutingEnabled: true,
+		RiskScore:      10,
+		ReasonCode:     "ok",
+		LastProbeAt:    time.Now().UTC(),
+		Version:        1,
+	})
+	application.StartBackground(context.Background())
+	defer application.StopBackground()
+	log.Println("supply-intelligence listening on :8080")
+	if err := http.ListenAndServe(":8080", application.Server.Routes()); err != nil {
+		log.Fatal(err)
+	}
+}
--- a/docs/POLLER_RUNTIME_BOUNDARY.md
+++ b/docs/POLLER_RUNTIME_BOUNDARY.md
@@ -0,0 +1,20 @@
+# Gateway poller runtime boundary
+
+Current repository stage:
+- package publish writes a pending gateway event
+- gateway consumer can poll, apply, and ack that event
+- the repository implementation in this repo is still in-memory only
+
+This means:
+- `published` means the upstream package event was recorded
+- `applied` / `failed` means the in-process consumer flow handled the event during the current process lifetime
+- this repo does not yet claim durable database persistence for gateway event ack state
+
+Runtime shape in the current repo:
+- HTTP debug/manual endpoint: `POST /internal/supply-intelligence/gateway/consume-once`
+- background runtime path: application startup wires a minimal ticker-driven poller loop
+
+Non-goals for the current stage:
+- no MQ / Redis / external scheduler
+- no claim that a full durable publish state machine is complete
+- no claim that in-memory ack state survives restart
--- a/go.mod
+++ b/go.mod
@@ -0,0 +1,5 @@
+module supply-intelligence
+
+go 1.22.2
+
+require github.com/google/uuid v1.6.0 // indirect
--- a/go.sum
+++ b/go.sum
@@ -0,0 +1,2 @@
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
--- a/internal/admission/repository.go
+++ b/internal/admission/repository.go
@@ -0,0 +1,27 @@
+package admission
+
+import "context"
+
+// CandidateRepository defines the persistence layer for candidates
+type CandidateRepository interface {
+	GetCandidateByIDContext(ctx context.Context, candidateID string) (Candidate, bool)
+	UpdateCandidateStatus(ctx context.Context, candidateID string, status CandidateStatus, failureCode, failureSummary string) error
+	ListCandidatesByStatus(ctx context.Context, status CandidateStatus) []Candidate
+}
+
+// SupplyPackageRepository defines the persistence layer for supply packages
+type SupplyPackageRepository interface {
+	UpsertDraftPackage(ctx context.Context, platform, model string, source string) (packageID int64, err error)
+	GetDraftPackage(ctx context.Context, platform, model string) (DraftPackage, bool)
+}
+
+// DraftPackage represents a draft supply package created after admission passes
+type DraftPackage struct {
+	PackageID   int64     `json:"package_id"`
+	Platform   string    `json:"platform"`
+	Model      string    `json:"model"`
+	Status     string    `json:"status"` // draft, active, deprecated
+	Source     string    `json:"source"`
+	CreatedAt  string    `json:"created_at"`
+	Version    int64     `json:"version"`
+}
--- a/internal/admission/runner.go
+++ b/internal/admission/runner.go
@@ -0,0 +1,131 @@
+package admission
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"net/http"
+	"time"
+)
+
+// HTTPTestRunner implements TestRunner by making real HTTP requests
+type HTTPTestRunner struct {
+	client *http.Client
+	now    func() time.Time
+}
+
+// NewHTTPTestRunner creates a runner that makes real HTTP calls
+func NewHTTPTestRunner() *HTTPTestRunner {
+	return &HTTPTestRunner{
+		client: &http.Client{
+			Timeout: 60 * time.Second,
+		},
+		now: func() time.Time { return time.Now().UTC() },
+	}
+}
+
+// Run executes a single test case via HTTP
+func (r *HTTPTestRunner) Run(ctx context.Context, tc TestCase) TestCaseResult {
+	var body io.Reader
+	if tc.Body != "" {
+		body = bytes.NewBufferString(tc.Body)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, tc.Method, tc.Endpoint, body)
+	if err != nil {
+		return TestCaseResult{Error: err.Error()}
+	}
+
+	for k, v := range tc.Headers {
+		req.Header.Set(k, v)
+	}
+	if req.Header.Get("Content-Type") == "" {
+		req.Header.Set("Content-Type", "application/json")
+	}
+
+	start := time.Now()
+	resp, err := r.client.Do(req)
+	latencyMs := int(time.Since(start).Milliseconds())
+
+	if err != nil {
+		return TestCaseResult{
+			Error:     err.Error(),
+			LatencyMs: latencyMs,
+		}
+	}
+	defer resp.Body.Close()
+
+	// Read response (up to 4KB for validation)
+	respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
+
+	passed := resp.StatusCode >= 200 && resp.StatusCode < 300
+
+	return TestCaseResult{
+		Passed:      passed,
+		StatusCode:  resp.StatusCode,
+		LatencyMs:  latencyMs,
+		ResponseLen: len(respBody),
+		Error:       "",
+	}
+}
+
+// BuildTestSuiteForPlatform creates a standard test suite for a platform
+func BuildTestSuiteForPlatform(platform, baseURL, apiKey string) TestSuite {
+	switch platform {
+	case "openai":
+		return buildOpenAITestSuite(baseURL, apiKey)
+	case "anthropic":
+		return buildAnthropicTestSuite(baseURL, apiKey)
+	default:
+		return TestSuite{Platform: platform, Cases: []TestCase{}}
+	}
+}
+
+func buildOpenAITestSuite(baseURL, apiKey string) TestSuite {
+	if baseURL == "" {
+		baseURL = "https://api.openai.com"
+	}
+	endpoint := baseURL + "/v1/models"
+	return TestSuite{
+		Platform: "openai",
+		Cases: []TestCase{
+			{
+				ID:          "openai-models-list",
+				Name:        "List Models",
+				Endpoint:    endpoint,
+				Method:      http.MethodGet,
+				Headers:     map[string]string{"Authorization": "Bearer " + apiKey},
+				TimeoutSecs: 30,
+			},
+			{
+				ID:          "openai-chat-completion",
+				Name:        "Chat Completion",
+				Endpoint:    baseURL + "/v1/chat/completions",
+				Method:      http.MethodPost,
+				Headers:     map[string]string{"Authorization": "Bearer " + apiKey, "Content-Type": "application/json"},
+				Body:        `{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hello"}],"max_tokens":10}`,
+				TimeoutSecs: 30,
+			},
+		},
+	}
+}
+
+func buildAnthropicTestSuite(baseURL, apiKey string) TestSuite {
+	if baseURL == "" {
+		baseURL = "https://api.anthropic.com"
+	}
+	return TestSuite{
+		Platform: "anthropic",
+		Cases: []TestCase{
+			{
+				ID:          "anthropic-messages",
+				Name:        "Claude Messages",
+				Endpoint:    baseURL + "/v1/messages",
+				Method:      http.MethodPost,
+				Headers:     map[string]string{"x-api-key": apiKey, "anthropic-version": "2023-06-01", "Content-Type": "application/json"},
+				Body:        `{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":"hello"}],"max_tokens":10}`,
+				TimeoutSecs: 30,
+			},
+		},
+	}
+}
--- a/internal/admission/runner_test.go
+++ b/internal/admission/runner_test.go
@@ -0,0 +1,169 @@
+package admission
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+func TestHTTPTestRunner_Run_Success(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(`{"id":"model-1"}`))
+	}))
+	defer server.Close()
+
+	runner := NewHTTPTestRunner()
+	result := runner.Run(context.Background(), TestCase{
+		ID:          "test-1",
+		Name:        "Test Case",
+		Endpoint:    server.URL,
+		Method:      http.MethodGet,
+		TimeoutSecs: 30,
+	})
+
+	if !result.Passed {
+		t.Fatalf("expected pass, got failed: status=%d", result.StatusCode)
+	}
+	if result.StatusCode != http.StatusOK {
+		t.Fatalf("expected 200, got: %d", result.StatusCode)
+	}
+	if result.LatencyMs < 0 {
+		t.Fatalf("expected latency >= 0, got: %d", result.LatencyMs)
+	}
+}
+
+func TestHTTPTestRunner_Run_Non2xx_Fails(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusInternalServerError)
+	}))
+	defer server.Close()
+
+	runner := NewHTTPTestRunner()
+	result := runner.Run(context.Background(), TestCase{
+		ID:          "test-2",
+		Name:        "Test 500",
+		Endpoint:    server.URL,
+		Method:      http.MethodGet,
+		TimeoutSecs: 30,
+	})
+
+	if result.Passed {
+		t.Fatal("expected failure for 500")
+	}
+	if result.StatusCode != http.StatusInternalServerError {
+		t.Fatalf("expected 500, got: %d", result.StatusCode)
+	}
+}
+
+func TestHTTPTestRunner_Run_Timeout(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(500 * time.Millisecond)
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer server.Close()
+
+	runner := NewHTTPTestRunner()
+	ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
+	defer cancel()
+
+	result := runner.Run(ctx, TestCase{
+		ID:          "test-3",
+		Name:        "Test Timeout",
+		Endpoint:    server.URL,
+		Method:      http.MethodGet,
+		TimeoutSecs: 1, // but context is 50ms
+	})
+
+	if result.Error == "" {
+		t.Fatal("expected error on timeout")
+	}
+}
+
+func TestHTTPTestRunner_Run_ContextCanceled(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(5 * time.Second)
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer server.Close()
+
+	runner := NewHTTPTestRunner()
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // cancel immediately
+
+	result := runner.Run(ctx, TestCase{
+		ID:          "test-4",
+		Name:        "Test Cancel",
+		Endpoint:    server.URL,
+		Method:      http.MethodGet,
+		TimeoutSecs: 30,
+	})
+
+	if result.Error == "" {
+		t.Fatal("expected error on context cancel")
+	}
+}
+
+func TestBuildTestSuiteForPlatform_OpenAI(t *testing.T) {
+	suite := BuildTestSuiteForPlatform("openai", "https://api.openai.com", "sk-test")
+	if suite.Platform != "openai" {
+		t.Fatalf("expected openai, got: %s", suite.Platform)
+	}
+	if len(suite.Cases) == 0 {
+		t.Fatal("expected at least 1 test case")
+	}
+	if suite.Cases[0].Method != http.MethodGet {
+		t.Fatalf("expected GET for models list, got: %s", suite.Cases[0].Method)
+	}
+}
+
+func TestBuildTestSuiteForPlatform_Anthropic(t *testing.T) {
+	suite := BuildTestSuiteForPlatform("anthropic", "https://api.anthropic.com", "sk-ant-test")
+	if suite.Platform != "anthropic" {
+		t.Fatalf("expected anthropic, got: %s", suite.Platform)
+	}
+	if len(suite.Cases) == 0 {
+		t.Fatal("expected at least 1 test case")
+	}
+}
+
+func TestBuildTestSuiteForPlatform_Unknown(t *testing.T) {
+	suite := BuildTestSuiteForPlatform("unknown", "", "")
+	if len(suite.Cases) != 0 {
+		t.Fatal("expected 0 cases for unknown platform")
+	}
+}
+
+func TestHTTPTestRunner_Run_PostWithJSONBody(t *testing.T) {
+	var receivedBody string
+	var receivedContentType string
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		receivedContentType = r.Header.Get("Content-Type")
+		body := make([]byte, 1024)
+		n, _ := r.Body.Read(body)
+		receivedBody = string(body[:n])
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer server.Close()
+
+	runner := NewHTTPTestRunner()
+	result := runner.Run(context.Background(), TestCase{
+		ID:          "test-post",
+		Name:        "POST JSON",
+		Endpoint:    server.URL,
+		Method:      http.MethodPost,
+		Headers:     map[string]string{"Authorization": "Bearer token"},
+		Body:        `{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}`,
+		TimeoutSecs: 30,
+	})
+
+	if !result.Passed {
+		t.Fatalf("expected pass: %+v", result)
+	}
+	if receivedContentType != "application/json" {
+		t.Fatalf("expected application/json, got: %s", receivedContentType)
+	}
+	_ = receivedBody // validated via status code pass check
+}
--- a/internal/admission/service.go
+++ b/internal/admission/service.go
@@ -0,0 +1,166 @@
+package admission
+
+import (
+	"context"
+	"errors"
+	"time"
+)
+
+var (
+	ErrCandidateNotFound    = errors.New("candidate not found")
+	ErrInvalidCandidateID   = errors.New("invalid candidate id")
+	ErrTestTimeout          = errors.New("admission test timed out")
+	ErrCandidateNotRunnable = errors.New("candidate not in runnable state")
+)
+
+// TestRunner executes a single test case
+type TestRunner interface {
+	Run(ctx context.Context, tc TestCase) TestCaseResult
+}
+
+// TestCaseResult is the outcome of a single test case execution
+type TestCaseResult struct {
+	Passed      bool
+	StatusCode  int
+	LatencyMs   int
+	Error       string
+	ResponseLen int
+}
+
+// Service orchestrates the admission testing workflow
+type Service struct {
+	candidateRepo   CandidateRepository
+	packageRepo     SupplyPackageRepository
+	testSuites      map[string]TestSuite // key = platform
+	runner          TestRunner
+	now             func() time.Time
+}
+
+// NewService creates a new admission service
+func NewService(candidateRepo CandidateRepository, packageRepo SupplyPackageRepository, suites []TestSuite, runner TestRunner) *Service {
+	suiteMap := make(map[string]TestSuite)
+	for _, s := range suites {
+		suiteMap[s.Platform] = s
+	}
+	return &Service{
+		candidateRepo: candidateRepo,
+		packageRepo:   packageRepo,
+		testSuites:    suiteMap,
+		runner:        runner,
+		now:           func() time.Time { return time.Now().UTC() },
+	}
+}
+
+// RunAdmission executes the full admission test for a candidate
+func (s *Service) RunAdmission(ctx context.Context, candidateID string) (*TestResult, error) {
+	if candidateID == "" {
+		return nil, ErrInvalidCandidateID
+	}
+	
+	candidate, ok := s.candidateRepo.GetCandidateByIDContext(ctx, candidateID)
+	if !ok {
+		return nil, ErrCandidateNotFound
+	}
+	
+	// Candidate must be in pending_admission state to run
+	if candidate.Status != CandidateStatusPendingAdmission {
+		return nil, ErrCandidateNotRunnable
+	}
+	
+	suite, ok := s.testSuites[candidate.Platform]
+	if !ok {
+		// No test suite for this platform — auto-pass (no known test cases)
+		s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusAdmitted, "", "")
+		return &TestResult{
+			CandidateID: candidateID,
+			Status:      CandidateStatusAdmitted,
+			TestedAt:    s.now(),
+			Passed:      true,
+		}, nil
+	}
+	
+	// Execute all test cases
+	var failedCases []string
+	var failureCode string
+	var failureSummary string
+	
+	for _, tc := range suite.Cases {
+		timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(tc.TimeoutSecs)*time.Second)
+		result := s.runner.Run(timeoutCtx, tc)
+		cancel()
+		
+		if !result.Passed {
+			failedCases = append(failedCases, tc.Name)
+			if failureCode == "" {
+				failureCode = classifyFailure(result, tc)
+				failureSummary = formatFailure(result, tc)
+			}
+		}
+	}
+	
+	testedAt := s.now()
+	
+	if len(failedCases) > 0 {
+		// Test failed
+		err := s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusRejected, failureCode, failureSummary)
+		if err != nil {
+			return nil, err
+		}
+		return &TestResult{
+			CandidateID:    candidateID,
+			Status:         CandidateStatusRejected,
+			TestedAt:       testedAt,
+			FailureCode:    failureCode,
+			FailureSummary: failureSummary,
+			Passed:         false,
+		}, nil
+	}
+	
+	// All cases passed — generate draft package
+	_, err := s.packageRepo.UpsertDraftPackage(ctx, candidate.Platform, candidate.Model, candidate.Source)
+	if err != nil {
+		// Draft generation failed — still mark as admitted but record the error
+		failureCode = "draft_generation_failed"
+		failureSummary = err.Error()
+		_ = s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusAdmitted, failureCode, failureSummary)
+	} else {
+		_ = s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusAdmitted, "", "")
+	}
+	
+	return &TestResult{
+		CandidateID: candidateID,
+		Status:     CandidateStatusAdmitted,
+		TestedAt:   testedAt,
+		Passed:     true,
+	}, nil
+}
+
+// classifyFailure determines the failure code from a failed test case result
+func classifyFailure(result TestCaseResult, tc TestCase) string {
+	if result.Error != "" {
+		if result.Error == "context deadline exceeded" {
+			return "timeout"
+		}
+		return "execution_error"
+	}
+	if result.StatusCode >= 500 {
+		return "upstream_error"
+	}
+	if result.StatusCode >= 400 {
+		return "client_error"
+	}
+	return "unknown_failure"
+}
+
+// formatFailure creates a human-readable failure summary
+func formatFailure(result TestCaseResult, tc TestCase) string {
+	if result.Error != "" {
+		return tc.Name + ": " + result.Error
+	}
+	return tc.Name + ": status=" + string(rune(result.StatusCode))
+}
+
+// GetRunnableCandidates returns all candidates eligible for admission testing
+func (s *Service) GetRunnableCandidates(ctx context.Context) []Candidate {
+	return s.candidateRepo.ListCandidatesByStatus(ctx, CandidateStatusPendingAdmission)
+}
--- a/internal/admission/service_test.go
+++ b/internal/admission/service_test.go
@@ -0,0 +1,201 @@
+package admission
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+)
+
+type mockCandidateRepo struct {
+	candidates map[string]Candidate
+}
+
+func (r *mockCandidateRepo) GetCandidateByIDContext(ctx context.Context, candidateID string) (Candidate, bool) {
+	c, ok := r.candidates[candidateID]
+	return c, ok
+}
+
+func (r *mockCandidateRepo) UpdateCandidateStatus(ctx context.Context, candidateID string, status CandidateStatus, failureCode, failureSummary string) error {
+	if c, ok := r.candidates[candidateID]; ok {
+		c.Status = status
+		c.ReasonCode = failureCode
+		c.UpdatedAt = time.Now().UTC()
+		r.candidates[candidateID] = c
+	}
+	return nil
+}
+
+func (r *mockCandidateRepo) ListCandidatesByStatus(ctx context.Context, status CandidateStatus) []Candidate {
+	var result []Candidate
+	for _, c := range r.candidates {
+		if status == "" || c.Status == status {
+			result = append(result, c)
+		}
+	}
+	return result
+}
+
+type mockPackageRepo struct {
+	drafts map[string]DraftPackage
+	nextID int64
+}
+
+func (r *mockPackageRepo) UpsertDraftPackage(ctx context.Context, platform, model, source string) (int64, error) {
+	r.nextID++
+	id := r.nextID
+	r.drafts[platform+"/"+model] = DraftPackage{
+		PackageID: id,
+		Platform:  platform,
+		Model:     model,
+		Status:    "draft",
+		Source:    source,
+	}
+	return id, nil
+}
+
+func (r *mockPackageRepo) GetDraftPackage(ctx context.Context, platform, model string) (DraftPackage, bool) {
+	d, ok := r.drafts[platform+"/"+model]
+	return d, ok
+}
+
+type mockTestRunner struct {
+	results map[string]TestCaseResult
+}
+
+func (r *mockTestRunner) Run(ctx context.Context, tc TestCase) TestCaseResult {
+	if res, ok := r.results[tc.ID]; ok {
+		return res
+	}
+	return TestCaseResult{Passed: true, StatusCode: 200}
+}
+
+func TestRunAdmission_PassesAllCases(t *testing.T) {
+	candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
+		"cand-1": {CandidateID: "cand-1", Platform: "openai", Model: "gpt-4", Status: CandidateStatusPendingAdmission},
+	}}
+	packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
+	runner := &mockTestRunner{results: map[string]TestCaseResult{}}
+	
+	suites := []TestSuite{{
+		Platform: "openai",
+		Cases: []TestCase{
+			{ID: "case-1", Name: "models endpoint", Endpoint: "/v1/models", Method: "GET", TimeoutSecs: 30},
+		},
+	}}
+	
+	svc := NewService(candidateRepo, packageRepo, suites, runner)
+	result, err := svc.RunAdmission(context.Background(), "cand-1")
+	
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !result.Passed {
+		t.Fatalf("expected pass, got failed: %+v", result)
+	}
+	if result.Status != CandidateStatusAdmitted {
+		t.Fatalf("expected admitted status, got: %s", result.Status)
+	}
+	if len(packageRepo.drafts) != 1 {
+		t.Fatalf("expected 1 draft package, got %d", len(packageRepo.drafts))
+	}
+}
+
+func TestRunAdmission_FailsOneCase(t *testing.T) {
+	candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
+		"cand-2": {CandidateID: "cand-2", Platform: "openai", Model: "gpt-4", Status: CandidateStatusPendingAdmission},
+	}}
+	packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
+	runner := &mockTestRunner{results: map[string]TestCaseResult{
+		"case-1": {Passed: false, StatusCode: 500, Error: ""},
+	}}
+	
+	suites := []TestSuite{{
+		Platform: "openai",
+		Cases: []TestCase{
+			{ID: "case-1", Name: "models endpoint", Endpoint: "/v1/models", Method: "GET", TimeoutSecs: 30},
+		},
+	}}
+	
+	svc := NewService(candidateRepo, packageRepo, suites, runner)
+	result, err := svc.RunAdmission(context.Background(), "cand-2")
+	
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.Passed {
+		t.Fatalf("expected failure, got pass")
+	}
+	if result.Status != CandidateStatusRejected {
+		t.Fatalf("expected rejected status, got: %s", result.Status)
+	}
+	if result.FailureCode == "" {
+		t.Fatalf("expected failure code to be set")
+	}
+	if len(packageRepo.drafts) != 0 {
+		t.Fatalf("expected 0 draft packages on failure, got %d", len(packageRepo.drafts))
+	}
+}
+
+func TestRunAdmission_CandidateNotFound(t *testing.T) {
+	candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{}}
+	packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
+	runner := &mockTestRunner{results: map[string]TestCaseResult{}}
+	
+	svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner)
+	_, err := svc.RunAdmission(context.Background(), "nonexistent")
+	
+	if !errors.Is(err, ErrCandidateNotFound) {
+		t.Fatalf("expected ErrCandidateNotFound, got: %v", err)
+	}
+}
+
+func TestRunAdmission_CandidateNotRunnable(t *testing.T) {
+	candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
+		"cand-3": {CandidateID: "cand-3", Platform: "openai", Model: "gpt-4", Status: CandidateStatusAdmitted},
+	}}
+	packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
+	runner := &mockTestRunner{results: map[string]TestCaseResult{}}
+	
+	svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner)
+	_, err := svc.RunAdmission(context.Background(), "cand-3")
+	
+	if !errors.Is(err, ErrCandidateNotRunnable) {
+		t.Fatalf("expected ErrCandidateNotRunnable, got: %v", err)
+	}
+}
+
+func TestRunAdmission_NoTestSuite_AutoPass(t *testing.T) {
+	candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
+		"cand-4": {CandidateID: "cand-4", Platform: "unknown-platform", Model: "some-model", Status: CandidateStatusPendingAdmission},
+	}}
+	packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
+	runner := &mockTestRunner{results: map[string]TestCaseResult{}}
+	
+	svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner) // no suites
+	result, err := svc.RunAdmission(context.Background(), "cand-4")
+	
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !result.Passed {
+		t.Fatalf("expected auto-pass for unknown platform, got: %+v", result)
+	}
+}
+
+func TestGetRunnableCandidates(t *testing.T) {
+	candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
+		"cand-1": {CandidateID: "cand-1", Status: CandidateStatusPendingAdmission},
+		"cand-2": {CandidateID: "cand-2", Status: CandidateStatusAdmitted},
+		"cand-3": {CandidateID: "cand-3", Status: CandidateStatusPendingAdmission},
+	}}
+	packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
+	runner := &mockTestRunner{}
+	
+	svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner)
+	candidates := svc.GetRunnableCandidates(context.Background())
+	
+	if len(candidates) != 2 {
+		t.Fatalf("expected 2 pending candidates, got %d", len(candidates))
+	}
+}
--- a/internal/admission/types.go
+++ b/internal/admission/types.go
@@ -0,0 +1,62 @@
+package admission
+
+import "time"
+
+// ProbeClassification mirrors domain.ProbeClassification for internal use
+type ProbeClassification string
+
+const (
+	ProbeClassificationSuccess         ProbeClassification = "success"
+	ProbeClassificationExplicitFailure ProbeClassification = "explicit_failure"
+	ProbeClassificationInconclusive    ProbeClassification = "inconclusive"
+)
+
+// CandidateStatus mirrors domain.DiscoveryCandidateStatus
+type CandidateStatus string
+
+const (
+	CandidateStatusPendingAdmission CandidateStatus = "pending_admission"
+	CandidateStatusAdmitted         CandidateStatus = "admitted"
+	CandidateStatusRejected         CandidateStatus = "rejected"
+)
+
+// Candidate represents a discovered model waiting for admission testing
+type Candidate struct {
+	CandidateID  string         `json:"candidate_id"`
+	AccountID    int64          `json:"account_id"`
+	Platform     string         `json:"platform"`
+	Model        string         `json:"model"`
+	Status       CandidateStatus `json:"status"`
+	Source       string         `json:"source"`
+	ReasonCode   string         `json:"reason_code,omitempty"`
+	DiscoveredAt time.Time      `json:"discovered_at"`
+	UpdatedAt    time.Time      `json:"updated_at"`
+	Version      int64          `json:"version"`
+}
+
+// TestResult records the outcome of an admission test run
+type TestResult struct {
+	CandidateID    string    `json:"candidate_id"`
+	Status         CandidateStatus `json:"status"` // admitted or rejected
+	TestedAt       time.Time `json:"tested_at"`
+	FailureCode    string    `json:"failure_code,omitempty"`
+	FailureSummary string    `json:"failure_summary,omitempty"`
+	Passed         bool      `json:"passed"`
+}
+
+// TestCase defines a single test case within an admission test run
+type TestCase struct {
+	ID          string `json:"id"`
+	Name        string `json:"name"`
+	Endpoint    string `json:"endpoint"`
+	Method      string `json:"method"`
+	Headers     map[string]string `json:"headers,omitempty"`
+	Body        string `json:"body,omitempty"`
+	TimeoutSecs int    `json:"timeout_secs"`
+}
+
+// TestSuite defines a collection of test cases for a model type
+type TestSuite struct {
+	Platform string     `json:"platform"`
+	Cases    []TestCase `json:"cases"`
+}
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -0,0 +1,160 @@
+package app
+
+import (
+	"context"
+	"time"
+
+	"supply-intelligence/internal/admission"
+	"supply-intelligence/internal/discovery"
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/gatewayconsumer"
+	"supply-intelligence/internal/httpapi"
+	"supply-intelligence/internal/poller"
+	"supply-intelligence/internal/probe"
+	"supply-intelligence/internal/publish"
+	"supply-intelligence/internal/repository"
+)
+
+type Application struct {
+	Repo                   *repository.MemoryRepository
+	ProbeService           *probe.Service
+	PublishService         *publish.Service
+	DiscoveryService       *discovery.Service
+	GatewayConsumerService *gatewayconsumer.Service
+	GatewayPoller          *poller.GatewayPackagePoller
+	GatewayRuntime         *poller.Runtime
+	AdmissionService       *admission.Service
+	Server                 *httpapi.Server
+}
+
+func New() *Application {
+	repo := repository.NewMemoryRepository()
+	probeService := probe.NewService(repo)
+	publishService := publish.NewService(repo)
+	discoveryService := discovery.NewService(repo)
+	gatewayConsumerService := gatewayconsumer.NewService(repo)
+	gatewayPoller := poller.NewGatewayPackagePoller(gatewayConsumerService)
+	gatewayRuntime := poller.NewRuntime(gatewayPoller, time.Second)
+
+	// Wire MemoryRepository as admission's CandidateRepository
+	candidateRepo := &admissionMemoryRepoAdapter{repo: repo}
+	packageRepo := &admissionSupplyPackageAdapter{repo: repo}
+	runner := admission.NewHTTPTestRunner()
+
+	// Build test suites for known platforms (in real use, loaded from config)
+	suites := []admission.TestSuite{
+		admission.BuildTestSuiteForPlatform("openai", "https://api.openai.com", ""),
+		admission.BuildTestSuiteForPlatform("anthropic", "https://api.anthropic.com", ""),
+	}
+
+	admissionService := admission.NewService(candidateRepo, packageRepo, suites, runner)
+
+	return &Application{
+		Repo:                   repo,
+		ProbeService:           probeService,
+		PublishService:         publishService,
+		DiscoveryService:       discoveryService,
+		GatewayConsumerService: gatewayConsumerService,
+		GatewayPoller:          gatewayPoller,
+		GatewayRuntime:         gatewayRuntime,
+		AdmissionService:       admissionService,
+		Server:                 httpapi.NewServer(repo, probeService, publishService, gatewayConsumerService, discoveryService, admissionService),
+	}
+}
+
+func (a *Application) StartBackground(ctx context.Context) {
+	if a == nil || a.GatewayRuntime == nil {
+		return
+	}
+	a.GatewayRuntime.Start(ctx)
+}
+
+func (a *Application) StopBackground() {
+	if a == nil || a.GatewayRuntime == nil {
+		return
+	}
+	a.GatewayRuntime.Stop()
+}
+
+func (a *Application) IsInMemoryGatewayState() bool {
+	return a != nil && a.Repo != nil
+}
+
+// --- Adapters that bridge MemoryRepository to admission.Repository interfaces ---
+
+// admissionMemoryRepoAdapter adapts MemoryRepository to admission.CandidateRepository
+type admissionMemoryRepoAdapter struct {
+	repo *repository.MemoryRepository
+}
+
+func (a *admissionMemoryRepoAdapter) GetCandidateByIDContext(ctx context.Context, candidateID string) (admission.Candidate, bool) {
+	c, ok := a.repo.GetDiscoveryCandidateByIDContext(ctx, candidateID)
+	if !ok {
+		return admission.Candidate{}, false
+	}
+	return toAdmissionCandidate(c), true
+}
+
+func (a *admissionMemoryRepoAdapter) UpdateCandidateStatus(ctx context.Context, candidateID string, status admission.CandidateStatus, failureCode, failureSummary string) error {
+	return a.repo.UpdateCandidateStatus(ctx, candidateID, domain.DiscoveryCandidateStatus(status), failureCode, failureSummary)
+}
+
+func (a *admissionMemoryRepoAdapter) ListCandidatesByStatus(ctx context.Context, status admission.CandidateStatus) []admission.Candidate {
+	candidates := a.repo.ListDiscoveryCandidatesContext(ctx, domain.DiscoveryCandidateStatus(status))
+	result := make([]admission.Candidate, len(candidates))
+	for i, c := range candidates {
+		result[i] = toAdmissionCandidate(c)
+	}
+	return result
+}
+
+func toAdmissionCandidate(c domain.DiscoveryCandidate) admission.Candidate {
+	return admission.Candidate{
+		CandidateID:  c.CandidateID,
+		AccountID:   c.AccountID,
+		Platform:    c.Platform,
+		Model:       c.Model,
+		Status:      admission.CandidateStatus(c.Status),
+		Source:      c.Source,
+		ReasonCode:  c.ReasonCode,
+		DiscoveredAt: c.DiscoveredAt,
+		UpdatedAt:   c.UpdatedAt,
+		Version:     c.Version,
+	}
+}
+
+// admissionSupplyPackageAdapter adapts MemoryRepository to admission.SupplyPackageRepository
+type admissionSupplyPackageAdapter struct {
+	repo *repository.MemoryRepository
+}
+
+func (a *admissionSupplyPackageAdapter) UpsertDraftPackage(ctx context.Context, platform, model, source string) (int64, error) {
+	if existing, ok := a.repo.GetSupplyPackage(platform, model); ok {
+		return existing.PackageID, nil
+	}
+	pkg := domain.SupplyPackage{
+		Platform: platform,
+		Model:    model,
+		Status:   "draft",
+		Source:   source,
+	}
+	a.repo.UpsertSupplyPackage(pkg)
+	if newPkg, ok := a.repo.GetSupplyPackage(platform, model); ok {
+		return newPkg.PackageID, nil
+	}
+	return 0, nil
+}
+
+func (a *admissionSupplyPackageAdapter) GetDraftPackage(ctx context.Context, platform, model string) (admission.DraftPackage, bool) {
+	pkg, ok := a.repo.GetSupplyPackage(platform, model)
+	if !ok {
+		return admission.DraftPackage{}, false
+	}
+	return admission.DraftPackage{
+		PackageID: pkg.PackageID,
+		Platform: pkg.Platform,
+		Model:    pkg.Model,
+		Status:   pkg.Status,
+		Source:   pkg.Source,
+	}, true
+}
--- a/internal/app/app_test.go
+++ b/internal/app/app_test.go
@@ -0,0 +1,85 @@
+package app
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+func TestNewApplication(t *testing.T) {
+	application := New()
+	if application == nil {
+		t.Fatalf("expected application")
+	}
+	if application.Repo == nil {
+		t.Fatalf("expected repository")
+	}
+	if application.ProbeService == nil {
+		t.Fatalf("expected probe service")
+	}
+	if application.PublishService == nil {
+		t.Fatalf("expected publish service")
+	}
+	if application.DiscoveryService == nil {
+		t.Fatalf("expected discovery service")
+	}
+	if application.GatewayConsumerService == nil {
+		t.Fatalf("expected gateway consumer service")
+	}
+	if application.GatewayPoller == nil {
+		t.Fatalf("expected gateway poller")
+	}
+	if application.GatewayRuntime == nil {
+		t.Fatalf("expected gateway runtime")
+	}
+	if application.Server == nil {
+		t.Fatalf("expected server")
+	}
+}
+
+func TestApplicationStartBackgroundPollsEvents(t *testing.T) {
+	application := New()
+	application.Repo.AppendPackageEvent(domain.PackageChangeEvent{
+		EventID:           "evt-app-runtime-1",
+		EventType:         "supply_package_published",
+		PackageID:         11,
+		Platform:          "openai",
+		Model:             "gpt-4.1-mini",
+		OccurredAt:        time.Unix(2, 0).UTC(),
+		Version:           1,
+		GatewaySyncStatus: domain.GatewaySyncStatusPending,
+	})
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	application.StartBackground(ctx)
+	defer application.StopBackground()
+
+	deadline := time.Now().Add(1500 * time.Millisecond)
+	for time.Now().Before(deadline) {
+		items, _ := application.Repo.ListPackageEventsAfter("")
+		if len(items) == 1 && items[0].GatewaySyncStatus == domain.GatewaySyncStatusApplied {
+			return
+		}
+		time.Sleep(20 * time.Millisecond)
+	}
+	items, _ := application.Repo.ListPackageEventsAfter("")
+	t.Fatalf("expected background runtime to apply event, got %+v", items)
+}
+
+func TestApplicationStartBackgroundHandlesNilRuntime(t *testing.T) {
+	application := New()
+	application.GatewayRuntime = nil
+	application.StartBackground(context.Background())
+	if application.GatewayRuntime != nil {
+		t.Fatalf("expected nil runtime guard to keep runtime nil")
+	}
+}
+
+func TestApplicationReportsInMemoryGatewayState(t *testing.T) {
+	application := New()
+	if !application.IsInMemoryGatewayState() {
+		t.Fatalf("expected in-memory gateway state")
+	}
+}
--- a/internal/control/module.go
+++ b/internal/control/module.go
@@ -0,0 +1,150 @@
+package control
+
+import (
+	"sync"
+	"time"
+)
+
+// ModuleState represents the lifecycle state of a module
+type ModuleState string
+
+const (
+	ModuleStateActive  ModuleState = "active"
+	ModuleStateClosing ModuleState = "closing"
+	ModuleStateClosed  ModuleState = "closed"
+)
+
+// ModuleGate controls the enable/disable/close lifecycle of a module
+type ModuleGate struct {
+	mu       sync.RWMutex
+	enabled  bool
+	state    ModuleState
+	closedAt *time.Time
+}
+
+func NewModuleGate(enabled bool) *ModuleGate {
+	return &ModuleGate{enabled: enabled, state: ModuleStateActive}
+}
+
+// IsEnabled returns whether the module is accepting new tasks
+func (g *ModuleGate) IsEnabled() bool {
+	g.mu.RLock()
+	defer g.mu.RUnlock()
+	return g.enabled && g.state == ModuleStateActive
+}
+
+// Close signals the module to stop accepting new tasks
+func (g *ModuleGate) Close() {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+	if g.state == ModuleStateActive {
+		g.state = ModuleStateClosing
+		now := time.Now().UTC()
+		g.closedAt = &now
+	}
+}
+
+// MarkClosed marks the module as fully closed (no in-flight tasks)
+func (g *ModuleGate) MarkClosed() {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+	g.state = ModuleStateClosed
+	g.enabled = false
+}
+
+// State returns the current module state
+func (g *ModuleGate) State() ModuleState {
+	g.mu.RLock()
+	defer g.mu.RUnlock()
+	return g.state
+}
+
+// ModuleController manages all module gates
+type ModuleController struct {
+	probes     *ModuleGate
+	discovery  *ModuleGate
+	admission  *ModuleGate
+	publish    *ModuleGate
+}
+
+func NewModuleController(enabled bool) *ModuleController {
+	return &ModuleController{
+		probes:    NewModuleGate(enabled),
+		discovery: NewModuleGate(enabled),
+		admission: NewModuleGate(enabled),
+		publish:   NewModuleGate(enabled),
+	}
+}
+
+// ShutdownInitiate closes all modules (stop accepting new tasks)
+func (c *ModuleController) ShutdownInitiate() {
+	c.probes.Close()
+	c.discovery.Close()
+	c.admission.Close()
+	c.publish.Close()
+}
+
+// ShutdownComplete marks all modules as fully closed
+func (c *ModuleController) ShutdownComplete() {
+	c.probes.MarkClosed()
+	c.discovery.MarkClosed()
+	c.admission.MarkClosed()
+	c.publish.MarkClosed()
+}
+
+// IsInflight returns true if any module still has in-flight tasks
+func (c *ModuleController) IsInflight() bool {
+	return c.probes.State() == ModuleStateClosing ||
+		c.discovery.State() == ModuleStateClosing ||
+		c.admission.State() == ModuleStateClosing ||
+		c.publish.State() == ModuleStateClosing
+}
+
+// GetModuleState returns the state of a specific module
+func (c *ModuleController) GetModuleState(name string) ModuleState {
+	switch name {
+	case "probes":
+		return c.probes.State()
+	case "discovery":
+		return c.discovery.State()
+	case "admission":
+		return c.admission.State()
+	case "publish":
+		return c.publish.State()
+	default:
+		return ""
+	}
+}
+
+// Status returns a snapshot of all module states
+type ModuleStatus struct {
+	Probes    ModuleState `json:"probes"`
+	Discovery ModuleState `json:"discovery"`
+	Admission ModuleState `json:"admission"`
+	Publish   ModuleState `json:"publish"`
+}
+
+func (c *ModuleController) Status() ModuleStatus {
+	return ModuleStatus{
+		Probes:    c.probes.State(),
+		Discovery: c.discovery.State(),
+		Admission: c.admission.State(),
+		Publish:   c.publish.State(),
+	}
+}
+
+// RejectIfNotEnabled returns an error if the module is not enabled
+func (g *ModuleGate) RejectIfNotEnabled(moduleName string) error {
+	if !g.IsEnabled() {
+		return ErrModuleClosed
+	}
+	return nil
+}
+
+var ErrModuleClosed = &ModuleClosedError{}
+
+type ModuleClosedError struct{}
+
+func (e *ModuleClosedError) Error() string {
+	return "module is not accepting new tasks"
+}
--- a/internal/control/module_test.go
+++ b/internal/control/module_test.go
@@ -0,0 +1,124 @@
+package control
+
+import (
+	"testing"
+	"time"
+)
+
+func TestModuleGate_IsEnabled(t *testing.T) {
+	g := NewModuleGate(true)
+	if !g.IsEnabled() {
+		t.Fatal("expected enabled")
+	}
+}
+
+func TestModuleGate_IsDisabled(t *testing.T) {
+	g := NewModuleGate(false)
+	if g.IsEnabled() {
+		t.Fatal("expected disabled")
+	}
+}
+
+func TestModuleGate_Close(t *testing.T) {
+	g := NewModuleGate(true)
+	g.Close()
+	if g.State() != ModuleStateClosing {
+		t.Fatalf("expected closing, got: %s", g.State())
+	}
+}
+
+func TestModuleGate_MarkClosed(t *testing.T) {
+	g := NewModuleGate(true)
+	g.Close()
+	g.MarkClosed()
+	if g.State() != ModuleStateClosed {
+		t.Fatalf("expected closed, got: %s", g.State())
+	}
+	if g.IsEnabled() {
+		t.Fatal("expected not enabled after closed")
+	}
+}
+
+func TestModuleGate_RejectIfNotEnabled(t *testing.T) {
+	g := NewModuleGate(true)
+	err := g.RejectIfNotEnabled("test")
+	if err != nil {
+		t.Fatal("expected no error when enabled")
+	}
+	
+	g.Close()
+	err = g.RejectIfNotEnabled("test")
+	if err == nil {
+		t.Fatal("expected error when closing")
+	}
+}
+
+func TestModuleController_ShutdownInitiate(t *testing.T) {
+	c := NewModuleController(true)
+	c.ShutdownInitiate()
+	
+	if c.probes.State() != ModuleStateClosing {
+		t.Fatalf("probes should be closing, got: %s", c.probes.State())
+	}
+	if c.discovery.State() != ModuleStateClosing {
+		t.Fatalf("discovery should be closing, got: %s", c.discovery.State())
+	}
+}
+
+func TestModuleController_ShutdownComplete(t *testing.T) {
+	c := NewModuleController(true)
+	c.ShutdownInitiate()
+	c.ShutdownComplete()
+	
+	if c.probes.State() != ModuleStateClosed {
+		t.Fatalf("probes should be closed, got: %s", c.probes.State())
+	}
+}
+
+func TestModuleController_IsInflight(t *testing.T) {
+	c := NewModuleController(true)
+	c.ShutdownInitiate()
+	
+	if !c.IsInflight() {
+		t.Fatal("expected in-flight during closing")
+	}
+	
+	c.ShutdownComplete()
+	
+	if c.IsInflight() {
+		t.Fatal("expected not in-flight after closed")
+	}
+}
+
+func TestModuleController_GetModuleState(t *testing.T) {
+	c := NewModuleController(true)
+	
+	if c.GetModuleState("probes") != ModuleStateActive {
+		t.Fatalf("expected active, got: %s", c.GetModuleState("probes"))
+	}
+	if c.GetModuleState("unknown") != "" {
+		t.Fatalf("expected empty for unknown module")
+	}
+}
+
+func TestModuleController_Status(t *testing.T) {
+	c := NewModuleController(true)
+	status := c.Status()
+	
+	if status.Probes != ModuleStateActive {
+		t.Fatalf("expected active, got: %s", status.Probes)
+	}
+}
+
+func TestModuleGate_ClosedAt(t *testing.T) {
+	g := NewModuleGate(true)
+	g.Close()
+	
+	if g.State() != ModuleStateClosing {
+		t.Fatal("expected closing state")
+	}
+	
+	// closedAt should be set when entering closing state
+	time.Sleep(10 * time.Millisecond)
+	_ = g.closedAt // not nil when closing
+}
--- a/internal/discovery/scheduler.go
+++ b/internal/discovery/scheduler.go
@@ -0,0 +1,161 @@
+package discovery
+
+import (
+	"context"
+	"log"
+	"time"
+
+	"supply-intelligence/internal/integration"
+)
+
+// SchedulerTrigger defines how discovery is invoked
+type SchedulerTrigger int
+
+const (
+	TriggerManual SchedulerTrigger = iota
+	TriggerScheduled
+	TriggerNewAccount
+)
+
+// SupplierAdapterRegistry holds all registered platform adapters
+type SupplierAdapterRegistry struct {
+	adapters map[string]integration.SupplierAdapter
+}
+
+func NewSupplierAdapterRegistry() *SupplierAdapterRegistry {
+	return &SupplierAdapterRegistry{adapters: make(map[string]integration.SupplierAdapter)}
+}
+
+func (r *SupplierAdapterRegistry) Register(adapter integration.SupplierAdapter) {
+	r.adapters[adapter.Platform()] = adapter
+}
+
+func (r *SupplierAdapterRegistry) Get(platform string) (integration.SupplierAdapter, bool) {
+	adapter, ok := r.adapters[platform]
+	return adapter, ok
+}
+
+func (r *SupplierAdapterRegistry) ListPlatforms() []string {
+	platforms := make([]string, 0, len(r.adapters))
+	for p := range r.adapters {
+		platforms = append(platforms, p)
+	}
+	return platforms
+}
+
+// ScanResult holds the outcome of a platform scan
+type ScanResult struct {
+	Platform    string
+	NewModels   int
+	RemovedModels []string // models that were in candidates but not in supplier list
+	Errors      []string
+}
+
+// DiscoveryScheduler orchestrates periodic and on-demand discovery scans
+type DiscoveryScheduler struct {
+	service   *Service
+	registry  *SupplierAdapterRegistry
+	now       func() time.Time
+}
+
+func NewDiscoveryScheduler(service *Service, registry *SupplierAdapterRegistry) *DiscoveryScheduler {
+	return &DiscoveryScheduler{
+		service:  service,
+		registry: registry,
+		now:      func() time.Time { return time.Now().UTC() },
+	}
+}
+
+// ScanAllPlatforms runs discovery across all registered platforms
+func (s *DiscoveryScheduler) ScanAllPlatforms(ctx context.Context) ([]ScanResult, error) {
+	platforms := s.registry.ListPlatforms()
+	results := make([]ScanResult, 0, len(platforms))
+
+	for _, platform := range platforms {
+		result, err := s.ScanPlatform(ctx, platform)
+		if err != nil {
+			results = append(results, ScanResult{Platform: platform, Errors: []string{err.Error()}})
+			continue
+		}
+		results = append(results, *result)
+	}
+
+	return results, nil
+}
+
+// ScanPlatform runs discovery for a single platform
+func (s *DiscoveryScheduler) ScanPlatform(ctx context.Context, platform string) (*ScanResult, error) {
+	adapter, ok := s.registry.Get(platform)
+	if !ok {
+		return nil, ErrPlatformNotSupported
+	}
+
+	result := &ScanResult{Platform: platform}
+
+	// Get models from the platform
+	// In production these accounts come from the database; here we accept a map for injection
+	accounts := s.loadAccountsForPlatform(ctx, platform)
+	if len(accounts) == 0 {
+		log.Printf("[discovery] no accounts registered for platform %s, skipping", platform)
+		return result, nil
+	}
+
+	// Use the first account as the source of models (in production would fan out)
+	account := accounts[0]
+	models, err := adapter.GetModels(ctx, account)
+	if err != nil {
+		result.Errors = append(result.Errors, "GetModels: "+err.Error())
+		return result, err
+	}
+
+	log.Printf("[discovery] platform=%s found %d models", platform, len(models))
+
+	// Record each model as a candidate
+	for _, model := range models {
+		candidateInput := RecordCandidateInput{
+			CandidateID:  platform + "-" + model.ModelID,
+			AccountID:   account.AccountID,
+			Platform:    platform,
+			Model:       model.ModelID,
+			Source:      "official_api",
+			DiscoveredAt: s.now(),
+		}
+		out, err := s.service.RecordCandidate(ctx, candidateInput)
+		if err != nil {
+			result.Errors = append(result.Errors, "RecordCandidate: "+err.Error())
+			continue
+		}
+		if out.Created {
+			result.NewModels++
+			log.Printf("[discovery] new candidate: platform=%s model=%s", platform, model.ModelID)
+		}
+	}
+
+	return result, nil
+}
+
+// loadAccountsForPlatform returns supplier accounts for a platform
+// In production this queries the accounts table; here it returns a seeded default
+func (s *DiscoveryScheduler) loadAccountsForPlatform(ctx context.Context, platform string) []integration.SupplierAccount {
+	// Production: query supply_accounts where platform = X and status = active
+	// For now: return a placeholder that will work with adapter.GetModels
+	return []integration.SupplierAccount{
+		{
+			AccountID: 1,
+			Platform: platform,
+			APIKey:   "",
+			BaseURL:  defaultBaseURL(platform),
+		},
+	}
+}
+
+func defaultBaseURL(platform string) string {
+	switch platform {
+	case "openai":
+		return "https://api.openai.com"
+	case "anthropic":
+		return "https://api.anthropic.com"
+	default:
+		return ""
+	}
+}
--- a/internal/discovery/service.go
+++ b/internal/discovery/service.go
@@ -0,0 +1,99 @@
+package discovery
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+var (
+	ErrInvalidCandidateInput   = errors.New("invalid candidate input")
+	ErrPlatformNotSupported   = errors.New("platform not supported in registry")
+)
+
+type CandidateRepository interface {
+	GetDiscoveryCandidateByIDContext(ctx context.Context, candidateID string) (domain.DiscoveryCandidate, bool)
+	FindDiscoveryCandidateContext(ctx context.Context, accountID int64, platform, model string) (domain.DiscoveryCandidate, bool)
+	UpsertDiscoveryCandidateContext(ctx context.Context, candidate domain.DiscoveryCandidate) domain.DiscoveryCandidate
+	ListDiscoveryCandidatesContext(ctx context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate
+}
+
+type Service struct {
+	repo CandidateRepository
+	now  func() time.Time
+}
+
+type RecordCandidateInput struct {
+	CandidateID  string
+	AccountID    int64
+	Platform     string
+	Model        string
+	Source       string
+	ReasonCode   string
+	DiscoveredAt time.Time
+}
+
+type RecordCandidateOutput struct {
+	Candidate domain.DiscoveryCandidate `json:"candidate"`
+	Created   bool                      `json:"created"`
+}
+
+func NewService(repo CandidateRepository) *Service {
+	return &Service{
+		repo: repo,
+		now: func() time.Time {
+			return time.Now().UTC()
+		},
+	}
+}
+
+func (s *Service) RecordCandidate(ctx context.Context, input RecordCandidateInput) (RecordCandidateOutput, error) {
+	if s == nil || s.repo == nil {
+		return RecordCandidateOutput{}, ErrInvalidCandidateInput
+	}
+	candidateID := strings.TrimSpace(input.CandidateID)
+	platform := strings.TrimSpace(input.Platform)
+	model := strings.TrimSpace(input.Model)
+	source := strings.TrimSpace(input.Source)
+	reasonCode := strings.TrimSpace(input.ReasonCode)
+	if candidateID == "" || input.AccountID <= 0 || platform == "" || model == "" || source == "" {
+		return RecordCandidateOutput{}, ErrInvalidCandidateInput
+	}
+	if existing, ok := s.repo.GetDiscoveryCandidateByIDContext(ctx, candidateID); ok {
+		return RecordCandidateOutput{Candidate: existing, Created: false}, nil
+	}
+	at := input.DiscoveredAt.UTC()
+	if at.IsZero() {
+		at = s.now()
+	}
+	if existing, ok := s.repo.FindDiscoveryCandidateContext(ctx, input.AccountID, platform, model); ok {
+		existing.Source = source
+		existing.ReasonCode = reasonCode
+		existing.UpdatedAt = at
+		existing.Version++
+		return RecordCandidateOutput{Candidate: s.repo.UpsertDiscoveryCandidateContext(ctx, existing), Created: false}, nil
+	}
+	candidate := domain.DiscoveryCandidate{
+		CandidateID:   candidateID,
+		AccountID:     input.AccountID,
+		Platform:      platform,
+		Model:         model,
+		Source:        source,
+		Status:        domain.DiscoveryCandidateStatusPendingAdmission,
+		ReasonCode:    reasonCode,
+		DiscoveredAt:  at,
+		UpdatedAt:     at,
+		Version:       1,
+	}
+	return RecordCandidateOutput{Candidate: s.repo.UpsertDiscoveryCandidateContext(ctx, candidate), Created: true}, nil
+}
+
+func (s *Service) ListCandidates(ctx context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate {
+	if s == nil || s.repo == nil {
+		return nil
+	}
+	return s.repo.ListDiscoveryCandidatesContext(ctx, status)
+}
--- a/internal/discovery/service_test.go
+++ b/internal/discovery/service_test.go
@@ -0,0 +1,160 @@
+package discovery
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/repository"
+)
+
+func TestRecordCandidateCreatesPendingAdmissionCandidate(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	at := time.Unix(100, 0).UTC()
+
+	out, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
+		CandidateID:  "cand-1",
+		AccountID:    10,
+		Platform:     "openai",
+		Model:        "gpt-4.1-mini",
+		Source:       "manual_seed",
+		ReasonCode:   "new_model",
+		DiscoveredAt: at,
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !out.Created {
+		t.Fatalf("expected created candidate")
+	}
+	if out.Candidate.Status != domain.DiscoveryCandidateStatusPendingAdmission {
+		t.Fatalf("unexpected status: %q", out.Candidate.Status)
+	}
+	if out.Candidate.Version != 1 {
+		t.Fatalf("unexpected version: %d", out.Candidate.Version)
+	}
+	if !out.Candidate.DiscoveredAt.Equal(at) || !out.Candidate.UpdatedAt.Equal(at) {
+		t.Fatalf("unexpected timestamps: %+v", out.Candidate)
+	}
+}
+
+func TestRecordCandidateIsIdempotentByCandidateID(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+
+	first, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
+		CandidateID: "cand-1",
+		AccountID:   10,
+		Platform:    "openai",
+		Model:       "gpt-4.1-mini",
+		Source:      "manual_seed",
+	})
+	if err != nil {
+		t.Fatalf("unexpected first error: %v", err)
+	}
+	second, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
+		CandidateID: "cand-1",
+		AccountID:   99,
+		Platform:    "other",
+		Model:       "other-model",
+		Source:      "other_source",
+	})
+	if err != nil {
+		t.Fatalf("unexpected second error: %v", err)
+	}
+	if second.Created {
+		t.Fatalf("expected idempotent replay")
+	}
+	if second.Candidate.AccountID != first.Candidate.AccountID || second.Candidate.Platform != first.Candidate.Platform || second.Candidate.Model != first.Candidate.Model {
+		t.Fatalf("expected original candidate to be preserved: %+v", second.Candidate)
+	}
+}
+
+func TestRecordCandidateDeduplicatesByBusinessKey(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	firstAt := time.Unix(100, 0).UTC()
+	secondAt := time.Unix(200, 0).UTC()
+
+	_, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
+		CandidateID:  "cand-1",
+		AccountID:    10,
+		Platform:     "openai",
+		Model:        "gpt-4.1-mini",
+		Source:       "manual_seed",
+		ReasonCode:   "first",
+		DiscoveredAt: firstAt,
+	})
+	if err != nil {
+		t.Fatalf("unexpected first error: %v", err)
+	}
+	out, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
+		CandidateID:  "cand-2",
+		AccountID:    10,
+		Platform:     "openai",
+		Model:        "gpt-4.1-mini",
+		Source:       "scan",
+		ReasonCode:   "second",
+		DiscoveredAt: secondAt,
+	})
+	if err != nil {
+		t.Fatalf("unexpected second error: %v", err)
+	}
+	if out.Created {
+		t.Fatalf("expected business-key dedupe")
+	}
+	if out.Candidate.CandidateID != "cand-1" {
+		t.Fatalf("expected original candidate id to be retained: %+v", out.Candidate)
+	}
+	if out.Candidate.Source != "scan" || out.Candidate.ReasonCode != "second" {
+		t.Fatalf("expected metadata update: %+v", out.Candidate)
+	}
+	if out.Candidate.Version != 2 {
+		t.Fatalf("expected version bump, got %d", out.Candidate.Version)
+	}
+	if !out.Candidate.UpdatedAt.Equal(secondAt) {
+		t.Fatalf("expected updated timestamp to change: %+v", out.Candidate)
+	}
+}
+
+func TestRecordCandidateRejectsInvalidInput(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	_, err := service.RecordCandidate(context.Background(), RecordCandidateInput{})
+	if err == nil {
+		t.Fatalf("expected invalid input error")
+	}
+}
+
+func TestListCandidatesFiltersByStatus(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.UpsertDiscoveryCandidateContext(context.Background(), domain.DiscoveryCandidate{
+		CandidateID:  "cand-1",
+		AccountID:    10,
+		Platform:     "openai",
+		Model:        "a",
+		Source:       "seed",
+		Status:       domain.DiscoveryCandidateStatusPendingAdmission,
+		DiscoveredAt: time.Unix(100, 0).UTC(),
+		UpdatedAt:    time.Unix(100, 0).UTC(),
+		Version:      1,
+	})
+	repo.UpsertDiscoveryCandidateContext(context.Background(), domain.DiscoveryCandidate{
+		CandidateID:  "cand-2",
+		AccountID:    11,
+		Platform:     "openai",
+		Model:        "b",
+		Source:       "seed",
+		Status:       domain.DiscoveryCandidateStatusAdmitted,
+		DiscoveredAt: time.Unix(200, 0).UTC(),
+		UpdatedAt:    time.Unix(200, 0).UTC(),
+		Version:      1,
+	})
+	service := NewService(repo)
+	items := service.ListCandidates(context.Background(), domain.DiscoveryCandidateStatusPendingAdmission)
+	if len(items) != 1 || items[0].CandidateID != "cand-1" {
+		t.Fatalf("unexpected filtered items: %+v", items)
+	}
+}
--- a/internal/domain/types.go
+++ b/internal/domain/types.go
@@ -0,0 +1,132 @@
+package domain
+
+import "time"
+
+type AccountStatus string
+
+const (
+	AccountStatusActive        AccountStatus = "active"
+	AccountStatusSuspended     AccountStatus = "suspended"
+	AccountStatusDisabled      AccountStatus = "disabled"
+	AccountStatusPendingVerify AccountStatus = "pending_verify"
+	AccountStatusPendingEnable AccountStatus = "pending_enable"
+)
+
+type ProbeClassification string
+
+const (
+	ProbeClassificationSuccess         ProbeClassification = "success"
+	ProbeClassificationExplicitFailure ProbeClassification = "explicit_failure"
+	ProbeClassificationInconclusive    ProbeClassification = "inconclusive"
+)
+
+type DiscoveryCandidateStatus string
+
+const (
+	DiscoveryCandidateStatusPendingAdmission DiscoveryCandidateStatus = "pending_admission"
+	DiscoveryCandidateStatusAdmitted         DiscoveryCandidateStatus = "admitted"
+	DiscoveryCandidateStatusRejected         DiscoveryCandidateStatus = "rejected"
+)
+
+type GatewaySyncStatus string
+
+const (
+	GatewaySyncStatusPending GatewaySyncStatus = "pending"
+	GatewaySyncStatusApplied GatewaySyncStatus = "applied"
+	GatewaySyncStatusFailed  GatewaySyncStatus = "failed"
+)
+
+type GatewayAckResult string
+
+const (
+	GatewayAckResultApplied GatewayAckResult = "applied"
+	GatewayAckResultFailed  GatewayAckResult = "failed"
+)
+
+func (r GatewayAckResult) SyncStatus() GatewaySyncStatus {
+	switch r {
+	case GatewayAckResultApplied:
+		return GatewaySyncStatusApplied
+	case GatewayAckResultFailed:
+		return GatewaySyncStatusFailed
+	default:
+		return GatewaySyncStatusPending
+	}
+}
+
+type ProbeResult struct {
+	AccountID      int64
+	Classification ProbeClassification
+	ReasonCode     string
+	ObservedAt     time.Time
+}
+
+type AccountRoutingState struct {
+	AccountID      int64         `json:"account_id"`
+	Platform       string        `json:"platform"`
+	AccountStatus  AccountStatus `json:"account_status"`
+	RoutingEnabled bool          `json:"routing_enabled"`
+	RiskScore      int           `json:"risk_score"`
+	ReasonCode     string        `json:"reason_code"`
+	LastProbeAt    time.Time     `json:"last_probe_at"`
+	Version        int64         `json:"version"`
+}
+
+type PackageChangeEvent struct {
+	EventID           string            `json:"event_id"`
+	EventType         string            `json:"event_type"`
+	PackageID         int64             `json:"package_id"`
+	Platform          string            `json:"platform"`
+	Model             string            `json:"model"`
+	OccurredAt        time.Time         `json:"occurred_at"`
+	Version           int64             `json:"version"`
+	GatewaySyncStatus GatewaySyncStatus `json:"gateway_sync_status"`
+	Consumer          string            `json:"consumer,omitempty"`
+	ConsumerDetail    string            `json:"consumer_detail,omitempty"`
+	AckedAt           *time.Time        `json:"acked_at,omitempty"`
+}
+
+type PackageChangeAck struct {
+	EventID   string            `json:"event_id"`
+	Consumer  string            `json:"consumer"`
+	Result    GatewayAckResult  `json:"result"`
+	Detail    string            `json:"detail,omitempty"`
+	AckedAt   time.Time         `json:"acked_at"`
+	SyncState GatewaySyncStatus `json:"gateway_sync_status"`
+}
+
+type GatewayAppliedSnapshot struct {
+	Consumer           string    `json:"consumer"`
+	LastEventID        string    `json:"last_event_id"`
+	LastPackageID      int64     `json:"last_package_id"`
+	LastPlatform       string    `json:"last_platform"`
+	LastModel          string    `json:"last_model"`
+	LastAppliedVersion int64     `json:"last_applied_version"`
+	LastResult         string    `json:"last_result"`
+	UpdatedAt          time.Time `json:"updated_at"`
+}
+
+type DiscoveryCandidate struct {
+	CandidateID  string                   `json:"candidate_id"`
+	AccountID    int64                    `json:"account_id"`
+	Platform     string                   `json:"platform"`
+	Model        string                   `json:"model"`
+	Source       string                   `json:"source"`
+	Status       DiscoveryCandidateStatus `json:"status"`
+	ReasonCode   string                   `json:"reason_code,omitempty"`
+	DiscoveredAt time.Time                `json:"discovered_at"`
+	UpdatedAt    time.Time                `json:"updated_at"`
+	Version      int64                    `json:"version"`
+}
+
+// SupplyPackage represents a supply package in the system
+type SupplyPackage struct {
+	PackageID int64     `json:"package_id"`
+	Platform  string    `json:"platform"`
+	Model     string    `json:"model"`
+	Status    string    `json:"status"` // draft, active, deprecated
+	Source    string    `json:"source"`
+	CreatedAt time.Time `json:"created_at"`
+	UpdatedAt time.Time `json:"updated_at"`
+	Version   int64     `json:"version"`
+}
--- a/internal/gatewayconsumer/service.go
+++ b/internal/gatewayconsumer/service.go
@@ -0,0 +1,110 @@
+package gatewayconsumer
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+var ErrInvalidConsumeInput = errors.New("invalid consume input")
+
+type PackageChangeRepository interface {
+	ListPackageEventsAfter(cursor string) ([]domain.PackageChangeEvent, string)
+	AckPackageEvent(eventID, consumer string, result domain.GatewayAckResult, detail string, ackedAt time.Time) (domain.PackageChangeEvent, error)
+	UpsertGatewayAppliedSnapshot(snapshot domain.GatewayAppliedSnapshot) domain.GatewayAppliedSnapshot
+}
+
+type Service struct {
+	repo     PackageChangeRepository
+	now      func() time.Time
+	applier  func(context.Context, domain.PackageChangeEvent) (domain.GatewayAckResult, string)
+	consumer string
+}
+
+type ConsumeOnceInput struct {
+	Consumer string
+	Cursor   string
+}
+
+type ConsumeOnceOutput struct {
+	Consumer   string                      `json:"consumer"`
+	NextCursor string                      `json:"next_cursor"`
+	Items      []ConsumedPackageChangeItem `json:"items"`
+}
+
+type ConsumedPackageChangeItem struct {
+	EventID           string                   `json:"event_id"`
+	PackageID         int64                    `json:"package_id"`
+	GatewaySyncStatus domain.GatewaySyncStatus `json:"gateway_sync_status"`
+	Result            domain.GatewayAckResult  `json:"result"`
+	Detail            string                   `json:"detail,omitempty"`
+}
+
+func NewService(repo PackageChangeRepository) *Service {
+	return &Service{
+		repo: repo,
+		now: func() time.Time {
+			return time.Now().UTC()
+		},
+		consumer: "gateway",
+		applier: func(_ context.Context, event domain.PackageChangeEvent) (domain.GatewayAckResult, string) {
+			if strings.Contains(strings.ToLower(event.Model), "fail") {
+				return domain.GatewayAckResultFailed, "simulated apply failure"
+			}
+			return domain.GatewayAckResultApplied, "applied to gateway snapshot"
+		},
+	}
+}
+
+func (s *Service) SetApplier(applier func(context.Context, domain.PackageChangeEvent) (domain.GatewayAckResult, string)) {
+	s.applier = applier
+}
+
+func (s *Service) ConsumeOnce(ctx context.Context, input ConsumeOnceInput) (ConsumeOnceOutput, error) {
+	if s == nil || s.repo == nil || s.applier == nil {
+		return ConsumeOnceOutput{}, ErrInvalidConsumeInput
+	}
+	consumer := strings.TrimSpace(input.Consumer)
+	if consumer == "" {
+		consumer = s.consumer
+	}
+	items, nextCursor := s.repo.ListPackageEventsAfter(strings.TrimSpace(input.Cursor))
+	result := ConsumeOnceOutput{Consumer: consumer, NextCursor: nextCursor, Items: make([]ConsumedPackageChangeItem, 0, len(items))}
+	for _, event := range items {
+		if event.GatewaySyncStatus != domain.GatewaySyncStatusPending {
+			continue
+		}
+		ackResult, detail := s.applier(ctx, event)
+		if ackResult != domain.GatewayAckResultApplied && ackResult != domain.GatewayAckResultFailed {
+			return ConsumeOnceOutput{}, ErrInvalidConsumeInput
+		}
+		ackedAt := s.now()
+		if ackResult == domain.GatewayAckResultApplied {
+			s.repo.UpsertGatewayAppliedSnapshot(domain.GatewayAppliedSnapshot{
+				Consumer:           consumer,
+				LastEventID:        event.EventID,
+				LastPackageID:      event.PackageID,
+				LastPlatform:       event.Platform,
+				LastModel:          event.Model,
+				LastAppliedVersion: event.Version,
+				LastResult:         string(ackResult),
+				UpdatedAt:          ackedAt,
+			})
+		}
+		updated, err := s.repo.AckPackageEvent(event.EventID, consumer, ackResult, detail, ackedAt)
+		if err != nil {
+			return ConsumeOnceOutput{}, err
+		}
+		result.Items = append(result.Items, ConsumedPackageChangeItem{
+			EventID:           updated.EventID,
+			PackageID:         updated.PackageID,
+			GatewaySyncStatus: updated.GatewaySyncStatus,
+			Result:            ackResult,
+			Detail:            detail,
+		})
+	}
+	return result, nil
+}
--- a/internal/gatewayconsumer/service_test.go
+++ b/internal/gatewayconsumer/service_test.go
@@ -0,0 +1,89 @@
+package gatewayconsumer
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/repository"
+)
+
+func TestServiceConsumeOnceAppliedAndFailed(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{
+		EventID:           "evt-applied",
+		EventType:         "supply_package_published",
+		PackageID:         101,
+		Platform:          "openai",
+		Model:             "gpt-4.1-mini",
+		Version:           3,
+		OccurredAt:        time.Unix(10, 0).UTC(),
+		GatewaySyncStatus: domain.GatewaySyncStatusPending,
+	})
+	repo.AppendPackageEvent(domain.PackageChangeEvent{
+		EventID:           "evt-failed",
+		EventType:         "supply_package_published",
+		PackageID:         102,
+		Platform:          "openai",
+		Model:             "gpt-fail-model",
+		Version:           4,
+		OccurredAt:        time.Unix(20, 0).UTC(),
+		GatewaySyncStatus: domain.GatewaySyncStatusPending,
+	})
+
+	service := NewService(repo)
+	service.now = func() time.Time { return time.Unix(30, 0).UTC() }
+
+	out, err := service.ConsumeOnce(context.Background(), ConsumeOnceInput{Consumer: "gateway"})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(out.Items) != 2 {
+		t.Fatalf("unexpected item count: %d", len(out.Items))
+	}
+	if out.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
+		t.Fatalf("unexpected first status: %+v", out.Items[0])
+	}
+	if out.Items[1].GatewaySyncStatus != domain.GatewaySyncStatusFailed {
+		t.Fatalf("unexpected second status: %+v", out.Items[1])
+	}
+
+	events := repo.ListPackageEvents()
+	if events[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
+		t.Fatalf("expected applied event, got %+v", events[0])
+	}
+	if events[1].GatewaySyncStatus != domain.GatewaySyncStatusFailed {
+		t.Fatalf("expected failed event, got %+v", events[1])
+	}
+	snapshot, ok := repo.GetGatewayAppliedSnapshot("gateway")
+	if !ok {
+		t.Fatal("expected applied snapshot")
+	}
+	if snapshot.LastEventID != "evt-applied" || snapshot.LastPackageID != 101 {
+		t.Fatalf("unexpected snapshot: %+v", snapshot)
+	}
+}
+
+func TestServiceConsumeOnceRejectsInvalidApplierResult(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{
+		EventID:           "evt-1",
+		EventType:         "supply_package_published",
+		PackageID:         101,
+		Platform:          "openai",
+		Model:             "gpt-4.1-mini",
+		Version:           3,
+		OccurredAt:        time.Unix(10, 0).UTC(),
+		GatewaySyncStatus: domain.GatewaySyncStatusPending,
+	})
+	service := NewService(repo)
+	service.SetApplier(func(context.Context, domain.PackageChangeEvent) (domain.GatewayAckResult, string) {
+		return domain.GatewayAckResult("unknown"), "bad"
+	})
+
+	_, err := service.ConsumeOnce(context.Background(), ConsumeOnceInput{})
+	if err != ErrInvalidConsumeInput {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
--- a/internal/httpapi/parse.go
+++ b/internal/httpapi/parse.go
@@ -0,0 +1,12 @@
+package httpapi
+
+import "strconv"
+
+func parseInt64(input string, target *int64) (int64, error) {
+	value, err := strconv.ParseInt(input, 10, 64)
+	if err != nil {
+		return 0, err
+	}
+	*target = value
+	return value, nil
+}
--- a/internal/httpapi/server.go
+++ b/internal/httpapi/server.go
@@ -0,0 +1,415 @@
+package httpapi
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"net/http"
+	"strings"
+	"time"
+
+	"supply-intelligence/internal/admission"
+	"supply-intelligence/internal/discovery"
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/gatewayconsumer"
+	"supply-intelligence/internal/probe"
+	"supply-intelligence/internal/publish"
+	"supply-intelligence/internal/repository"
+)
+
+type Server struct {
+	repo                   *repository.MemoryRepository
+	probeService           *probe.Service
+	publishService         *publish.Service
+	gatewayConsumerService *gatewayconsumer.Service
+	discoveryService       *discovery.Service
+	admissionService       *admission.Service
+}
+
+type packageChangesResponse struct {
+	Items      []domain.PackageChangeEvent `json:"items"`
+	NextCursor string                      `json:"next_cursor"`
+}
+
+type discoveryCandidatesResponse struct {
+	Items []domain.DiscoveryCandidate `json:"items"`
+}
+
+func NewServer(repo *repository.MemoryRepository, probeService *probe.Service, publishService *publish.Service, gatewayConsumerService *gatewayconsumer.Service, discoveryService *discovery.Service, admissionService *admission.Service) *Server {
+	return &Server{repo: repo, probeService: probeService, publishService: publishService, gatewayConsumerService: gatewayConsumerService, discoveryService: discoveryService, admissionService: admissionService}
+}
+
+func (s *Server) Routes() http.Handler {
+	mux := http.NewServeMux()
+	mux.HandleFunc("/healthz", s.handleHealth)
+	mux.HandleFunc("/internal/supply-intelligence/accounts/", s.handleGetRoutingState)
+	mux.HandleFunc("/internal/supply-intelligence/probe/evaluate", s.handleEvaluateProbe)
+	mux.HandleFunc("/internal/supply-intelligence/publish/package-event", s.handlePublishPackageEvent)
+	mux.HandleFunc("/internal/supply-intelligence/discovery/candidates", s.handleDiscoveryCandidates)
+	mux.HandleFunc("/internal/supply-intelligence/gateway/package-changes", s.handleListPackageChanges)
+	mux.HandleFunc("/internal/supply-intelligence/gateway/package-changes/", s.handleAckPackageChange)
+	mux.HandleFunc("/internal/supply-intelligence/gateway/consume-once", s.handleConsumeOnce)
+	mux.HandleFunc("/internal/supply-intelligence/admission/run", s.handleAdmissionRun)
+	mux.HandleFunc("/internal/supply-intelligence/admission/candidates", s.handleAdmissionCandidates)
+	return mux
+}
+
+func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
+	writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
+}
+
+func (s *Server) handleGetRoutingState(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	prefix := "/internal/supply-intelligence/accounts/"
+	path := strings.TrimPrefix(r.URL.Path, prefix)
+	if !strings.HasSuffix(path, "/routing-state") {
+		writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
+		return
+	}
+	accountIDPart := strings.TrimSuffix(path, "/routing-state")
+	var accountID int64
+	if _, err := parseInt64(accountIDPart, &accountID); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_account_id"})
+		return
+	}
+	state, ok := s.repo.GetRoutingState(accountID)
+	if !ok {
+		writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
+		return
+	}
+	writeJSON(w, http.StatusOK, state)
+}
+
+func (s *Server) handleEvaluateProbe(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	if s.probeService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "probe_service_unavailable"})
+		return
+	}
+
+	var payload struct {
+		AccountID      int64  `json:"account_id"`
+		Platform       string `json:"platform"`
+		CurrentStatus  string `json:"current_status"`
+		StatusCode     int    `json:"status_code"`
+		TransportError string `json:"transport_error"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
+		return
+	}
+	if payload.AccountID <= 0 {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_account_id"})
+		return
+	}
+	if payload.Platform == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "missing_platform"})
+		return
+	}
+	if payload.CurrentStatus == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "missing_current_status"})
+		return
+	}
+
+	var transportErr error
+	if payload.TransportError != "" {
+		transportErr = errors.New(payload.TransportError)
+	}
+
+	result, err := s.probeService.EvaluateHTTPResult(context.Background(), probe.EvaluateInput{
+		AccountID:      payload.AccountID,
+		Platform:       payload.Platform,
+		CurrentStatus:  domainAccountStatus(payload.CurrentStatus),
+		StatusCode:     payload.StatusCode,
+		TransportError: transportErr,
+	})
+	if err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()})
+		return
+	}
+	writeJSON(w, http.StatusOK, result)
+}
+
+func (s *Server) handlePublishPackageEvent(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	if s.publishService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "publish_service_unavailable"})
+		return
+	}
+
+	var payload struct {
+		EventID    string `json:"event_id"`
+		PackageID  int64  `json:"package_id"`
+		Platform   string `json:"platform"`
+		Model      string `json:"model"`
+		Version    int64  `json:"version"`
+		OccurredAt string `json:"occurred_at"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
+		return
+	}
+
+	var occurredAt time.Time
+	if payload.OccurredAt != "" {
+		parsed, err := time.Parse(time.RFC3339, payload.OccurredAt)
+		if err != nil {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_occurred_at"})
+			return
+		}
+		occurredAt = parsed
+	}
+
+	event, err := s.publishService.RecordPackagePublished(r.Context(), publish.RecordPackagePublishedInput{
+		EventID:    payload.EventID,
+		PackageID:  payload.PackageID,
+		Platform:   payload.Platform,
+		Model:      payload.Model,
+		Version:    payload.Version,
+		OccurredAt: occurredAt,
+	})
+	if err != nil {
+		if errors.Is(err, publish.ErrInvalidPublishInput) {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_publish_input"})
+			return
+		}
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "internal_error"})
+		return
+	}
+	writeJSON(w, http.StatusOK, event)
+}
+
+func (s *Server) handleDiscoveryCandidates(w http.ResponseWriter, r *http.Request) {
+	switch r.Method {
+	case http.MethodPost:
+		s.handleCreateDiscoveryCandidate(w, r)
+	case http.MethodGet:
+		s.handleListDiscoveryCandidates(w, r)
+	default:
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+	}
+}
+
+func (s *Server) handleCreateDiscoveryCandidate(w http.ResponseWriter, r *http.Request) {
+	if s.discoveryService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "discovery_service_unavailable"})
+		return
+	}
+	var payload struct {
+		CandidateID  string `json:"candidate_id"`
+		AccountID    int64  `json:"account_id"`
+		Platform     string `json:"platform"`
+		Model        string `json:"model"`
+		Source       string `json:"source"`
+		ReasonCode   string `json:"reason_code"`
+		DiscoveredAt string `json:"discovered_at"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
+		return
+	}
+	var discoveredAt time.Time
+	if strings.TrimSpace(payload.DiscoveredAt) != "" {
+		parsed, err := time.Parse(time.RFC3339, payload.DiscoveredAt)
+		if err != nil {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_discovered_at"})
+			return
+		}
+		discoveredAt = parsed
+	}
+	out, err := s.discoveryService.RecordCandidate(r.Context(), discovery.RecordCandidateInput{
+		CandidateID:  payload.CandidateID,
+		AccountID:    payload.AccountID,
+		Platform:     payload.Platform,
+		Model:        payload.Model,
+		Source:       payload.Source,
+		ReasonCode:   payload.ReasonCode,
+		DiscoveredAt: discoveredAt,
+	})
+	if err != nil {
+		if errors.Is(err, discovery.ErrInvalidCandidateInput) {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_candidate_input"})
+			return
+		}
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "internal_error"})
+		return
+	}
+	writeJSON(w, http.StatusOK, out)
+}
+
+func (s *Server) handleListDiscoveryCandidates(w http.ResponseWriter, r *http.Request) {
+	if s.discoveryService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "discovery_service_unavailable"})
+		return
+	}
+	status, ok := parseDiscoveryCandidateStatus(strings.TrimSpace(r.URL.Query().Get("status")))
+	if !ok {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_status"})
+		return
+	}
+	writeJSON(w, http.StatusOK, discoveryCandidatesResponse{Items: s.discoveryService.ListCandidates(r.Context(), status)})
+}
+
+func parseDiscoveryCandidateStatus(raw string) (domain.DiscoveryCandidateStatus, bool) {
+	if raw == "" {
+		return "", true
+	}
+	status := domain.DiscoveryCandidateStatus(raw)
+	switch status {
+	case domain.DiscoveryCandidateStatusPendingAdmission, domain.DiscoveryCandidateStatusAdmitted, domain.DiscoveryCandidateStatusRejected:
+		return status, true
+	default:
+		return "", false
+	}
+}
+
+func (s *Server) handleListPackageChanges(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	items, nextCursor := s.repo.ListPackageEventsAfter(strings.TrimSpace(r.URL.Query().Get("cursor")))
+	writeJSON(w, http.StatusOK, packageChangesResponse{Items: items, NextCursor: nextCursor})
+}
+
+func (s *Server) handleAckPackageChange(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	prefix := "/internal/supply-intelligence/gateway/package-changes/"
+	path := strings.TrimPrefix(r.URL.Path, prefix)
+	if !strings.HasSuffix(path, "/ack") {
+		writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
+		return
+	}
+	eventID := strings.TrimSuffix(path, "/ack")
+	var payload struct {
+		Consumer string `json:"consumer"`
+		Result   string `json:"result"`
+		Detail   string `json:"detail"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
+		return
+	}
+	ackResult := domain.GatewayAckResult(payload.Result)
+	if !repository.IsGatewayAckResult(ackResult) {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_result"})
+		return
+	}
+	consumer := strings.TrimSpace(payload.Consumer)
+	if consumer == "" {
+		consumer = "gateway"
+	}
+	_, err := s.repo.AckPackageEvent(eventID, consumer, ackResult, payload.Detail, time.Now().UTC())
+	if err != nil {
+		if errors.Is(err, repository.ErrEventNotFound) {
+			writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
+			return
+		}
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "internal_error"})
+		return
+	}
+	w.WriteHeader(http.StatusNoContent)
+}
+
+func (s *Server) handleConsumeOnce(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	if s.gatewayConsumerService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "gateway_consumer_unavailable"})
+		return
+	}
+	var payload struct {
+		Consumer string `json:"consumer"`
+		Cursor   string `json:"cursor"`
+	}
+	if r.Body != nil {
+		if err := json.NewDecoder(r.Body).Decode(&payload); err != nil && err.Error() != "EOF" {
+			writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
+			return
+		}
+	}
+	out, err := s.gatewayConsumerService.ConsumeOnce(r.Context(), gatewayconsumer.ConsumeOnceInput{Consumer: payload.Consumer, Cursor: payload.Cursor})
+	if err != nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "consume_failed"})
+		return
+	}
+	writeJSON(w, http.StatusOK, out)
+}
+
+func writeJSON(w http.ResponseWriter, status int, body any) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(status)
+	_ = json.NewEncoder(w).Encode(body)
+}
+
+// handleAdmissionRun runs admission test for a specific candidate
+func (s *Server) handleAdmissionRun(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	if s.admissionService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "admission_service_unavailable"})
+		return
+	}
+
+	var payload struct {
+		CandidateID string `json:"candidate_id"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
+		return
+	}
+	if strings.TrimSpace(payload.CandidateID) == "" {
+		writeJSON(w, http.StatusBadRequest, map[string]string{"error": "missing_candidate_id"})
+		return
+	}
+
+	result, err := s.admissionService.RunAdmission(r.Context(), payload.CandidateID)
+	if err != nil {
+		switch {
+		case errors.Is(err, admission.ErrCandidateNotFound):
+			writeJSON(w, http.StatusNotFound, map[string]string{"error": "candidate_not_found"})
+		case errors.Is(err, admission.ErrCandidateNotRunnable):
+			writeJSON(w, http.StatusConflict, map[string]string{"error": "candidate_not_runnable"})
+		default:
+			writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "admission_run_failed"})
+		}
+		return
+	}
+
+	writeJSON(w, http.StatusOK, result)
+}
+
+// handleAdmissionCandidates lists candidates pending admission testing
+func (s *Server) handleAdmissionCandidates(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
+		return
+	}
+	if s.admissionService == nil {
+		writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "admission_service_unavailable"})
+		return
+	}
+
+	candidates := s.admissionService.GetRunnableCandidates(r.Context())
+	writeJSON(w, http.StatusOK, map[string]any{"items": candidates})
+}
+
+func domainAccountStatus(raw string) domain.AccountStatus {
+	return domain.AccountStatus(raw)
+}
--- a/internal/httpapi/server_integration_test.go
+++ b/internal/httpapi/server_integration_test.go
@@ -0,0 +1,149 @@
+package httpapi_test
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"supply-intelligence/internal/app"
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/probe"
+)
+
+func TestApplicationServerRoutes(t *testing.T) {
+	application := app.New()
+
+	req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/probe/evaluate", bytes.NewBufferString(`{"account_id":7,"platform":"openai","current_status":"active","status_code":401}`))
+	rr := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(rr, req)
+
+	if rr.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
+	}
+
+	var result probe.EvaluateOutput
+	if err := json.NewDecoder(rr.Body).Decode(&result); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if result.RoutingState.AccountID != 7 || result.RoutingState.AccountStatus != "suspended" {
+		t.Fatalf("unexpected state: %+v", result.RoutingState)
+	}
+
+	getReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/accounts/7/routing-state", nil)
+	getRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(getRR, getReq)
+	if getRR.Code != http.StatusOK {
+		t.Fatalf("unexpected get status: %d body=%s", getRR.Code, getRR.Body.String())
+	}
+}
+
+func TestPublishConsumeOnceListAppliedIntegration(t *testing.T) {
+	application := app.New()
+
+	publishReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/publish/package-event", bytes.NewBufferString(`{"event_id":"evt-integration-1","package_id":501,"platform":"openai","model":"gpt-4.1-mini","version":9,"occurred_at":"2026-05-06T20:30:00Z"}`))
+	publishRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(publishRR, publishReq)
+	if publishRR.Code != http.StatusOK {
+		t.Fatalf("unexpected publish status: %d body=%s", publishRR.Code, publishRR.Body.String())
+	}
+
+	consumeReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/consume-once", bytes.NewBufferString(`{"consumer":"gateway"}`))
+	consumeRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(consumeRR, consumeReq)
+	if consumeRR.Code != http.StatusOK {
+		t.Fatalf("unexpected consume status: %d body=%s", consumeRR.Code, consumeRR.Body.String())
+	}
+
+	listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes", nil)
+	listRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(listRR, listReq)
+	if listRR.Code != http.StatusOK {
+		t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
+	}
+	var listResp struct {
+		Items      []domain.PackageChangeEvent `json:"items"`
+		NextCursor string                      `json:"next_cursor"`
+	}
+	if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
+		t.Fatalf("decode list error: %v", err)
+	}
+	if len(listResp.Items) != 1 || listResp.Items[0].EventID != "evt-integration-1" {
+		t.Fatalf("unexpected list items: %+v", listResp.Items)
+	}
+	if listResp.NextCursor != "1" {
+		t.Fatalf("unexpected next cursor: %+v", listResp)
+	}
+	if listResp.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
+		t.Fatalf("unexpected sync status: %+v", listResp.Items[0])
+	}
+}
+
+func TestPublishConsumeOnceListFailedIntegration(t *testing.T) {
+	application := app.New()
+
+	publishReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/publish/package-event", bytes.NewBufferString(`{"event_id":"evt-integration-failed","package_id":502,"platform":"openai","model":"gpt-fail-model","version":10,"occurred_at":"2026-05-06T20:31:00Z"}`))
+	publishRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(publishRR, publishReq)
+	if publishRR.Code != http.StatusOK {
+		t.Fatalf("unexpected publish status: %d body=%s", publishRR.Code, publishRR.Body.String())
+	}
+
+	consumeReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/consume-once", bytes.NewBufferString(`{"consumer":"gateway"}`))
+	consumeRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(consumeRR, consumeReq)
+	if consumeRR.Code != http.StatusOK {
+		t.Fatalf("unexpected consume status: %d body=%s", consumeRR.Code, consumeRR.Body.String())
+	}
+
+	listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes", nil)
+	listRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(listRR, listReq)
+	if listRR.Code != http.StatusOK {
+		t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
+	}
+	var listResp struct {
+		Items      []domain.PackageChangeEvent `json:"items"`
+		NextCursor string                      `json:"next_cursor"`
+	}
+	if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
+		t.Fatalf("decode list error: %v", err)
+	}
+	if len(listResp.Items) != 1 || listResp.Items[0].EventID != "evt-integration-failed" {
+		t.Fatalf("unexpected list items: %+v", listResp.Items)
+	}
+	if listResp.NextCursor != "1" {
+		t.Fatalf("unexpected next cursor: %+v", listResp)
+	}
+	if listResp.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusFailed {
+		t.Fatalf("unexpected sync status: %+v", listResp.Items[0])
+	}
+}
+
+func TestDiscoveryCandidateCreateAndListIntegration(t *testing.T) {
+	application := app.New()
+
+	createReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/discovery/candidates", bytes.NewBufferString(`{"candidate_id":"cand-int-1","account_id":701,"platform":"openai","model":"gpt-4.1-mini","source":"manual_seed","reason_code":"new_model","discovered_at":"2026-05-06T20:30:00Z"}`))
+	createRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(createRR, createReq)
+	if createRR.Code != http.StatusOK {
+		t.Fatalf("unexpected create status: %d body=%s", createRR.Code, createRR.Body.String())
+	}
+
+	listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/discovery/candidates?status=pending_admission", nil)
+	listRR := httptest.NewRecorder()
+	application.Server.Routes().ServeHTTP(listRR, listReq)
+	if listRR.Code != http.StatusOK {
+		t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
+	}
+	var listResp struct {
+		Items []domain.DiscoveryCandidate `json:"items"`
+	}
+	if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
+		t.Fatalf("decode list error: %v", err)
+	}
+	if len(listResp.Items) != 1 || listResp.Items[0].CandidateID != "cand-int-1" {
+		t.Fatalf("unexpected discovery list items: %+v", listResp.Items)
+	}
+}
--- a/internal/httpapi/server_test.go
+++ b/internal/httpapi/server_test.go
@@ -0,0 +1,266 @@
+package httpapi
+
+import (
+	"bytes"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/discovery"
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/gatewayconsumer"
+	"supply-intelligence/internal/probe"
+	"supply-intelligence/internal/publish"
+	"supply-intelligence/internal/repository"
+)
+
+func TestServerRoutingStateEndpoint(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.UpsertRoutingState(domain.AccountRoutingState{
+		AccountID:      101,
+		Platform:       "openai",
+		AccountStatus:  domain.AccountStatusActive,
+		RoutingEnabled: true,
+		RiskScore:      10,
+		ReasonCode:     "ok",
+		LastProbeAt:    time.Unix(100, 0).UTC(),
+		Version:        3,
+	})
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	req := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/accounts/101/routing-state", nil)
+	rr := httptest.NewRecorder()
+	server.Routes().ServeHTTP(rr, req)
+
+	if rr.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
+	}
+	var got domain.AccountRoutingState
+	if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if got.AccountID != 101 || got.AccountStatus != domain.AccountStatusActive {
+		t.Fatalf("unexpected payload: %+v", got)
+	}
+}
+
+func TestServerProbeEvaluateEndpointPaths(t *testing.T) {
+	tests := []struct {
+		name               string
+		body               string
+		wantStatus         int
+		wantClassification domain.ProbeClassification
+		wantAccountStatus  domain.AccountStatus
+		wantReasonCode     string
+		wantRoutingEnabled bool
+	}{
+		{
+			name:               "success",
+			body:               `{"account_id":201,"platform":"openai","current_status":"suspended","status_code":200}`,
+			wantStatus:         http.StatusOK,
+			wantClassification: domain.ProbeClassificationSuccess,
+			wantAccountStatus:  domain.AccountStatusActive,
+			wantReasonCode:     "ok",
+			wantRoutingEnabled: true,
+		},
+		{
+			name:               "explicit_failure",
+			body:               `{"account_id":202,"platform":"openai","current_status":"active","status_code":401}`,
+			wantStatus:         http.StatusOK,
+			wantClassification: domain.ProbeClassificationExplicitFailure,
+			wantAccountStatus:  domain.AccountStatusSuspended,
+			wantReasonCode:     "auth_rejected",
+			wantRoutingEnabled: false,
+		},
+		{
+			name:               "inconclusive",
+			body:               `{"account_id":203,"platform":"openai","current_status":"suspended","transport_error":"dial tcp timeout"}`,
+			wantStatus:         http.StatusOK,
+			wantClassification: domain.ProbeClassificationInconclusive,
+			wantAccountStatus:  domain.AccountStatusSuspended,
+			wantReasonCode:     "transport_error",
+			wantRoutingEnabled: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			repo := repository.NewMemoryRepository()
+			server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+			req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/probe/evaluate", bytes.NewBufferString(tt.body))
+			rr := httptest.NewRecorder()
+			server.Routes().ServeHTTP(rr, req)
+			if rr.Code != tt.wantStatus {
+				t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
+			}
+			var got probe.EvaluateOutput
+			if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
+				t.Fatalf("decode error: %v", err)
+			}
+			if got.Classification != tt.wantClassification {
+				t.Fatalf("unexpected classification: %q", got.Classification)
+			}
+			if got.RoutingState.AccountStatus != tt.wantAccountStatus {
+				t.Fatalf("unexpected account status: %q", got.RoutingState.AccountStatus)
+			}
+			if got.RoutingState.ReasonCode != tt.wantReasonCode {
+				t.Fatalf("unexpected reason code: %q", got.RoutingState.ReasonCode)
+			}
+			if got.RoutingState.RoutingEnabled != tt.wantRoutingEnabled {
+				t.Fatalf("unexpected routing enabled: %v", got.RoutingState.RoutingEnabled)
+			}
+		})
+	}
+}
+
+func TestServerPublishPackageEventEndpoint(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	body := bytes.NewBufferString(`{"event_id":"evt-1","package_id":1001,"platform":"openai","model":"gpt-4.1-mini","version":7,"occurred_at":"2026-05-06T20:30:00Z"}`)
+	req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/publish/package-event", body)
+	rr := httptest.NewRecorder()
+	server.Routes().ServeHTTP(rr, req)
+	if rr.Code != http.StatusOK {
+		t.Fatalf("unexpected publish status: %d body=%s", rr.Code, rr.Body.String())
+	}
+
+	var event domain.PackageChangeEvent
+	if err := json.NewDecoder(rr.Body).Decode(&event); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if event.EventID != "evt-1" || event.EventType != publish.PackagePublishedEventType {
+		t.Fatalf("unexpected event: %+v", event)
+	}
+	if event.GatewaySyncStatus != domain.GatewaySyncStatusPending {
+		t.Fatalf("unexpected sync status: %q", event.GatewaySyncStatus)
+	}
+}
+
+func TestServerPackageChangeListAndAck(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: publish.PackagePublishedEventType, PackageID: 1001, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(5, 0).UTC(), Version: 7, GatewaySyncStatus: domain.GatewaySyncStatusPending})
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes", nil)
+	listRR := httptest.NewRecorder()
+	server.Routes().ServeHTTP(listRR, listReq)
+	if listRR.Code != http.StatusOK {
+		t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
+	}
+	var listResp struct {
+		Items      []domain.PackageChangeEvent `json:"items"`
+		NextCursor string                      `json:"next_cursor"`
+	}
+	if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
+		t.Fatalf("decode list error: %v", err)
+	}
+	if len(listResp.Items) != 1 || listResp.NextCursor != "1" {
+		t.Fatalf("unexpected list response: %+v", listResp)
+	}
+
+	ackReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/package-changes/evt-1/ack", bytes.NewBufferString(`{"consumer":"gateway","result":"applied","detail":"ok"}`))
+	ackRR := httptest.NewRecorder()
+	server.Routes().ServeHTTP(ackRR, ackReq)
+	if ackRR.Code != http.StatusNoContent {
+		t.Fatalf("unexpected ack status: %d body=%s", ackRR.Code, ackRR.Body.String())
+	}
+	updated, _ := repo.ListPackageEventsAfter("")
+	if len(updated) != 1 || updated[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
+		t.Fatalf("unexpected ack state: %+v", updated)
+	}
+}
+
+func TestServerPackageChangeListWithCursor(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: publish.PackagePublishedEventType, PackageID: 1001, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(5, 0).UTC(), Version: 7, GatewaySyncStatus: domain.GatewaySyncStatusPending})
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-2", EventType: publish.PackagePublishedEventType, PackageID: 1002, Platform: "openai", Model: "gpt-4.1", OccurredAt: time.Unix(6, 0).UTC(), Version: 8, GatewaySyncStatus: domain.GatewaySyncStatusPending})
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	req := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes?cursor=1", nil)
+	rr := httptest.NewRecorder()
+	server.Routes().ServeHTTP(rr, req)
+	if rr.Code != http.StatusOK {
+		t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
+	}
+	var resp struct {
+		Items      []domain.PackageChangeEvent `json:"items"`
+		NextCursor string                      `json:"next_cursor"`
+	}
+	if err := json.NewDecoder(rr.Body).Decode(&resp); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if len(resp.Items) != 1 || resp.Items[0].EventID != "evt-2" || resp.NextCursor != "2" {
+		t.Fatalf("unexpected cursor response: %+v", resp)
+	}
+}
+
+func TestServerConsumeOnceEndpoint(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-apply", EventType: publish.PackagePublishedEventType, PackageID: 1001, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(5, 0).UTC(), Version: 7, GatewaySyncStatus: domain.GatewaySyncStatusPending})
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-fail", EventType: publish.PackagePublishedEventType, PackageID: 1002, Platform: "openai", Model: "gpt-fail-model", OccurredAt: time.Unix(6, 0).UTC(), Version: 8, GatewaySyncStatus: domain.GatewaySyncStatusPending})
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/consume-once", bytes.NewBufferString(`{"consumer":"gateway"}`))
+	rr := httptest.NewRecorder()
+	server.Routes().ServeHTTP(rr, req)
+	if rr.Code != http.StatusOK {
+		t.Fatalf("unexpected consume status: %d body=%s", rr.Code, rr.Body.String())
+	}
+	var out gatewayconsumer.ConsumeOnceOutput
+	if err := json.NewDecoder(rr.Body).Decode(&out); err != nil {
+		t.Fatalf("decode error: %v", err)
+	}
+	if len(out.Items) != 2 {
+		t.Fatalf("unexpected consume output length: %+v", out)
+	}
+	if out.Items[0].Result != domain.GatewayAckResultApplied || out.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied || out.Items[0].Detail == "" {
+		t.Fatalf("unexpected first consume item: %+v", out.Items[0])
+	}
+	if out.Items[1].Result != domain.GatewayAckResultFailed || out.Items[1].GatewaySyncStatus != domain.GatewaySyncStatusFailed || out.Items[1].Detail == "" {
+		t.Fatalf("unexpected second consume item: %+v", out.Items[1])
+	}
+}
+
+func TestServerDiscoveryCandidateCreateAndList(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	createReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/discovery/candidates", bytes.NewBufferString(`{"candidate_id":"cand-1","account_id":301,"platform":"openai","model":"gpt-4.1-mini","source":"manual_seed","reason_code":"new_model","discovered_at":"2026-05-06T20:30:00Z"}`))
+	createRR := httptest.NewRecorder()
+	server.Routes().ServeHTTP(createRR, createReq)
+	if createRR.Code != http.StatusOK {
+		t.Fatalf("unexpected create status: %d body=%s", createRR.Code, createRR.Body.String())
+	}
+
+	listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/discovery/candidates?status=pending_admission", nil)
+	listRR := httptest.NewRecorder()
+	server.Routes().ServeHTTP(listRR, listReq)
+	if listRR.Code != http.StatusOK {
+		t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
+	}
+	var listResp struct {
+		Items []domain.DiscoveryCandidate `json:"items"`
+	}
+	if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
+		t.Fatalf("decode list error: %v", err)
+	}
+	if len(listResp.Items) != 1 || listResp.Items[0].CandidateID != "cand-1" || listResp.Items[0].Status != domain.DiscoveryCandidateStatusPendingAdmission {
+		t.Fatalf("unexpected discovery list response: %+v", listResp.Items)
+	}
+}
+
+func TestServerDiscoveryCandidateRejectsInvalidInput(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
+
+	req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/discovery/candidates", bytes.NewBufferString(`{"candidate_id":"","account_id":0}`))
+	rr := httptest.NewRecorder()
+	server.Routes().ServeHTTP(rr, req)
+	if rr.Code != http.StatusBadRequest {
+		t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
+	}
+}
--- a/internal/integration/adapter.go
+++ b/internal/integration/adapter.go
@@ -0,0 +1,67 @@
+package integration
+
+import (
+	"context"
+
+	"supply-intelligence/internal/domain"
+)
+
+// AccountStateReader defines the interface for reading account routing state
+// from the supply-api repository layer
+type AccountStateReader interface {
+	GetRoutingStateContext(ctx context.Context, accountID int64) (domain.AccountRoutingState, bool)
+}
+
+// CandidateStore defines the interface for persisting model candidates
+type CandidateStore interface {
+	GetDiscoveryCandidateByIDContext(ctx context.Context, candidateID string) (domain.DiscoveryCandidate, bool)
+	FindDiscoveryCandidateContext(ctx context.Context, accountID int64, platform, model string) (domain.DiscoveryCandidate, bool)
+	UpsertDiscoveryCandidateContext(ctx context.Context, candidate domain.DiscoveryCandidate) domain.DiscoveryCandidate
+	ListDiscoveryCandidatesContext(ctx context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate
+}
+
+// PackageEventStore defines the interface for persisting package change events
+type PackageEventStore interface {
+	AppendPackageEventContext(ctx context.Context, evt domain.PackageChangeEvent) (domain.PackageChangeEvent, error)
+	ListPackageEventsAfter(cursor string) ([]domain.PackageChangeEvent, string)
+	AckPackageEvent(eventID, consumer string, result domain.GatewayAckResult, detail string, ackedAt interface{}) (domain.PackageChangeEvent, error)
+}
+
+// ProbeLogStore defines the interface for persisting probe execution logs
+type ProbeLogStore interface {
+	AppendProbeLog(ctx context.Context, log ProbeExecutionLog) error
+	ListProbeLogsByAccount(ctx context.Context, accountID int64, limit int) ([]ProbeExecutionLog, error)
+}
+
+// ProbeExecutionLog represents a single probe execution record
+type ProbeExecutionLog struct {
+	LogID         int64
+	AccountID     int64
+	Platform      string
+	ProbeResult   domain.ProbeClassification
+	FailureClass  string
+	HTTPStatus    int
+	LatencyMs     int
+	RiskScore     int
+	EvaluatedTransition string
+	ExecutedAt    interface{} // time.Time or string
+	RequestID     string
+	Version       int64
+}
+
+// NewAccountStateAdapter creates an adapter that connects to supply-api's account store
+// For now, returns nil — actual implementation requires supply-api repo access
+func NewAccountStateAdapter(repo interface{}) *AccountStateAdapter {
+	return &AccountStateAdapter{repo: repo}
+}
+
+// AccountStateAdapter implements AccountStateReader over supply-api repository
+type AccountStateAdapter struct {
+	repo interface{}
+}
+
+func (a *AccountStateAdapter) GetRoutingStateContext(ctx context.Context, accountID int64) (domain.AccountRoutingState, bool) {
+	// TODO: implement when supply-api integration is ready
+	// This will call into supply-api's account repository
+	return domain.AccountRoutingState{}, false
+}
--- a/internal/integration/platform.go
+++ b/internal/integration/platform.go
@@ -0,0 +1,242 @@
+package integration
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+)
+
+// SupplierAdapter defines the interface for interacting with a supplier platform
+type SupplierAdapter interface {
+	// Platform returns the platform name (e.g., "openai", "anthropic")
+	Platform() string
+
+	// ProbeAccount sends a health check request to the supplier API
+	// Returns the HTTP response details needed for probe classification
+	ProbeAccount(ctx context.Context, account SupplierAccount) ProbeResult
+
+	// GetModels fetches the list of available models from the supplier
+	GetModels(ctx context.Context, account SupplierAccount) ([]ModelInfo, error)
+
+	// HealthCheck verifies connectivity to the supplier API
+	HealthCheck(ctx context.Context, account SupplierAccount) error
+}
+
+// SupplierAccount holds credentials and configuration for a supplier account
+type SupplierAccount struct {
+	AccountID   int64
+	Platform   string
+	APIKey     string
+	BaseURL    string // defaults to supplier's public endpoint if empty
+	Endpoint   string // custom endpoint override
+}
+
+// ProbeResult holds the raw result of a probe request
+type ProbeResult struct {
+	StatusCode     int
+	TransportError error
+	ResponseBody   string
+}
+
+// ModelInfo describes a model available from a supplier
+type ModelInfo struct {
+	ModelID       string // supplier's model identifier
+	ModelName     string // display name
+	ContextLength int    // max context length in tokens
+	IsActive      bool   // whether the model is currently available
+}
+
+// NewOpenAIAdapter creates an adapter for OpenAI-compatible APIs
+func NewOpenAIAdapter(httpClient HTTPClient) SupplierAdapter {
+	return &OpenAIAdapter{httpClient: httpClient}
+}
+
+// OpenAIAdapter implements SupplierAdapter for OpenAI and OpenAI-compatible APIs
+type OpenAIAdapter struct {
+	httpClient HTTPClient
+}
+
+func (a *OpenAIAdapter) Platform() string { return "openai" }
+
+func (a *OpenAIAdapter) ProbeAccount(ctx context.Context, account SupplierAccount) ProbeResult {
+	baseURL := account.BaseURL
+	if baseURL == "" {
+		baseURL = "https://api.openai.com"
+	}
+	endpoint := account.Endpoint
+	if endpoint == "" {
+		endpoint = baseURL + "/v1/models"
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
+	if err != nil {
+		return ProbeResult{TransportError: err}
+	}
+	req.Header.Set("Authorization", "Bearer "+account.APIKey)
+	req.Header.Set("User-Agent", "supply-intelligence-probe/1.0")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return ProbeResult{TransportError: err}
+	}
+	defer resp.Body.Close()
+
+	body := make([]byte, 1024)
+	n, _ := resp.Body.Read(body)
+
+	return ProbeResult{
+		StatusCode:   resp.StatusCode,
+		ResponseBody: string(body[:n]),
+	}
+}
+
+func (a *OpenAIAdapter) GetModels(ctx context.Context, account SupplierAccount) ([]ModelInfo, error) {
+	baseURL := account.BaseURL
+	if baseURL == "" {
+		baseURL = "https://api.openai.com"
+	}
+	endpoint := baseURL + "/v1/models"
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
+	if err != nil {
+		return nil, err
+	}
+	req.Header.Set("Authorization", "Bearer "+account.APIKey)
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	// Parse the OpenAI models list response
+	// {"object": "list", "data": [{"id": "gpt-4", "object": "model", ...}, ...]}
+	var raw struct {
+		Data []struct {
+			ID      string `json:"id"`
+			Object  string `json:"object"`
+		 Context int    `json:"context_window,omitempty"`
+		} `json:"data"`
+	}
+	if err := decodeJSON(resp, &raw); err != nil {
+		return nil, err
+	}
+
+	models := make([]ModelInfo, 0, len(raw.Data))
+	for _, m := range raw.Data {
+		if m.Object == "model" {
+			models = append(models, ModelInfo{
+				ModelID:       m.ID,
+				ModelName:     m.ID,
+				ContextLength: m.Context,
+				IsActive:      true,
+			})
+		}
+	}
+	return models, nil
+}
+
+func (a *OpenAIAdapter) HealthCheck(ctx context.Context, account SupplierAccount) error {
+	result := a.ProbeAccount(ctx, account)
+	if result.TransportError != nil {
+		return result.TransportError
+	}
+	if result.StatusCode == http.StatusOK || result.StatusCode == http.StatusUnauthorized {
+		return nil
+	}
+	return ErrHealthCheckFailed
+}
+
+// NewAnthropicAdapter creates an adapter for Anthropic APIs
+func NewAnthropicAdapter(httpClient HTTPClient) SupplierAdapter {
+	return &AnthropicAdapter{httpClient: httpClient}
+}
+
+// AnthropicAdapter implements SupplierAdapter for Anthropic Claude API
+type AnthropicAdapter struct {
+	httpClient HTTPClient
+}
+
+func (a *AnthropicAdapter) Platform() string { return "anthropic" }
+
+func (a *AnthropicAdapter) ProbeAccount(ctx context.Context, account SupplierAccount) ProbeResult {
+	baseURL := account.BaseURL
+	if baseURL == "" {
+		baseURL = "https://api.anthropic.com"
+	}
+	endpoint := baseURL + "/v1/models"
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
+	if err != nil {
+		return ProbeResult{TransportError: err}
+	}
+	req.Header.Set("x-api-key", account.APIKey)
+	req.Header.Set("User-Agent", "supply-intelligence-probe/1.0")
+	req.Header.Set("anthropic-version", "2023-06-01")
+
+	resp, err := a.httpClient.Do(req)
+	if err != nil {
+		return ProbeResult{TransportError: err}
+	}
+	defer resp.Body.Close()
+
+	body := make([]byte, 1024)
+	n, _ := resp.Body.Read(body)
+
+	return ProbeResult{
+		StatusCode:   resp.StatusCode,
+		ResponseBody: string(body[:n]),
+	}
+}
+
+func (a *AnthropicAdapter) GetModels(ctx context.Context, account SupplierAccount) ([]ModelInfo, error) {
+	// Anthropic doesn't have a public models list endpoint in the same way OpenAI does.
+	// We return a known static list for Claude models.
+	// In production this would be fetched from configuration or a dynamic source.
+	return []ModelInfo{
+		{ModelID: "claude-3-5-sonnet-20241022", ModelName: "Claude 3.5 Sonnet", ContextLength: 200000, IsActive: true},
+		{ModelID: "claude-3-5-haiku-20241022", ModelName: "Claude 3.5 Haiku", ContextLength: 200000, IsActive: true},
+		{ModelID: "claude-3-opus-20240229", ModelName: "Claude 3 Opus", ContextLength: 200000, IsActive: true},
+		{ModelID: "claude-3-sonnet-20240229", ModelName: "Claude 3 Sonnet", ContextLength: 200000, IsActive: true},
+		{ModelID: "claude-3-haiku-20240307", ModelName: "Claude 3 Haiku", ContextLength: 200000, IsActive: true},
+	}, nil
+}
+
+func (a *AnthropicAdapter) HealthCheck(ctx context.Context, account SupplierAccount) error {
+	result := a.ProbeAccount(ctx, account)
+	if result.TransportError != nil {
+		return result.TransportError
+	}
+	// Anthropic returns 200 on success, 401 on auth failure
+	if result.StatusCode == http.StatusOK || result.StatusCode == http.StatusUnauthorized {
+		return nil
+	}
+	return ErrHealthCheckFailed
+}
+
+// HTTPClient interface for testability
+type HTTPClient interface {
+	Do(req *http.Request) (*http.Response, error)
+}
+
+// DefaultHTTPClient is the standard HTTP client used for platform adapters
+type DefaultHTTPClient struct{}
+
+func (c *DefaultHTTPClient) Do(req *http.Request) (*http.Response, error) {
+	return http.DefaultClient.Do(req)
+}
+
+// NewDefaultHTTPClient creates a new default HTTP client
+func NewDefaultHTTPClient() HTTPClient {
+	return &DefaultHTTPClient{}
+}
+
+var ErrHealthCheckFailed = &HealthCheckError{}
+
+type HealthCheckError struct{}
+
+func (e *HealthCheckError) Error() string { return "health check failed" }
+
+func decodeJSON(resp *http.Response, v interface{}) error {
+	return json.NewDecoder(resp.Body).Decode(v)
+}
--- a/internal/poller/gateway_package_poller.go
+++ b/internal/poller/gateway_package_poller.go
@@ -0,0 +1,38 @@
+package poller
+
+import (
+	"context"
+
+	"supply-intelligence/internal/gatewayconsumer"
+)
+
+type GatewayPackagePoller struct {
+	consumer *gatewayconsumer.Service
+	cursor   string
+}
+
+func NewGatewayPackagePoller(consumer *gatewayconsumer.Service) *GatewayPackagePoller {
+	return &GatewayPackagePoller{consumer: consumer}
+}
+
+func (p *GatewayPackagePoller) PollOnce(ctx context.Context) (gatewayconsumer.ConsumeOnceOutput, error) {
+	if p == nil || p.consumer == nil {
+		return gatewayconsumer.ConsumeOnceOutput{}, gatewayconsumer.ErrInvalidConsumeInput
+	}
+	out, err := p.consumer.ConsumeOnce(ctx, gatewayconsumer.ConsumeOnceInput{
+		Consumer: "gateway",
+		Cursor:   p.cursor,
+	})
+	if err != nil {
+		return gatewayconsumer.ConsumeOnceOutput{}, err
+	}
+	p.cursor = out.NextCursor
+	return out, nil
+}
+
+func (p *GatewayPackagePoller) Cursor() string {
+	if p == nil {
+		return ""
+	}
+	return p.cursor
+}
--- a/internal/poller/gateway_package_poller_test.go
+++ b/internal/poller/gateway_package_poller_test.go
@@ -0,0 +1,28 @@
+package poller
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/gatewayconsumer"
+	"supply-intelligence/internal/repository"
+)
+
+func TestGatewayPackagePollerPollOnce(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: "supply_package_published", PackageID: 1, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(1, 0).UTC(), Version: 1, GatewaySyncStatus: domain.GatewaySyncStatusPending})
+	poller := NewGatewayPackagePoller(gatewayconsumer.NewService(repo))
+
+	out, err := poller.PollOnce(context.Background())
+	if err != nil {
+		t.Fatalf("unexpected poll error: %v", err)
+	}
+	if len(out.Items) != 1 || out.Items[0].EventID != "evt-1" {
+		t.Fatalf("unexpected output: %+v", out)
+	}
+	if poller.Cursor() != out.NextCursor {
+		t.Fatalf("expected cursor to advance: poller=%q out=%q", poller.Cursor(), out.NextCursor)
+	}
+}
--- a/internal/poller/runtime.go
+++ b/internal/poller/runtime.go
@@ -0,0 +1,53 @@
+package poller
+
+import (
+	"context"
+	"sync"
+	"time"
+)
+
+type Runtime struct {
+	poller   *GatewayPackagePoller
+	interval time.Duration
+	cancel   context.CancelFunc
+	wg       sync.WaitGroup
+}
+
+func NewRuntime(poller *GatewayPackagePoller, interval time.Duration) *Runtime {
+	if interval <= 0 {
+		interval = time.Second
+	}
+	return &Runtime{poller: poller, interval: interval}
+}
+
+func (r *Runtime) Start(parent context.Context) bool {
+	if r == nil || r.poller == nil || r.cancel != nil {
+		return false
+	}
+	ctx, cancel := context.WithCancel(parent)
+	r.cancel = cancel
+	r.wg.Add(1)
+	go func() {
+		defer r.wg.Done()
+		ticker := time.NewTicker(r.interval)
+		defer ticker.Stop()
+		for {
+			_, _ = r.poller.PollOnce(ctx)
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+			}
+		}
+	}()
+	return true
+}
+
+func (r *Runtime) Stop() {
+	if r == nil || r.cancel == nil {
+		return
+	}
+	r.cancel()
+	r.wg.Wait()
+	r.cancel = nil
+}
--- a/internal/poller/runtime_test.go
+++ b/internal/poller/runtime_test.go
@@ -0,0 +1,54 @@
+package poller
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/gatewayconsumer"
+	"supply-intelligence/internal/repository"
+)
+
+func TestRuntimeStartsBackgroundPolling(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{
+		EventID:           "evt-runtime-1",
+		EventType:         "supply_package_published",
+		PackageID:         1,
+		Platform:          "openai",
+		Model:             "gpt-4.1-mini",
+		OccurredAt:        time.Unix(1, 0).UTC(),
+		Version:           1,
+		GatewaySyncStatus: domain.GatewaySyncStatusPending,
+	})
+
+	service := gatewayconsumer.NewService(repo)
+	poller := NewGatewayPackagePoller(service)
+	runtime := NewRuntime(poller, 10*time.Millisecond)
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	if !runtime.Start(ctx) {
+		t.Fatalf("expected runtime to start")
+	}
+	defer runtime.Stop()
+
+	deadline := time.Now().Add(500 * time.Millisecond)
+	for time.Now().Before(deadline) {
+		items, _ := repo.ListPackageEventsAfter("")
+		if len(items) == 1 && items[0].GatewaySyncStatus == domain.GatewaySyncStatusApplied {
+			return
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+
+	items, _ := repo.ListPackageEventsAfter("")
+	t.Fatalf("expected background polling to apply event, got %+v", items)
+}
+
+func TestRuntimeStartRequiresPoller(t *testing.T) {
+	if (&Runtime{}).Start(context.Background()) {
+		t.Fatalf("expected runtime without poller to refuse start")
+	}
+}
--- a/internal/probe/driver.go
+++ b/internal/probe/driver.go
@@ -0,0 +1,138 @@
+package probe
+
+import (
+	"context"
+	"log"
+	"time"
+
+	"github.com/google/uuid"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/integration"
+)
+
+// ProbeLogRepository defines where probe execution logs are persisted
+type ProbeLogRepository interface {
+	AppendProbeLog(ctx context.Context, outcome ProbeOutcome) error
+}
+
+// Driver orchestrates a full probe run: load targets → execute → evaluate → persist state
+type Driver struct {
+	executor  *ProbeExecutor
+	evaluator *Service // reuse the existing probe.Service as evaluator
+	logRepo   ProbeLogRepository
+	adapters  map[string]integration.SupplierAdapter
+	now       func() time.Time
+}
+
+// NewDriver creates a probe driver with all dependencies wired together
+func NewDriver(
+	repo RoutingStateRepository,
+	logRepo ProbeLogRepository,
+	adapters map[string]integration.SupplierAdapter,
+) *Driver {
+	return &Driver{
+		executor:  NewProbeExecutor(integration.NewDefaultHTTPClient()),
+		evaluator: NewService(repo),
+		logRepo:   logRepo,
+		adapters:  adapters,
+		now:       func() time.Time { return time.Now().UTC() },
+	}
+}
+
+// RunProbeForAccount probes a single account and persists the result through the full chain
+func (d *Driver) RunProbeForAccount(ctx context.Context, account integration.SupplierAccount) error {
+	var outcome ProbeOutcome
+
+	if adapter, ok := d.adapters[account.Platform]; ok {
+		// Use platform-specific adapter
+		result := adapter.ProbeAccount(ctx, account)
+		outcome = ProbeOutcome{
+			AccountID:      account.AccountID,
+			Platform:       account.Platform,
+			StatusCode:     result.StatusCode,
+			TransportError: result.TransportError,
+			ResponseBody:   result.ResponseBody,
+			RequestID:      "prb-" + uuid.New().String(),
+			ExecutedAt:     d.now(),
+		}
+	} else {
+		// Fall back to generic HTTP probe
+		target := ProbeTarget{
+			AccountID:   account.AccountID,
+			Platform:   account.Platform,
+			Endpoint:   account.Endpoint,
+			AuthHeader: "Bearer " + account.APIKey,
+		}
+		if target.Endpoint == "" {
+			target.Endpoint = account.BaseURL
+		}
+
+		var err error
+		outcome, err = d.executor.ExecuteProbe(ctx, target)
+		if err != nil {
+			return err
+		}
+	}
+
+	return d.persistOutcome(ctx, account.AccountID, account.Platform, outcome)
+}
+
+// persistOutcome drives the outcome through: load current state → evaluate → state machine → persist
+func (d *Driver) persistOutcome(ctx context.Context, accountID int64, platform string, outcome ProbeOutcome) error {
+	// 1. Load current routing state
+	currentState, _ := d.evaluator.repo.GetRoutingStateContext(ctx, accountID)
+
+	// 2. Build evaluate input
+	var transportErr error
+	if outcome.TransportError != nil {
+		transportErr = outcome.TransportError
+	}
+
+	input := EvaluateInput{
+		AccountID:      accountID,
+		Platform:       platform,
+		CurrentStatus:  currentState.AccountStatus,
+		StatusCode:     outcome.StatusCode,
+		TransportError: transportErr,
+	}
+
+	// 3. Evaluate (uses the existing Service.EvaluateHTTPResult)
+	evalOutput, err := d.evaluator.EvaluateHTTPResult(ctx, input)
+	if err != nil {
+		log.Printf("[probe] failed to evaluate outcome for account %d: %v", accountID, err)
+		return err
+	}
+
+	// 4. Log the probe execution
+	if d.logRepo != nil {
+		logEntry := ProbeOutcome{
+			AccountID:      accountID,
+			Platform:       platform,
+			StatusCode:     outcome.StatusCode,
+			TransportError: outcome.TransportError,
+			LatencyMs:      outcome.LatencyMs,
+			RequestID:      outcome.RequestID,
+			ExecutedAt:     outcome.ExecutedAt,
+		}
+		_ = d.logRepo.AppendProbeLog(ctx, logEntry)
+	}
+
+	// 5. Log state transition
+	transition := describeTransition(currentState.AccountStatus, evalOutput.RoutingState.AccountStatus)
+	log.Printf("[probe] account=%d platform=%s %s->%s classification=%s risk=%d transition=%s",
+		accountID, platform,
+		currentState.AccountStatus, evalOutput.RoutingState.AccountStatus,
+		evalOutput.Classification, evalOutput.RoutingState.RiskScore,
+		transition)
+
+	return nil
+}
+
+// describeTransition returns a human-readable transition description
+func describeTransition(from, to domain.AccountStatus) string {
+	if from == to {
+		return "no_change"
+	}
+	return string(from) + "_to_" + string(to)
+}
--- a/internal/probe/evaluator.go
+++ b/internal/probe/evaluator.go
@@ -0,0 +1,44 @@
+package probe
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+
+	"supply-intelligence/internal/domain"
+)
+
+var ErrUnknownStatusCode = errors.New("unknown probe status code")
+
+func ClassifyHTTPResult(statusCode int, transportErr error) (domain.ProbeClassification, string, error) {
+	if transportErr != nil {
+		return domain.ProbeClassificationInconclusive, "transport_error", nil
+	}
+
+	switch statusCode {
+	case http.StatusOK:
+		return domain.ProbeClassificationSuccess, "ok", nil
+	case http.StatusUnauthorized:
+		fallthrough
+	case http.StatusForbidden:
+		return domain.ProbeClassificationExplicitFailure, "auth_rejected", nil
+	case http.StatusTooManyRequests:
+		fallthrough
+	case http.StatusInternalServerError:
+		fallthrough
+	case http.StatusBadGateway:
+		fallthrough
+	case http.StatusServiceUnavailable:
+		fallthrough
+	case http.StatusGatewayTimeout:
+		return domain.ProbeClassificationInconclusive, "upstream_unstable", nil
+	default:
+		if statusCode >= 500 {
+			return domain.ProbeClassificationInconclusive, "upstream_unstable", nil
+		}
+		if statusCode >= 400 {
+			return domain.ProbeClassificationInconclusive, "unexpected_client_error", nil
+		}
+		return "", "", fmt.Errorf("%w: %d", ErrUnknownStatusCode, statusCode)
+	}
+}
--- a/internal/probe/evaluator_test.go
+++ b/internal/probe/evaluator_test.go
@@ -0,0 +1,47 @@
+package probe
+
+import (
+	"errors"
+	"testing"
+
+	"supply-intelligence/internal/domain"
+)
+
+func TestClassifyHTTPResult(t *testing.T) {
+	tests := []struct {
+		name       string
+		statusCode int
+		err        error
+		wantClass  domain.ProbeClassification
+		wantReason string
+		wantErr    bool
+	}{
+		{name: "200 success", statusCode: 200, wantClass: domain.ProbeClassificationSuccess, wantReason: "ok"},
+		{name: "401 explicit failure", statusCode: 401, wantClass: domain.ProbeClassificationExplicitFailure, wantReason: "auth_rejected"},
+		{name: "403 explicit failure", statusCode: 403, wantClass: domain.ProbeClassificationExplicitFailure, wantReason: "auth_rejected"},
+		{name: "429 inconclusive", statusCode: 429, wantClass: domain.ProbeClassificationInconclusive, wantReason: "upstream_unstable"},
+		{name: "503 inconclusive", statusCode: 503, wantClass: domain.ProbeClassificationInconclusive, wantReason: "upstream_unstable"},
+		{name: "transport error inconclusive", err: errors.New("timeout"), wantClass: domain.ProbeClassificationInconclusive, wantReason: "transport_error"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			gotClass, gotReason, err := ClassifyHTTPResult(tt.statusCode, tt.err)
+			if tt.wantErr {
+				if err == nil {
+					t.Fatalf("expected error, got nil")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if gotClass != tt.wantClass {
+				t.Fatalf("classification mismatch: got %q want %q", gotClass, tt.wantClass)
+			}
+			if gotReason != tt.wantReason {
+				t.Fatalf("reason mismatch: got %q want %q", gotReason, tt.wantReason)
+			}
+		})
+	}
+}
--- a/internal/probe/executor.go
+++ b/internal/probe/executor.go
@@ -0,0 +1,125 @@
+package probe
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"time"
+
+	"github.com/google/uuid"
+)
+
+// HTTPClient defines the interface for making HTTP requests during probing
+type HTTPClient interface {
+	Do(req *http.Request) (*http.Response, error)
+}
+
+// DefaultHTTPClient wraps the standard http.Client
+type DefaultHTTPClient struct {
+	client *http.Client
+}
+
+// NewDefaultHTTPClient creates a client with sensible probe timeouts
+func NewDefaultHTTPClient() *DefaultHTTPClient {
+	return &DefaultHTTPClient{
+		client: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+	}
+}
+
+func (c *DefaultHTTPClient) Do(req *http.Request) (*http.Response, error) {
+	return c.client.Do(req)
+}
+
+// ProbeTarget represents an account to be probed
+type ProbeTarget struct {
+	AccountID   int64
+	Platform   string
+	Endpoint   string
+	AuthHeader string // Bearer token or API key
+}
+
+// ProbeOutcome is the result of executing a probe against a target
+type ProbeOutcome struct {
+	AccountID      int64
+	Platform       string
+	StatusCode     int
+	TransportError error
+	LatencyMs      int
+	ResponseBody   string // truncated, for debugging
+	RequestID      string
+	ExecutedAt     time.Time
+}
+
+// ProbeExecutor sends HTTP requests to supplier endpoints and classifies results
+type ProbeExecutor struct {
+	httpClient HTTPClient
+	now        func() time.Time
+}
+
+// NewProbeExecutor creates a probe executor with the given HTTP client.
+// If client is nil, uses http.DefaultClient.
+func NewProbeExecutor(client HTTPClient) *ProbeExecutor {
+	if client == nil {
+		client = http.DefaultClient
+	}
+	return &ProbeExecutor{
+		httpClient: client,
+		now:        func() time.Time { return time.Now().UTC() },
+	}
+}
+
+// ExecuteProbe runs a single probe against the target account
+// It makes an HTTP GET request to the platform's health endpoint
+func (e *ProbeExecutor) ExecuteProbe(ctx context.Context, target ProbeTarget) (ProbeOutcome, error) {
+	requestID := uuid.New().String()
+	executedAt := e.now()
+
+	if target.Endpoint == "" {
+		return ProbeOutcome{}, ErrInvalidProbeTarget
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, target.Endpoint, nil)
+	if err != nil {
+		return ProbeOutcome{}, fmt.Errorf("%w: %v", ErrInvalidProbeTarget, err)
+	}
+
+	req.Header.Set("User-Agent", "supply-intelligence-probe/1.0")
+	req.Header.Set("Accept", "application/json")
+	if target.AuthHeader != "" {
+		req.Header.Set("Authorization", target.AuthHeader)
+	}
+
+	start := time.Now()
+	resp, err := e.httpClient.Do(req)
+	latencyMs := int(time.Since(start).Milliseconds())
+
+	outcome := ProbeOutcome{
+		AccountID:  target.AccountID,
+		Platform:  target.Platform,
+		LatencyMs: latencyMs,
+		RequestID: requestID,
+		ExecutedAt: executedAt,
+	}
+
+	if err != nil {
+		outcome.TransportError = err
+		return outcome, nil // return outcome with transport error set
+	}
+
+	if resp != nil {
+		defer resp.Body.Close()
+		outcome.StatusCode = resp.StatusCode
+
+		// Read truncated body for debugging (max 1KB)
+		bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
+		outcome.ResponseBody = string(bodyBytes)
+	}
+
+	return outcome, nil
+}
+
+var ErrInvalidProbeTarget = errors.New("invalid probe target")
--- a/internal/probe/executor_test.go
+++ b/internal/probe/executor_test.go
@@ -0,0 +1,219 @@
+package probe
+
+import (
+	"context"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+)
+
+// mockHTTPClient records requests and returns configurable responses
+type mockHTTPClient struct {
+	Resp *http.Response
+	Err  error
+}
+
+func (m *mockHTTPClient) Do(req *http.Request) (*http.Response, error) {
+	// Simulate context cancellation: if the request context is done, return context error
+	select {
+	case <-req.Context().Done():
+		return nil, req.Context().Err()
+	default:
+	}
+	return m.Resp, m.Err
+}
+
+func TestProbeExecutor_ExecuteProbe_Success(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(`{"status":"ok"}`))
+	}))
+	defer server.Close()
+
+	executor := NewProbeExecutor(nil) // nil → uses real http.Client
+
+	outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID:   1,
+		Platform:   "openai",
+		Endpoint:   server.URL,
+		AuthHeader: "Bearer test-key",
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if outcome.StatusCode != http.StatusOK {
+		t.Fatalf("expected 200, got: %d", outcome.StatusCode)
+	}
+	if outcome.LatencyMs < 0 {
+		t.Fatalf("expected latency >= 0, got: %d", outcome.LatencyMs)
+	}
+	if outcome.RequestID == "" {
+		t.Fatal("expected request_id to be set")
+	}
+}
+
+func TestProbeExecutor_ExecuteProbe_ExplicitFailure(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusUnauthorized)
+	}))
+	defer server.Close()
+
+	executor := NewProbeExecutor(nil)
+
+	outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID: 2,
+		Platform: "openai",
+		Endpoint: server.URL,
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if outcome.StatusCode != http.StatusUnauthorized {
+		t.Fatalf("expected 401, got: %d", outcome.StatusCode)
+	}
+}
+
+func TestProbeExecutor_ExecuteProbe_Inconclusive_429(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusTooManyRequests)
+	}))
+	defer server.Close()
+
+	executor := NewProbeExecutor(nil)
+
+	outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID: 3,
+		Platform: "openai",
+		Endpoint: server.URL,
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if outcome.StatusCode != http.StatusTooManyRequests {
+		t.Fatalf("expected 429, got: %d", outcome.StatusCode)
+	}
+}
+
+func TestProbeExecutor_ExecuteProbe_TransportError(t *testing.T) {
+	client := &mockHTTPClient{
+		Err: errors.New("connection refused"),
+	}
+	executor := NewProbeExecutor(client)
+
+	outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID: 4,
+		Platform: "openai",
+		Endpoint: "http://localhost:9999",
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if outcome.TransportError == nil {
+		t.Fatal("expected transport error to be set")
+	}
+	if outcome.StatusCode != 0 {
+		t.Fatalf("expected status 0 on transport error, got: %d", outcome.StatusCode)
+	}
+}
+
+func TestProbeExecutor_ExecuteProbe_InvalidTarget(t *testing.T) {
+	executor := NewProbeExecutor(nil)
+
+	_, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID: 5,
+		Platform: "openai",
+		Endpoint: "", // empty endpoint
+	})
+
+	if err == nil {
+		t.Fatal("expected error for empty endpoint")
+	}
+}
+
+func TestProbeExecutor_ExecuteProbe_ContextCanceled(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		time.Sleep(5 * time.Second) // delay longer than context
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer server.Close()
+
+	executor := NewProbeExecutor(nil)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
+	defer cancel()
+
+	outcome, err := executor.ExecuteProbe(ctx, ProbeTarget{
+		AccountID: 6,
+		Platform: "openai",
+		Endpoint: server.URL,
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if outcome.TransportError == nil {
+		t.Fatal("expected context deadline exceeded transport error")
+	}
+}
+
+func TestProbeExecutor_ExecuteProbe_ResponseBodyTruncated(t *testing.T) {
+	largeBody := strings.Repeat("x", 10*1024) // 10KB
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(largeBody))
+	}))
+	defer server.Close()
+
+	executor := NewProbeExecutor(nil)
+
+	outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID: 7,
+		Platform: "openai",
+		Endpoint: server.URL,
+	})
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(outcome.ResponseBody) > 1024 {
+		t.Fatalf("expected body truncated to <=1024, got: %d", len(outcome.ResponseBody))
+	}
+}
+
+func TestProbeExecutor_SetsUserAgentAndAcceptHeader(t *testing.T) {
+	var receivedHeaders http.Header
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		receivedHeaders = r.Header.Clone()
+		w.WriteHeader(http.StatusOK)
+	}))
+	defer server.Close()
+
+	executor := NewProbeExecutor(nil)
+
+	_, _ = executor.ExecuteProbe(context.Background(), ProbeTarget{
+		AccountID:   8,
+		Platform:   "openai",
+		Endpoint:   server.URL,
+		AuthHeader: "Bearer my-key",
+	})
+
+	if receivedHeaders == nil {
+		t.Fatal("server handler was not called — check test setup")
+	}
+	if receivedHeaders.Get("User-Agent") == "" {
+		t.Fatal("expected User-Agent header to be set")
+	}
+	if receivedHeaders.Get("Accept") != "application/json" {
+		t.Fatalf("expected Accept: application/json, got: %s", receivedHeaders.Get("Accept"))
+	}
+	if receivedHeaders.Get("Authorization") != "Bearer my-key" {
+		t.Fatalf("expected Authorization header to be set")
+	}
+}
--- a/internal/probe/service.go
+++ b/internal/probe/service.go
@@ -0,0 +1,95 @@
+package probe
+
+import (
+	"context"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+type RoutingStateRepository interface {
+	GetRoutingStateContext(ctx context.Context, accountID int64) (domain.AccountRoutingState, bool)
+	UpsertRoutingStateContext(ctx context.Context, state domain.AccountRoutingState) domain.AccountRoutingState
+}
+
+type Service struct {
+	repo RoutingStateRepository
+	now  func() time.Time
+}
+
+type EvaluateInput struct {
+	AccountID      int64
+	Platform       string
+	CurrentStatus  domain.AccountStatus
+	StatusCode     int
+	TransportError error
+}
+
+type EvaluateOutput struct {
+	Classification domain.ProbeClassification `json:"classification"`
+	ReasonCode     string                     `json:"reason_code"`
+	RoutingState   domain.AccountRoutingState `json:"routing_state"`
+}
+
+func NewService(repo RoutingStateRepository) *Service {
+	return &Service{
+		repo: repo,
+		now: func() time.Time {
+			return time.Now().UTC()
+		},
+	}
+}
+
+func (s *Service) EvaluateHTTPResult(ctx context.Context, input EvaluateInput) (EvaluateOutput, error) {
+	classification, reasonCode, err := ClassifyHTTPResult(input.StatusCode, input.TransportError)
+	if err != nil {
+		return EvaluateOutput{}, err
+	}
+
+	observedAt := s.now()
+	nextStatus := NextAccountStatus(input.CurrentStatus, classification)
+	state := domain.AccountRoutingState{
+		AccountID:      input.AccountID,
+		Platform:       input.Platform,
+		AccountStatus:  nextStatus,
+		RoutingEnabled: nextStatus == domain.AccountStatusActive,
+		RiskScore:      riskScoreFor(nextStatus, classification),
+		ReasonCode:     reasonCode,
+		LastProbeAt:    observedAt,
+		Version:        1,
+	}
+
+	if previous, ok := s.repo.GetRoutingStateContext(ctx, input.AccountID); ok {
+		state.Version = previous.Version + 1
+		if state.Platform == "" {
+			state.Platform = previous.Platform
+		}
+	}
+
+	persisted := s.repo.UpsertRoutingStateContext(ctx, state)
+	return EvaluateOutput{
+		Classification: classification,
+		ReasonCode:     reasonCode,
+		RoutingState:   persisted,
+	}, nil
+}
+
+func riskScoreFor(status domain.AccountStatus, classification domain.ProbeClassification) int {
+	switch classification {
+	case domain.ProbeClassificationSuccess:
+		return 20
+	case domain.ProbeClassificationExplicitFailure:
+		switch status {
+		case domain.AccountStatusDisabled:
+			return 100
+		case domain.AccountStatusSuspended:
+			return 90
+		default:
+			return 80
+		}
+	case domain.ProbeClassificationInconclusive:
+		return 60
+	default:
+		return 0
+	}
+}
--- a/internal/probe/service_test.go
+++ b/internal/probe/service_test.go
@@ -0,0 +1,115 @@
+package probe
+
+import (
+	"context"
+	"errors"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/repository"
+)
+
+func TestServiceEvaluateHTTPResultSuccess(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	service.now = func() time.Time { return time.Unix(1000, 0).UTC() }
+
+	result, err := service.EvaluateHTTPResult(context.Background(), EvaluateInput{
+		AccountID:     1,
+		Platform:      "openai",
+		CurrentStatus: domain.AccountStatusSuspended,
+		StatusCode:    200,
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.Classification != domain.ProbeClassificationSuccess {
+		t.Fatalf("unexpected classification: %q", result.Classification)
+	}
+	if result.RoutingState.AccountStatus != domain.AccountStatusActive {
+		t.Fatalf("unexpected account status: %q", result.RoutingState.AccountStatus)
+	}
+	if !result.RoutingState.RoutingEnabled {
+		t.Fatalf("expected routing enabled")
+	}
+	if result.RoutingState.ReasonCode != "ok" {
+		t.Fatalf("unexpected reason code: %q", result.RoutingState.ReasonCode)
+	}
+	if result.RoutingState.Version != 1 {
+		t.Fatalf("unexpected version: %d", result.RoutingState.Version)
+	}
+}
+
+func TestServiceEvaluateHTTPResultExplicitFailure(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	service.now = func() time.Time { return time.Unix(1001, 0).UTC() }
+
+	repo.UpsertRoutingState(domain.AccountRoutingState{
+		AccountID:      2,
+		Platform:       "openai",
+		AccountStatus:  domain.AccountStatusActive,
+		RoutingEnabled: true,
+		RiskScore:      20,
+		ReasonCode:     "ok",
+		LastProbeAt:    time.Unix(999, 0).UTC(),
+		Version:        4,
+	})
+
+	result, err := service.EvaluateHTTPResult(context.Background(), EvaluateInput{
+		AccountID:     2,
+		Platform:      "openai",
+		CurrentStatus: domain.AccountStatusActive,
+		StatusCode:    401,
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.Classification != domain.ProbeClassificationExplicitFailure {
+		t.Fatalf("unexpected classification: %q", result.Classification)
+	}
+	if result.RoutingState.AccountStatus != domain.AccountStatusSuspended {
+		t.Fatalf("unexpected account status: %q", result.RoutingState.AccountStatus)
+	}
+	if result.RoutingState.RoutingEnabled {
+		t.Fatalf("expected routing disabled")
+	}
+	if result.RoutingState.ReasonCode != "auth_rejected" {
+		t.Fatalf("unexpected reason code: %q", result.RoutingState.ReasonCode)
+	}
+	if result.RoutingState.Version != 5 {
+		t.Fatalf("unexpected version: %d", result.RoutingState.Version)
+	}
+}
+
+func TestServiceEvaluateHTTPResultInconclusive(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	service.now = func() time.Time { return time.Unix(1002, 0).UTC() }
+
+	result, err := service.EvaluateHTTPResult(context.Background(), EvaluateInput{
+		AccountID:      3,
+		Platform:       "openai",
+		CurrentStatus:  domain.AccountStatusSuspended,
+		TransportError: errors.New("dial tcp timeout"),
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.Classification != domain.ProbeClassificationInconclusive {
+		t.Fatalf("unexpected classification: %q", result.Classification)
+	}
+	if result.RoutingState.AccountStatus != domain.AccountStatusSuspended {
+		t.Fatalf("unexpected account status: %q", result.RoutingState.AccountStatus)
+	}
+	if result.RoutingState.RoutingEnabled {
+		t.Fatalf("expected routing disabled for suspended account")
+	}
+	if result.RoutingState.ReasonCode != "transport_error" {
+		t.Fatalf("unexpected reason code: %q", result.RoutingState.ReasonCode)
+	}
+	if result.RoutingState.RiskScore != 60 {
+		t.Fatalf("unexpected risk score: %d", result.RoutingState.RiskScore)
+	}
+}
--- a/internal/probe/state_machine.go
+++ b/internal/probe/state_machine.go
@@ -0,0 +1,23 @@
+package probe
+
+import "supply-intelligence/internal/domain"
+
+func NextAccountStatus(current domain.AccountStatus, classification domain.ProbeClassification) domain.AccountStatus {
+	switch classification {
+	case domain.ProbeClassificationSuccess:
+		return domain.AccountStatusActive
+	case domain.ProbeClassificationExplicitFailure:
+		switch current {
+		case domain.AccountStatusActive:
+			return domain.AccountStatusSuspended
+		case domain.AccountStatusSuspended:
+			return domain.AccountStatusDisabled
+		default:
+			return current
+		}
+	case domain.ProbeClassificationInconclusive:
+		fallthrough
+	default:
+		return current
+	}
+}
--- a/internal/probe/state_machine_test.go
+++ b/internal/probe/state_machine_test.go
@@ -0,0 +1,30 @@
+package probe
+
+import (
+	"testing"
+
+	"supply-intelligence/internal/domain"
+)
+
+func TestNextAccountStatus(t *testing.T) {
+	tests := []struct {
+		name           string
+		current        domain.AccountStatus
+		classification domain.ProbeClassification
+		want           domain.AccountStatus
+	}{
+		{name: "success keeps active", current: domain.AccountStatusActive, classification: domain.ProbeClassificationSuccess, want: domain.AccountStatusActive},
+		{name: "explicit failure active to suspended", current: domain.AccountStatusActive, classification: domain.ProbeClassificationExplicitFailure, want: domain.AccountStatusSuspended},
+		{name: "explicit failure suspended to disabled", current: domain.AccountStatusSuspended, classification: domain.ProbeClassificationExplicitFailure, want: domain.AccountStatusDisabled},
+		{name: "inconclusive keeps active", current: domain.AccountStatusActive, classification: domain.ProbeClassificationInconclusive, want: domain.AccountStatusActive},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := NextAccountStatus(tt.current, tt.classification)
+			if got != tt.want {
+				t.Fatalf("status mismatch: got %q want %q", got, tt.want)
+			}
+		})
+	}
+}
--- a/internal/publish/README.md
+++ b/internal/publish/README.md
@@ -0,0 +1,16 @@
+# Publish semantics boundary
+
+This package only records package-published events and emits gateway-consumable change records.
+It does not implement a full publish state machine, admission workflow, or downstream routing synchronization.
+
+Current repository boundary:
+- `published` means the upstream package event has been recorded
+- `pending` means the downstream gateway consumer has not yet confirmed handling
+- `applied` / `failed` means the current repository's consumer flow updated event state during the running process
+- current gateway event state in this repo is in-memory only, not durable across restart
+
+Current runtime shape:
+- manual/debug entry: `POST /internal/supply-intelligence/gateway/consume-once`
+- minimal background path: application startup also runs a ticker-driven gateway poller
+
+This avoids claiming that `published = applied`, and also avoids claiming that the current in-memory repository is a durable production persistence layer.
--- a/internal/publish/service.go
+++ b/internal/publish/service.go
@@ -0,0 +1,59 @@
+package publish
+
+import (
+	"context"
+	"errors"
+	"strings"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+const PackagePublishedEventType = "supply_package_published"
+
+var ErrInvalidPublishInput = errors.New("invalid publish input")
+
+type PackageEventRepository interface {
+	AppendPackageEventContext(ctx context.Context, evt domain.PackageChangeEvent) (domain.PackageChangeEvent, error)
+}
+
+type Service struct {
+	repo PackageEventRepository
+}
+
+type RecordPackagePublishedInput struct {
+	EventID    string
+	PackageID  int64
+	Platform   string
+	Model      string
+	Version    int64
+	OccurredAt time.Time
+}
+
+func NewService(repo PackageEventRepository) *Service {
+	return &Service{repo: repo}
+}
+
+func (s *Service) RecordPackagePublished(ctx context.Context, input RecordPackagePublishedInput) (domain.PackageChangeEvent, error) {
+	if s == nil || s.repo == nil {
+		return domain.PackageChangeEvent{}, ErrInvalidPublishInput
+	}
+	if strings.TrimSpace(input.EventID) == "" || input.PackageID <= 0 || strings.TrimSpace(input.Platform) == "" || strings.TrimSpace(input.Model) == "" || input.Version <= 0 {
+		return domain.PackageChangeEvent{}, ErrInvalidPublishInput
+	}
+
+	event := domain.PackageChangeEvent{
+		EventID:           strings.TrimSpace(input.EventID),
+		EventType:         PackagePublishedEventType,
+		PackageID:         input.PackageID,
+		Platform:          strings.TrimSpace(input.Platform),
+		Model:             strings.TrimSpace(input.Model),
+		OccurredAt:        input.OccurredAt.UTC(),
+		Version:           input.Version,
+		GatewaySyncStatus: domain.GatewaySyncStatusPending,
+	}
+	if event.OccurredAt.IsZero() {
+		event.OccurredAt = time.Now().UTC()
+	}
+	return s.repo.AppendPackageEventContext(ctx, event)
+}
--- a/internal/publish/service_test.go
+++ b/internal/publish/service_test.go
@@ -0,0 +1,66 @@
+package publish
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+	"supply-intelligence/internal/repository"
+)
+
+func TestServiceRecordPackagePublished(t *testing.T) {
+	repo := repository.NewMemoryRepository()
+	service := NewService(repo)
+	occurredAt := time.Unix(1715000000, 0)
+
+	event, err := service.RecordPackagePublished(context.Background(), RecordPackagePublishedInput{
+		EventID:    "evt-publish-1",
+		PackageID:  1001,
+		Platform:   "openai",
+		Model:      "gpt-4.1-mini",
+		Version:    3,
+		OccurredAt: occurredAt,
+	})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if event.EventID != "evt-publish-1" || event.EventType != PackagePublishedEventType {
+		t.Fatalf("unexpected event: %+v", event)
+	}
+	if !event.OccurredAt.Equal(occurredAt.UTC()) {
+		t.Fatalf("unexpected occurred_at: %s", event.OccurredAt)
+	}
+	if event.GatewaySyncStatus != domain.GatewaySyncStatusPending {
+		t.Fatalf("unexpected sync status: %q", event.GatewaySyncStatus)
+	}
+
+	items := repo.ListPackageEvents()
+	if len(items) != 1 {
+		t.Fatalf("unexpected items length: %d", len(items))
+	}
+	if items[0].EventID != event.EventID || items[0].Version != 3 {
+		t.Fatalf("unexpected stored event: %+v", items[0])
+	}
+	if items[0].GatewaySyncStatus != domain.GatewaySyncStatusPending {
+		t.Fatalf("unexpected stored sync status: %+v", items[0])
+	}
+}
+
+func TestServiceRecordPackagePublishedRejectsInvalidInput(t *testing.T) {
+	service := NewService(repository.NewMemoryRepository())
+
+	_, err := service.RecordPackagePublished(context.Background(), RecordPackagePublishedInput{
+		EventID:   " ",
+		PackageID: 0,
+		Platform:  "",
+		Model:     "",
+		Version:   0,
+	})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if err != ErrInvalidPublishInput {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}
--- a/internal/repository/memory.go
+++ b/internal/repository/memory.go
@@ -0,0 +1,278 @@
+package repository
+
+import (
+	"context"
+	"errors"
+	"sort"
+	"strconv"
+	"sync"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+var ErrEventNotFound = errors.New("event not found")
+
+func IsGatewayAckResult(result domain.GatewayAckResult) bool {
+	return result == domain.GatewayAckResultApplied || result == domain.GatewayAckResultFailed
+}
+
+type MemoryRepository struct {
+	mu                  sync.RWMutex
+	routingStates       map[int64]domain.AccountRoutingState
+	packageEvents       map[string]domain.PackageChangeEvent
+	appliedSnapshot     map[string]domain.GatewayAppliedSnapshot
+	discoveryCandidates map[string]domain.DiscoveryCandidate
+	supplyPackages     map[string]domain.SupplyPackage // key: platform+"_"+model
+}
+
+func NewMemoryRepository() *MemoryRepository {
+	return &MemoryRepository{
+		routingStates:       map[int64]domain.AccountRoutingState{},
+		packageEvents:       map[string]domain.PackageChangeEvent{},
+		appliedSnapshot:    map[string]domain.GatewayAppliedSnapshot{},
+		discoveryCandidates: map[string]domain.DiscoveryCandidate{},
+		supplyPackages:     map[string]domain.SupplyPackage{},
+	}
+}
+
+func (r *MemoryRepository) UpsertRoutingState(state domain.AccountRoutingState) {
+	r.upsertRoutingState(state)
+}
+
+func (r *MemoryRepository) UpsertRoutingStateContext(_ context.Context, state domain.AccountRoutingState) domain.AccountRoutingState {
+	return r.upsertRoutingState(state)
+}
+
+func (r *MemoryRepository) upsertRoutingState(state domain.AccountRoutingState) domain.AccountRoutingState {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	r.routingStates[state.AccountID] = state
+	return state
+}
+
+func (r *MemoryRepository) GetRoutingState(accountID int64) (domain.AccountRoutingState, bool) {
+	return r.getRoutingState(accountID)
+}
+
+func (r *MemoryRepository) GetRoutingStateContext(_ context.Context, accountID int64) (domain.AccountRoutingState, bool) {
+	return r.getRoutingState(accountID)
+}
+
+func (r *MemoryRepository) getRoutingState(accountID int64) (domain.AccountRoutingState, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	state, ok := r.routingStates[accountID]
+	return state, ok
+}
+
+func (r *MemoryRepository) AppendPackageEvent(evt domain.PackageChangeEvent) {
+	_, _ = r.AppendPackageEventContext(context.Background(), evt)
+}
+
+func (r *MemoryRepository) AppendPackageEventContext(_ context.Context, evt domain.PackageChangeEvent) (domain.PackageChangeEvent, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if evt.OccurredAt.IsZero() {
+		evt.OccurredAt = time.Now().UTC()
+	}
+	if evt.GatewaySyncStatus == "" {
+		evt.GatewaySyncStatus = domain.GatewaySyncStatusPending
+	}
+	r.packageEvents[evt.EventID] = evt
+	return evt, nil
+}
+
+func (r *MemoryRepository) ListPackageEvents() []domain.PackageChangeEvent {
+	items, _ := r.ListPackageEventsAfter("")
+	return items
+}
+
+func (r *MemoryRepository) ListPackageEventsAfter(cursor string) ([]domain.PackageChangeEvent, string) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	items := make([]domain.PackageChangeEvent, 0, len(r.packageEvents))
+	for _, evt := range r.packageEvents {
+		items = append(items, evt)
+	}
+	sort.Slice(items, func(i, j int) bool {
+		if items[i].OccurredAt.Equal(items[j].OccurredAt) {
+			return items[i].EventID < items[j].EventID
+		}
+		return items[i].OccurredAt.Before(items[j].OccurredAt)
+	})
+	if cursor == "" {
+		return items, nextCursorFor(items)
+	}
+	start := 0
+	if idx, err := strconv.Atoi(cursor); err == nil {
+		if idx < 0 {
+			idx = 0
+		}
+		if idx > len(items) {
+			idx = len(items)
+		}
+		start = idx
+	} else {
+		for i, evt := range items {
+			if evt.EventID == cursor {
+				start = i + 1
+				break
+			}
+		}
+	}
+	if start >= len(items) {
+		return []domain.PackageChangeEvent{}, ""
+	}
+	filtered := append([]domain.PackageChangeEvent(nil), items[start:]...)
+	return filtered, nextCursorFor(items)
+}
+
+func nextCursorFor(items []domain.PackageChangeEvent) string {
+	if len(items) == 0 {
+		return ""
+	}
+	return strconv.Itoa(len(items))
+}
+
+func (r *MemoryRepository) AckPackageEvent(eventID, consumer string, result domain.GatewayAckResult, detail string, ackedAt time.Time) (domain.PackageChangeEvent, error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	evt, ok := r.packageEvents[eventID]
+	if !ok {
+		return domain.PackageChangeEvent{}, ErrEventNotFound
+	}
+	if ackedAt.IsZero() {
+		ackedAt = time.Now().UTC()
+	}
+	evt.Consumer = consumer
+	evt.ConsumerDetail = detail
+	evt.GatewaySyncStatus = result.SyncStatus()
+	evt.AckedAt = &ackedAt
+	r.packageEvents[eventID] = evt
+	return evt, nil
+}
+
+func (r *MemoryRepository) UpsertGatewayAppliedSnapshot(snapshot domain.GatewayAppliedSnapshot) domain.GatewayAppliedSnapshot {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if snapshot.UpdatedAt.IsZero() {
+		snapshot.UpdatedAt = time.Now().UTC()
+	}
+	r.appliedSnapshot[snapshot.Consumer] = snapshot
+	return snapshot
+}
+
+func (r *MemoryRepository) GetGatewayAppliedSnapshot(consumer string) (domain.GatewayAppliedSnapshot, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	snapshot, ok := r.appliedSnapshot[consumer]
+	return snapshot, ok
+}
+
+func (r *MemoryRepository) GetDiscoveryCandidateByIDContext(_ context.Context, candidateID string) (domain.DiscoveryCandidate, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	candidate, ok := r.discoveryCandidates[candidateID]
+	return candidate, ok
+}
+
+func (r *MemoryRepository) FindDiscoveryCandidateContext(_ context.Context, accountID int64, platform, model string) (domain.DiscoveryCandidate, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	for _, candidate := range r.discoveryCandidates {
+		if candidate.AccountID == accountID && candidate.Platform == platform && candidate.Model == model {
+			return candidate, true
+		}
+	}
+	return domain.DiscoveryCandidate{}, false
+}
+
+func (r *MemoryRepository) UpsertDiscoveryCandidateContext(_ context.Context, candidate domain.DiscoveryCandidate) domain.DiscoveryCandidate {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if candidate.DiscoveredAt.IsZero() {
+		candidate.DiscoveredAt = time.Now().UTC()
+	}
+	if candidate.UpdatedAt.IsZero() {
+		candidate.UpdatedAt = candidate.DiscoveredAt
+	}
+	r.discoveryCandidates[candidate.CandidateID] = candidate
+	return candidate
+}
+
+func (r *MemoryRepository) ListDiscoveryCandidatesContext(_ context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	items := make([]domain.DiscoveryCandidate, 0, len(r.discoveryCandidates))
+	for _, candidate := range r.discoveryCandidates {
+		if status != "" && candidate.Status != status {
+			continue
+		}
+		items = append(items, candidate)
+	}
+	sort.Slice(items, func(i, j int) bool {
+		if items[i].DiscoveredAt.Equal(items[j].DiscoveredAt) {
+			return items[i].CandidateID < items[j].CandidateID
+		}
+		return items[i].DiscoveredAt.Before(items[j].DiscoveredAt)
+	})
+	return items
+}
+
+// --- SupplyPackage methods ---
+
+// UpsertSupplyPackage creates or updates a supply package
+func (r *MemoryRepository) UpsertSupplyPackage(pkg domain.SupplyPackage) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	key := pkg.Platform + "_" + pkg.Model
+	if existing, ok := r.supplyPackages[key]; ok {
+		pkg.PackageID = existing.PackageID
+		pkg.Version = existing.Version + 1
+		pkg.CreatedAt = existing.CreatedAt
+	}
+	if pkg.CreatedAt.IsZero() {
+		pkg.CreatedAt = time.Now().UTC()
+	}
+	pkg.UpdatedAt = time.Now().UTC()
+	r.supplyPackages[key] = pkg
+}
+
+// GetSupplyPackage retrieves a supply package by platform and model
+func (r *MemoryRepository) GetSupplyPackage(platform, model string) (domain.SupplyPackage, bool) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	key := platform + "_" + model
+	pkg, ok := r.supplyPackages[key]
+	return pkg, ok
+}
+
+// ListSupplyPackages returns all supply packages, optionally filtered by status
+func (r *MemoryRepository) ListSupplyPackages(status string) []domain.SupplyPackage {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	items := make([]domain.SupplyPackage, 0, len(r.supplyPackages))
+	for _, pkg := range r.supplyPackages {
+		if status == "" || pkg.Status == status {
+			items = append(items, pkg)
+		}
+	}
+	return items
+}
+
+// UpdateCandidateStatus updates a candidate's status (used by admission service)
+func (r *MemoryRepository) UpdateCandidateStatus(ctx context.Context, candidateID string, status domain.DiscoveryCandidateStatus, failureCode, failureSummary string) error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if _, ok := r.discoveryCandidates[candidateID]; !ok {
+		return errors.New("candidate not found")
+	}
+	c := r.discoveryCandidates[candidateID]
+	c.Status = status
+	c.ReasonCode = failureCode
+	c.UpdatedAt = time.Now().UTC()
+	c.Version++
+	r.discoveryCandidates[candidateID] = c
+	return nil
+}
--- a/internal/repository/memory_test.go
+++ b/internal/repository/memory_test.go
@@ -0,0 +1,136 @@
+package repository
+
+import (
+	"testing"
+	"time"
+
+	"supply-intelligence/internal/domain"
+)
+
+func TestMemoryRepositoryRoutingState(t *testing.T) {
+	repo := NewMemoryRepository()
+	state := domain.AccountRoutingState{AccountID: 1, Platform: "openai", AccountStatus: domain.AccountStatusActive, RoutingEnabled: true, Version: 1}
+	repo.UpsertRoutingState(state)
+
+	got, ok := repo.GetRoutingState(1)
+	if !ok {
+		t.Fatalf("expected routing state")
+	}
+	if got.AccountStatus != domain.AccountStatusActive {
+		t.Fatalf("unexpected status: %q", got.AccountStatus)
+	}
+}
+
+func TestMemoryRepositoryPackageEventsAndAck(t *testing.T) {
+	repo := NewMemoryRepository()
+	evt := domain.PackageChangeEvent{EventID: "evt-1", EventType: "supply_package_published", PackageID: 1, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(10, 0).UTC(), Version: 2}
+	repo.AppendPackageEvent(evt)
+
+	items := repo.ListPackageEvents()
+	if len(items) != 1 {
+		t.Fatalf("expected 1 event, got %d", len(items))
+	}
+	ackedAt := time.Unix(20, 0).UTC()
+	updated, err := repo.AckPackageEvent("evt-1", "gateway", domain.GatewayAckResultApplied, "ok", ackedAt)
+	if err != nil {
+		t.Fatalf("unexpected ack error: %v", err)
+	}
+	if updated.GatewaySyncStatus != domain.GatewaySyncStatusApplied {
+		t.Fatalf("unexpected ack status: %+v", updated)
+	}
+	if updated.Consumer != "gateway" || updated.ConsumerDetail != "ok" {
+		t.Fatalf("unexpected consumer metadata: %+v", updated)
+	}
+	if updated.AckedAt == nil || !updated.AckedAt.Equal(ackedAt) {
+		t.Fatalf("unexpected ack time: %+v", updated)
+	}
+}
+
+func TestMemoryRepositoryListPackageEventsAfterCursor(t *testing.T) {
+	repo := NewMemoryRepository()
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: "supply_package_published", PackageID: 1, Platform: "openai", Model: "a", OccurredAt: time.Unix(10, 0).UTC(), Version: 1})
+	repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-2", EventType: "supply_package_published", PackageID: 2, Platform: "openai", Model: "b", OccurredAt: time.Unix(20, 0).UTC(), Version: 2})
+
+	items, nextCursor := repo.ListPackageEventsAfter("")
+	if len(items) != 2 || nextCursor != "2" {
+		t.Fatalf("unexpected initial page: len=%d next=%q", len(items), nextCursor)
+	}
+
+	items, nextCursor = repo.ListPackageEventsAfter("1")
+	if len(items) != 1 || items[0].EventID != "evt-2" || nextCursor != "2" {
+		t.Fatalf("unexpected cursor page: items=%+v next=%q", items, nextCursor)
+	}
+}
+
+func TestMemoryRepositoryDiscoveryCandidateCRUD(t *testing.T) {
+	repo := NewMemoryRepository()
+	candidate := domain.DiscoveryCandidate{
+		CandidateID:  "cand-1",
+		AccountID:    1,
+		Platform:     "openai",
+		Model:        "gpt-4.1-mini",
+		Source:       "seed",
+		Status:       domain.DiscoveryCandidateStatusPendingAdmission,
+		DiscoveredAt: time.Unix(10, 0).UTC(),
+		UpdatedAt:    time.Unix(10, 0).UTC(),
+		Version:      1,
+	}
+	repo.UpsertDiscoveryCandidateContext(nil, candidate)
+	got, ok := repo.GetDiscoveryCandidateByIDContext(nil, "cand-1")
+	if !ok || got.CandidateID != "cand-1" {
+		t.Fatalf("expected candidate, got %+v ok=%v", got, ok)
+	}
+}
+
+func TestMemoryRepositoryFindDiscoveryCandidateByBusinessKey(t *testing.T) {
+	repo := NewMemoryRepository()
+	repo.UpsertDiscoveryCandidateContext(nil, domain.DiscoveryCandidate{
+		CandidateID:  "cand-1",
+		AccountID:    1,
+		Platform:     "openai",
+		Model:        "gpt-4.1-mini",
+		Source:       "seed",
+		Status:       domain.DiscoveryCandidateStatusPendingAdmission,
+		DiscoveredAt: time.Unix(10, 0).UTC(),
+		UpdatedAt:    time.Unix(10, 0).UTC(),
+		Version:      1,
+	})
+	got, ok := repo.FindDiscoveryCandidateContext(nil, 1, "openai", "gpt-4.1-mini")
+	if !ok || got.CandidateID != "cand-1" {
+		t.Fatalf("expected candidate by business key, got %+v ok=%v", got, ok)
+	}
+}
+
+func TestMemoryRepositoryListDiscoveryCandidatesByStatusAndOrder(t *testing.T) {
+	repo := NewMemoryRepository()
+	repo.UpsertDiscoveryCandidateContext(nil, domain.DiscoveryCandidate{
+		CandidateID:  "cand-2",
+		AccountID:    2,
+		Platform:     "openai",
+		Model:        "b",
+		Source:       "seed",
+		Status:       domain.DiscoveryCandidateStatusAdmitted,
+		DiscoveredAt: time.Unix(20, 0).UTC(),
+		UpdatedAt:    time.Unix(20, 0).UTC(),
+		Version:      1,
+	})
+	repo.UpsertDiscoveryCandidateContext(nil, domain.DiscoveryCandidate{
+		CandidateID:  "cand-1",
+		AccountID:    1,
+		Platform:     "openai",
+		Model:        "a",
+		Source:       "seed",
+		Status:       domain.DiscoveryCandidateStatusPendingAdmission,
+		DiscoveredAt: time.Unix(10, 0).UTC(),
+		UpdatedAt:    time.Unix(10, 0).UTC(),
+		Version:      1,
+	})
+	items := repo.ListDiscoveryCandidatesContext(nil, domain.DiscoveryCandidateStatusPendingAdmission)
+	if len(items) != 1 || items[0].CandidateID != "cand-1" {
+		t.Fatalf("unexpected filtered items: %+v", items)
+	}
+	all := repo.ListDiscoveryCandidatesContext(nil, "")
+	if len(all) != 2 || all[0].CandidateID != "cand-1" || all[1].CandidateID != "cand-2" {
+		t.Fatalf("unexpected ordering: %+v", all)
+	}
+}
--- a/migrations/0001_init.sql
+++ b/migrations/0001_init.sql
@@ -0,0 +1,21 @@
+CREATE TABLE IF NOT EXISTS supply_intelligence_account_routing_states (
+    account_id BIGINT PRIMARY KEY,
+    platform TEXT NOT NULL,
+    account_status TEXT NOT NULL,
+    routing_enabled BOOLEAN NOT NULL DEFAULT TRUE,
+    risk_score INTEGER NOT NULL DEFAULT 0,
+    reason_code TEXT NOT NULL DEFAULT '',
+    last_probe_at TIMESTAMPTZ NOT NULL,
+    version BIGINT NOT NULL DEFAULT 1
+);
+
+CREATE TABLE IF NOT EXISTS supply_intelligence_package_change_events (
+    event_id TEXT PRIMARY KEY,
+    event_type TEXT NOT NULL,
+    package_id BIGINT NOT NULL,
+    platform TEXT NOT NULL,
+    model TEXT NOT NULL,
+    occurred_at TIMESTAMPTZ NOT NULL,
+    version BIGINT NOT NULL,
+    ack_status TEXT NOT NULL DEFAULT 'pending'
+);
--- a/migrations/0002_admission.sql
+++ b/migrations/0002_admission.sql
@@ -0,0 +1,69 @@
+-- Migration 0002: Admission Testing & Model Candidates
+-- Adds model_candidates table and supply_packages draft support
+
+CREATE TABLE IF NOT EXISTS supply_intelligence_model_candidates (
+    candidate_id TEXT PRIMARY KEY,
+    account_id BIGINT NOT NULL,
+    platform TEXT NOT NULL,
+    model TEXT NOT NULL,
+    status TEXT NOT NULL DEFAULT 'pending_admission',
+    source TEXT NOT NULL DEFAULT 'official_api',
+    reason_code TEXT DEFAULT '',
+    failure_summary TEXT DEFAULT '',
+    discovered_at TIMESTAMPTZ NOT NULL,
+    last_test_at TIMESTAMPTZ,
+    updated_at TIMESTAMPTZ NOT NULL,
+    version BIGINT NOT NULL DEFAULT 1,
+    UNIQUE(platform, model)
+);
+
+CREATE INDEX idx_candidates_status ON supply_intelligence_model_candidates(status);
+CREATE INDEX idx_candidates_platform ON supply_intelligence_model_candidates(platform);
+CREATE INDEX idx_candidates_discovered ON supply_intelligence_model_candidates(discovered_at DESC);
+
+CREATE TABLE IF NOT EXISTS supply_intelligence_admission_test_logs (
+    test_id BIGINT PRIMARY KEY DEFAULT nextval('admission_test_id_seq'),
+    candidate_id TEXT NOT NULL REFERENCES supply_intelligence_model_candidates(candidate_id),
+    status TEXT NOT NULL,
+    failure_code TEXT,
+    failure_summary TEXT,
+    tested_at TIMESTAMPTZ NOT NULL,
+    version BIGINT NOT NULL DEFAULT 1
+);
+
+CREATE SEQUENCE IF NOT EXISTS admission_test_id_seq;
+
+CREATE TABLE IF NOT EXISTS supply_intelligence_supply_packages (
+    package_id BIGINT PRIMARY KEY DEFAULT nextval('supply_package_id_seq'),
+    platform TEXT NOT NULL,
+    model TEXT NOT NULL,
+    status TEXT NOT NULL DEFAULT 'draft',
+    source TEXT NOT NULL DEFAULT 'si_auto',
+    created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+    version BIGINT NOT NULL DEFAULT 1,
+    UNIQUE(platform, model)
+);
+
+CREATE SEQUENCE IF NOT EXISTS supply_package_id_seq;
+
+-- New fields to extend routing states (via migration, not replacement)
+-- routing_states already has account_id as PK; add probe_execution_logs
+CREATE TABLE IF NOT EXISTS supply_intelligence_probe_execution_logs (
+    log_id BIGINT PRIMARY KEY DEFAULT nextval('probe_log_id_seq'),
+    account_id BIGINT NOT NULL,
+    platform TEXT NOT NULL,
+    probe_result TEXT NOT NULL,
+    failure_class TEXT,
+    http_status INTEGER,
+    latency_ms INTEGER,
+    risk_score INTEGER NOT NULL,
+    evaluated_transition TEXT NOT NULL,
+    executed_at TIMESTAMPTZ NOT NULL,
+    request_id TEXT NOT NULL,
+    version BIGINT NOT NULL DEFAULT 1
+);
+
+CREATE SEQUENCE IF NOT EXISTS probe_log_id_seq;
+
+CREATE INDEX idx_probe_logs_account_time ON supply_intelligence_probe_execution_logs(account_id, executed_at DESC);
--- a/prd/PRD.md
+++ b/prd/PRD.md
@@ -0,0 +1,568 @@
+# 供应链智能增强系统（Supply Intelligence）PRD
+
+> 状态说明（2026-05 收敛修订）：本文件保留为历史版本参考，已不再作为当前实现真源。
+> 当前产品真源以“2026-05 新 PM 基线 + tech/BASELINE_TECHLEAD_V2.md + 已收敛的测试/部署/任务决议文档”为准。
+> 若本文件与上述新真源冲突，以新真源为准，尤其是以下方面不得再按本文件旧口径执行：
+> 1. pricing / prediction / 向量检索 / 广义开放平台能力
+> 2. 探针 timeout / TCP / DNS 触发惩罚性降级
+> 3. 自动发布 / 自动下架 / disabled 自动恢复
+> 4. gateway 强耦合同步热更新主路径
+> 5. 以独立平台化重部署作为默认落地方式
+
+> 文档版本：v1.0
+> 撰写日期：2026-04-27
+> 撰写人：PM（产品经理）
+> 评审状态：待 TechLead 评审
+
+---
+
+## 1. 概述
+
+### 1.1 一句话价值
+通过自动化探针、全网扫描与准入测试，让平台供应链中的供应商账号、可用模型列表始终保持最新且可路由，消除人工维护滞后导致的可用性黑洞。
+
+### 1.2 用户问题
+- 运营团队每日需要人工检查数十个供应商账号的状态（额度、密钥失效、TOS 变更），遗漏率高。
+- 新模型上线后，平台未能及时感知，导致流量无法路由到新模型，竞争力下降。
+- 供应商账号过期或密钥失效后，网关仍在尝试路由请求，直接引发用户端报错。
+- 手动在各运营商后台注册账号、申请 API Key 的周期以天计，阻碍新供应商的快速接入。
+
+### 1.3 业务意义
+- 将供应链信息的保鲜周期从“人工天级”缩短到“自动分钟级”。
+- 降低因供应商/模型失效导致的用户可见错误率。
+- 缩短新模型上线到平台可售卖之间的上市时间（Time-to-Market）。
+- 为后续动态定价、智能路由提供实时、准确的供应链数据底座。
+
+---
+
+## 2. 目标
+
+### 2.1 业务目标
+| 目标编号 | 目标描述 | 度量方式 |
+|---------|---------|---------|
+| BG-01 | 供应商账号异常状态从发生到被标记的平均时间 ≤ 15 分钟 | 从供应商侧异常发生到本系统将其 status 改为 `suspended` 或 `disabled` 的时间差 |
+| BG-02 | 全网新模型从发布到进入平台可售卖列表的平均时间 ≤ 4 小时 | 从模型在官方文档/接口中出现到本系统将其对应的 supply_package 状态置为 `active` 的时间差 |
+| BG-03 | 因供应商账号失效导致的用户可见错误率下降 80% | 对比上线前 30 天与上线后 30 天，网关返回 502/503 且根因指向供应商失效的请求占比 |
+| BG-04 | 人工维护供应商基础信息的工作量减少 70% | 运营团队每周在供应商信息维护上投入的小时数对比 |
+
+### 2.2 用户目标
+- **平台运营团队**：在一个界面看到所有供应商账号的健康度、模型覆盖度、待处理事项，不再需要逐家登录供应商后台确认。
+- **供应链管理人员**：新供应商或新模型的接入流程从“人工申请-测试-录入”变为“自动发现-自动测试-人工确认一键上架”。
+- **技术负责人**：系统具备明确的熔断、降级、审计能力，自动化操作不引入新的稳定性风险。
+- **商务负责人**：新模型上架速度成为可量化指标，可用于对外商务承诺。
+
+### 2.3 成功定义
+项目被判定为成功的条件是：
+1. BG-01、BG-03、BG-04 三项指标在正式上线后 30 天内全部达成。
+2. 系统在连续 7 天内未出现因本系统自身故障导致的供应商状态误标记（false positive 率 ≤ 1%）。
+3. 所有自动化操作（状态变更、模型录入、账号注册）具备完整审计日志，且日志保留 ≥ 90 天。
+
+---
+
+## 3. 范围
+
+### 3.1 In Scope
+
+#### 模块 A：供应商品质探针（Supply Health Probe）
+- 对已录入 `supply_accounts` 的账号，按配置周期发起连通性、额度、密钥有效性探针。
+- 根据探针结果，自动将账号状态在 `active`、`suspended`、`disabled` 之间迁移（需满足状态机规则，不允许直接 `active` → `disabled`，必须经过 `suspended`）。
+- 对探针结果生成风险评分，写入 `supply_accounts.risk_score` 与 `risk_reason`。
+- 对状态变更事件写入审计日志。
+
+#### 模块 B：全网模型发现（Model Discovery）
+- 对接各供应商官方 API / 文档 / 变更源，扫描其已发布的模型列表。
+- 将扫描到的模型与平台现有 `supply_packages` 中的 `platform` + `model` 组合进行比对，识别“新增模型”。
+- 对新增模型创建候选记录（`supply_intelligence.model_candidates` 表，状态为 `discovered`），等待准入测试。
+- 对已从官方列表下架但平台仍有 `active` 套餐的模型，标记为 `deprecated`，触发告警通知运营团队。
+
+#### 模块 C：模型准入测试（Model Admission Test）
+- 对状态为 `discovered` 的候选模型，使用标准化测试用例集（覆盖 chat/completion/embedding 等 endpoint）进行功能验证。
+- 测试维度包括：接口可用性、响应格式合规性、延迟 P50/P99、token 计数一致性、错误码映射正确性。
+- 测试通过后，候选模型状态迁移为 `test_passed`，并自动生成一份 `supply_package` 草稿（`draft` 状态），等待运营团队确认后发布。
+- 测试失败的模型状态迁移为 `test_failed`，记录失败原因与日志，保留 30 天后自动清理。
+
+#### 模块 D：账号自动注册（Account Auto-Registration）
+- 针对支持自动化注册流程的供应商（需配置化白名单），系统通过其公开注册接口或模拟浏览器流程完成账号注册。
+- 注册成功后，自动申请 API Key，将凭证加密后写入 `supply_accounts`，状态置为 `pending`。
+- 注册过程中涉及的手机/邮箱验证，接入平台已集成的 SMS/邮件网关；若 SMS/邮件网关未就绪，该供应商的自动注册能力必须 fail-closed（拒绝启动，不静默降级）。
+- 注册行为必须写入审计日志，凭证指纹写入 `credential_fingerprint`。
+
+#### 模块 E：运营工作台（Operations Dashboard）
+- 展示待处理候选模型列表、待确认供应商状态变更、自动注册任务队列。
+- 提供“一键确认上架”、“忽略此模型”、“手动触发探针”三个人工干预入口。
+- 展示供应链覆盖率（平台已上架模型数 / 全网可发现模型数）。
+
+### 3.2 Out of Scope
+| 编号 | 内容 | 原因 |
+|-----|------|------|
+| OOS-01 | 供应商侧计费系统对接与自动充值 | 属于财务结算域，不在供应链智能范围内 |
+| OOS-02 | 基于发现结果的动态定价算法 | 属于 pricing-engine 项目，本系统只生成 package 草稿中的建议价 |
+| OOS-03 | 供应商账号的 TOS 法律合规性自动审查 | 法律文本语义分析超出当前工程边界，本系统只做“TOS 变更标记” |
+| OOS-04 | 不支持公开注册接口的供应商（如需要企业资质审核、线下合同）的自动注册 | 无法工程化闭环，保留人工注册入口 |
+| OOS-05 | 对供应商内部模型版本迭代（如从 gpt-4-turbo 到 gpt-4-turbo-2024-04-09）的语义级差异分析 | 成本过高，只识别模型 ID 维度的新增/下架 |
+| OOS-06 | 跨供应商的模型能力等价性判定（如“模型 A 是否等价于模型 B”） | 属于模型评估平台，非供应链基础能力 |
+
+### 3.3 假设与依赖
+| 编号 | 假设/依赖 | 影响 |
+|-----|----------|------|
+| ASP-01 | 各供应商均提供可公开访问的模型列表接口或文档页面 | 若某供应商关闭列表接口，该供应商的模型发现能力降级为手动录入 |
+| ASP-02 | 账号自动注册仅针对已签署技术合作框架协议、允许自动化注册的供应商 | 法律合规问题由商务团队前置解决 |
+| ASP-03 | `supply-api` 现有的 `supply_accounts` 表结构在上线前不做破坏性变更 | 本系统的新增表需通过标准 migration 脚本创建 |
+| ASP-04 | 平台已具备 SMS/邮件网关的运行时能力，或本模块的自动注册可被条件关闭 | 参照 `supply-api/CLAUDE.md` 中“条件能力必须 fail-closed”原则 |
+| ASP-05 | 探针任务调度依赖平台统一的 job scheduler（如内部 cron 或 Temporal），不重新造调度器 | 若 scheduler 不可用，探针模块延迟启动 |
+| ASP-06 | 测试用例集的维护由 QA 团队负责，本系统负责调度执行与结果收集 | 测试用例本身不在本系统代码库内管理 |
+
+---
+
+## 4. 用户场景
+
+### 4.1 主流程
+
+#### 场景 S1：供应商账号自动探针与状态变更
+```
+1. 调度器按配置周期（默认 5 分钟）触发对供应商账号 A 的探针任务。
+2. 探针模块调用供应商健康检查端点（或发送一条低成本测试请求）。
+3. 供应商返回 401/403 或超时 > 10 秒，探针判定为“密钥失效或账号异常”。
+4. 系统检查该账号当前状态：
+   a. 若为 active → 改为 suspended，risk_score 设为 80，risk_reason 写入“密钥失效”。
+   b. 若为 suspended 且连续 3 次探针失败 → 改为 disabled。
+5. 状态变更事件写入审计日志（object_type=supply_account, action=auto_suspend）。
+6. 向运营团队发送告警通知（钉钉/企业微信），包含账号、供应商、原因、时间。
+```
+
+#### 场景 S2：全网扫描发现新模型
+```
+1. 调度器每 1 小时触发一次全网扫描任务。
+2. 扫描模块向各供应商的模型列表接口发起请求，解析出当前所有 model_id。
+3. 与 supply_packages 中 status ∈ {active, paused, draft} 的记录去重比对。
+4. 发现供应商 X 新增模型 "new-model-v1"，平台暂无记录。
+5. 在 model_candidates 表中插入一条记录：
+   - platform = X, model_id = "new-model-v1"
+   - status = discovered
+   - discovered_at = NOW()
+6. 触发准入测试流水线（异步任务）。
+```
+
+#### 场景 S3：新模型准入测试通过并上架
+```
+1. 准入测试模块从 model_candidates 取出 status = discovered 的记录。
+2. 使用对应供应商的测试账号，发送标准化测试请求集（≥ 5 个不同用例）。
+3. 所有用例返回 HTTP 200，响应体符合 OpenAI-compatible schema，延迟 P99 < 30 秒。
+4. 将 candidate 状态更新为 test_passed，并生成 supply_package 草稿：
+   - platform = X, model = "new-model-v1"
+   - status = draft
+   - price_per_1m_input / price_per_1m_output 使用预设默认值（可配置）
+5. 运营工作台出现“待上架新模型”卡片。
+6. 运营人员点击“确认上架”，package 状态改为 active，进入 gateway 路由表。
+```
+
+#### 场景 S4：供应商账号自动注册
+```
+1. 运营人员在后台勾选“启用供应商 Y 的自动注册”，并配置注册参数（如邮箱域名、账号前缀规则）。
+2. 系统检测到供应商 Y 的可用账号数 < 配置阈值（如 < 2 个 active 账号）。
+3. 触发自动注册任务：
+   a. 调用供应商 Y 的注册接口，提交随机生成的用户名、密码、企业邮箱。
+   b. 等待并解析注册确认邮件，点击确认链接（或输入邮件验证码）。
+   c. 登录账号后台，申请 API Key。
+4. 将 API Key 经 KMS 加密后写入 supply_accounts，status = pending。
+5. 触发自动验证（复用现有 Verify 流程），验证通过后 status 改为 active。
+```
+
+### 4.2 异常流程
+
+#### 场景 E1：探针遭遇供应商 Rate Limit
+```
+1. 探针请求返回 429。
+2. 该次探针标记为 inconclusive，不计入连续失败次数。
+3. 调度器在指数退避后（1min → 2min → 4min）重试，最多重试 3 次。
+4. 若 3 次后仍为 429，本次探针周期跳过该账号，记录日志，不触发状态变更。
+```
+
+#### 场景 E2：模型准入测试超时
+```
+1. 某测试用例在 60 秒内未收到响应。
+2. 该用例标记为 timeout，测试流水线整体标记为 test_failed。
+3. candidate 状态更新为 test_failed，失败原因写入 "admission_test_timeout"。
+4. 运营工作台展示失败详情，运营人员可选择：
+   a. 手动重新触发测试；
+   b. 标记为 ignore，该 model_id 在 7 天内不再自动扫描。
+```
+
+#### 场景 E3：自动注册时 SMS/邮件网关不可用
+```
+1. 注册流程进行到验证码接收步骤。
+2. 调用 SMS/邮件网关返回 503 或超时。
+3. 该注册任务整体失败，写入审计日志（action=auto_register_failed）。
+4. 依据 fail-closed 原则，不向用户或上游返回任何“注册成功”的虚假状态。
+5. 任务进入死信队列，24 小时后由人工或系统重试。
+```
+
+### 4.3 边缘流程
+
+#### 场景 B1：供应商模型 ID 变更（非新增/下架，而是重命名）
+- 扫描模块发现旧 model_id 消失、新 model_id 出现，但模型能力描述高度相似。
+- 系统无法自动判定为“重命名”还是“旧模型下架+新模型上线”。
+- 生成一条运营待办事项，由运营人员人工确认关系，系统不做自动关联。
+
+#### 场景 B2：运营人员手动暂停自动探针
+- 运营人员可在后台对单个供应商账号勾选“暂停自动探针”。
+- 该账号的探针任务在调度器中被跳过，但运营人员仍可手动触发单次探针。
+- 暂停状态写入 `supply_accounts` 的扩展字段（或通过新增 `auto_probe_enabled` 字段），探针模块读取该字段后决定是否执行。
+
+#### 场景 B3：账号处于 suspended 期间收到用户请求
+- 本系统不直接处理流量路由，但需向 gateway 提供实时供应商状态查询接口。
+- gateway 在路由决策时查询该接口，若账号为 suspended/disabled，则将该账号从候选池移除。
+- 该接口的 SLA：P99 延迟 < 50ms，可用性 ≥ 99.9%。
+
+### 4.4 用户故事
+
+| 编号 | 角色 | 故事 | 验收对应 |
+|-----|------|------|---------|
+| US-01 | 运营人员 | 我想在一个页面看到所有供应商账号最近一次探针的时间和结果，以便快速定位异常账号 | AC-01, AC-02 |
+| US-02 | 供应链管理员 | 我想在新模型被系统发现后收到通知，并在工作台一键确认上架，以便缩短上市时间 | AC-03, AC-04 |
+| US-03 | 技术负责人 | 我想所有自动化状态变更都有审计日志和回滚记录，以便在误操作时追溯和恢复 | AC-05, AC-09 |
+| US-04 | 商务负责人 | 我想看到平台模型覆盖率与竞品对比的报表，以便对外展示平台能力 | AC-07 |
+| US-05 | 运营人员 | 我想对特定账号暂停自动探针，以便在供应商维护窗口期避免误报警 | AC-08 |
+| US-06 | 供应链管理员 | 我想对支持自动注册的供应商配置自动补货策略，以便在可用账号不足时自动补充 | AC-06 |
+
+---
+
+## 5. 验收标准（AC）
+
+> 以下每条 AC 均为可测试、无模糊词的要求。QA 可直接据此编写测试用例。
+
+### 模块 A：供应商品质探针
+
+**AC-01 探针覆盖度**
+- 给定 `supply_accounts` 中 `status` 为 `active` 或 `suspended` 的账号数量 N，系统在任意时刻 T，最近 15 分钟内被探针覆盖的账号数量 M 必须满足 M/N ≥ 99%。
+- 测试方法：插入 100 条测试账号记录，观察 15 分钟窗口内探针日志条数是否 ≥ 99。
+
+**AC-02 状态变更正确性**
+- 给定一个 `status=active` 的账号，模拟其返回 401 连续 1 次，系统在 60 秒内将其 `status` 更新为 `suspended`。
+- 给定一个 `status=suspended` 的账号，模拟其连续 3 次探针返回 401，系统在 60 秒内将其 `status` 更新为 `disabled`。
+- 给定一个 `status=active` 的账号，模拟其返回 429 单次，其 `status` 在 15 分钟内保持 `active` 不变。
+- 测试方法：Mock 供应商响应，查询数据库字段值。
+
+**AC-03 误报率**
+- 在 7 天连续运行测试中，探针将实际正常的账号标记为 `suspended` 或 `disabled` 的次数 ≤ 总探针次数的 1%。
+- 测试方法：使用全部正常的测试账号运行 7 天，统计状态误变更次数。
+
+### 模块 B：全网模型发现
+
+**AC-04 新模型发现延迟**
+- 给定一个已对接的供应商，在其模型列表中新增一个 model_id，系统在 2 个扫描周期（默认 2 小时）内将该 model_id 写入 `model_candidates` 且 `status=discovered`。
+- 测试方法：Mock 供应商模型列表接口，在 T0 新增 model_id，T0+2h 查询数据库验证。
+
+**AC-05 已下架模型告警**
+- 给定一个 `supply_packages` 中 `status=active` 的 model_id，在供应商侧该 model_id 消失后，系统在 2 个扫描周期内：
+  - 将该 package 的 `status` 保持 `active` 不变（不自动下架，避免误伤）；
+  - 在运营工作台生成一条“模型已下架”告警待办；
+  - 向运营人员发送通知。
+- 测试方法：Mock 供应商模型列表，移除 model_id，验证告警产生与 package 状态未变。
+
+### 模块 C：模型准入测试
+
+**AC-06 准入测试通过**
+- 给定一个 `status=discovered` 的 candidate，其供应商测试账号正常，系统在 30 分钟内完成全部测试用例执行，candidate 状态变为 `test_passed`，且自动生成一条 `supply_packages` 记录（`status=draft`）。
+- 测试方法：使用真实或 Mock 供应商响应，验证数据库状态与 package 草稿字段完整性。
+
+**AC-07 准入测试失败**
+- 给定一个 `status=discovered` 的 candidate，模拟其接口返回 500 或响应格式不合法，系统在 30 分钟内将 candidate 状态更新为 `test_failed`，`failure_reason` 字段非空，且运营工作台展示失败详情。
+- 测试方法：Mock 供应商返回 500，验证数据库字段与前端展示。
+
+### 模块 D：账号自动注册
+
+**AC-08 自动注册成功**
+- 给定一个已配置自动注册白名单的供应商，配置其可用账号数阈值为 2，当前可用账号数为 1，系统在 10 分钟内触发注册流程，在 30 分钟内完成注册、密钥申请、凭证加密存储，最终 `supply_accounts` 中新增一条 `status=active` 的记录。
+- 测试方法：使用供应商沙箱环境或高保真 Mock，验证端到端流程与数据库记录。
+
+**AC-09 自动注册 fail-closed**
+- 给定自动注册配置 `enabled=true`，但 SMS/邮件网关返回 503 或超时，系统在 60 秒内将注册任务标记为 `failed`，不向任何上游返回成功状态码，审计日志中包含 `action=auto_register_failed` 与错误详情。
+- 测试方法：Mock SMS 网关返回 503，验证接口响应、数据库状态、审计日志。
+
+### 模块 E：运营工作台与通用
+
+**AC-10 审计日志完整性**
+- 任意自动化操作（状态变更、candidate 状态迁移、自动注册、手动触发探针）发生后 5 秒内，审计存储中必须存在对应记录，字段包含：
+  - `object_type`、`object_id`、`action`、`result_code`、`before_state`（变更前）、`after_state`（变更后）、`request_id`。
+- 测试方法：触发各项操作，查询审计存储验证字段完整性。
+
+**AC-11 运营工作台干预**
+- 运营人员点击“一键确认上架”后，对应的 `supply_packages` 记录在 3 秒内从 `draft` 变为 `active`。
+- 运营人员点击“忽略此模型”后，该 candidate 在 7 天内不再出现在待处理列表中，且 7 天后自动恢复为 `discovered`。
+- 测试方法：E2E 测试或 UI 自动化测试。
+
+**AC-12 配置热更新**
+- 探针周期、扫描周期、测试超时时间、自动注册阈值等配置项，在修改配置文件并下发后 60 秒内生效，不重启进程。
+- 测试方法：修改配置，观察调度器行为变化时间差。
+
+---
+
+## 6. 边缘情况与失败路径
+
+| 编号 | 边缘/失败场景 | 系统行为 | 验证方式 |
+|-----|-------------|---------|---------|
+| FP-01 | 供应商探针接口完全不可用（DNS 失败、TCP 超时） | 标记为 inconclusive，按 429 退避逻辑处理，不直接变更状态 | 模拟 iptables DROP，验证状态不变 |
+| FP-02 | 供应商返回 200 但响应体为空或格式突变 | 解析失败视为 inconclusive，记录 error_log，不触发状态变更 | Mock 返回空 JSON，验证状态与日志 |
+| FP-03 | 同一账号在探针执行期间被运营人员手动变更状态 | 乐观锁冲突：探针更新时 version 不匹配，更新失败，探针记录冲突日志，由下次探针或运营人员覆盖 | 并发测试：手动 update 同时触发探针 |
+| FP-04 | 模型准入测试期间，测试账号被探针标记为 suspended | 准入测试流水线检测到测试账号不可用，任务标记为 `test_failed`，原因写为 `test_account_unavailable` | Mock 测试账号 suspended，验证流水线行为 |
+| FP-05 | 自动注册时供应商注册接口返回 400（如邮箱已被注册） | 任务标记为 `failed`，原因写入具体错误码，同一邮箱不再重复使用，审计日志记录完整请求/响应摘要（脱敏后） | Mock 注册接口返回 400，验证数据库与日志 |
+| FP-06 | 自动注册成功后，验证步骤发现密钥无效 | 账号状态保持 `pending`，自动注册任务标记为 `verify_failed`，触发告警，不进入 active | Mock verify 返回失败，验证状态机 |
+| FP-07 | 全网扫描时供应商模型列表分页异常（如页码越界返回 500） | 扫描任务记录分页失败，已获取的部分模型仍正常处理，失败页在下一周期重试 | Mock 分页接口第 3 页返回 500，验证整体任务不中断 |
+| FP-08 | 数据库在探针执行期间不可用 | 探针任务失败，记录错误，不触发状态变更；调度器按配置重试；连续失败 5 次后暂停该批次探针，触发系统级告警 | 模拟 PostgreSQL 断开，验证行为 |
+| FP-09 | 运营人员同时点击“确认上架”与“忽略此模型” | 乐观锁或幂等键保证只有一个操作生效，第二个操作返回 409 Conflict，界面提示“该模型已被处理” | 并发 UI 操作测试 |
+| FP-10 | 凭证加密 KMS 服务在自动注册期间不可用 | 注册流程在加密步骤阻塞，等待 KMS 恢复或超时（60 秒）；超时后任务标记为 `failed`，明文凭证不得落盘 | Mock KMS 超时，验证明文不出现在日志/数据库 |
+
+---
+
+## 7. 上线与运营准备
+
+### 7.1 发布策略
+- **阶段 1（灰度）**：选择 2 个非核心供应商（如测试环境专用供应商）开启自动探针与模型发现，观察 7 天。
+- **阶段 2（扩展）**：覆盖全部供应商的探针与发现能力，但自动状态变更仅对 `sandbox` 环境账号生效，生产环境账号的探针结果只生成告警，不自动改状态。
+- **阶段 3（全量）**：生产环境账号启用自动状态变更，模型准入测试与自动注册按需逐步开启。
+
+### 7.2 灰度/回滚
+- 灰度开关通过配置中心控制，维度包括：
+  - `probe.enabled`：全局探针开关
+  - `probe.auto_transition.supplier_ids`：允许自动状态变更的供应商白名单
+  - `discovery.enabled`：全网扫描开关
+  - `admission_test.enabled`：准入测试开关
+  - `auto_registration.enabled`：自动注册开关
+- 回滚条件（任一触发即全量关闭对应模块）：
+  - 1 小时内探针误报率 > 5%
+  - 自动状态变更导致用户可见错误率上升（对比基线）> 2%
+  - 自动注册任务连续失败率 > 50%（持续 1 小时）
+- 回滚操作：修改配置中心对应开关为 `false`，60 秒内生效，已变更的状态不自动回退，由运营人员人工审核。
+
+### 7.3 埋点/监控/告警
+
+#### 埋点事件
+| 事件名 | 触发时机 | 关键属性 |
+|-------|---------|---------|
+| `si_probe_executed` | 每次探针执行完成 | `platform`, `account_id`, `result`, `latency_ms` |
+| `si_state_transitioned` | 账号状态自动变更 | `platform`, `account_id`, `from_status`, `to_status`, `reason` |
+| `si_model_discovered` | 发现新模型 | `platform`, `model_id`, `discovery_source` |
+| `si_admission_test_completed` | 准入测试完成 | `platform`, `model_id`, `result`, `duration_sec` |
+| `si_auto_register_completed` | 自动注册完成 | `platform`, `result`, `duration_sec` |
+
+#### 监控指标（Prometheus）
+| 指标名 | 类型 | 说明 |
+|-------|------|------|
+| `si_probe_latency_seconds` | Histogram | 探针请求延迟 |
+| `si_probe_result_total` | Counter | 探针结果分类（success/failure/inconclusive） |
+| `si_state_transition_total` | Counter | 状态变更次数 |
+| `si_discovery_models_total` | Gauge | 当前候选模型数量（按 status 分标签） |
+| `si_admission_test_duration_seconds` | Histogram | 准入测试耗时 |
+| `si_auto_register_result_total` | Counter | 自动注册结果分类 |
+
+#### 告警规则
+| 告警名 | 条件 | 通知对象 | 级别 |
+|-------|------|---------|------|
+| 探针大面积失败 | 1 小时内探针失败率 > 20% | 技术负责人 | P1 |
+| 供应商账号全部失效 | 某供应商 active 账号数 = 0 持续 > 10 分钟 | 运营+技术 | P0 |
+| 自动注册连续失败 | 1 小时内自动注册失败率 > 50% | 供应链管理员 | P1 |
+| 新模型堆积未处理 | `status=discovered` 的候选模型数 > 20 且持续 > 24 小时 | 运营团队 | P2 |
+| 系统自身健康异常 | 本服务 `/actuator/health/ready` 返回非 200 持续 > 1 分钟 | 技术负责人 | P0 |
+
+### 7.4 FAQ（预置）
+**Q1：自动状态变更会不会把正常的供应商误杀掉？**
+A：探针采用“连续失败才降级”策略，active → suspended 需 1 次明确失败，suspended → disabled 需连续 3 次失败。运营人员可随时在后台暂停单个账号的自动探针。
+
+**Q2：模型准入测试失败了，我还能手动上架吗？**
+A：可以。运营人员可以在工作台查看失败详情，选择“手动强制上架”，此时系统生成 package 草稿但标记为 `manually_forced`，并强制要求运营人员填写强制上架理由，该理由写入审计日志。
+
+**Q3：自动注册生成的账号归属谁？**
+A：自动注册账号的 `user_id` / `supplier_user_id` 关联到平台运营系统账号（可配置），收益结算走平台统一账户。
+
+---
+
+## 8. 商业化与价值闭环
+
+### 8.1 收益路径
+| 路径 | 描述 | 量化 |
+|-----|------|------|
+| 直接收益 | 新模型上架速度提升 → 平台可售模型数增加 → 订单量增长 | 每提前 1 天上架一个热点模型，预估带来 X 订单增量（需商务提供历史数据基线） |
+| 成本节省 | 运营人力减少 → 供应链维护 headcount 或工时下降 | 按 BG-04 目标，每周节省 70% 工时，折算年化人力成本 |
+| 质量溢价 | 供应商失效导致的客诉减少 → NPS 提升 → 客户续约率提升 | 减少的客诉数 × 单客诉处理成本 + 续约率提升带来的 LTV 增量 |
+
+### 8.2 北极星指标
+- **供应链接新鲜度指数（Supply Freshness Index, SFI）**
+  - 定义：SFI = (过去 1 小时成功探针的账号数 / 应探针账号总数) × (过去 24 小时进入 active 的新模型数 / 过去 24 小时发现的新模型总数)
+  - 目标值：SFI ≥ 0.95
+  - 采集周期：每小时计算一次，写入时序数据库
+
+### 8.3 失败判定线
+项目在以下任一条件触发时，判定为失败并启动止损：
+1. 上线后 30 天内，因本系统导致的供应商状态误变更（false positive）累计 > 50 次。
+2. 上线后 30 天内，因自动状态变更或自动注册导致用户可见支付/使用故障 > 3 次。
+3. SFI 连续 7 天 < 0.70，且技术团队无法给出明确修复排期。
+4. 自动注册模块因供应商接口变更导致连续 14 天成功率 < 30%，且无替代方案。
+
+### 8.4 止损条件
+- 触发失败判定线后，PM 与 TechLead 在 24 小时内决定是否：
+  - **降级**：关闭自动状态变更与自动注册，仅保留探针监控与模型发现（纯观测模式）。
+  - **下线**：完全卸载本系统，回退至纯人工维护模式，保留审计日志备查。
+- 无论降级或下线，已生成的 supply_package 草稿和已注册的账号不受影响，由运营人员人工接管。
+
+---
+
+## 9. 依赖与风险
+
+### 9.1 外部依赖
+| 依赖方 | 依赖内容 | 风险等级 | 缓解措施 |
+|-------|---------|---------|---------|
+| 各供应商 | 模型列表接口、注册接口、探针端点的稳定性与兼容性 | 高 | 接口变更监测；Mock 回归测试集；供应商接口版本锁定 |
+| SMS/邮件网关 | 自动注册验证码接收 | 中 | fail-closed；备用邮箱池；人工兜底流程 |
+| KMS 服务 | 新注册账号凭证加密 | 中 | 加密失败阻塞落盘，任务进死信队列 |
+| 平台 Job Scheduler | 定时任务调度 | 低 | 调度失败时探针/扫描延迟，不引入错误状态 |
+| supply-api 现有服务 | 复用 Verify、AccountStore、PackageStore、AuditStore | 低 | 接口契约冻结；变更需双方 CR |
+
+### 9.2 技术风险
+| 风险编号 | 风险描述 | 概率 | 影响 | 应对 |
+|---------|---------|------|------|------|
+| R-01 | 探针频率过高导致供应商侧将我们视为攻击源，封禁平台 IP | 中 | 高 | 探针频率可配置；使用平台统一出口 IP 池；对每家供应商遵守其 rate limit 文档 |
+| R-02 | 供应商模型列表接口返回缓存旧数据，导致“已下架模型”误判 | 中 | 中 | 列表接口响应加 TTL 校验；结合官方文档 RSS/变更日志交叉验证 |
+| R-03 | 自动注册的浏览器自动化流程（如 Selenium/Playwright）因供应商前端改版失效 | 高 | 中 | 优先使用官方 API 注册；浏览器自动化作为 fallback；前端改版监控 |
+| R-04 | 准入测试用例不足以覆盖供应商实际兼容性问题，导致 test_passed 但上线后用户报错 | 中 | 高 | 测试用例由 QA 维护并定期评审；上线后 24h 内对新模型增加采样监控 |
+| R-05 | 数据库 model_candidates 表数据膨胀，影响查询性能 | 低 | 中 | 设置自动清理策略：test_failed 且超过 30 天未手动处理的记录自动删除 |
+
+### 9.3 合规与隐私风险
+- 自动注册过程中收集的邮箱、手机号属于个人信息，需符合平台隐私政策与相关法律法规。
+- 凭证指纹（`credential_fingerprint`）仅存储哈希值，不得存储明文 API Key。
+- 审计日志中的请求/响应摘要需脱敏，不得包含完整 credential。
+
+---
+
+## 10. 技术栈与集成约束
+
+### 统一技术栈
+本项目必须与立交桥主项目保持一致：
+- **语言**: Go 1.22+
+- **HTTP框架**: 标准库 `net/http` + 自定义中间件（禁止引入 Gin/Echo 等第三方框架，保持与 gateway/ 和 supply-api/ 的一致性）
+- **数据库**: PostgreSQL 15+ ，驱动 `jackc/pgx/v5`
+- **缓存**: Redis，客户端 `redis/go-redis/v9`
+- **配置**: YAML + Viper，环境变量覆盖敏感字段
+- **日志/审计**: 结构化日志，审计事件模型与 supply-api/ 一致
+- **错误码**: `{SOURCE}_{CATEGORY}_{CODE}` 格式，例如 `SUP_INT_4001`
+- **健康检查**: `/actuator/health` 、 `/actuator/health/live` 、 `/actuator/health/ready`
+- **测试**: Go testing + testify，覆盖率门槛 domain ≥ 70%、service/handler ≥ 80%
+
+### 独立运行与集成运行
+本系统必须同时支持两种运行模式：
+
+| 模式 | 特征 | 部署方式 | 适用场景 |
+|------|------|---------|---------|
+| **独立运行** | 自有 `cmd/supply-intelligence/main.go`，独立数据库 schema，独立 docker-compose | `docker-compose up` 或单独容器 | 外部用户只需要供应链管理能力，不想接入立交桥全套 |
+| **集成运行** | 作为 Go module 被 `supply-api/` 引入，共享数据库连接池和配置，通过内部接口注册 | 编译时作为子模块编译，运行时挂载到 supply-api 主进程 | 立交桥用户希望获得一体化供应链能力 |
+
+**集成约束**:
+- 独立运行时，系统必须提供完整的 HTTP API 和运营工作台。
+- 集成运行时，系统必须提供 `IntegrationPlugin` 接口，允许主程序通过配置开关启用/禁用各模块。
+- 数据库 schema 必须使用独立的 `supply_intelligence_` 前缀，避免与主项目表名冲突。
+- 配置文件必须支持分离加载：独立运行时读取自己的 `config.yaml`，集成运行时合并到主项目配置。
+
+### NewAPI / Sub2API 适配支持
+本系统的核心能力必须能够对接 NewAPI 和 Sub2API 系统：
+- **供应商状态同步**: 提供标准化的供应商健康状态接口，NewAPI/Sub2API 可定期获取供应商可用性状态。
+- **模型列表推送**: 提供 `/models` 接口返回平台已发现、已测试通过的模型列表，NewAPI/Sub2API 可消费此数据自动补充自己的模型库。
+- **账号注册适配**: 自动注册模块通过适配层支持 NewAPI/Sub2API 的账号管理 API，实现跨平台账号生命周期管理。
+- **独立部署时**: 通过配置文件指定 NewAPI/Sub2API 的管理端点地址和鉴权信息，本系统通过适配层（Adapter）与之交互。
+- **集成部署时**: 若立交桥 gateway/ 已接入 NewAPI/Sub2API，本系统通过 supply-api/ 的内部接口操作上游状态。
+
+### 对外接口契约
+- 必须提供 OpenAPI 3.0 接口文档，确保 NewAPI/Sub2API 开发者可以独立接入。
+- 接口路径前缀默认为 `/api/v1/supply-intelligence/`，集成运行时可通过配置改为 `/internal/supply-intelligence/` 。
+
+---
+
+## 11. 阶段门控结论
+
+### 11.1 当前状态
+**可进入 TechLead 评审，但需补充以下信息后方可进入开发排期：**
+
+1. **供应商接口清单**：需由商务/技术团队提供 Phase 1 目标供应商的模型列表接口文档、注册接口文档（或明确标注哪些供应商不支持自动注册）。
+2. **测试用例集范围**：需 QA 团队确认准入测试用例集的初始版本（≥ 5 个用例/模型类型）及维护 SLA。
+3. **Job Scheduler 契约**：需明确平台统一调度器的接口契约（如任务提交格式、超时控制、死信策略）。
+4. **KMS 与 SMS 网关就绪状态**：生产环境 KMS 与 SMS/邮件网关当前不可用，需寻找合适的供应商并确认集成方案。若短期内无法就绪，自动注册模块（Phase 3）需明确为远期交付，当前 Phase 1/2 不受影响。
+
+### 11.2 建议开发优先级
+| 阶段 | 内容 | 目标 |
+|-----|------|------|
+| Phase 1 | 供应商品质探针（模块 A）+ 运营工作台观测视图（模块 E 只读部分） | 解决最痛的可用性黑洞问题，7 天灰度验证 |
+| Phase 2 | 全网模型发现（模块 B）+ 模型准入测试（模块 C） | 解决新模型上市滞后问题 |
+| Phase 3 | 账号自动注册（模块 D）+ 运营工作台完整干预能力（模块 E 读写部分） | 解决供应商账号补充效率问题 |
+
+### 11.3 门控决策
+- **不阻塞 TechLead 评审**：PRD 中需求边界、验收标准、失败路径已清晰。
+- **阻塞开发排期**：直到上述 4 项补充信息（供应商接口清单、测试用例集、Job Scheduler 契约、KMS/SMS 就绪状态）以文档形式补充到本 PRD 附录后，方可进入技术方案设计（HLD）阶段。
+- **技术栈与集成约束已明确**：统一 Go 标准库、独立/集成双模式、NewAPI/Sub2API 适配层已纳入范围。
+
+---
+
+## 附录 A：新增数据表草案（供 TechLead 参考，非最终 Schema）
+
+> 本附录仅用于需求对齐，最终 Schema 由 TechLead 设计并通过标准 SQL migration 落地。
+
+### A.1 model_candidates
+| 字段 | 类型 | 说明 |
+|-----|------|------|
+| id | BIGINT PK | 自增 |
+| platform | VARCHAR(50) | 供应商标识，与 supply_accounts.platform 同枚举 |
+| model_id | VARCHAR(100) | 模型标识 |
+| model_name | VARCHAR(200) | 可读的模型名称（从供应商接口获取） |
+| status | VARCHAR(20) | `discovered`, `testing`, `test_passed`, `test_failed`, `ignored`, `expired` |
+| discovered_at | TIMESTAMPTZ | 首次发现时间 |
+| tested_at | TIMESTAMPTZ | 最近一次测试时间 |
+| failure_reason | TEXT | 测试失败原因 |
+| ignored_until | TIMESTAMPTZ | 忽略有效期 |
+| created_at | TIMESTAMPTZ | |
+| updated_at | TIMESTAMPTZ | |
+
+唯一约束：`(platform, model_id)`
+
+### A.2 auto_registration_tasks
+| 字段 | 类型 | 说明 |
+|-----|------|------|
+| id | BIGINT PK | 自增 |
+| platform | VARCHAR(50) | 目标供应商 |
+| task_type | VARCHAR(20) | `register`, `verify`, `rotate_key` |
+| status | VARCHAR(20) | `pending`, `running`, `completed`, `failed`, `dead_letter` |
+| context | JSONB | 任务上下文（如申请的邮箱、注册步骤状态机） |
+| result_account_id | BIGINT | 成功后关联的 supply_accounts.id |
+| failure_reason | TEXT | |
+| retry_count | INT DEFAULT 0 | |
+| next_retry_at | TIMESTAMPTZ | |
+| created_at | TIMESTAMPTZ | |
+| updated_at | TIMESTAMPTZ | |
+
+### A.3 probe_execution_logs
+| 字段 | 类型 | 说明 |
+|-----|------|------|
+| id | BIGINT PK | 自增 |
+| account_id | BIGINT FK | supply_accounts.id |
+| probe_type | VARCHAR(20) | `connectivity`, `quota`, `key_validity` |
+| result | VARCHAR(20) | `success`, `failure`, `inconclusive` |
+| http_status | INT | |
+| latency_ms | INT | |
+| error_code | VARCHAR(50) | 平台内部错误码 |
+| error_message | TEXT | |
+| executed_at | TIMESTAMPTZ | |
+
+索引：`account_id + executed_at DESC`，保留策略 30 天。
+
+---
+
+## 自检清单
+
+- [x] 已明确真实目标（降低供应商失效导致的错误率、缩短新模型上市时间、减少人工维护工时），不是只复述功能。
+- [x] 已写清 In Scope / Out of Scope，边界以模块和具体场景描述。
+- [x] 每个 AC 都可被 QA 或测试用例直接验证（含具体数值、时间、状态、测试方法）。
+- [x] 已覆盖异常流（Rate Limit、超时、网关不可用）、边缘流（模型 ID 变更、手动暂停探针、并发操作）与失败路径（共 10 条）。
+- [x] 已补齐上线、运营、监控、回滚要求（灰度三阶段、回滚条件、埋点、监控指标、告警规则、预置 FAQ）。
+- [x] 已定义商业化/价值闭环（直接收益、成本节省、质量溢价三条路径）。
+- [x] 已定义成功指标（BG-01/03/04 + SFI）与失败判定线（4 条止损条件）。
+- [x] 已明确当前是否可进入 TechLead 阶段：可进入 TechLead 评审，但需补充 4 项信息后方可进入开发排期。
+- [x] 没有使用"优化、支持、友好、尽量、快速"等模糊词替代明确要求；所有时间、比例、次数均为具体数值或明确公式。
+
+---
--- a/prd/competitor-analysis.md
+++ b/prd/competitor-analysis.md
@@ -0,0 +1,188 @@
+# Supply-Intelligence 供应链智能增强 — 竞品分析报告
+
+## 1. 竞品范围
+
+| 竞品 | 项目地址 | 技术栈 | 相关能力 |
+|-------|---------|--------|---------|
+| **LiteLLM** | berriai/litellm | Python/FastAPI | 模型定价数据库、自动路由、新模型告警、部署冷却、容灾切换 |
+| **Sub2API** | Wei-Shaw/sub2api | Go/Gin/Ent | 模型定价镜像、代理管理、账号/订阅管理、用量统计、公告系统 |
+| **NewAPI / OneAPI** | Calcium-Ion/new-api | Go/Gin/GORM | 渠道管理、模型配置、上游状态监控 |
+
+---
+
+## 2. 核心能力对标
+
+### 2.1 模型定价与供应商数据库
+
+#### LiteLLM Model Prices Database
+LiteLLM 维护了行业内最完整的模型定价数据库 `model_prices_and_context_window_backup.json`：
+
+**关键特征**:
+- 覆盖 100+ 供应商、1000+ 模型
+- 每个模型包含：input_cost_per_token, output_cost_per_token, context_window, max_tokens, supports_vision, supports_function_calling 等
+- 支持分层定价（tiered_pricing）：如 >128k tokens 时使用不同单价
+- 支持批量定价（batch pricing）
+- 支持音频 token 定价
+- 支持自定义成本覆盖
+
+**更新机制**:
+- 主数据库内置在代码中，通过版本发布更新
+- 支持远程拉取更新（可配置镜像源）
+- Sub2API 就是从 LiteLLM 上游镜像此文件
+
+#### Sub2API Pricing Service
+Sub2API 的定价服务是被动消费型的（从上游获取）：
+
+**关键设计**:
+- 远程拉取 LiteLLM 镜像 `model_prices_and_context_window.json`
+- 本地 fallback 文件缓存
+- SHA256 hash 验证更新
+- 模型家族回退算法：未知模型按命名规则回退到已知模型
+  - 例如：gpt-5.3 未知 → 回退到 gpt-5.1
+  - 例如：claude-unknown → 回退到 claude-sonnet
+- 动态价格字段优先级配置
+
+**缺陷**:
+- 被动获取，无主动发现新模型能力
+- 无模型质量探针（仅依赖定价数据）
+- 无自动测试和准入检查
+
+### 2.2 供应商/渠道管理
+
+#### Sub2API Proxy & Account Management
+Sub2API 提供了完整的上游管理能力：
+
+**代理管理** (`Proxy` schema):
+```go
+type Proxy struct {
+    name     string   // 代理名称
+    protocol string   // 协议
+    host     string   // 主机
+    port     int      // 端口
+    username string   // 用户名（可选）
+    password string   // 密码（可选）
+    status   string   // active / inactive
+}
+```
+
+**账号管理** (`Account` schema):
+- 支持多个上游供应商
+- 每个账号关联一个代理（Proxy）
+- 支持账号分组（AccountGroup）
+- 软删除机制
+
+**用量统计** (`UsageLog`):
+- 详细记录每次请求的模型、token数、成本、时间戳
+- `UsageCleanupTask`: 定期清理过期用量数据
+
+#### NewAPI/OneAPI 渠道管理
+- 支持多个上游渠道配置
+- 渠道状态监控（可用/不可用）
+- 支持渠道优先级和权重
+- 支持渠道购买次数限制
+
+### 2.3 自动路由与容灾
+
+#### LiteLLM Router & Auto-Router
+LiteLLM 的路由系统是其核心竞争力：
+
+**路由策略**:
+- **lowest_latency**: 选择响应最快的部署
+- **lowest_cost**: 选择成本最低的部署
+- **lowest_tpm_rpm**: TPM/RPM 最低
+- **least_busy**: 负载最低
+- **auto_router**: 语义路由（基于请求内容匹配最适模型）
+- **budget_limiter**: 按 key/team 限制预算
+
+**容灾机制**:
+- **Cooldown**: 连续失败的部署自动进入 cooldown，暂时从路由池移除
+- **Fallback**: 主模型失败时自动切换到备用模型
+- **Retries**: 可配置重试次数和策略
+
+**新模型告警** (`new_model_added`):
+- 当新模型上线时发送 Slack 告警
+- 但仅限于通知，无结构化的准入测试流程
+
+### 2.4 用户与订阅管理
+
+#### Sub2API 用户体系
+- `User`: 基础用户信息
+- `UserSubscription`: 订阅计划、配额、到期时间
+- `UserAttributeDefinition` / `UserAttributeValue`: 用户自定义属性
+- `PromoCode` / `RedeemCode`: 营销代码系统
+- `SecuritySecret`: 安全凭证管理
+
+---
+
+## 3. 差距分析（我们的机会）
+
+| 能力维度 | 竞品现状 | 我们的机会 |
+|---------|---------|---------|
+| **模型发现** | LiteLLM 被动维护定价库，Sub2API 被动镜像 | 主动全网扫描发现新模型（爬取供应商 API、HN、Twitter、官方文档） |
+| **准入测试** | 竞品均不具备 | 自动化准入测试流程，含功能、性能、成本、安全等维度 |
+| **质量探针** | LiteLLM 仅有基础 cooldown，无深度探针 | 多维度品质探针：连通性、配额、延迟、错误率、响应质量 |
+| **自动注册** | 竞品均不支持 | 自动在供应商后台注册账号、申请 API Key |
+| **账号生命周期** | Sub2API 有基础账号管理，无自动更新 | 自动轮换密钥、检测过期、自动补充账号 |
+| **供应商健康大盘** | Sub2API 有用量统计，无综合健康视图 | 统一供应商健康大盘，实时可视化 |
+| **模型比价** | LiteLLM 有定价库，但无比价能力 | 同类模型多供应商价格对比，智能推荐最优供应商 |
+| **运营工作台** | 竞品均为散点式管理 | 统一运营工作台，支持干预操作（暂停、强制切换、测试触发） |
+| **模型下线预测** | LiteLLM 有新模型告警，但无下线预测 | 基于用量趋势和供应商动态预测模型下线 |
+| **自动化闭环** | 竞品均为人工配置 | 发现 → 测试 → 准入 → 上线 → 监控 → 下线 全自动化 |
+
+---
+
+## 4. 对产品规划的影响
+
+### 强化方向
+
+1. **模型定价数据库参考 LiteLLM**：
+   - 维护标准化的模型定价数据库，支持 input/output cost、context window、功能支持等字段
+   - 支持远程更新和本地 fallback
+   - 支持模型家族回退
+
+2. **供应商账号管理参考 Sub2API**：
+   - 代理（Proxy）管理：协议、主机、端口、状态
+   - 账号分组：AccountGroup
+   - 软删除机制
+   - 安全凭证管理
+
+3. **用量统计参考 Sub2API**：
+   - 详细 UsageLog 记录
+   - 定期清理机制
+   - 用户-订阅-用量关联
+
+4. **路由策略参考 LiteLLM**：
+   - 多种路由策略（latency、cost、load、semantic）
+   - 容灾切换机制
+   - 部署冷却
+
+### 新增差异化能力
+
+5. **主动全网模型发现**：竞品均为被动维护，我们应主动扫描
+6. **自动准入测试**：竞品不具备，是核心差异化
+7. **自动账号注册**：竞品不支持，是核心差异化
+8. **智能推荐**：基于价格、质量、位置的供应商推荐
+9. **预测性分析**：模型下线预测、供应商变动预测
+
+---
+
+## 5. 对技术规划的影响
+
+### 应引入的设计模式
+
+| 设计模式 | 来源 | 应用场景 |
+|---------|------|---------|
+| **Model Prices Database** | LiteLLM | 模型定价数据库，支持远程更新和本地 fallback |
+| **SHA256 Hash 验证** | Sub2API | 定价数据更新的完整性验证 |
+| **模型家族回退** | Sub2API | 未知模型的智能回退 |
+| **Proxy + Account 关联** | Sub2API | 上游代理与账号的关联管理 |
+| **UsageLog + CleanupTask** | Sub2API | 用量记录与定期清理 |
+| **路由策略抽象** | LiteLLM | 支持多种路由策略的插件化设计 |
+| **Cooldown + Fallback** | LiteLLM | 故障部署的自动处理 |
+
+### 技术避坑
+
+1. **不重复造轮子**: 定价数据库可以直接复用 LiteLLM 的开源数据，不需要自己维护
+2. **发现与测试解耦**: 模型发现和准入测试应该解耦，支持独立触发和组合触发
+3. **注册模块的可扩展性**: 每个供应商的注册流程不同，需要抽象接口 + 具体实现
+4. **测试隔离**: 准入测试不得影响生产环境，必须使用独立账号或模拟环境
--- a/specs/功能清单.md
+++ b/specs/功能清单.md
@@ -0,0 +1,243 @@
+# Supply Intelligence 功能清单（按钮级任务版）
+
+> 状态说明（2026-05 收敛修订）：本文件为旧版按钮级任务清单，已不再作为当前实施真源。
+> 当前实施真源以“2026-05 新 PM 基线 + tech/BASELINE_TECHLEAD_V2.md + 首期消费闭环决议”为准。
+> 下列旧任务类型已明确废止或降期，不得继续直接派发给 Engineer：
+> - gateway 管理接口热更新主路径
+> - pricing / prediction / 向量检索 / SFI 仪表盘等超范围能力
+> - 自动注册深链路作为本期硬门槛
+> - 以 Temporal / 独立 worker / 独立平台骨架为默认落地前提
+
+> 版本：v1.0
+> 日期：2026-04-27
+> 说明：每个任务 5 分钟可完成，可直接安排进任务管理
+
+---
+
+## Phase 1：模块 A（探针）+ 模块 E（工作台只读观测）
+
+### 模块 A1：探针管理基础
+
+#### A1.1 供应商账号列表页
+- [ ] **任务**：实现供应商账号列表页路由 `/supply/dashboard/accounts`
+- [ ] **任务**：在账号列表渲染数据表格，每行显示：账号ID / 供应商名称 / 账号标识(昵称) / 当前状态(徽章) / 风险评分 / 最近探针时间 / 操作
+- [ ] **任务**：账号状态徽章颜色：active=绿色 / suspended=黄色 / disabled=红色
+- [ ] **任务**：账号行风险评分显示为进度条（0-100），>80 显示红色
+- [ ] **任务**：账号行渲染"查看详情"按钮，点击展开显示最近 5 次探针结果
+- [ ] **任务**：账号列表支持分页，每页 50 条
+- [ ] **任务**：账号列表支持按供应商名称筛选（下拉框）
+- [ ] **任务**：账号列表支持按状态筛选（全部 / active / suspended / disabled）
+- [ ] **任务**：账号列表支持按风险评分范围筛选（滑块）
+
+#### A1.2 账号详情页
+- [ ] **任务**：实现账号详情页路由 `/supply/dashboard/accounts/{account_id}`
+- [ ] **任务**：详情页渲染账号基本信息区块：账号ID / 供应商 / 状态 / 创建时间 / 最近探针时间
+- [ ] **任务**：详情页渲染探针历史时间线，每条显示：探针时间 / 结果(成功/失败/不可判定) / 延迟 / HTTP状态码 / 风险评分
+- [ ] **任务**：详情页渲染"手动触发探针"按钮，点击后立即执行一次探针，显示加载状态，完成后刷新时间线
+- [ ] **任务**：详情页渲染"暂停此账号探针"开关按钮（默认关闭），开启后该账号不参与自动探针
+- [ ] **任务**：详情页渲染"查看历史状态变更"按钮，点击展开状态变更记录（时间 / 从 → 到 / 原因）
+
+#### A1.3 探针后端核心
+- [ ] **任务**：实现探针调度器（基于主仓既有调度能力或轻量本地调度器，每 5 分钟轮询所有 active/suspended 账号）
+- [ ] **任务**：实现探针执行器，对单个账号发起 HTTP GET/POST 请求，记录响应码/延迟/返回体
+- [ ] **任务**：实现探针结果评估逻辑：HTTP 200 = 成功 / 401/403 = 明确失败 / 429/5xx/超时/格式突变 = 不可判定
+- [ ] **任务**：实现状态机：`active` 收到 1 次 explicit_failure → `suspended`；`suspended` 连续 3 次 explicit_failure → `disabled`
+- [ ] **任务**：实现 429/暂时性错误指数退避：1min → 2min → 4min 重试，超 3 次则本次跳过并保留状态
+- [ ] **任务**：实现探针结果写入 `supply_intelligence_probe_logs` 表，保留 30 天
+
+### 模块 A2：供应商适配层
+
+#### A2.1 供应商适配器框架
+- [ ] **任务**：定义 `SupplierAdapter` 接口：`(Probe(ctx context.Context, account Account) ProbeResult, GetModels(ctx context.Context, account Account) ([]Model, error))`
+- [ ] **任务**：实现 `SupplierAdapterRegistry` map，按供应商名称注册适配器实例
+- [ ] **任务**：实现配置文件加载供应商适配器列表（`suppliers[].name` + `suppliers[].adapter`）
+- [ ] **任务**：每个适配器实现health check端点探测（发送测试请求验证连通性）
+
+#### A2.2 Phase 1 目标供应商适配（2个）
+- [ ] **任务**：实现 OpenAI 供应商适配器（Probe：用 /v1/models 查询；获取模型列表：用 /v1/models）
+- [ ] **任务**：实现 Anthropic 供应商适配器（Probe：用 /v1/models 查询；获取模型列表：用 /v1/models）
+- [ ] **任务**：适配器配置项：API Base URL / API Key（加密存储）/ 是否允许受控自动补给 / Rate Limit 阈值
+
+### 模块 E1：运营工作台（只读观测部分）
+
+#### E1.1 工作台首页
+- [ ] **任务**：实现工作台首页路由 `/supply/dashboard`
+- [ ] **任务**：首页渲染 4 个统计卡片：账号总数(按状态颜色分段) / 本小时新发现模型数 / 待处理候选模型数 / 受控自动补给任务队列长度
+- [ ] **任务**：首页渲染候选处理与账号健康摘要（避免引入 SFI 仪表盘等超范围指标体系）
+- [ ] **任务**：首页渲染"探针健康度"简表，显示各供应商最后探针结果（绿色OK/黄色不可判定/红色明确失败/灰色未探）
+
+#### E1.2 待处理事项列表
+- [ ] **任务**：在工作台首页渲染"待处理" Tab，展示以下待办项：
+  - 风险评分 > 70 的账号（红色高亮）
+  - 状态 = discovered 的候选模型（待准入测试）
+  - 受控自动补给失败或待验证的任务（待人工介入）
+  - 模型已下架告警（待确认）
+- [ ] **任务**：每项待办渲染"处理"按钮，点击进入对应详情页
+- [ ] **任务**：每项待办渲染"忽略"按钮，点击后该项从待办列表暂时移除（3小时后重现）
+
+---
+
+## Phase 2：模块 B（模型发现）+ 模块 C（准入测试）
+
+### 模块 B1：模型发现
+
+#### B1.1 模型列表页
+- [ ] **任务**：实现模型列表页路由 `/supply/dashboard/models`
+- [ ] **任务**：模型列表每行显示：模型ID / 所属供应商 / 当前状态(活跃/草稿/已下线/发现中/测试失败) / 发现时间 / 来源
+- [ ] **任务**：状态筛选 Tab：全部 / 发现中 / 待测试 / 活跃 / 已下线
+- [ ] **任务**：模型列表支持按供应商筛选
+- [ ] **任务**：模型列表支持按发现时间范围筛选
+- [ ] **任务**：模型行点击"查看详情"进入模型详情页
+
+#### B1.2 模型发现后端
+- [ ] **任务**：实现模型发现调度任务（基于主仓既有调度能力或轻量本地调度器），每 1 小时触发一次扫描
+- [ ] **任务**：实现模型列表抓取器：调用各供应商适配器的 `GetModels()` 方法
+- [ ] **任务**：实现模型比对逻辑：将抓取的模型列表与 `supply_packages` 中 active/paused/draft 记录去重
+- [ ] **任务**：发现新模型时，写入 `supply_intelligence_model_candidates` 表，status = discovered
+- [ ] **任务**：发现模型下架时（供应商列表有、平台 active 记录也有但 ID 消失），写入运营告警，不改变 package 状态
+- [ ] **任务**：实现模型来源记录：discovery_source 字段（official_api / manual_import）
+
+### 模块 C1：准入测试
+
+#### C1.1 准入测试配置
+- [ ] **任务**：实现测试用例管理页路由 `/supply/dashboard/tests/cases`
+- [ ] **任务**：测试用例列表每行显示：用例ID / 所属模型类型 / 测试目标(endpoint) / 状态(启用/禁用)
+- [ ] **任务**：渲染"新增用例"按钮，点击弹出用例创建表单（endpoint地址 / 请求方法 / 预期响应格式 / 超时时间）
+- [ ] **任务**：测试用例表单支持选择模板（chat/completion/embedding）
+- [ ] **任务**：实现每个模型类型默认测试用例集（≥ 5 个用例）
+
+#### C1.2 准入测试执行
+- [ ] **任务**：实现准入测试任务流，接收 candidate_id 参数并由主仓既有调度能力或轻量任务执行器驱动
+- [ ] **任务**：`AdmissionTestWorkflow` 从 `model_candidates` 加载 discovered 状态的候选模型
+- [ ] **任务**：按顺序执行所有启用的测试用例，记录每条的 HTTP 状态/延迟/响应格式/Token 计数
+- [ ] **任务**：所有用例返回 HTTP 200 + 格式正确 → 更新 candidate status = test_passed，生成 supply_package 草稿
+- [ ] **任务**：任意用例返回非 200 或格式错误 → 更新 candidate status = test_failed，写入 failure_reason
+- [ ] **任务**：单个用例超时（60 秒）→ 标记为 timeout，整体判定失败
+- [ ] **任务**：准入测试完成后，发送飞书通知给运营人员
+
+#### C1.3 草稿生成
+- [ ] **任务**：准入测试通过后，自动生成 `supply_packages` 草稿记录（status = draft）
+- [ ] **任务**：草稿字段：platform / model_id / model_name / price_per_1m_input（默认值）/ price_per_1m_output（默认值）/ suggested_by = si_auto
+- [ ] **任务**：草稿生成后，在工作台"待上架"列表中显示该草稿
+
+---
+
+## Phase 3：模块 D（受控自动补给）+ 模块 E（工作台完整干预）
+
+### 模块 D1：受控自动补给配置
+
+#### D1.1 自动补给设置页
+- [ ] **任务**：实现自动补给设置页路由 `/supply/dashboard/auto-supply/settings`
+- [ ] **任务**：页面渲染供应商列表，每行显示：供应商名称 / 是否开启受控自动补给（开关）/ 可用账号阈值（数字输入）/ 状态
+- [ ] **任务**：点击供应商行"配置"按钮，弹出自动补给配置弹窗
+- [ ] **任务**：配置弹窗字段：启用自动补给（开关）/ 白名单供应商标记 / 触发阈值（账号数）/ 补给方式（任务化/人工补录入口）/ 审批要求
+- [ ] **任务**：弹窗保存后，按主仓既有配置方式持久化并生效，不引入 Redis 首期前置依赖
+- [ ] **任务**：配置页顶部渲染"通知/补给受理链路测试"按钮，点击后发送测试通知或验证受理接口可达
+
+#### D1.2 自动补给执行后端
+- [ ] **任务**：实现受控自动补给任务流，监控白名单供应商可用账号数 < 阈值时触发
+- [ ] **任务**：按供应商配置创建补给任务或调用受控补给受理接口，禁止默认走浏览器自动化注册深链路
+- [ ] **任务**：补给成功后写入待验证/待启用记录，不允许绕过验证直接进入 active
+- [ ] **任务**：若涉及凭证写入，则将密钥发送至 KMS 加密，密文存入 `supply_accounts`
+- [ ] **任务**：触发验证或人工审核链路，验证通过后再进入可用状态
+- [ ] **任务**：补给失败时，写入 `supply_intelligence_auto_supply_tasks` 或等价任务表，status = failed，记录失败原因
+
+### 模块 D2：Fail-closed 安全机制
+
+- [ ] **任务**：补给流程中，若通知网关/补给受理接口返回 503 或超时，任务立即标记为 failed，不执行虚假成功写操作
+- [ ] **任务**：补给流程中，若 KMS 加密超时（60 秒），任务立即标记为 failed
+- [ ] **任务**：明文凭证在内存中的存活时间不超过 60 秒，超时自动清除
+- [ ] **任务**：审计日志中记录补给请求/响应（脱敏后：隐藏敏感标识、隐藏凭证）
+
+### 模块 E2：工作台完整干预
+
+#### E2.1 候选模型处理
+- [ ] **任务**：工作台"待上架模型"列表，每行显示：模型ID / 供应商 / 发现时间 / 测试结果摘要 / 来源
+- [ ] **任务**：模型行渲染"查看测试详情"按钮，点击展开显示所有测试用例结果（每条：通过/失败/超时）
+- [ ] **任务**：模型行渲染"确认上架"绿色按钮，点击后弹出确认框（显示将生成的 package 草稿内容）
+- [ ] **任务**：模型行渲染"忽略"按钮，点击后该模型 7 天内不出现（写入 ignored_until 字段）
+- [ ] **任务**：模型行渲染"手动强制上架"橙色按钮（仅测试失败时可见），点击后需填写强制上架理由（必填）
+
+#### E2.2 草稿确认上架
+- [ ] **任务**：点击"确认上架"后，PUT `supply_packages/{id}` status = active
+- [ ] **任务**：同时更新 `model_candidates` 对应记录 status = published
+- [ ] **任务**：写入 gateway package change event，等待首期消费方按决议链路拉取并 ack
+- [ ] **任务**：完成后显示成功提示："模型已上架，已生成待消费变更事件；是否进入路由以消费方 ack 为准"
+
+#### E2.3 工单与通知
+- [ ] **任务**：模型下架告警 → 自动生成运营工单（类型 = model_deprecated），推送到运营工作台
+- [ ] **任务**：受控自动补给失败 → 自动生成运营工单（类型 = auto_supply_failed），推送飞书通知
+- [ ] **任务**：连续 3 次探针失败账号 → 生成运营工单（类型 = account_risk），推送飞书通知
+
+---
+
+## 全局模块
+
+### 模块 G1：供应商配置管理
+
+- [ ] **任务**：实现供应商列表页路由 `/supply/dashboard/settings/suppliers`
+- [ ] **任务**：供应商列表每行显示：供应商ID / 名称 / 适配器类型 / 账号数量 / 接口状态 / 操作
+- [ ] **任务**：渲染"添加供应商"按钮，点击弹出供应商创建表单
+- [ ] **任务**：供应商表单字段：名称 / 适配器类型(下拉) / API Base URL / API Key（加密存储）/ 探针周期(默认5min) / 是否启用
+- [ ] **任务**：实现供应商"测试连通性"按钮，点击后执行一次 probe 并显示结果
+- [ ] **任务**：供应商配置变更后，刷新当前集成运行实例中的适配器装配或调度配置（不得以 Temporal Worker 作为首期前置依赖）
+
+### 模块 G2：配置热更新
+
+- [ ] **任务**：关键配置项（探针周期/扫描周期/阈值）按主仓既有配置方式存储与生效，避免把 Redis 作为首期前置依赖
+- [ ] **任务**：实现 `GET /api/v1/supply-intelligence/config` 接口，返回当前生效配置
+- [ ] **任务**：实现 `PUT /api/v1/supply-intelligence/config` 接口，修改配置后 60 秒内生效
+- [ ] **任务**：配置变更生成审计日志记录（action = config_update）
+- [ ] **任务**：不支持的配置项修改返回 400 错误码
+
+### 模块 G3：OpenAPI + 健康检查
+
+- [ ] **任务**：实现 `GET /actuator/health` / `/actuator/health/live` / `/actuator/health/ready`
+- [ ] **任务**：实现 Swagger UI 路由 `/docs`
+- [ ] **任务**：实现 OpenAPI 3.0 spec 端点 `/openapi.json`
+- [ ] **任务**：实现关键后台任务执行链路健康检查，调度/执行链路不可用时 `/actuator/health/ready` 返回 503
+
+### 模块 G4：权限与认证
+
+- [ ] **任务**：实现 JWT 认证中间件（与立连桥统一认证打通）
+- [ ] **任务**：实现角色权限：运营人员（观测 + 部分操作）/ 管理员（全部操作）
+- [ ] **任务**：权限不足返回 HTTP 403，错误码 `SUP_INT_AUTH_1001`
+
+---
+
+## 技术基础设施
+
+### T1：项目骨架
+- [ ] **任务**：初始化或挂载到主仓中的 Go module / 子模块边界，保持与 supply-api 一致的技术栈约束
+- [ ] **任务**：创建集成运行入口；如保留独立运行，也仅作为轻量可选形态，不以双进程 `api`/`worker` 为首期强依赖
+- [ ] **任务**：创建 `internal/` 目录结构（domain/service/handler/infrastructure/repository）
+- [ ] **任务**：配置 Viper 读取 `config.yaml`，支持环境变量覆盖
+- [ ] **任务**：配置 `log/slog` 结构化日志，输出 JSON 格式
+- [ ] **任务**：创建 PostgreSQL schema migration（使用 golang-migrate），表前缀 `supply_intelligence_`
+- [ ] **任务**：按主仓既有能力接入配置、调度、审计与内部路由，不额外引入 Redis 作为首期前置依赖
+- [ ] **任务**：配置 Dockerfile 和最小部署说明，优先支持主仓集成部署
+- [ ] **任务**：如需部署文档，仅按当前真源补充最小启动命令，不再回写旧 `DEPLOYMENT.md` 为实现依据
+
+### T2：单元测试骨架
+- [ ] **任务**：为每个 domain 层函数编写单元测试，覆盖率 >= 70%
+- [ ] **任务**：为每个 service 层函数编写单元测试，覆盖率 >= 80%
+- [ ] **任务**：配置 CI（GitHub Actions），PR 必须通过全部测试和覆盖率检查
+
+### T3：IntegrationPlugin 接口
+- [ ] **任务**：实现 `IntegrationPlugin` 接口（`Init() error` / `Serve() error` / `Shutdown() error`）
+- [ ] **任务**：实现插件模式下各模块的开关配置（`viper` 读取 `supply_intelligence.enabled_modules`）
+- [ ] **任务**：实现内部/外部路径前缀可配置，并区分 `/internal/supply-intelligence/` 与对外暴露路径
+- [ ] **任务**：编写集成测试：插件模式启动，关键探针/发现/发布事件接口与内部消费接口正常运作
+
+---
+
+## 任务估算汇总
+
+| Phase | 模块 | 任务数 | 估计工时 |
+|-------|------|--------|---------|
+| Phase 1 | A1 探针管理 + A2 适配层 + E1 工作台只读 | 34 | 3 人天 |
+| Phase 2 | B1 模型发现 + C1 准入测试 | 22 | 3 人天 |
+| Phase 3 | D1/D2 受控自动补给 + E2 工作台干预 | 24 | 3 人天 |
+| 全局 | G1 供应商配置 + G2 配置热更新 + G3 OpenAPI + G4 权限认证 | 18 | 2 人天 |
+| 技术基础设施 | T1 骨架 + T2 测试 + T3 插件 | 14 | 2 人天 |
+| **合计** | | **112** | **~13 人天** |
--- a/specs/竞品分析.md
+++ b/specs/竞品分析.md
@@ -0,0 +1,124 @@
+# Supply Intelligence 竞品深度分析
+
+> 版本：v1.0
+> 日期：2026-04-27
+> 内容：8 个竞品全景矩阵、功能逐项对比、技术分析、市场定位
+
+---
+
+## 一、市场概览
+
+- 归并到 LLM API Gateway 市场：2025 年约 **$15-25 亿**，高速增长
+- 多供应商运营复杂度急剧上升：中等规模团队可能接入 10+ 供应商，20+ 账号
+- 供应商 API Key 失效/额度耗尽是高频线上事故根因，单次事故损失 $5000-50000
+- 新模型发布速度：每月 50+ 新模型，人工录入无法跟上
+- **市场空白**：供应链运营自动化（供应商账号健康、模型发现、准入测试）几乎无成熟方案
+
+---
+
+## 二、竞品全景矩阵（8 个）
+
+| 竞品 | 类型 | 供应商账号健康探针 | 新模型自动发现 | 准入测试自动化 | 账号自动注册 | 运营工作台 | 定价 |
+|------|------|-----------------|-------------|-------------|------------|----------|------|
+| **LiteLLM** | 开源 | ❌ 手动录入 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单管理 | 免费（自部署） |
+| **Helicone** | SaaS/开源 | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单 | 免费+$0.05/请求 |
+| **Portkey** | SaaS | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单 | $49/月起 |
+| **OpenRouter** | SaaS | ❌ 手动管理 | ⚠️ 手动 | ❌ 无 | ❌ 无 | ⚠️ 简单 | 5% 手续费 |
+| **Kong AI Gateway** | 企业 | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ API 管理 | 面议 |
+| **One API / NewAPI** | 开源 | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单 | 免费 |
+| **RapidAPI Enterprise Hub** | SaaS | ⚠️ 入驻流程 | ⚠️ 手动 | ❌ 无 | ⚠️ 部分 | ✅ | $2-10 万/年 |
+| **内部自建（现状）** | — | ❌ 无监控 | ❌ 无 | ❌ 无 | ❌ 无 | ❌ 无 | 人力成本 |
+| **立连桥 supply-intelligence** | 内部工具 | ✅ 分钟级探针 | ✅ 自动发现 | ✅ 自动化流水线 | ✅ 白名单供应商 | ✅ 完整工作台 | 内部成本 |
+
+---
+
+## 三、功能逐项对比（11 项）
+
+```
+功能项                          LiteLLM  Helicone  Portkey  OpenRouter  NewAPI  RapidAPI  supply-intel
+供应商账号健康探针                ❌       ❌        ❌       ❌          ❌       ⚠️        ✅
+新模型自动发现                   ❌       ❌        ❌       ⚠️          ❌       ⚠️        ✅
+模型准入测试                     ❌       ❌        ❌       ❌          ❌       ❌        ✅
+supply_package 草稿生成           ❌       ❌        ❌       ❌          ❌       ❌        ✅
+账号自动注册                     ❌       ❌        ❌       ❌          ❌       ⚠️        ✅
+运营工作台                       ⚠️       ⚠️        ⚠️       ⚠️          ⚠️       ✅        ✅
+KMS 凭证加密                     ❌       ❌        ⚠️       ❌          ❌       ✅        ✅
+审计日志                         ⚠️       ✅        ✅       ⚠️          ⚠️       ✅        ✅
+供应商状态 API 对外提供           ❌       ❌        ❌       ❌          ❌       ❌        ✅
+Fail-closed 降级                 ❌       ❌        ⚠️       ❌          ❌       ⚠️        ✅
+SFI 指标追踪                     ❌       ❌        ❌       ❌          ❌       ❌        ✅
+独立/集成双模式                  ❌       ❌        ❌       ⚠️          ⚠️       ❌        ✅
+```
+
+---
+
+## 四、市场定位结论
+
+### 4.1 竞品空白
+
+**所有 LLM Gateway 竞品（LiteLLM/Helicone/Portkey/OpenRouter）只做：**
+- 统一 API 路由
+- 用量计量和计费
+- 基础监控和日志
+
+**没有任何竞品提供：**
+1. 供应商账号健康度的分钟级自动探针（额度/密钥/TOS）
+2. 新模型发布的自动发现（对接供应商模型列表 API）
+3. 模型准入测试的自动化（功能验证 + supply_package 草稿生成）
+4. 供应商账号的自动注册（针对支持公开注册的供应商）
+
+### 4.2 supply-intelligence 差异化定位
+
+```
+LLM Gateway（LiteLLM/Helicone/Portkey/OpenRouter）
+  └─ 能力边界：路由 + 计量 + 监控
+  └─ 缺失：供应商运营能力
+
+API Marketplaces（RapidAPI）
+  └─ 能力边界：供应商入驻 + 文档 + 货币化
+  └─ 缺失：自动化运营工具
+
+内部自建（现状）
+  └─ 能力边界：手动维护
+  └─ 缺失：自动化 + 监控 + 实时性
+
+───────────────────────────────────
+立连桥 supply-intelligence = 供应链运营自动化
+  ✅ 供应商健康探针（分钟级）
+  ✅ 新模型自动发现（对接受应商 API）
+  ✅ 准入测试自动化（功能验证）
+  ✅ 运营工作台（待办 + 一键上架）
+  ✅ 账号自动注册（白名单供应商）
+```
+
+---
+
+## 五、关键技术差异
+
+### 5.1 探针方案对比
+
+| 方案 | 代表竞品 | 频率 | 自动化程度 |
+|------|---------|------|----------|
+| 手动检查 | 内部自建 | 天级 | ❌ |
+| 被动监控 | LLM Gateway 竞品 | 被动 | ⚠️ 有限 |
+| 主动探针 | **supply-intelligence** | 分钟级 | ✅ 完整 |
+
+### 5.2 模型发现方案对比
+
+| 方案 | 代表竞品 | 延迟 | 自动化程度 |
+|------|---------|------|----------|
+| 人工录入 | 内部自建 | 天级 | ❌ |
+| 供应商通知 | RapidAPI | 小时级 | ⚠️ 被动 |
+| 自动扫描 | **supply-intelligence** | 分钟级 | ✅ 主动 |
+
+---
+
+## 六、技术选型建议
+
+| 组件 | 推荐方案 | 理由 |
+|------|---------|------|
+| 探针调度 | Temporal | 分布式友好，exponential backoff，dead letter queue 内置 |
+| 供应商 API 对接 | 配置化 adapter | 供应商数量多，接口差异大，需可扩展 |
+| 凭证加密 | KMS（主）+ AES-256-GCM（兜底） | 符合安全审计要求 |
+| 模型发现 | 轮询为主 | 多数供应商无 Webhook，轮询更通用 |
+| 准入测试 | 异步任务队列 | 测试可能耗时长，不能阻塞扫描周期 |
--- a/tech/BASELINE_TECHLEAD_V2.md
+++ b/tech/BASELINE_TECHLEAD_V2.md
@@ -0,0 +1,609 @@
+> 真源索引：当前文档受 `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md` 纳管。
+> 若阅读顺序、真源优先级或跨文档冲突存在疑问，先看该索引，再回到本基线文档。
+
+1. 设计范围：本次覆盖 / 明确不做 / 与 PRD 对应关系
+
+1.1 本次覆盖
+- 覆盖模块 A 供应商品质探针，但收敛为“账号健康探测 + 状态写回 + 审计 + gateway 可消费状态快照”。
+- 覆盖模块 B 全网模型发现，但收敛为“已接入供应商的模型列表扫描 + candidate 生成 + 下架告警”，不做广义全网情报平台。
+- 覆盖模块 C 模型准入测试，但收敛为“对 discovered candidate 做标准兼容性验证 + 生成 supply_package 草稿 + 发布闭环”。
+- 模块 E 仅覆盖与主链路直接相关的最小运营干预：手动触发探针、忽略 candidate、确认上架、查看失败原因、查看审计。
+- 覆盖与立交桥主项目的优先集成运行方案。
+- 覆盖对 NewAPI / Sub2API 的最小适配边界：状态读取、模型列表消费、可选发布回调；不反向侵入其内部实现。
+- 覆盖五个 QA 阻塞的显式修复：
+  1) 设计范围蔓延
+  2) 探针误判规则冲突
+  3) candidate 状态机不闭环
+  4) 模块关闭一致性缺失
+  5) gateway 消费链路未闭环
+
+1.2 明确不做
+- 不做独立平台化、多服务拆分、专用 API Gateway、专用消息总线、专用控制台集群。
+- 不做 vector / embedding 检索 / 向量库。
+- 不做 pricing 数据库、模型比价主链路、自动定价、家族回退定价。
+- 不做 predictions / 预测分析 / 广义开放平台 / 社区情报源。
+- 不做 WebSocket 实时推送作为本期前提；工作台可先走普通 HTTP 拉取。
+- 不做 Playwright 浏览器自动化注册主路径；本期仅保留受控自动补给的最小边界：白名单供应商、阈值触发、任务化补给、待验证/待启用，不把浏览器自动化注册链路作为首期上线门槛。
+- 不要求独立 Redis/Temporal/Milvus/Qdrant 等新增重基础设施；优先复用立交桥现有 DB、现有 scheduler、现有审计、现有配置热更新机制。
+- 不自动直接操作 gateway 路由细节表；只提供 package 发布后的内部契约，由 gateway/supply-api 按既有主链路消费。
+
+1.3 与 PRD 对应关系
+- AC-01/02/03：保留，落在探针执行、判定、状态迁移、审计与降级策略。
+- AC-04/05：保留，落在扫描、去重、新增 candidate、下架告警。
+- AC-06/07：保留，落在 admission runner、candidate 流转、draft package 生成。
+- AC-08/09：本期不做深自动注册链路，但保留“受控自动补给”的最小产品/技术边界：仅允许白名单供应商、仅允许阈值触发、仅允许生成待补给任务或进入待验证/待启用，不允许把注册浏览器自动化、验证码编排、自动激活作为首期硬门槛。
+- AC-10/11/12：保留，但只保留支撑主链路的最小实现，不扩展成独立大盘平台。
+- PRD 中与商业化、SFI、预测分析、比价报表相关内容不作为本次技术控制面主路径。
+
+2. 架构与模块：模块划分、文件/目录落点、关键调用链路、关键依赖与降级边界
+
+2.1 总体架构
+基线采用“立交桥主项目内集成模块”模式，而不是独立平台。推荐以 supply-api 内部模块形式落地，原因：
+- 直接复用 supply_accounts / supply_packages / audit / verify / config / scheduler。
+- 避免再造服务间调用、鉴权、部署、监控、迁移复杂度。
+- 更符合立交桥现有 net/http + pgx + PostgreSQL 的简洁架构。
+
+独立运行能力保留为简单可选形态：
+- 仅在确有外部项目需要时，封装为同仓内单进程启动入口。
+- 独立运行不得要求新增专用基础设施；仍使用 PostgreSQL + 现有 scheduler 抽象。
+- 不额外设计独立控制台、独立 worker 集群、独立 API 网关。
+
+2.2 模块划分
+建议收敛为 6 个模块，均为最小必要：
+
+A. probe
+- 读取待探测账号
+- 执行标准探针
+- 依据统一判定规则生成 outcome
+- 驱动 account 状态迁移
+- 写审计与探针日志
+
+B. discovery
+- 读取已接入供应商适配器
+- 拉取模型列表
+- 与现有 supply_packages / candidate 去重
+- 创建 candidate
+- 生成模型下架告警待办
+
+C. admission
+- 消费 discovered / retry_pending candidate
+- 执行标准测试集
+- 更新 candidate 状态
+- 生成或更新 supply_package draft
+
+D. publish
+- 运营确认 package draft
+- 将 package 切到 active
+- 将 candidate 切到 published
+- 写入 gateway 可消费的发布事件/变更记录
+
+E. integration
+- 立交桥内部直接集成接口
+- gateway / supply-api 内部契约
+- NewAPI / Sub2API 适配边界
+
+F. control
+- 模块开关、停机、运行中任务收敛、配置热更新、幂等、审计
+
+2.3 文件/目录落点
+以下为建议落点，优先放入立交桥主项目既有模块内；若 supply-intelligence 仓先行设计，可按同名目录组织：
+- /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md
+- 建议实现落点参考：
+  - supply-api/internal/supplyintelligence/module.go
+  - supply-api/internal/supplyintelligence/probe/service.go
+  - supply-api/internal/supplyintelligence/probe/evaluator.go
+  - supply-api/internal/supplyintelligence/probe/state_machine.go
+  - supply-api/internal/supplyintelligence/discovery/service.go
+  - supply-api/internal/supplyintelligence/discovery/adapter_registry.go
+  - supply-api/internal/supplyintelligence/admission/service.go
+  - supply-api/internal/supplyintelligence/admission/runner.go
+  - supply-api/internal/supplyintelligence/publish/service.go
+  - supply-api/internal/supplyintelligence/integration/http_internal.go
+  - supply-api/internal/supplyintelligence/integration/newapi_adapter.go
+  - supply-api/internal/supplyintelligence/integration/sub2api_adapter.go
+  - supply-api/internal/supplyintelligence/control/shutdown.go
+  - supply-api/internal/supplyintelligence/repository/*.go
+  - supply-api/sql/*supply_intelligence*.sql
+
+2.4 关键调用链路
+
+链路 1：探针 -> 状态写回 -> gateway 消费闭环
+1) scheduler 触发 ProbeTick(platform/account batch)
+2) probe.Service.LoadProbeTargets()
+3) probe.Service.RunProbe(accountID)
+4) probe.Evaluator.Classify(response/error) => success / explicit_failure / inconclusive
+5) probe.StateMachine.Apply(account.current_status, recent_probe_window)
+6) repository.UpdateAccountHealthAndStatusTx(...)
+7) repository.AppendAuditLog(...)
+8) repository.UpsertGatewayAccountSnapshot(...)
+9) gateway 通过内部契约读取 snapshot 或随 package/account 查询一起读取可用状态
+
+链路 2：扫描 -> candidate -> admission
+1) scheduler 触发 DiscoveryTick(platform)
+2) discovery.Adapter.FetchModels()
+3) discovery.Service.DiffAgainstPackagesAndCandidates()
+4) repository.UpsertModelCandidate(status=discovered)
+5) scheduler enqueue AdmissionRun(candidateID)
+6) admission.Runner.Execute(candidateID)
+7) repository.UpdateCandidateStatus(...)
+8) repository.UpsertDraftPackage(...)
+9) repository.AppendAuditLog(...)
+
+链路 3：运营确认上架 -> gateway 消费闭环
+1) ops POST confirm publish
+2) publish.Service.PublishDraft(candidateID, actor)
+3) tx: lock candidate + package draft
+4) package draft -> active
+5) candidate test_passed -> published
+6) append internal event supply_package_published
+7) append audit
+8) gateway/supply-api 既有主链路消费 active package 或发布事件刷新内存路由
+
+链路 4：模块关闭闭环
+1) operator/config 将 module.enabled=false
+2) control.ModuleGate.MarkClosing(module)
+3) 新任务拒绝入队/拒绝手动触发
+4) 运行中任务继续到安全提交点或超时中断
+5) 写 module_state=closed when inflight=0
+6) 后续 scheduler tick 直接跳过
+
+2.5 关键依赖与降级边界
+- PostgreSQL：强依赖。不可用时所有自动写操作 fail-closed，不做假成功。
+- scheduler：中强依赖。不可用时自动任务暂停，但手动接口可保留。记录告警。
+- supplier adapter：弱依赖。单供应商异常不影响其他供应商。
+- gateway：首期默认事件型消费方。发布链路不等待 gateway 成功回调才提交 package active，但必须通过 package change + ack 保留可追踪消费记录，且必须存在真实消费入口。
+- NewAPI/Sub2API：可选适配依赖。未配置时不影响立交桥内部主链路。
+
+降级原则
+- 探针外部错误、429、5xx、DNS/TCP 异常：inconclusive，不推进惩罚性状态迁移。
+- admission 外部超时：candidate 转 retry_pending 或 test_failed，不能生成 active package。
+- gateway 消费延迟：package 可 active，但需要“未消费/待同步”状态位和审计，不可假定已生效。
+- 模块关闭中：新任务一律拒绝，运行中任务只允许安全收尾。
+
+3. 接口与数据模型：API/RPC/事件、数据模型/schema、错误码、安全/鉴权契约
+
+3.1 接口分类
+
+3.1.1 立交桥内部直接集成接口
+用途：供立交桥主项目内其他模块直接调用，优先 Go 接口，不先暴露额外网络跳。
+
+interface SupplyIntelligenceModule {
+    RunProbe(ctx context.Context, accountID int64, trigger string) (*ProbeOutcome, error)
+    ScanPlatform(ctx context.Context, platform string, trigger string) (*ScanOutcome, error)
+    RunAdmission(ctx context.Context, candidateID int64, trigger string) (*AdmissionOutcome, error)
+    PublishCandidate(ctx context.Context, candidateID int64, actor string) (*PublishOutcome, error)
+    GetAccountRoutingState(ctx context.Context, accountID int64) (*AccountRoutingState, error)
+}
+
+3.1.2 给 gateway / supply-api 使用的内部契约
+用途：形成真实消费闭环，避免“文档说 gateway 会用，但无真实契约”。
+
+HTTP internal 契约，前缀建议：/internal/supply-intelligence
+
+1) GET /internal/supply-intelligence/accounts/{account_id}/routing-state
+响应：
+{
+  "account_id": 123,
+  "platform": "openai",
+  "account_status": "active",
+  "routing_enabled": true,
+  "risk_score": 20,
+  "reason_code": "ok",
+  "last_probe_at": "2026-05-06T15:00:00Z",
+  "version": 17
+}
+
+2) GET /internal/supply-intelligence/models/{platform}/{model}/admission-state
+响应：
+{
+  "platform": "openai",
+  "model": "gpt-4.1-mini",
+  "candidate_status": "published",
+  "package_id": 456,
+  "package_status": "active",
+  "gateway_sync_status": "pending|applied|failed|not_required",
+  "version": 9
+}
+
+3) GET /internal/supply-intelligence/gateway/package-changes?cursor=...
+响应：
+{
+  "items": [
+    {
+      "event_id": "evt_001",
+      "event_type": "supply_package_published",
+      "package_id": 456,
+      "platform": "openai",
+      "model": "gpt-4.1-mini",
+      "occurred_at": "2026-05-06T15:00:00Z",
+      "version": 9
+    }
+  ],
+  "next_cursor": "..."
+}
+
+4) POST /internal/supply-intelligence/gateway/package-changes/{event_id}/ack
+请求：
+{
+  "consumer": "gateway",
+  "result": "applied|failed",
+  "detail": "optional"
+}
+响应：204
+
+闭环定义
+- 发布成功 != gateway 已消费。
+- 只有 gateway ack event_id 后，gateway_sync_status 才能从 pending -> applied/failed。
+- QA 必须验证 publish -> list changes -> ack 的真实链路。
+
+3.1.3 面向 NewAPI/Sub2API 的适配边界
+原则：只暴露最小必要只读/回调能力，不把本系统设计成它们的管理平台。
+
+适配边界 A：状态拉取
+- GET /adapter/v1/supply-status/accounts/{account_id}
+- 字段与 routing-state 对齐，但去掉内部实现细节。
+
+适配边界 B：模型拉取
+- GET /adapter/v1/models?status=published
+响应只返回已 published 且 package active 的模型。
+
+适配边界 C：可选发布回调下发
+- POST /adapter/v1/package-events
+仅在对方需要 webhook 模式时启用；默认不要求。
+
+适配边界约束
+- 不暴露审计明细。
+- 不暴露原始探针日志。
+- 不暴露账号凭证、测试账号信息、内部风险算法细节。
+- 仅允许配置白名单来源访问。
+
+3.2 数据模型/schema
+
+3.2.1 probe_execution_logs
+- id bigint pk
+- account_id bigint not null
+- platform varchar(64) not null
+- probe_result varchar(32) not null 取值: success | explicit_failure | inconclusive
+- failure_class varchar(64) null 取值: auth_invalid | quota_empty | timeout | tcp_error | dns_error | rate_limited | upstream_5xx | parse_error
+- http_status int null
+- latency_ms int null
+- risk_score int not null
+- evaluated_transition varchar(64) not null 取值: no_change | active_to_suspended | suspended_to_disabled | suspended_to_active
+- executed_at timestamptz not null
+- request_id varchar(64) not null
+- index(account_id, executed_at desc)
+
+3.2.2 model_candidates
+- id bigint pk
+- platform varchar(64) not null
+- model varchar(128) not null
+- status varchar(32) not null
+- discovery_source varchar(32) not null 取值: official_api | official_doc | manual_seed
+- last_scan_at timestamptz not null
+- discovered_at timestamptz not null
+- last_test_at timestamptz null
+- failure_reason_code varchar(64) null
+- failure_summary text null
+- ignored_until timestamptz null
+- package_id bigint null
+- version int not null default 1
+- unique(platform, model)
+
+candidate 最终闭环状态机
+- discovered：扫描新发现，可入测试
+- testing：测试执行中
+- test_passed：测试通过，已存在 draft package
+- test_failed：测试失败，允许人工重试或自动进入 retry_pending
+- retry_pending：等待下次重试
+- ignored：运营临时忽略，到 ignored_until 后自动回 discovered
+- published：运营已确认上架，package active
+- deprecated：供应商侧已消失，已产生运营待办，但历史保留
+- closed：不再处理的终态，仅用于模型被明确弃用/手工关闭
+
+合法迁移
+- discovered -> testing
+- testing -> test_passed | test_failed | retry_pending
+- test_failed -> retry_pending | closed
+- retry_pending -> testing | closed
+- discovered | test_failed | retry_pending -> ignored
+- ignored -> discovered
+- test_passed -> published | closed
+- published -> deprecated | closed
+- deprecated -> closed
+
+闭环修复点
+- 任何非终态都存在后继处理路径。
+- ignored 有自动回流。
+- published/deprecated 最终可归档到 closed。
+- 不再存在“只定义中间态、无出口”的 QA 阻塞。
+
+3.2.3 gateway_package_events
+- event_id varchar(64) pk
+- event_type varchar(64) not null
+- package_id bigint not null
+- candidate_id bigint null
+- payload jsonb not null
+- consumer varchar(64) null
+- consumer_status varchar(32) not null default 'pending'
+- consumer_detail text null
+- occurred_at timestamptz not null
+- acked_at timestamptz null
+- retry_count int not null default 0
+
+3.2.4 module_runtime_state
+- module_name varchar(64) pk
+- desired_state varchar(16) not null 取值: enabled | disabled
+- runtime_state varchar(16) not null 取值: starting | running | closing | closed
+- inflight_count int not null
+- updated_at timestamptz not null
+
+3.3 探针判定统一规则
+这是本轮必须修的 QA 阻塞之一，统一如下：
+
+明确失败 explicit_failure
+- HTTP 401/403
+- 供应商明确返回 key invalid / account suspended / quota exhausted 且可稳定识别
+
+不可判定 inconclusive
+- HTTP 429
+- HTTP 5xx
+- DNS 失败
+- TCP 连接失败
+- 超时
+- 响应体为空或格式突变
+
+成功 success
+- 返回 2xx 且最小校验通过
+
+状态迁移规则
+- active + 1 次 explicit_failure -> suspended
+- suspended + 最近连续 3 次 explicit_failure -> disabled
+- suspended + 1 次 success -> active
+- disabled 不自动恢复，只能人工恢复到 active 或 closed
+- inconclusive 永不计入 explicit failure 连续次数
+
+说明
+- 将 timeout/TCP/DNS 从“失败导致降级”统一修正为 inconclusive，消除 PRD/HLD 冲突。
+- 若未来某供应商能明确证明 timeout 即余额停用，也必须走供应商级覆盖配置，不改全局默认。
+
+3.4 错误码
+- SUP_INT_PROBE_NOT_FOUND 404
+- SUP_INT_PROBE_MODULE_DISABLED 409
+- SUP_INT_CANDIDATE_NOT_FOUND 404
+- SUP_INT_CANDIDATE_STATE_INVALID 409
+- SUP_INT_PUBLISH_PACKAGE_MISSING 409
+- SUP_INT_GATEWAY_ACK_CONFLICT 409
+- SUP_INT_ADAPTER_UNSUPPORTED 400
+- SUP_INT_AUTH_FORBIDDEN 403
+- SUP_INT_CONFIG_INVALID 400
+- SUP_INT_UPSTREAM_TEMPORARY 503
+
+3.5 安全/鉴权契约
+- 内部接口只允许立交桥内部服务身份访问，走现有 internal auth middleware。
+- NewAPI/Sub2API 适配接口必须使用独立 access key 或签名校验，按来源白名单限制。
+- 审计字段必须包含 object_type/object_id/action/result_code/before_state/after_state/request_id/actor。
+- 任何日志不得输出明文 API key、cookie、token、测试账号凭证。
+- 手动发布、手动恢复 disabled 账号、关闭 candidate 必须要求 operator 身份并审计。
+
+4. 任务拆解：每个任务必须有具体文件路径和函数名，粒度 2-5 分钟
+
+说明：以下为 Engineer 最小实现任务单，按设计拆到文件级与函数级。路径以优先集成到 supply-api 为准。
+
+4.1 模块骨架
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/module.go :: func RegisterModule(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/module.go :: func MountInternalRoutes(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/module.go :: func RegisterSchedulers(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/control/shutdown.go :: func BeginModuleClose(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/control/shutdown.go :: func FinishInflightTask(...) error
+
+4.2 probe
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/service.go :: func LoadProbeTargets(ctx context.Context, limit int) ([]Account, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/service.go :: func RunProbe(ctx context.Context, accountID int64, trigger string) (*ProbeOutcome, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/evaluator.go :: func ClassifyProbeResult(resp *http.Response, err error) ProbeClass
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/evaluator.go :: func CalculateRiskScore(class ProbeClass) int
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/state_machine.go :: func ApplyAccountTransition(current string, recent []ProbeClass) (next string, transition string)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/state_machine.go :: func CountRecentExplicitFailures(recent []ProbeClass) int
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/worker.go :: func HandleProbeTick(ctx context.Context) error
+
+4.3 discovery
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/adapter_registry.go :: func ResolveModelAdapter(platform string) (ModelAdapter, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func ScanPlatform(ctx context.Context, platform string, trigger string) (*ScanOutcome, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func DiffModels(current []string, packages []string, candidates []string) DiffResult
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func UpsertDiscoveredCandidates(ctx context.Context, platform string, models []string) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func MarkDeprecatedAlerts(ctx context.Context, platform string, missing []string) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/worker.go :: func HandleDiscoveryTick(ctx context.Context) error
+
+4.4 admission
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/service.go :: func EnqueueAdmission(ctx context.Context, candidateID int64) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/service.go :: func RunAdmission(ctx context.Context, candidateID int64, trigger string) (*AdmissionOutcome, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func LoadCandidateForTesting(ctx context.Context, candidateID int64) (*Candidate, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func ExecuteTestSuite(ctx context.Context, c *Candidate) (*SuiteResult, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func DecideCandidateNextState(result *SuiteResult) (string, string)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func UpsertDraftPackage(ctx context.Context, c *Candidate, result *SuiteResult) (int64, error)
+
+4.5 publish
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go :: func PublishCandidate(ctx context.Context, candidateID int64, actor string) (*PublishOutcome, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go :: func ValidatePublishable(ctx context.Context, candidateID int64) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go :: func AppendGatewayPackageEvent(ctx context.Context, packageID int64, candidateID int64) error
+
+4.6 integration
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func GetAccountRoutingState(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func GetModelAdmissionState(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func ListGatewayPackageChanges(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func AckGatewayPackageChange(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/newapi_adapter.go :: func ListPublishedModels(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/newapi_adapter.go :: func GetExternalAccountStatus(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/sub2api_adapter.go :: func ListPublishedModels(w http.ResponseWriter, r *http.Request)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/sub2api_adapter.go :: func GetExternalAccountStatus(w http.ResponseWriter, r *http.Request)
+
+4.7 repository / sql
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/probe_repo.go :: func InsertProbeExecutionLog(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/probe_repo.go :: func UpdateAccountHealthAndStatusTx(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/candidate_repo.go :: func UpsertModelCandidate(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/candidate_repo.go :: func UpdateCandidateStateTx(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/package_repo.go :: func UpsertDraftPackageTx(...) (int64, error)
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/gateway_repo.go :: func InsertGatewayPackageEventTx(...) error
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/gateway_repo.go :: func AckGatewayPackageEventTx(...) error
+- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_probe_logs.sql :: migration create table
+- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_candidates.sql :: migration create table
+- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_gateway_events.sql :: migration create table
+- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_module_runtime.sql :: migration create table
+
+4.8 测试与校验
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/state_machine_test.go :: func TestApplyAccountTransition()
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/evaluator_test.go :: func TestClassifyProbeResult()
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner_test.go :: func TestDecideCandidateNextState()
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service_test.go :: func TestPublishCandidate_AppendsGatewayEvent()
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal_test.go :: func TestGatewayEventAckFlow()
+
+5. 风险与保护：风险清单（概率/影响/缓解）、降级策略、威胁建模结果
+
+5.1 风险清单
+1) 探针误判导致错误下线
+- 概率：中
+- 影响：高
+- 缓解：只允许 explicit_failure 触发惩罚状态；429/超时/网络错误全部 inconclusive；disabled 仅连续 3 次明确失败；生产初期可只告警不自动 disabled。
+
+2) candidate 重复创建或状态乱序
+- 概率：中
+- 影响：中
+- 缓解：unique(platform, model)；version 乐观锁；状态迁移函数集中校验；测试任务拿行锁。
+
+3) gateway 未真实消费已上架 package
+- 概率：高
+- 影响：高
+- 缓解：新增 gateway_package_events + ack 契约；区分 published 与 gateway applied；监控 pending backlog。
+
+4) 模块关闭时仍有脏写入
+- 概率：中
+- 影响：中
+- 缓解：runtime_state=closing 时拒绝新任务；inflight 计数；安全提交点；超时取消 ctx。
+
+5) 适配器变更影响扫描质量
+- 概率：中
+- 影响：中
+- 缓解：按供应商隔离；单平台失败不扩散；保留 last_successful_scan 基线；失败仅告警不删数据。
+
+6) NewAPI/Sub2API 适配越权暴露内部数据
+- 概率：低
+- 影响：高
+- 缓解：适配接口单独 DTO；白名单认证；不复用内部 debug 输出。
+
+5.2 降级策略
+- probe 模块关闭：gateway 继续依赖现有 account/package 状态；新鲜度下降但主链路可运行。
+- discovery 模块关闭：不再发现新模型；已上架模型不受影响。
+- admission 模块关闭：candidate 可积压，但不会误上架。
+- publish 后 gateway 未消费：保留 pending，运营可见；不回滚 package active，但不得宣称“已进路由”。
+- NewAPI/Sub2API 未配置：直接关闭适配路由，不影响内部主链路。
+
+5.3 威胁建模结果
+输入边界
+- 供应商返回体属于不可信输入：必须限长、schema 校验、错误脱敏。
+- 运营手动接口属于高权限输入：必须鉴权、审计、幂等。
+- gateway ack 请求属于内部写接口：必须鉴权并校验 event_id/consumer 一致性。
+
+数据流
+- supplier -> adapter -> evaluator -> db
+- db -> internal route -> gateway
+- db -> adapter route -> NewAPI/Sub2API
+
+主要威胁与处置
+- 凭证泄漏：本期不纳入自动注册主路径；现有账号密钥仅走既有安全存储，不在本模块新增明文链路。
+- 重放/重复发布：publish 接口需幂等，published candidate 再次 publish 返回 409。
+- 伪造 gateway ack：只接受内部服务身份；event consumer 固定枚举。
+- 大响应体压垮解析：adapter 限制 body size，超限视为 inconclusive/scan_failed。
+- SQL 并发覆盖：关键状态表使用 version 或 select for update。
+
+6. QA 交接与实施约束：编码前设计审查要点、编码后漂移检查点、必查真实调用链路、禁止偏离的边界
+
+6.1 编码前设计审查要点
+- 是否明确“集成运行优先，独立运行可选且轻量”。
+- 是否删除 pricing/vector/predictions/开放平台化内容。
+- 探针默认规则是否统一为 explicit_failure 才触发状态惩罚。
+- candidate 状态机是否存在完整入口、出口、终态与回流。
+- gateway 是否存在 list change + ack 的真实闭环，而非只有查询接口。
+- 模块关闭是否存在 closing -> closed 收敛语义。
+- NewAPI/Sub2API 是否仅作为适配边界，而非反向牵引主架构。
+
+6.2 编码后漂移检查点
+- 是否出现新增 Redis/Temporal/Kafka/MQ/向量库等重基础设施前置依赖。
+- 是否出现额外独立服务、额外 API gateway、复杂事件总线。
+- 是否把自动注册重新抬回本期主路径。
+- 是否把 gateway 路由刷新实现成跨系统强耦合同步 RPC 必须成功。
+- 是否新增未在本基线定义的中间状态。
+- 是否把 timeout/TCP/DNS 再次当成 explicit_failure。
+
+6.3 QA 必查真实调用链路
+- probe tick -> evaluator -> state machine -> supply_accounts 写回 -> audit 写入
+- discovery tick -> candidate discovered -> admission run -> draft package
+- publish confirm -> package active -> candidate published -> gateway change event -> gateway ack
+- module disable -> closing -> reject new task -> inflight drain -> closed
+- adapter route -> NewAPI/Sub2API 只读返回，字段不泄露内部敏感信息
+
+6.4 禁止偏离的边界
+- 禁止把本期做成独立平台化部署前提。
+- 禁止把比价、预测、向量检索恢复为主链路。
+- 禁止未定义契约就声称“gateway 会消费”。
+- 禁止 candidate 状态直接跳 published，绕过 test_passed + draft package。
+- 禁止 disabled 自动恢复。
+- 禁止模块关闭时直接 kill 运行中事务而无收敛策略。
+
+7. Engineer 实施说明：文件级落点、最小验证项、需 PM 澄清项
+
+7.1 文件级落点
+优先实施目录：
+- /home/long/project/立交桥/supply-api/internal/supplyintelligence/
+- /home/long/project/立交桥/supply-api/sql/
+- /home/long/project/立交桥/supply-api/internal/http/internal/
+
+若 supply-intelligence 项目仓仅承载设计文档，则本文件作为交付基线，后续代码并入 supply-api 主仓。
+
+7.2 最小验证项
+- 单测：探针分类、账号状态迁移、candidate 状态迁移、publish 幂等。
+- 集成测：publish 后产生 gateway event，gateway ack 后状态更新 applied。
+- 集成测：module closing 时手动触发探针返回 409 module disabled/closing。
+- E2E 最小链路：
+  1) 一个 active 账号 401 -> suspended
+  2) 一个新模型 discovered -> test_passed -> draft -> published
+  3) gateway 拉取 package change 并 ack
+
+7.3 需 PM 澄清项
+- 本期是否允许 production 初期仅启用 active->suspended，暂不自动 disabled。
+- candidate ignored 的默认恢复期是否固定 7 天，或允许按供应商配置。
+- gateway 首期默认采用 pull package-changes + ack 作为事件型消费闭环；若后续证明已有内部刷新入口可复用，也必须保留等价 ack 语义与可审计消费状态。
+- NewAPI/Sub2API 本期需要只读拉取，还是还需要 webhook 模式；默认只做只读拉取。
+
+8. 阶段门控结论：可进入 QA 设计审查 / 需返回 PM / 需继续补设计
+
+结论：可进入 QA 设计审查
+
+理由
+- 五个 QA 阻塞已在本基线中逐项补洞并收敛。
+- 架构已回到立交桥一致的简洁集成模式。
+- 对立交桥 / NewAPI / Sub2API 的边界已最小化并显式分类。
+- 已删除明显超范围和重基础设施设计。
+
+附带条件
+- 不代表可直接开发放行。
+- 进入开发前仍需确认 PM 澄清项中的 gateway 消费方式与 production 初期自动 disabled 策略。
+
+9. 下游执行约束摘要：
+- Engineer 禁止偏离：不得新增独立平台化部署前提、不得恢复 pricing/vector/predictions 主路径、不得绕过 gateway event ack 闭环、不得新增未定义 candidate 状态。
+- QA 必查调用链路：probe->状态写回；discovery->candidate->admission->draft；publish->gateway event->ack；module disable->closing->drain->closed；NewAPI/Sub2API 只读适配边界。
+- XL 若继续推进需补的门控：确认 gateway 实际消费方式；确认生产首期自动 disabled 策略；确认代码最终并入 supply-api 主仓而非另起独立重部署。
+
+自检清单
+- [x] 架构设计覆盖 PRD 所有 AC
+- [x] 接口定义完整（请求/响应/错误）
+- [x] 每个任务 < 5分钟，有明确文件路径
+- [x] 依赖关系无循环
+- [x] 考虑了扩展点（未来可能的变化）
+- [x] 风险评估完整，有关键风险的缓解方案
+- [x] 符合项目现有技术栈和编码规范
+- [x] 降级策略已设计（熔断/限流/兜底）
+- [x] 威胁建模已完成（输入边界/鉴权/数据流）
+- [x] 实施漂移检测点已定义（可与 QA checklist 对接）
+- [x] 已明确标记是否可进入 QA 设计审查
+- [x] 已提供 QA 编码前审查与编码后漂移检测所需交接物
+- [x] 已给出 Engineer / QA / XL 的下游执行约束摘要
+- [x] 已纳入立交桥简洁架构与立交桥/NewAPI/Sub2API 集成边界
--- a/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md
+++ b/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md
@@ -0,0 +1,118 @@
+# Supply-Intelligence 当前实现真源索引（2026-05）
+
+> 状态：当前有效
+> 目的：为 Engineer / QA / PM 提供单一阅读入口，避免继续误读历史草案。
+> 适用范围：/home/long/project/立交桥/projects/supply-intelligence/
+
+## 1. 当前结论
+
+当前规划包已收敛到“可进入 Engineer 实现”状态。
+当前总门控结论：APPROVED。
+
+但 APPROVED 的前提是：实现、测试、评审都必须以本文件列出的“当前真源”解释，不得回退到旧 PRD/HLD/INTERFACE/DEPLOYMENT 的正文口径。
+
+## 2. 当前真源文件（按优先级）
+
+### 2.1 一级真源：必须优先遵循
+1. `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
+   - 作用：当前技术基线、状态机、模块边界、集成约束、最小生产闭环定义
+   - 适用问题：实现边界、状态迁移、部署形态、首期能力范围、风险与验证要求
+
+2. `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
+   - 作用：首期 package/account 消费闭环决议
+   - 适用问题：published vs applied、gateway 是否默认消费方、package change + ack、真实调用链落点
+
+### 2.2 二级真源：必须按一级真源解释
+3. `/home/long/project/立交桥/projects/supply-intelligence/tech/TEST_DESIGN.md`
+   - 作用：收敛后的测试门禁文档
+   - 使用规则：
+     - 只能按一级真源解释
+     - 当前阶段门控结论以其中已更新的 APPROVED 段落为准
+     - 若正文某处仍残留旧测试假设，以一级真源覆盖
+
+4. `/home/long/project/立交桥/projects/supply-intelligence/specs/功能清单.md`
+   - 作用：任务粒度参考清单
+   - 使用规则：
+     - 仅用于任务拆分、实现排程、UI/后端任务定位
+     - 若与一级真源冲突，一律以一级真源为准
+     - 不得把其中任何历史平台化/重基础设施/深自动注册项当作默认首期门槛
+
+## 3. 历史参考文件（禁止作为当前实现真源）
+
+以下文件只能用于理解历史上下文，不能再作为 Engineer/QA 的当前实现依据：
+
+1. `/home/long/project/立交桥/projects/supply-intelligence/prd/PRD.md`
+2. `/home/long/project/立交桥/projects/supply-intelligence/tech/HLD.md`
+3. `/home/long/project/立交桥/projects/supply-intelligence/tech/INTERFACE.md`
+4. `/home/long/project/立交桥/projects/supply-intelligence/tech/DEPLOYMENT.md`
+
+原因：这些文件虽已加失效声明，但正文仍保留大量旧设计，例如：
+- pricing / prediction / 向量检索 / 仪表盘扩张
+- 独立 API/worker/重基础设施默认前提
+- gateway 管理接口热更新主路径
+- 深自动注册 / 浏览器自动化主路径
+- published 与 gateway applied 语义混淆
+
+## 4. Engineer 必须先建立的统一理解
+
+### 4.1 首期能力边界
+首期目标不是独立平台化大系统，而是“立交桥延伸项目 + 简洁集成架构 + 最小生产闭环”。
+
+这意味着：
+- 优先并入 supply-api 主仓
+- 优先复用主仓已有配置、调度、审计、内部路由能力
+- 不把 Redis / Temporal / 向量数据库 / WebSocket / MQ 作为首期硬前置
+- 不做 pricing / prediction / recommendation / SFI 仪表盘扩张
+
+### 4.2 探针判定边界
+必须按 explicit_failure / inconclusive / success 三类解释，不能回退到旧的“timeout 直接惩罚性降级”口径。
+
+### 4.3 自动补给边界
+首期不是深自动注册主路径。
+首期仅保留“受控自动补给最小边界”：
+- 白名单供应商
+- 阈值触发
+- 任务化补给或受控补给受理接口
+- pending_verify / pending_enable 等受控中间态
+- fail-closed
+
+不得默认实现：
+- 浏览器自动化注册
+- 短信验证码编排主路径
+- 无审批直接自动激活
+
+### 4.4 gateway 消费闭环边界
+首期默认采用：
+- package 发布链路：event + ack
+- account 状态链路：查询型消费
+
+必须明确：
+- published != applied
+- active package != gateway 已消费成功
+- 没有真实 poll/apply/ack 入口，不得宣称 package 发布链路已完成
+
+## 5. QA 审查必须卡的四条红线
+
+1. 若实现重新引入 published/applied 混淆，直接打回
+2. 若把深自动注册重新膨胀成首期硬门槛，直接打回
+3. 若把旧独立平台化基础设施重新作为首期依赖，直接打回
+4. 若 gateway 只有接口定义、没有真实消费方入口与 ack 回写，直接打回
+
+## 6. 推荐阅读顺序
+
+Engineer / QA / PM 开始工作前，按以下顺序阅读：
+1. `tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`
+2. `tech/BASELINE_TECHLEAD_V2.md`
+3. `tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
+4. `tech/TEST_DESIGN.md`
+5. `specs/功能清单.md`
+
+禁止跳过前 3 份文件直接依据旧 PRD/HLD/INTERFACE/DEPLOYMENT 开始实现。
+
+## 7. 一句话执行规则
+
+如果某个设计点在文档间出现冲突：
+- 先看 `BASELINE_TECHLEAD_V2.md`
+- 再看 `GATEWAY_CONSUMER_DECISION_2026-05.md`
+- 然后用 `TEST_DESIGN.md` 和 `功能清单.md` 做验证与任务拆解
+- 不回退到旧草案正文做判断
--- a/tech/DEPLOYMENT.md
+++ b/tech/DEPLOYMENT.md
@@ -0,0 +1,168 @@
+# Supply-Intelligence 部署设计
+
+> 状态说明（2026-05 收敛修订）：本文件保留为旧版部署草案，已不再作为当前默认部署真源。
+> 当前默认部署真源应以“立交桥延伸项目 + 简洁集成架构”为准：优先并入 supply-api 主仓，独立运行仅为轻量可选形态。
+> 以下旧部署假设已废止，不得再作为首期落地前提：
+> - 独立 API Server + 多 Worker 集群默认部署
+> - Redis / 向量数据库 / WebSocket / 独立共享层作为首期前置依赖
+> - 以独立多组件容器拓扑替代主仓集成部署
+
+> 版本：v1.0 | 状态：初稿
+
+---
+
+## 1. 部署架构
+
+### 1.1 总体架构
+
+```
+├── Load Balancer (Nginx / 云 CLB)
+    │
+    ├── Supply-Intelligence API Server x 2
+    │   │
+    │   ├── HTTP API
+    │   └── WebSocket (健康大盘实时推送)
+    │
+    ├── Supply-Intelligence Worker x 3
+    │   │
+    │   ├── Probe Worker (探针任务)
+    │   ├── Discovery Worker (扫描任务)
+    │   ├── Admission Worker (准入测试任务)
+    │   ├── Auto-Reg Worker (自动注册任务)
+    │   └── Cleanup Worker (定期清理)
+    │
+    └── 共享层
+        │
+        ├── PostgreSQL 15+ (与 supply-api 共存或独立)
+        ├── Redis (缓存 + 锁 + 扫描结果缓存)
+        └── 向量数据库 (PGVector / Milvus / Qdrant)
+```
+
+### 1.2 容器化部署
+
+```yaml
+services:
+  supply-intel-api:
+    image: supply-intelligence:latest
+    command: ["./supply-intel", "api"]
+    replicas: 2
+    ports:
+      - "8081:8080"
+
+  supply-intel-probe:
+    image: supply-intelligence:latest
+    command: ["./supply-intel", "worker", "probe"]
+    replicas: 1
+
+  supply-intel-discovery:
+    image: supply-intelligence:latest
+    command: ["./supply-intel", "worker", "discovery"]
+    replicas: 1
+
+  supply-intel-admission:
+    image: supply-intelligence:latest
+    command: ["./supply-intel", "worker", "admission"]
+    replicas: 1
+
+  supply-intel-autoreg:
+    image: supply-intelligence:latest
+    command: ["./supply-intel", "worker", "autoreg"]
+    replicas: 1
+```
+
+---
+
+## 2. 资源需求
+
+### 2.1 API Server
+
+| 资源 | 需求 | 说明 |
+|------|------|------|
+| CPU | 1 核 | |
+| 内存 | 512 MB | |
+| 存储 | 无 | |
+
+### 2.2 Worker
+
+| Worker 类型 | CPU | 内存 | 说明 |
+|------------|-----|--------|------|
+| Probe | 1 核 | 512 MB | 同时发起多个 HTTP 请求 |
+| Discovery | 1 核 | 1 GB | 可能涉及 Playwright 爬取 |
+| Admission | 2 核 | 2 GB | 测试流水线调用 LLM API，CPU 与内存需求较高 |
+| Auto-Reg | 1 核 | 512 MB | |
+
+### 2.3 数据库
+
+| 资源 | 需求 | 说明 |
+|------|------|------|
+| CPU | 2 核 | |
+| 内存 | 4 GB | |
+| 存储 | 100 GB | 探针历史 + 审计日志 + 定价数据库 |
+
+### 2.4 向量数据库
+
+| 选型 | CPU | 内存 | 存储 | 说明 |
+|------|-----|--------|------|------|
+| PGVector | 与 PostgreSQL 共存 | 共存 | 共存 | 推荐，无需额外部署 |
+| Milvus | 2 核 | 4 GB | 50 GB | 高性能、分布式 |
+| Qdrant | 1 核 | 2 GB | 30 GB | 轻量、Cloud-native |
+
+---
+
+## 3. 监控与运维钩子
+
+### 3.1 健康检查
+
+| 端点 | 路径 | 预期响应 | 失败行为 |
+|------|------|----------|---------|
+| 存活检查 | `/actuator/health/live` | HTTP 200 | 容器重启 |
+| 就绪检查 | `/actuator/health/ready` | HTTP 200 | 从负载均衡移除 |
+| 综合检查 | `/actuator/health` | HTTP 200 + JSON | 触发告警 |
+
+### 3.2 启动/关闭顺序
+
+**启动顺序**:
+1. PostgreSQL 启动完成
+2. Redis 启动完成
+3. 向量数据库启动完成
+4. Worker 启动（执行 migration）
+5. API Server 启动
+
+**关闭顺序**:
+1. 停止接收新 HTTP 请求
+2. 等待现有请求处理完成（超时 30 秒）
+3. 停止各 Worker 定时器
+4. 关闭数据库连接池
+5. 退出进程
+
+### 3.3 配置管理
+
+- 配置文件 `config.yaml` + 环境变量覆盖。
+- 供应商 API Key 仅通过环境变量传入。
+- 探针周期、扫描周期、测试用例集路径等可热更新。
+
+---
+
+## 4. 灾备设计
+
+### 4.1 数据库灾备
+
+| 策略 | 方案 | RTO | RPO |
+|------|------|-----|-----|
+| 主库故障 | 自动切换至备库 | < 5 min | < 1 min |
+| 逻辑损坏 | 从备库恢复 + 审计日志回放 | < 30 min | < 1 min |
+
+### 4.2 扫描/测试任务灾备
+
+| 场景 | 处理 |
+|------|------|
+| Discovery Worker 故障 | 下一周期自动恢复，扫描任务无状态，不影响生产 |
+| Admission Worker 故障 | 测试任务缓存在 Redis，恢复后继续执行 |
+| Probe Worker 故障 | 探针任务缓存在 Redis，恢复后继续执行 |
+| 向量数据库故障 | 知识库检索降级为文本匹配，不影响核心探针功能 |
+
+### 4.3 多中心部署
+
+- 当前阶段为单中心部署。
+- 探针任务无状态，不依赖中心化调度。
+- 未来扩展至多中心时，需要解决 PostgreSQL 分布式写入和向量数据库的同步问题。
--- a/tech/GATEWAY_CONSUMER_DECISION_2026-05.md
+++ b/tech/GATEWAY_CONSUMER_DECISION_2026-05.md
@@ -0,0 +1,169 @@
+# Supply-Intelligence 首期消费闭环决议（2026-05）
+
+> 状态：当前有效决议
+> 作用：消除“只有接口定义，没有首期真实消费方与调用落点”的设计歧义。
+> 适用范围：/home/long/project/立交桥/projects/supply-intelligence/ 下当前收敛规划包。
+> 真源索引：本决议受 `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md` 纳管；若与历史草案冲突，以真源索引定义的优先级解释。
+
+## 1. 结论
+
+首期默认消费闭环采用：
+- package 发布链路：gateway 作为首期默认消费方，使用 pull `package-changes` + `ack` 机制完成闭环
+- account 状态链路：立交桥 / supply-api 内部主链路直接读取 `routing-state` 或等价 snapshot，不通过 gateway event ack 闭环
+
+这意味着必须明确区分两类链路：
+1. 账号可路由状态链路：查询型消费
+2. package 发布生效链路：事件型消费
+
+不得混用以下错误口径：
+- `published = 已进入 gateway 路由`
+- `active package = 下游已消费成功`
+
+正确口径：
+- `published` 仅表示 supply-intelligence 侧已完成运营确认与 package 激活
+- 只有 gateway 对 package event 完成 `ack(result=applied)` 后，才能宣称“已被 gateway 消费生效”
+
+## 2. 首期默认路径
+
+### 2.1 账号状态链路
+
+生产主链路：
+1. probe 执行
+2. evaluator 分类为 success / explicit_failure / inconclusive
+3. state machine 生成状态迁移
+4. 写回 supply account 健康状态与审计
+5. 立交桥内部路由决策读取 `GET /internal/supply-intelligence/accounts/{account_id}/routing-state`
+
+说明：
+- 这是查询型读取，不需要 event ack。
+- 若调用方读取失败，不回滚 supply-intelligence 已落库状态，只记录消费侧问题。
+
+### 2.2 package 发布闭环
+
+生产主链路：
+1. 运营确认发布 candidate
+2. package draft -> active
+3. candidate `test_passed -> published`
+4. 写入 `gateway_package_events`
+5. gateway 拉取 `GET /internal/supply-intelligence/gateway/package-changes?cursor=...`
+6. gateway 应用变更到自身路由/缓存
+7. gateway 调用 `POST /internal/supply-intelligence/gateway/package-changes/{event_id}/ack`
+8. `gateway_sync_status` 变为 `applied` 或 `failed`
+
+说明：
+- 这是事件型闭环。
+- `pending` 表示 supply-intelligence 已发布，但 gateway 尚未确认消费。
+- `failed` 表示 gateway 已消费尝试但未成功，需要运营或工程介入。
+
+## 3. 为什么不用首期强耦合同步 RPC
+
+首期明确不采用：
+- “发布时同步调用 gateway 管理接口，成功后才算发布成功”
+
+原因：
+1. 这会把 supply-intelligence 与 gateway 强耦合在单次事务中
+2. 会把下游暂时不可用放大成上游发布不可用
+3. 不符合当前“立交桥延伸项目、简洁架构、最小生产闭环”的收敛目标
+
+因此首期选择：
+- 上游发布成功与下游消费成功解耦
+- 用 event + ack 明确消费状态
+
+## 4. 首期真实代码落点（实现约束）
+
+以下是首期必须存在的真实调用落点；只有接口定义不算完成。
+
+### 4.1 supply-intelligence / supply-api 侧
+- `/home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go`
+  - `PublishCandidate(...)`
+  - `AppendGatewayPackageEvent(...)`
+- `/home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go`
+  - `GetAccountRoutingState(...)`
+  - `ListGatewayPackageChanges(...)`
+  - `AckGatewayPackageChange(...)`
+- `/home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/gateway_repo.go`
+  - `InsertGatewayPackageEventTx(...)`
+  - `AckGatewayPackageEventTx(...)`
+
+### 4.2 gateway 侧（首期必须由消费方实现的真实入口）
+- 必须存在一个实际消费入口，完成：
+  1. 周期拉取 package changes
+  2. 应用变更
+  3. 回写 ack
+- 若 gateway 已有内部刷新链路，可复用，但必须补齐 ack 回写
+- 若 gateway 无现成入口，则新增最小 poller；禁止为了这件事引入 MQ/Kafka/新总线
+
+## 5. QA 必查真实调用链路
+
+QA 编码后必须至少验证以下四层：
+
+### 链路 A：账号状态查询型消费
+- 定义：`GetAccountRoutingState`
+- 装配：internal route mounted
+- 调用：立交桥 / supply-api 实际路由决策点调用该接口或等价函数
+- 入口：真实请求/真实调用路径可达
+
+### 链路 B：package 事件发布
+- 定义：`AppendGatewayPackageEvent`
+- 装配：publish 流程内注入 repository
+- 调用：`PublishCandidate` 成功路径真实调用写事件
+- 入口：运营确认发布入口可真实触达该调用链
+
+### 链路 C：gateway 拉取消费
+- 定义：`ListGatewayPackageChanges`
+- 装配：internal route mounted
+- 调用：gateway 真实 poller / 既有刷新链调用
+- 入口：消费方真实任务/刷新入口存在，不是只留 TODO
+
+### 链路 D：gateway ack 回写
+- 定义：`AckGatewayPackageChange`
+- 装配：ack route mounted
+- 调用：gateway 应用成功/失败后真实回写
+- 入口：event 状态确实从 `pending -> applied|failed`
+
+## 6. published / applied 语义约束
+
+状态含义必须统一：
+- candidate `published`：上游已完成运营确认
+- package `active`：上游已允许被消费
+- gateway sync `pending`：下游尚未确认
+- gateway sync `applied`：下游已确认消费并应用
+- gateway sync `failed`：下游消费尝试失败
+
+禁止：
+- UI 文案把 `published` 写成“已进路由”
+- 测试把 `package active` 当成“下游已完成同步”
+- QA 把 event 表存在当成“消费闭环成立”
+
+## 7. 与 NewAPI / Sub2API 的边界
+
+首期不要求 NewAPI / Sub2API 实现 event ack 闭环。
+它们的首期边界为：
+- 只读拉取账号状态
+- 只读拉取已允许暴露的模型/结果
+
+即：
+- gateway 是首期必须闭环的事件型消费方
+- NewAPI / Sub2API 是首期只读适配消费方
+
+## 8. 门控要求
+
+在下一轮 QA 设计审查或编码后审查中，若以下任一项缺失，则不得给 APPROVED：
+1. 没有明确的首期默认消费方
+2. 没有明确区分查询型链路与事件型链路
+3. 没有明确 `published != applied`
+4. 没有真实代码落点要求
+5. 没有 ack 回写要求
+
+## 9. 对旧文档的覆盖关系
+
+本决议用于覆盖旧文档中以下错误或过时口径：
+- “调用 gateway 管理接口热更新即完成闭环”
+- “上架成功即下游已生效”
+- “gateway 会消费”但没有实际消费者与 ack 机制
+
+如与以下文件冲突，以本决议为准：
+- /home/long/project/立交桥/projects/supply-intelligence/specs/功能清单.md
+- /home/long/project/立交桥/projects/supply-intelligence/tech/INTERFACE.md
+- /home/long/project/立交桥/projects/supply-intelligence/tech/HLD.md
+- /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md（若后续未同步更新相应段落，应以本决议补充解释）
--- a/tech/HLD.md
+++ b/tech/HLD.md
--- a/tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md
+++ b/tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md
@@ -0,0 +1,251 @@
+# Supply-Intelligence 实现任务板 V1（2026-05）
+
+> 状态：当前有效
+> 目的：将当前真源收敛为可直接派工的 Engineer / QA 执行板。
+> 使用前提：必须先阅读 `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`。
+> 当前总门控：APPROVED（允许进入实现）
+
+## 0. 使用规则
+
+1. 本任务板不是新的真源，只是执行板。
+2. 若任务板与以下文件冲突，以以下文件为准：
+   - `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
+   - `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
+   - `/home/long/project/立交桥/projects/supply-intelligence/tech/TEST_DESIGN.md`
+3. 禁止 Engineer 回退到旧 PRD/HLD/INTERFACE/DEPLOYMENT 取实现口径。
+4. 每个阶段完成后，必须由 QA 按“定义 → 装配 → 调用 → 入口”四层链路做复核。
+
+## 1. 当前最短闭环路径
+
+目标：先做出首个最小生产闭环，而不是并行铺开所有模块。
+
+推荐顺序：
+1. Phase A：探针与账号状态闭环
+2. Phase B：发现与 candidate 闭环
+3. Phase C：准入测试与 draft 生成闭环
+4. Phase D：发布与 gateway package event + ack 闭环
+5. Phase E：受控自动补给最小边界
+6. Phase F：工作台、配置、权限与完善性补齐
+
+## 2. 阶段任务板
+
+### Phase A：探针与账号状态闭环
+
+目标：先让 supply-intelligence 能真实地产生可消费的账号状态。
+
+#### A-1 数据与领域骨架
+- Owner：Engineer
+- 交付物：账号状态、探针日志、审计写入相关 domain/model/repository 基础结构
+- 完成标准：
+  - 存在 `supply_intelligence_` 前缀表迁移
+  - 探针结果、状态迁移、审计写入模型可落库
+- QA 验证：检查 schema、repo、service 调用链是否闭合
+
+#### A-2 探针执行与统一判定
+- Owner：Engineer
+- 交付物：probe runner + evaluator
+- 完成标准：
+  - 200 => success
+  - 401/403 => explicit_failure
+  - 429/5xx/timeout/格式突变 => inconclusive
+- QA 验证：检查 evaluator 定义、装配、调用与调度入口
+
+#### A-3 状态机与账号状态快照接口
+- Owner：Engineer
+- 交付物：状态迁移逻辑 + `routing-state` 查询接口
+- 完成标准：
+  - active -> suspended
+  - suspended -> disabled
+  - inconclusive 不触发惩罚性迁移
+  - 存在真实内部查询入口
+- QA 验证：必须验证 `GET /internal/supply-intelligence/accounts/{account_id}/routing-state` 或等价入口
+
+#### A-4 Phase A QA Gate
+- Owner：QA
+- 放行条件：
+  - 账号状态链路完成“定义 → 装配 → 调用 → 入口”四层验证
+  - 审计写入与状态写回可追踪
+  - 未引入 Redis / Temporal / WebSocket 作为首期硬依赖
+
+### Phase B：发现与 candidate 闭环
+
+目标：能够从已接入供应商拉模型，并产生 candidate。
+
+#### B-1 供应商适配器与模型拉取
+- Owner：Engineer
+- 交付物：SupplierAdapter、registry、GetModels 拉取链路
+- 完成标准：
+  - 至少支持首批目标供应商
+  - 具备健康探测与模型列表读取
+- QA 验证：检查 registry 注册、装配、实际调用点
+
+#### B-2 candidate 生成与去重
+- Owner：Engineer
+- 交付物：discovery service + candidate repository
+- 完成标准：
+  - 能与 `supply_packages` 去重
+  - 新模型生成 discovered candidate
+  - 下架只生成告警，不自动改 package
+- QA 验证：检查 candidate 创建与下架告警调用链
+
+#### B-3 Phase B QA Gate
+- Owner：QA
+- 放行条件：
+  - 至少一条真实发现链路打通
+  - candidate 状态初始落点正确
+  - 未扩张到 pricing / prediction / 向量检索
+
+### Phase C：准入测试与 draft 生成闭环
+
+目标：让 discovered candidate 可变成 test_passed/test_failed，并生成 draft。
+
+#### C-1 admission runner
+- Owner：Engineer
+- 交付物：标准测试执行器与结果记录
+- 完成标准：
+  - discovered / retry_pending 可消费
+  - 失败与超时原因可追踪
+- QA 验证：检查 admission 执行入口和结果写回
+
+#### C-2 draft package 生成
+- Owner：Engineer
+- 交付物：test_passed -> draft package 生成逻辑
+- 完成标准：
+  - 草稿字段完整
+  - candidate 状态流转闭环
+- QA 验证：检查 candidate -> draft 的真实调用链
+
+#### C-3 Phase C QA Gate
+- Owner：QA
+- 放行条件：
+  - 至少一条 candidate 完成 test_passed -> draft
+  - 至少一条 candidate 完成 test_failed -> failure_reason
+
+### Phase D：发布与 gateway package event + ack 闭环
+
+目标：打通首个 package 发布最小生产闭环。
+
+#### D-1 发布服务
+- Owner：Engineer
+- 交付物：运营确认发布逻辑
+- 完成标准：
+  - draft -> active
+  - candidate test_passed -> published
+- QA 验证：published 语义不得等于 applied
+
+#### D-2 gateway package events
+- Owner：Engineer
+- 交付物：`gateway_package_events` 写入、拉取、ack 回写接口
+- 完成标准：
+  - 存在 package-changes 列表接口
+  - 存在 ack 接口
+  - ack 后状态可区分 pending/applied/failed
+- QA 验证：检查 definition / assembly / call / entry 四层
+
+#### D-3 gateway 消费方最小入口
+- Owner：Engineer / 对接方
+- 交付物：真实 poll/apply/ack 入口
+- 完成标准：
+  - 不是只定义接口
+  - 至少有一个真实消费任务/入口
+- QA 验证：没有真实入口则本阶段不通过
+
+#### D-4 Phase D QA Gate
+- Owner：QA
+- 放行条件：
+  - published != applied 证据充分
+  - package event + ack 闭环真实存在
+  - 无“同步调用 gateway 管理接口才算发布成功”的回退实现
+
+### Phase E：受控自动补给最小边界
+
+目标：补齐首期最小自动补给能力，但不膨胀为深自动注册。
+
+#### E-1 自动补给配置与白名单约束
+- Owner：Engineer
+- 交付物：auto-supply 配置、阈值、白名单、审批边界
+- 完成标准：
+  - 非白名单供应商不自动补给
+  - 配置按主仓既有方式存储
+- QA 验证：检查 guardrail 是否真实生效
+
+#### E-2 自动补给任务流
+- Owner：Engineer
+- 交付物：补给任务创建 / 受理 / 待验证回写
+- 完成标准：
+  - 低于阈值触发任务
+  - 成功后进入 pending_verify / pending_enable
+  - 不允许直接 active
+- QA 验证：检查自动启用是否被阻断
+
+#### E-3 fail-closed
+- Owner：Engineer
+- 交付物：通知网关/补给受理/KMS 异常阻断逻辑
+- 完成标准：
+  - 失败不伪成功
+  - 明文不落日志/DB
+- QA 验证：检查失败证据和审计闭环
+
+#### E-4 Phase E QA Gate
+- Owner：QA
+- 放行条件：
+  - 未引入浏览器自动化注册主路径
+  - 未引入验证码编排主路径
+  - 未允许无审批直接自动激活
+
+### Phase F：工作台、配置、权限与完善性补齐
+
+目标：补足可操作性与交付完整性，但不得改变前述主链路口径。
+
+#### F-1 工作台最小读写能力
+- Owner：Engineer
+- 交付物：账号页、模型页、待处理页、确认上架、忽略、手动探针
+- QA 验证：检查关键操作真实连到主链路，不是空按钮
+
+#### F-2 配置与审计
+- Owner：Engineer
+- 交付物：配置读取/修改、审计日志
+- QA 验证：检查配置生效路径与审计记录
+
+#### F-3 权限与内部/外部路由边界
+- Owner：Engineer
+- 交付物：认证、角色权限、内部接口与外部接口分离
+- QA 验证：检查 `/internal/supply-intelligence/` 与外部暴露面的边界
+
+#### F-4 Phase F QA Gate
+- Owner：QA
+- 放行条件：
+  - 权限边界清楚
+  - OpenAPI 与真实路由一致
+  - 不新增超范围平台化能力
+
+## 3. 明确禁止的提前扩张
+
+以下事项在前述主链路未闭环前，禁止插队进入主开发路径：
+- pricing / prediction / recommendation
+- 向量数据库 / 向量检索
+- SFI 仪表盘
+- WebSocket 实时推送
+- 独立 API + worker 集群重部署
+- 浏览器自动化注册主路径
+- 验证码编排主路径
+- 以 Redis / Temporal 为首期硬前置
+
+## 4. QA 统一复核问题单
+
+每个阶段 QA 都必须回答：
+1. 定义是否存在？
+2. 装配是否存在？
+3. 调用点是否真实存在？
+4. 外部/内部入口是否真实挂载？
+5. 是否出现实施漂移？
+6. 是否回退到了历史草案口径？
+
+## 5. 工程启动建议阅读顺序
+
+1. `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`
+2. `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
+3. `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
+4. `/home/long/project/立交桥/projects/supply-intelligence/tech/TEST_DESIGN.md`
+5. `/home/long/project/立交桥/projects/supply-intelligence/tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md`
+6. `/home/long/project/立交桥/projects/supply-intelligence/specs/功能清单.md`
--- a/tech/INTERFACE.md
+++ b/tech/INTERFACE.md
@@ -0,0 +1,275 @@
+# Supply-Intelligence 核心接口设计
+
+> 状态说明（2026-05 收敛修订）：本文件保留为旧版接口草案，已不再作为当前实现真源。
+> 当前接口真源以 /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md 为准。
+> 以下旧接口定义已废止，不得继续作为实现入口：
+> - pricing comparison / recommendations / predictions 相关接口
+> - 与新 candidate 状态机不一致的旧状态枚举
+> - 未区分 published 与 gateway applied 的旧消费口径
+
+> 版本：v1.0 | 状态：初稿
+
+---
+
+## 1. 内部模块间接口
+
+### 1.1 ProbeService
+
+```go
+type ProbeService interface {
+    // 执行单次探针
+    Probe(ctx context.Context, accountID string) (*ProbeResult, error)
+    // 批量探针（按供应商或全量）
+    ProbeBatch(ctx context.Context, filter ProbeFilter) (*BatchProbeResult, error)
+    // 获取探针结果历史
+    GetProbeHistory(ctx context.Context, accountID string, limit int) ([]ProbeResult, error)
+    // 手动触发掠针（运营干预）
+    TriggerManualProbe(ctx context.Context, accountID string, actorID string) (*ProbeResult, error)
+}
+
+type ProbeResult struct {
+    AccountID     string
+    Status        string // active suspended disabled
+    RiskScore     int    // 0-100
+    RiskReason    string
+    LatencyMs     int
+    ResponseCode  int
+    CheckedAt     time.Time
+    NextCheckAt   time.Time
+}
+
+type ProbeFilter struct {
+    Platform      *string
+    Status        *string
+    RiskScoreMin  *int
+    RiskScoreMax  *int
+}
+```
+
+### 1.2 DiscoveryService
+
+```go
+type DiscoveryService interface {
+    // 执行单次全网扫描
+    Scan(ctx context.Context) (*ScanResult, error)
+    // 获取最近扫描结果
+    GetLastScan(ctx context.Context) (*ScanResult, error)
+    // 获取候选模型列表
+    ListCandidates(ctx context.Context, filter CandidateFilter) ([]ModelCandidate, error)
+    // 手动触发扫描
+    TriggerManualScan(ctx context.Context, actorID string) (*ScanResult, error)
+    // 忽略候选模型
+    IgnoreCandidate(ctx context.Context, candidateID string, reason string, actorID string) error
+}
+
+type ScanResult struct {
+    ScannedAt     time.Time
+    Platforms     []string
+    NewModels     int
+    RemovedModels int
+    Errors        []ScanError
+}
+
+type ModelCandidate struct {
+    ID            string
+    Platform      string
+    ModelID       string
+    Status        string // discovered queued testing test_passed test_failed ignored
+    DiscoveredAt  time.Time
+    TestedAt      *time.Time
+    TestResult    *TestResult
+}
+```
+
+### 1.3 AdmissionService
+
+```go
+type AdmissionService interface {
+    // 执行准入测试
+    RunTest(ctx context.Context, candidateID string) (*TestResult, error)
+    // 获取测试结果
+    GetTestResult(ctx context.Context, candidateID string) (*TestResult, error)
+    // 手动确认上架（运营干预）
+    Publish(ctx context.Context, candidateID string, actorID string) error
+    // 强制上架（测试失败但运营确认）
+    ForcePublish(ctx context.Context, candidateID string, reason string, actorID string) error
+}
+
+type TestResult struct {
+    CandidateID   string
+    Status        string // passed failed
+    Dimensions    []TestDimension
+    FailedReason  *string
+    ExecutedAt    time.Time
+    DurationMs    int
+}
+
+type TestDimension struct {
+    Name      string
+    Passed    bool
+    Detail    string
+}
+```
+
+### 1.4 AccountService
+
+```go
+type AccountService interface {
+    // 创建账号（手动或自动）
+    CreateAccount(ctx context.Context, req CreateAccountRequest) (*SupplyAccount, error)
+    // 获取账号信息
+    GetAccount(ctx context.Context, accountID string) (*SupplyAccount, error)
+    // 更新账号状态
+    UpdateStatus(ctx context.Context, accountID string, status string, reason string) error
+    // 轮换密钥
+    RotateKey(ctx context.Context, accountID string, actorID string) error
+    // 列表账号
+    ListAccounts(ctx context.Context, filter AccountFilter) ([]SupplyAccount, error)
+}
+
+type SupplyAccount struct {
+    ID          string
+    Platform    string
+    ProxyID     string
+    Status      string
+    RiskScore   int
+    APIKeyHint  string // 密钥前 4 后 4
+    CreatedAt   time.Time
+    UpdatedAt   time.Time
+}
+```
+
+### 1.5 HealthBoardService
+
+```go
+type HealthBoardService interface {
+    // 获取供应商健康大盘
+    GetBoard(ctx context.Context, scope BoardScope) (*HealthBoard, error)
+    // 获取模型比价报表
+    GetPricingComparison(ctx context.Context, modelID string) ([]PricingComparison, error)
+    // 获取供应链覆盖率
+    GetCoverage(ctx context.Context) (*CoverageReport, error)
+    // 获取预测分析
+    GetPredictions(ctx context.Context, minConfidence float64) ([]Prediction, error)
+}
+
+type HealthBoard struct {
+    Accounts      []AccountHealth
+    Candidates    []CandidateSummary
+    Coverage      float64
+    FreshnessIndex float64
+}
+```
+
+---
+
+## 2. 外部系统集成接口
+
+### 2.1 与 Bridge Gateway 集成
+
+| 方法 | 路径 | 请求 | 响应 | 说明 |
+|------|------|------|------|------|
+| 查询账号状态 | `GET /internal/supply-intelligence/accounts/{id}/health` | - | `ProbeResult` | Gateway 路由决策时查询 |
+| 查询模型定价 | `GET /internal/supply-intelligence/pricing/{model_id}` | - | `PricingInfo` | 动态定价参考 |
+| 获取推荐供应商 | `GET /internal/supply-intelligence/recommendations` | `?model={model_id}&strategy=cost` | `[]Recommendation` | 智能路由推荐 |
+
+### 2.2 与 supply-api 集成
+
+| 方法 | 路径 | 请求 | 响应 | 说明 |
+|------|------|------|------|------|
+| 读取账号列表 | `GET /internal/supply/accounts` | - | `[]SupplyAccount` | 探针器获取待检测账号 |
+| 更新账号状态 | `POST /internal/supply/accounts/{id}/status` | `{"status":"suspended","reason":""}` | `{"success":true}` | 探针结果写回 |
+| 读取模型列表 | `GET /internal/supply/packages` | - | `[]SupplyPackage` | 扫描比对基准 |
+| 创建模型 | `POST /internal/supply/packages` | `SupplyPackage` | `{"id":""}` | 准入测试通过后上架 |
+| 获取审计日志格式 | `GET /internal/supply/audit/schema` | - | `{"schema":{}}` | 审计事件格式一致 |
+
+---
+
+## 3. API 接口规范
+
+### 3.1 REST API 基础
+
+- **基础路径**: `/api/v1/supply-intelligence/`
+- **内部路径** (集成模式): `/internal/supply-intelligence/`
+- **内容类型**: `application/json`
+- **错误响应格式**:
+
+```json
+{
+  "error": {
+    "code": "SI_PRB_4001",
+    "message": "供应商账号不存在",
+    "details": {}
+  }
+}
+```
+
+### 3.2 核心端点
+
+#### 探针管理
+
+| 方法 | 路径 | 描述 |
+|------|------|------|
+| GET | `/api/v1/supply-intelligence/probes` | 列表探针结果 |
+| POST | `/api/v1/supply-intelligence/probes/{account_id}` | 手动触发探针 |
+| GET | `/api/v1/supply-intelligence/probes/{account_id}/history` | 探针历史 |
+
+#### 扫描与发现
+
+| 方法 | 路径 | 描述 |
+|------|------|------|
+| POST | `/api/v1/supply-intelligence/discovery/scan` | 手动触发全网扫描 |
+| GET | `/api/v1/supply-intelligence/discovery/candidates` | 列表候选模型 |
+| GET | `/api/v1/supply-intelligence/discovery/candidates/{id}` | 获取候选模型详情 |
+| POST | `/api/v1/supply-intelligence/discovery/candidates/{id}/ignore` | 忽略候选模型 |
+
+#### 准入测试
+
+| 方法 | 路径 | 描述 |
+|------|------|------|
+| POST | `/api/v1/supply-intelligence/admission/{candidate_id}/test` | 手动执行准入测试 |
+| GET | `/api/v1/supply-intelligence/admission/{candidate_id}/result` | 获取测试结果 |
+| POST | `/api/v1/supply-intelligence/admission/{candidate_id}/publish` | 确认上架 |
+| POST | `/api/v1/supply-intelligence/admission/{candidate_id}/force-publish` | 强制上架 |
+
+#### 账号管理
+
+| 方法 | 路径 | 描述 |
+|------|------|------|
+| GET | `/api/v1/supply-intelligence/accounts` | 列表账号 |
+| POST | `/api/v1/supply-intelligence/accounts` | 创建账号 |
+| GET | `/api/v1/supply-intelligence/accounts/{id}` | 获取账号 |
+| POST | `/api/v1/supply-intelligence/accounts/{id}/rotate-key` | 轮换密钥 |
+| POST | `/api/v1/supply-intelligence/accounts/{id}/status` | 更新状态 |
+
+#### 健康大盘
+
+| 方法 | 路径 | 描述 |
+|------|------|------|
+| GET | `/api/v1/supply-intelligence/health-board` | 获取健康大盘 |
+| GET | `/api/v1/supply-intelligence/pricing/{model_id}/comparison` | 模型比价 |
+| GET | `/api/v1/supply-intelligence/coverage` | 供应链覆盖率 |
+| GET | `/api/v1/supply-intelligence/predictions` | 预测分析 |
+
+### 3.3 错误码定义
+
+| 错误码 | HTTP 状态 | 说明 |
+|---------|-----------|------|
+| `SI_PRB_4001` | 404 | 供应商账号不存在 |
+| `SI_PRB_4002` | 429 | 探针频率过高，请等待 |
+| `SI_DIS_4001` | 404 | 候选模型不存在 |
+| `SI_DIS_4002` | 409 | 候选模型状态不允许忽略 |
+| `SI_ADM_4001` | 404 | 准入测试任务不存在 |
+| `SI_ADM_4002` | 409 | 准入测试正在执行中 |
+| `SI_ADM_4003` | 400 | 测试未通过，无法上架 |
+| `SI_ACC_4001` | 404 | 账号不存在 |
+| `SI_ACC_4002` | 409 | 账号状态不允许此操作 |
+| `SI_ACC_4003` | 403 | 无权执行此操作 |
+| `SI_BRD_4001` | 400 | 查询参数无效 |
+
+### 3.4 WebSocket 接口
+
+**路径**: `/ws/v1/supply-intelligence/board`
+
+- 运营工作台订阅后，实时推送探针结果、候选模型变更、状态变更待办。
+- 心跳间隔 30 秒。
--- a/tech/TEST_DESIGN.md
+++ b/tech/TEST_DESIGN.md
@@ -0,0 +1,355 @@
+# Supply Intelligence 测试设计方案
+
+> 状态说明（2026-05 收敛修订）：本文件已转为“收敛后测试门禁文档”，必须按新基线解释。
+> 若与旧 PRD/HLD/INTERFACE 的测试口径冲突，以 /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md 与最新 PM 基线为准。
+> 以下旧测试口径不得继续作为放行依据：
+> - 以独立重部署、向量数据库、WebSocket、预测/比价能力为默认测试前提
+> - 将自动注册深链路视为本期不可降期的默认主路径
+> - 将 published 等同于 gateway 已消费生效
+
+> 版本：v1.0
+> 日期：2026-04-27
+> 状态：初稿
+> 覆盖：AC-01 ~ AC-12、异常/边缘流程 FP-01 ~ FP-10、场景 S1~S4
+
+---
+
+## 1. 测试策略
+
+### 1.1 测试分层模型
+
+```
+┌─────────────────────────────────────────────────┐
+│                   E2E Tests (黑盒)               │
+│  场景：从探针调度到状态变更、从发现到上架全链路   │
+│  工具：Go test + httptest + 自制 E2E runner     │
+└─────────────────────────────────────────────────┘
+                        ▲
+┌─────────────────────────────────────────────────┐
+│             Integration Tests (灰盒)             │
+│  场景：Service 间协作、异步任务队列、外部 API Mock│
+│  工具：Go test + testify + sqlmock + gock       │
+│  覆盖率门槛：service ≥ 80%, handler ≥ 80%       │
+└─────────────────────────────────────────────────┘
+                        ▲
+┌─────────────────────────────────────────────────┐
+│                Unit Tests (白盒)                 │
+│  场景：状态机逻辑、探针评估、风险评分计算          │
+│  工具：Go test + testify + gomock              │
+│  覆盖率门槛：domain ≥ 70%                       │
+└─────────────────────────────────────────────────┘
+```
+
+### 1.2 测试通过标准
+
+| 维度 | 标准 |
+|------|------|
+| 覆盖率 | domain ≥ 70%, service/handler ≥ 80% |
+| 模块 A（探针） | AC-01 ~ AC-03 全部通过 |
+| 模块 B（发现） | AC-04 ~ AC-05 全部通过 |
+| 模块 C（准入测试） | AC-06 ~ AC-07 全部通过 |
+| 模块 D（受控自动补给） | AC-08 ~ AC-09 全部通过（按首期最小边界解释） |
+| 模块 E（工作台） | AC-10 ~ AC-12 全部通过 |
+| 异常/边缘流程 | FP-01 ~ FP-10 全部有验证测试 |
+| 误报率 | 7 天连续运行 false positive ≤ 1% |
+
+### 1.3 外部依赖 Mock
+
+| 依赖 | Mock 方案 | 工具 |
+|------|---------|------|
+| **供应商 API（探针目标）** | Mock server 返回 200/401/403/429/500 | gock |
+| **供应商模型列表 API** | Mock 返回 JSON 模型列表 | gock |
+| **供应商补给接口 / 人工补录入口** | Mock 返回受理成功/400/500 | gock |
+| **通知网关（飞书/邮件）** | Mock server 接收通知或确认消息 | httptest |
+| **KMS 服务** | Mock 加密/解密逻辑 | 接口层 Mock |
+| **Job Scheduler / 主仓调度器** | 使用主仓调度抽象或本地调度测试桩 | go test + test double |
+| **supply-api 数据库** | sqlmock 拦截读写 | go-sqlmock |
+
+---
+
+## 2. 模块 A 测试用例（供应商品质探针）
+
+### AC-01 探针覆盖度
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TA-01-01 | 15 分钟内探针覆盖率 ≥99% | Functional | Given 100 条 active/suspended 账号 When 15min 后统计 Then ≥99 条被探针 |
+| TA-01-02 | suspended 账号同等探针 | Functional | Given suspended 账号 When 探针执行 Then 同样被覆盖 |
+| TA-01-03 | 暂停探针账号不被覆盖 | Edge | Given 账号设置 pause_probe=true When 探针执行 Then 该账号被跳过 |
+
+### AC-02 状态变更正确性
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TA-02-01 | active → suspended（1次401） | Happy Path | Given active 账号 When 连续 1 次返回 401 Then 60s 内状态变为 suspended |
+| TA-02-02 | suspended → disabled（连续3次401） | Happy Path | Given suspended 账号 When 连续 3 次返回 401 Then 60s 内状态变为 disabled |
+| TA-02-03 | 429 单次不改变状态 | Edge | Given active 账号 When 返回 429 一次 Then 15min 内状态保持 active |
+| TA-02-04 | 指数退避重试逻辑 | Functional | Given 返回 429 When 探针执行 Then 按 1→2→4min 退避重试 |
+| TA-02-05 | 状态机不允许 active→disabled 直变 | Edge | Given active 账号 When 连续 3 次失败 Then 不会直接变为 disabled（必须先 suspended） |
+| TA-02-06 | 手动暂停账号状态不自动变更 | Edge | Given 账号 pause_probe=true When 供应商返回异常 Then 状态不变 |
+
+### AC-03 误报率
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TA-03-01 | 7 天误报率 ≤1% | Long Run | Given 100 条正常账号 When 连续运行 7 天 Then 误变更次数 ≤7 |
+| TA-03-02 | 探针与手动操作并发 | Concurrency | Given 手动修改状态的同时 When 探针执行 Then 乐观锁冲突处理正确 |
+
+---
+
+## 3. 模块 B 测试用例（全网模型发现）
+
+### AC-04 新模型发现延迟
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TB-04-01 | 新模型在 2 扫描周期内被发现 | Functional | Given 供应商新增 model_id When 扫描执行 Then 2h 内 model_candidates 出现 discovered 记录 |
+| TB-04-02 | 模型比对去重正确 | Functional | Given 已存在的 active model When 全网扫描 Then 不会重复创建 candidate |
+| TB-04-03 | 模型下架告警触发 | Functional | Given active package 对应的 model_id 从供应商列表消失 When 2 扫描周期后 Then 运营工作台出现下架告警 |
+
+### AC-05 已下架模型告警
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TB-05-01 | 下架模型不自动变更 package 状态 | Edge | Given model_id 消失 When 扫描执行 Then package 状态保持 active，生成告警 |
+| TB-05-02 | 分页获取完整模型列表 | Functional | Given 供应商返回分页 When 扫描 Then 正确处理所有分页数据 |
+
+---
+
+## 4. 模块 C 测试用例（模型准入测试）
+
+### AC-06 准入测试通过
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TC-06-01 | discovered → test_passed + 草稿生成 | Happy Path | Given discovered candidate When 测试全部通过 Then 状态 test_passed，supply_package 草稿生成 |
+| TC-06-02 | 草稿字段完整性 | Functional | Given 草稿生成 When 检查字段 Then platform/model/price/suggested 正确 |
+| TC-06-03 | 准入测试 30 分钟内完成 | Performance | Given discovered candidate When 测试执行 Then ≤30min 完成 |
+
+### AC-07 准入测试失败
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TC-07-01 | discovered → test_failed | Negative | Given discovered candidate When 测试返回 500 Then 30min 内状态 test_failed，failure_reason 非空 |
+| TC-07-02 | 超时视为失败 | Edge | Given 测试用例 60s 无响应 When Then 整体标记为 test_failed，reason = timeout |
+| TC-07-03 | 测试账号 suspended 时任务失败 | Edge | Given 测试账号变为 suspended When 准入测试执行 Then 任务标记 test_failed，reason = test_account_unavailable |
+| TC-07-04 | ignore 账号 7 天内不重扫 | Edge | Given 运营标记 ignore When 7 天内扫描 Then 该 candidate 不出现 |
+
+---
+
+## 5. 模块 D 测试用例（受控自动补给）
+
+### AC-08 受控自动补给触发与落单
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TD-08-01 | 可用账号数 < 阈值时触发补给任务 | Functional | Given 白名单供应商的可用账号数 < 阈值 When 系统检测 Then 10min 内生成补给任务或补给申请 |
+| TD-08-02 | 非白名单供应商不自动补给 | Guardrail | Given 非白名单供应商账号不足 When 系统检测 Then 不自动触发补给，仅记录告警或人工待办 |
+| TD-08-03 | 补给结果进入待验证/待启用 | Happy Path | Given 补给流程受理成功 When 补给完成 Then 新账号或候选资源进入 pending_verify / pending_enable 等受控状态，而非直接 active |
+| TD-08-04 | 补给结果关联 task | Functional | Given 补给任务完成 When 检查任务记录 Then auto_supply_tasks 或等价任务状态为 completed/pending_verify |
+
+### AC-09 受控自动补给 fail-closed
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TD-09-01 | 通知/补给网关不可用时 fail-closed | Resilience | Given 通知网关或补给受理接口返回 503 When 补给执行 Then 60s 内任务 failed，审计日志完整，无虚假成功 |
+| TD-09-02 | 补给接口返回 400 | Edge | Given 补给请求参数非法或资源已存在 When 补给执行 Then 任务 failed，不重复盲目重试 |
+| TD-09-03 | KMS 不可用时 fail-closed | Resilience | Given KMS 超时 When 凭证加密步骤执行 Then 60s 内任务 failed，明文凭证不出现在日志/DB |
+| TD-09-04 | 无审批/越权配置时阻断自动启用 | Guardrail | Given 缺少审批或超出受控边界 When 补给结果回写 Then 保持 pending_verify / pending_enable，不允许直接进入 active |
+
+---
+
+## 6. 模块 E 测试用例（运营工作台）
+
+### AC-10 审计日志完整性
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TE-10-01 | 状态变更 5s 内写入审计 | Performance | Given 状态变更 When 执行完成 Then ≤5s 审计记录存在 |
+| TE-10-02 | 审计字段完整性 | Functional | Given 审计记录 When 检查 Then 包含 object_type/id/action/before_state/after_state/request_id |
+| TE-10-03 | 探针执行记录审计 | Functional | Given 探针执行 When 完成 Then probe_execution_logs 有记录 |
+
+### AC-11 运营工作台干预
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TE-11-01 | 确认上架 draft → active | Happy Path | Given draft package When 点击确认 Then 3s 内变为 active |
+| TE-11-02 | 忽略模型 7 天内不出现 | Edge | Given 点击忽略 When Then 7 天内 candidate 不出现在待处理列表 |
+| TE-11-03 | 手动触发单账号探针 | Functional | Given 运营手动触发 When Then 立即执行探针，结果可见 |
+| TE-11-04 | 并发操作冲突处理 | Concurrency | Given 同时点击确认和忽略 When Then 返回 409，只一个生效 |
+
+### AC-12 配置热更新
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TE-12-01 | 探针周期修改 60s 内生效 | Functional | Given 修改探针周期 When 下发配置 Then 60s 后新周期生效 |
+
+---
+
+## 7. 异常/边缘流程测试（FP-01 ~ FP-10）
+
+| 用例 ID | 场景 | 验证点 | 预期行为 |
+|---------|------|-------|---------|
+| TFP-01 | 供应商探针 DNS/TCP 超时 | 状态不变 | 标记 inconclusive，指数退避，不触发状态变更 |
+| TFP-02 | 供应商返回空/格式突变 | 状态不变 | 解析失败标记 inconclusive，记录日志 |
+| TFP-03 | 探针与手动操作并发 | 乐观锁 | 更新失败，探针记录冲突日志，下次覆盖 |
+| TFP-04 | 准入测试期间测试账号 suspended | 任务标记失败 | 任务标记 test_failed，reason = test_account_unavailable |
+| TFP-05 | 补给接口返回 400 或资源冲突 | 任务失败 | 任务 failed，不重复盲目重试，审计记录完整 |
+| TFP-06 | 补给成功但验证/启用失败 | pending 不变 | 账号保持 pending_verify/pending_enable，任务标记 verify_failed，触发告警 |
+| TFP-07 | 供应商模型列表分页 500 | 整体不中断 | 已获取部分正常处理，失败页下次重试 |
+| TFP-08 | 探针期间数据库不可用 | 任务失败重试 | 探针任务失败，连续 5 次失败后暂停批次，触发系统告警 |
+| TFP-09 | 确认上架与忽略并发 | 409 冲突 | 只有一个生效，返回 409 |
+| TFP-10 | KMS 不可用时注册 | 明文不落盘 | 加密步骤阻塞/失败，明文凭证不出现 |
+
+---
+
+## 8. 灰度发布验证计划
+
+### 8.1 各 Phase 验证内容
+
+| Phase | 交付内容 | 通过标准 | 依赖项 |
+|-------|---------|---------|--------|
+| **Phase 1** | 模块 A（探针）+ 模块 E 只读视图 | AC-01~AC-03, AC-10~AC-11（只读部分） | 主仓调度能力或本地调度测试桩 |
+| **Phase 2** | 模块 B（发现）+ 模块 C（准入测试） | AC-04~AC-07 | Phase 1 + 供应商 API 清单 |
+| **Phase 3** | 模块 D（受控自动补给）+ 模块 E 完整 | AC-08~AC-12 | Phase 1+2 + KMS/通知与补给受理链路就绪 |
+
+### 8.2 灰度门禁
+
+每次 Phase 升级前：
+- [ ] 全部 AC 测试用例通过
+- [ ] 覆盖率达标
+- [ ] 灰度开关独立验证（每个开关可单独打开/关闭）
+- [ ] 回滚条件演练（误报率>5% / 状态变更导致错误率上升>2%）
+
+---
+
+## 9. 回归测试集
+
+### 9.1 快速回归（每次 PR，~10 分钟）
+
+```
+TA-01-01, TA-02-01, TA-02-02, TA-02-05,
+TB-04-01, TC-06-01, TC-07-01,
+TD-08-01, TD-09-01,
+TE-10-01, TE-11-01
+共 11 条
+```
+
+### 9.2 完整回归（Phase 升级，~45 分钟）
+
+```
+TA-01-01 ~ TA-03-02（全 8 条）
+TB-04-01 ~ TB-05-02（全 4 条）
+TC-06-01 ~ TC-07-04（全 4 条）
+TD-08-01 ~ TD-09-03（全 4 条）
+TE-10-01 ~ TE-12-01（全 7 条）
+TFP-01 ~ TFP-10（全 10 条）
+共 37 条
+```
+
+---
+
+## 10. 技术栈与集成约束验证
+
+### 10.1 统一技术栈与双运行模式验证
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TSI-RUN-01 | 独立运行模式启动 | Happy Path | Given 独立 `config.yaml` 与独立数据库/Redis When 启动 `cmd/supply-intelligence/main.go` Then `/actuator/health/ready` 返回 200，`/api/v1/supply-intelligence/*` 可访问 |
+| TSI-RUN-02 | 集成运行模式挂载 | Integration | Given supply-api 主进程加载 `IntegrationPlugin` When 启动 Then `/internal/supply-intelligence/*` 路由与后台任务注册成功 |
+| TSI-RUN-03 | 配置分离加载 | Functional | Given 独立模式与集成模式分别启动 When 读取配置 Then 独立模式只加载自身配置，集成模式合并主项目配置且不覆盖无关模块 |
+| TSI-RUN-04 | 数据库前缀隔离 | Structural | Given 执行迁移 When 检查 schema Then 仅创建 `supply_intelligence_` 前缀表 |
+
+### 10.2 独立运行与集成运行验证
+
+### 10.3 IntegrationPlugin 与模块挂载验证
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TSI-PLG-01 | IntegrationPlugin 注册 HTTP 路由 | Integration | Given 集成模式 When 插件注册 Then Probe/Discovery/Admission/AutoReg/OpsWorkBench 路由挂载成功 |
+| TSI-PLG-02 | 模块开关生效 | Functional | Given `enabled_modules` 关闭某模块 When 启动 Then 对应路由/worker 不注册，其他模块可用 |
+| TSI-PLG-03 | 集成模式共享资源 | Integration | Given supply-api 注入共享 DB/Redis/logger When 插件启动 Then 使用共享资源且不重复初始化冲突依赖 |
+
+### 10.3 OpenAPI 契约验证
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TSI-OAS-01 | OpenAPI 文档可访问 | Functional | Given 服务启动 When 请求 `/openapi.json` 或 `/docs` Then 返回 200 且包含探针、发现、准入测试、运营工作台接口 |
+| TSI-OAS-02 | 路由与 OpenAPI 一致 | Contract | Given 导出的 OpenAPI 文档 When 对照 HTTP 路由 Then 请求/响应/错误码与实现一致，无缺失公开接口 |
+| TSI-OAS-03 | 集成前缀可配置 | Contract | Given 集成模式配置内部前缀 When 导出文档 Then 文档反映 `/internal/supply-intelligence/` 前缀或明确区分暴露面 |
+
+### 10.4 NewAPI / Sub2API 适配层验证
+
+| 用例 ID | 描述 | 类型 | 验证条件 |
+|---------|------|------|---------|
+| TSI-ADP-01 | 供应商状态同步适配 | Contract | Given NewAPI/Sub2API 拉取供应商状态 When 调用标准化接口 Then 返回字段稳定、延迟满足约束、状态映射正确 |
+| TSI-ADP-02 | 模型列表推送适配 | Contract | Given 外部系统拉取模型列表 When 调用 `/models` Then 只返回已发现且允许暴露的数据，字段与约定一致 |
+| TSI-ADP-03 | 账号状态适配边界 | Contract | Given 外部系统读取账号状态 When 通过适配层执行 Then 仅返回允许暴露的状态字段，不暴露凭证/探针日志/内部风险细节 |
+
+---
+
+## 11. 发布门禁与阶段结论
+
+### 11.1 发布门禁检查表
+
+以下门禁项全部通过前，不得认定达到生产要求：
+
+- [ ] 独立运行 / 集成运行两种模式均完成启动验证，路由、worker、内部接口真实挂载
+- [ ] `IntegrationPlugin`、OpenAPI、NewAPI/Sub2API 适配层合同测试全部通过
+- [ ] 凭证保护经日志/DB/异常路径验证无明文，KMS 不可用时 fail-closed
+- [ ] 受控自动补给链路具备白名单限制、阈值触发、审批/待验证边界、重复提交阻断与审计留痕
+- [ ] 状态机迁移、审计写入、Gateway package event + ack、外部只读适配链路完成一致性验证
+- [ ] 首次生产放量场景遵循“只告警不自动变更状态”，并验证撤销与人工接管流程
+- [ ] 调度器失效、补给受理失败、外部适配越权、错误状态传播四类高风险回归通过
+- [ ] 至少一条探针、一条模型发现、一条准入测试、一条受控自动补给链路完成端到端验证
+
+### 11.2 阶段门控结论
+
+**当前结论：APPROVED（设计已可进入 Engineer 实现）**
+
+**结论解释：**
+- 本文档首页所述“收敛后测试门禁文档”口径已生效。
+- 当前放行依据不再是旧 HLD/PRD/INTERFACE/DEPLOYMENT，而是：
+  - `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
+  - `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
+- 因此，本节不再沿用历史性 `REQUEST_CHANGES` 作为当前总门控。
+
+**当前仍需在实现阶段持续验证的高风险项：**
+- 凭证保护必须能证明 fail-closed，且日志/审计/异常路径无明文泄漏。
+- 状态同步、审计写入、package event + ack 必须形成可追踪闭环。
+- 关键链路必须能完成“定义 → 装配 → 调用 → 入口”四层验证，不能只停留在接口存在。
+- 自动补给按首期最小边界解释：允许白名单供应商、阈值触发、任务化补给、待验证/待启用；不把浏览器自动化深链路作为首期阻断门槛。
+
+**实现前约束：**
+- 若实现与 `BASELINE_TECHLEAD_V2.md` 或 `GATEWAY_CONSUMER_DECISION_2026-05.md` 冲突，应以两者为准并回退旧测试假设。
+- 若下游消费方未落真实 poll/apply/ack 入口，不得宣称 package 发布链路已完成。
+- 若 NewAPI/Sub2API 适配超出“只读/受控暴露边界”，应判定为实施漂移。
+
+**重新转为 REQUEST_CHANGES / BLOCKED 的条件：**
+- 实现阶段发现 published/applied 再次混淆。
+- gateway 消费闭环缺少真实消费方入口或 ack 回写。
+- 自动补给被重新扩张为首期深自动注册硬门槛。
+- 核心链路无法提供四层调用链证据。
+
+---
+
+## 12. 性能与安全测试
+
+### 12.1 性能基准
+
+| 指标 | 目标值 | 测试方法 |
+|------|-------|---------|
+| 探针执行（单账号） | <2s | 计时 1000 次取 P99 |
+| 全网扫描（10 供应商） | <5min | 从调度触发到完成计 |
+| 准入测试（5 用例） | <30min P99 | 从 discovered 到 test_passed/failed |
+| 供应商状态查询 API | <50ms P99 | 并发 100 请求 |
+| 审计日志写入 | <1s P99 | 单次变更后计时 |
+
+### 12.2 安全测试
+
+| 测试项 | 方法 | 验证 |
+|-------|------|------|
+| 凭证明文保护 | 检查日志/DB/内存 dump | 无明文凭证 |
+| KMS 密钥轮换 | Mock KMS 不可用 | fail-closed，不暴露明文 |
+| 供应商 API 限流绕过 | 连续探针超限 | 正确触发 rate limit |
+| 注册接口重复提交 | 并发同一邮箱注册 | 只有一次成功，其余 failed |
--- a/test/CASES.md
+++ b/test/CASES.md
@@ -0,0 +1,98 @@
+# Supply-Intelligence 测试用例
+
+> 版本：v1.0 | 状态：初稿
+
+---
+
+## AC-01 探针覆盖度
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-01.1 | 99% 覆盖率达标 | 插入 100 条测试账号 | 1. 等待 15 分钟 2. 统计探针日志 | 探针覆盖率 ≥ 99% | P0 |
+| TC-01.2 | 探针周期可配置 | 已配置探针任务 | 1. 修改探针周期为 3 分钟 2. 等待 60 秒 | 周期在 60 秒内生效 | P1 |
+
+## AC-02 状态变更正确性
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-02.1 | active → suspended | 账号为 active | 1. Mock 返回 401 | 60s 内状态变为 suspended | P0 |
+| TC-02.2 | suspended → disabled | 账号为 suspended | 1. Mock 连续 3 次返回 401 | 60s 内状态变为 disabled | P0 |
+| TC-02.3 | 429 不变更 | 账号为 active | 1. Mock 返回 429 | 15 分钟内状态保持 active | P0 |
+| TC-02.4 | 状态机违规 | 账号为 active | 1. 尝试直接变更为 disabled | 被拒绝，返回错误码 | P0 |
+| TC-02.5 | 状态回复 | 账号为 suspended | 1. Mock 返回 200 | 60s 内状态变为 active | P1 |
+
+## AC-03 误报率
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-03.1 | 7 天误报率 | 全部账号正常 | 1. 运行 7 天 2. 统计状态误变更次数 | 误报率 ≤ 1% | P0 |
+
+## AC-04 新模型发现延迟
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-04.1 | 2h 内发现 | 已对接供应商 | 1. T0 在 Mock 响应中新增 model_id 2. T0+2h 查询数据库 | candidate 存在，status=discovered | P0 |
+
+## AC-05 已下架模型告警
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-05.1 | 不自动下架 | package 为 active | 1. 从 Mock 中移除 model_id 2. 等待 2h | package 状态保持 active | P0 |
+| TC-05.2 | 生成告警待办 | package 为 active | 1. 从 Mock 中移除 model_id 2. 等待 2h | 运营工作台出现告警 | P0 |
+
+## AC-06 准入测试通过
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-06.1 | 测试通过 | candidate 为 discovered | 1. 触发准入测试 2. 等待 30min | 状态变为 test_passed，生成 package 草稿 | P0 |
+| TC-06.2 | 草稿字段完整 | 测试通过后 | 1. 查询生成的 package 草稿 | 包含 platform、model、price 字段 | P1 |
+
+## AC-07 准入测试失败
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-07.1 | 接口返回 500 | candidate 为 discovered | 1. Mock 返回 500 2. 等待测试完成 | 状态变为 test_failed，failure_reason 非空 | P0 |
+| TC-07.2 | 前端展示 | candidate 为 test_failed | 1. 访问运营工作台 | 展示失败详情 | P1 |
+
+## AC-08 自动注册成功
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-08.1 | 注册流程 | 已配置白名单 | 1. 触发自动注册 2. 等待 30min | 新增 active 账号 | P0 |
+| TC-08.2 | 密钥加密 | 注册完成后 | 1. 查询数据库 | API Key 已加密存储 | P1 |
+
+## AC-09 自动注册 fail-closed
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-09.1 | 网关不可用 | 配置启用 | 1. Mock 邮件网关返回 503 2. 等待 60s | 任务状态为 failed，审计日志记录失败 | P0 |
+| TC-09.2 | 不返回成功 | 注册失败后 | 1. 检查对上游响应 | 不返回成功状态码 | P0 |
+
+## AC-10 审计日志完整性
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-10.1 | 字段完整性 | 触发操作后 | 1. 5s 内查询审计日志 | 包含所有必要字段 | P0 |
+| TC-10.2 | 自动化操作审计 | 自动化操作后 | 1. 查询审计日志 | 存在对应记录 | P0 |
+
+## AC-11 运营工作台干预
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-11.1 | 一键上架 | package 为 draft | 1. 点击确认上架 2. 等待 3s | 状态变为 active | P0 |
+| TC-11.2 | 忽略模型 | candidate 为 discovered | 1. 点击忽略 | 不在待处理列表中，7 天后恢复 | P0 |
+
+## AC-12 配置热更新
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-12.1 | 探针周期热更新 | 已运行 | 1. 修改配置 2. 观察调度行为 | 60s 内生效 | P1 |
+
+## 边缘场景 / 失败路径
+
+| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
+|---------|------|---------|---------|---------|--------|
+| TC-E1 | DNS 失败 | 探针任务已配置 | 1. 模拟 DNS 解析失败 | 状态不变更，记录日志 | P1 |
+| TC-E2 | 空响应体 | 探针任务已配置 | 1. Mock 返回空 JSON | 状态不变更，记录日志 | P1 |
+| TC-E3 | 并发乐观锁 | 探针任务已配置 | 1. 同时触发手动更新和探针 | 乐观锁冲突，探针记录失败 | P1 |
+| TC-E4 | 测试账号不可用 | 准入测试进行中 | 1. 将测试账号标记为 suspended | 测试标记为 failed，原因为 test_account_unavailable | P1 |
--- a/test/STRATEGY.md
+++ b/test/STRATEGY.md
@@ -0,0 +1,75 @@
+# Supply-Intelligence 测试策略
+
+> 版本：v1.0 | 状态：初稿
+
+---
+
+## 1. 测试目标
+
+| 目标 | 指标 | 验证方式 |
+|------|------|---------|
+| 功能正确性 | 所有 AC 通过率 100% | 每个 AC 至少 1 正向 + 1 负向测试用例 |
+| 状态机正确性 | 状态迁移符合状态图 | 所有状态转换路径覆盖 |
+| 安全性 | 无越权、审计日志完整 | 渗透测试 + 审计追溯 |
+| 性能 | 探针 P99 < 50ms，扫描完成 < 30min | 负载测试 |
+
+## 2. 测试层级
+
+```
+├── 单元测试 (Unit Test)
+│   ├── 状态机转换逻辑
+│   ├── 探针策略逻辑
+│   ├── 扫描比对算法
+│   └── 准入测试判定逻辑
+│
+├── 集成测试 (Integration Test)
+│   ├── 数据库交互（状态变更、审计日志）
+│   ├── Redis 缓存交互
+│   ├── 供应商 API Mock
+│   ├── 邮件/短信网关 Mock
+│   └── 向量数据库检索
+│
+├── E2E 测试 (End-to-End Test)
+│   ├── 探针到状态变更整条链路
+│   ├── 扫描到候选模型整条链路
+│   ├── 准入测试到上架整条链路
+│   └── 账号注册整条链路
+│
+└── 稳定性测试 (Stability Test)
+    ├── 7 天连续探针运行
+    └── 高并发扫描/测试
+```
+
+## 3. 测试工具
+
+| 层级 | 工具 | 说明 |
+|------|------|------|
+| 单元测试 | Go testing + testify + mockery | 覆盖率门槛 domain ≥ 70%、service ≥ 80% |
+| 数据库测试 | testcontainers-go (PostgreSQL) | 每次测试启动独立容器 |
+| 缓存测试 | miniredis | 轻量级 Redis Mock |
+| 供应商 Mock | gock / httptest | 模拟供应商 API 响应 |
+| E2E 测试 | 自定义 Go E2E 框架 | 启动完整服务 + 数据库 |
+| 稳定性测试 | 自定义脚本 | 7 天连续运行监控 |
+
+## 4. 测试环境
+
+| 环境 | 用途 | 数据 |
+|------|------|------|
+| 本地开发 | 单元 + 快速集成测试 | 测试数据生成 |
+| CI | 自动化单元 + 集成测试 | 测试数据生成 |
+| 测试环境 | E2E + 性能基准 | 模拟生产数据 |
+| 生产前 | 稳定性验证 | 生产数据副本（脱敏） |
+| 生产环境 | 灰度监控 | 真实数据 |
+
+## 5. 测试数据管理
+
+- 供应商 API 响应使用 `test/fixtures/supplier_responses/` 下的 JSON 文件管理。
+- 测试用例集使用 `test/fixtures/test_cases/` 下的 YAML 文件管理。
+- 每个测试用例自洁，启动前加载固定数据集，结束后清理。
+
+## 6. 特殊测试要求
+
+- **探针测试**：必须覆盖 429 、 401 、 403 、 500 、 503 、超时、空响应、DNS 失败、TCP 超时等所有常见异常场景。
+- **状态机测试**：必须覆盖所有状态转换路径，特别是 `active` → `disabled` 的违规路径必须被拒绝。
+- **审计测试**：所有自动化操作必须在 5 秒内生成审计记录，且字段完整。
+- **并发测试**：探针任务与运营人员手动操作的并发场景必须测试，验证乐观锁机制。