feat: bootstrap supply intelligence baseline
This commit is contained in:
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
bin/
|
||||
.coverprofile
|
||||
coverage.out
|
||||
*.log
|
||||
*.tmp
|
||||
.DS_Store
|
||||
21
README.md
Normal file
21
README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# supply-intelligence
|
||||
|
||||
Supply-Intelligence 项目代码仓。
|
||||
|
||||
当前阶段目标:先实现首个最小生产闭环:
|
||||
1. 账号探针与状态写回
|
||||
2. 模型发现与 candidate 闭环
|
||||
3. 准入测试与 draft package 生成
|
||||
4. package 发布与 gateway package event + ack
|
||||
|
||||
实现真源文档:
|
||||
- `tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`
|
||||
- `tech/BASELINE_TECHLEAD_V2.md`
|
||||
- `tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
- `tech/TEST_DESIGN.md`
|
||||
- `tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md`
|
||||
|
||||
运行约束:
|
||||
- 首期不把 Redis / Temporal / WebSocket / 向量数据库作为硬前置
|
||||
- 首期不做深自动注册主路径
|
||||
- 首期默认 package 发布链路采用 event + ack
|
||||
31
cmd/supply-intelligence/main.go
Normal file
31
cmd/supply-intelligence/main.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/app"
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
func main() {
|
||||
application := app.New()
|
||||
application.Repo.UpsertRoutingState(domain.AccountRoutingState{
|
||||
AccountID: 1,
|
||||
Platform: "openai",
|
||||
AccountStatus: domain.AccountStatusActive,
|
||||
RoutingEnabled: true,
|
||||
RiskScore: 10,
|
||||
ReasonCode: "ok",
|
||||
LastProbeAt: time.Now().UTC(),
|
||||
Version: 1,
|
||||
})
|
||||
application.StartBackground(context.Background())
|
||||
defer application.StopBackground()
|
||||
log.Println("supply-intelligence listening on :8080")
|
||||
if err := http.ListenAndServe(":8080", application.Server.Routes()); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
20
docs/POLLER_RUNTIME_BOUNDARY.md
Normal file
20
docs/POLLER_RUNTIME_BOUNDARY.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# Gateway poller runtime boundary
|
||||
|
||||
Current repository stage:
|
||||
- package publish writes a pending gateway event
|
||||
- gateway consumer can poll, apply, and ack that event
|
||||
- the repository implementation in this repo is still in-memory only
|
||||
|
||||
This means:
|
||||
- `published` means the upstream package event was recorded
|
||||
- `applied` / `failed` means the in-process consumer flow handled the event during the current process lifetime
|
||||
- this repo does not yet claim durable database persistence for gateway event ack state
|
||||
|
||||
Runtime shape in the current repo:
|
||||
- HTTP debug/manual endpoint: `POST /internal/supply-intelligence/gateway/consume-once`
|
||||
- background runtime path: application startup wires a minimal ticker-driven poller loop
|
||||
|
||||
Non-goals for the current stage:
|
||||
- no MQ / Redis / external scheduler
|
||||
- no claim that a full durable publish state machine is complete
|
||||
- no claim that in-memory ack state survives restart
|
||||
5
go.mod
Normal file
5
go.mod
Normal file
@@ -0,0 +1,5 @@
|
||||
module supply-intelligence
|
||||
|
||||
go 1.22.2
|
||||
|
||||
require github.com/google/uuid v1.6.0 // indirect
|
||||
2
go.sum
Normal file
2
go.sum
Normal file
@@ -0,0 +1,2 @@
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
27
internal/admission/repository.go
Normal file
27
internal/admission/repository.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package admission
|
||||
|
||||
import "context"
|
||||
|
||||
// CandidateRepository defines the persistence layer for candidates
|
||||
type CandidateRepository interface {
|
||||
GetCandidateByIDContext(ctx context.Context, candidateID string) (Candidate, bool)
|
||||
UpdateCandidateStatus(ctx context.Context, candidateID string, status CandidateStatus, failureCode, failureSummary string) error
|
||||
ListCandidatesByStatus(ctx context.Context, status CandidateStatus) []Candidate
|
||||
}
|
||||
|
||||
// SupplyPackageRepository defines the persistence layer for supply packages
|
||||
type SupplyPackageRepository interface {
|
||||
UpsertDraftPackage(ctx context.Context, platform, model string, source string) (packageID int64, err error)
|
||||
GetDraftPackage(ctx context.Context, platform, model string) (DraftPackage, bool)
|
||||
}
|
||||
|
||||
// DraftPackage represents a draft supply package created after admission passes
|
||||
type DraftPackage struct {
|
||||
PackageID int64 `json:"package_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
Status string `json:"status"` // draft, active, deprecated
|
||||
Source string `json:"source"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
Version int64 `json:"version"`
|
||||
}
|
||||
131
internal/admission/runner.go
Normal file
131
internal/admission/runner.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package admission
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// HTTPTestRunner implements TestRunner by making real HTTP requests
|
||||
type HTTPTestRunner struct {
|
||||
client *http.Client
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// NewHTTPTestRunner creates a runner that makes real HTTP calls
|
||||
func NewHTTPTestRunner() *HTTPTestRunner {
|
||||
return &HTTPTestRunner{
|
||||
client: &http.Client{
|
||||
Timeout: 60 * time.Second,
|
||||
},
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
}
|
||||
}
|
||||
|
||||
// Run executes a single test case via HTTP
|
||||
func (r *HTTPTestRunner) Run(ctx context.Context, tc TestCase) TestCaseResult {
|
||||
var body io.Reader
|
||||
if tc.Body != "" {
|
||||
body = bytes.NewBufferString(tc.Body)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, tc.Method, tc.Endpoint, body)
|
||||
if err != nil {
|
||||
return TestCaseResult{Error: err.Error()}
|
||||
}
|
||||
|
||||
for k, v := range tc.Headers {
|
||||
req.Header.Set(k, v)
|
||||
}
|
||||
if req.Header.Get("Content-Type") == "" {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
resp, err := r.client.Do(req)
|
||||
latencyMs := int(time.Since(start).Milliseconds())
|
||||
|
||||
if err != nil {
|
||||
return TestCaseResult{
|
||||
Error: err.Error(),
|
||||
LatencyMs: latencyMs,
|
||||
}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Read response (up to 4KB for validation)
|
||||
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||
|
||||
passed := resp.StatusCode >= 200 && resp.StatusCode < 300
|
||||
|
||||
return TestCaseResult{
|
||||
Passed: passed,
|
||||
StatusCode: resp.StatusCode,
|
||||
LatencyMs: latencyMs,
|
||||
ResponseLen: len(respBody),
|
||||
Error: "",
|
||||
}
|
||||
}
|
||||
|
||||
// BuildTestSuiteForPlatform creates a standard test suite for a platform
|
||||
func BuildTestSuiteForPlatform(platform, baseURL, apiKey string) TestSuite {
|
||||
switch platform {
|
||||
case "openai":
|
||||
return buildOpenAITestSuite(baseURL, apiKey)
|
||||
case "anthropic":
|
||||
return buildAnthropicTestSuite(baseURL, apiKey)
|
||||
default:
|
||||
return TestSuite{Platform: platform, Cases: []TestCase{}}
|
||||
}
|
||||
}
|
||||
|
||||
func buildOpenAITestSuite(baseURL, apiKey string) TestSuite {
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.openai.com"
|
||||
}
|
||||
endpoint := baseURL + "/v1/models"
|
||||
return TestSuite{
|
||||
Platform: "openai",
|
||||
Cases: []TestCase{
|
||||
{
|
||||
ID: "openai-models-list",
|
||||
Name: "List Models",
|
||||
Endpoint: endpoint,
|
||||
Method: http.MethodGet,
|
||||
Headers: map[string]string{"Authorization": "Bearer " + apiKey},
|
||||
TimeoutSecs: 30,
|
||||
},
|
||||
{
|
||||
ID: "openai-chat-completion",
|
||||
Name: "Chat Completion",
|
||||
Endpoint: baseURL + "/v1/chat/completions",
|
||||
Method: http.MethodPost,
|
||||
Headers: map[string]string{"Authorization": "Bearer " + apiKey, "Content-Type": "application/json"},
|
||||
Body: `{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hello"}],"max_tokens":10}`,
|
||||
TimeoutSecs: 30,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func buildAnthropicTestSuite(baseURL, apiKey string) TestSuite {
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
return TestSuite{
|
||||
Platform: "anthropic",
|
||||
Cases: []TestCase{
|
||||
{
|
||||
ID: "anthropic-messages",
|
||||
Name: "Claude Messages",
|
||||
Endpoint: baseURL + "/v1/messages",
|
||||
Method: http.MethodPost,
|
||||
Headers: map[string]string{"x-api-key": apiKey, "anthropic-version": "2023-06-01", "Content-Type": "application/json"},
|
||||
Body: `{"model":"claude-3-5-haiku-20241022","messages":[{"role":"user","content":"hello"}],"max_tokens":10}`,
|
||||
TimeoutSecs: 30,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
169
internal/admission/runner_test.go
Normal file
169
internal/admission/runner_test.go
Normal file
@@ -0,0 +1,169 @@
|
||||
package admission
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHTTPTestRunner_Run_Success(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"id":"model-1"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
runner := NewHTTPTestRunner()
|
||||
result := runner.Run(context.Background(), TestCase{
|
||||
ID: "test-1",
|
||||
Name: "Test Case",
|
||||
Endpoint: server.URL,
|
||||
Method: http.MethodGet,
|
||||
TimeoutSecs: 30,
|
||||
})
|
||||
|
||||
if !result.Passed {
|
||||
t.Fatalf("expected pass, got failed: status=%d", result.StatusCode)
|
||||
}
|
||||
if result.StatusCode != http.StatusOK {
|
||||
t.Fatalf("expected 200, got: %d", result.StatusCode)
|
||||
}
|
||||
if result.LatencyMs < 0 {
|
||||
t.Fatalf("expected latency >= 0, got: %d", result.LatencyMs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPTestRunner_Run_Non2xx_Fails(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
runner := NewHTTPTestRunner()
|
||||
result := runner.Run(context.Background(), TestCase{
|
||||
ID: "test-2",
|
||||
Name: "Test 500",
|
||||
Endpoint: server.URL,
|
||||
Method: http.MethodGet,
|
||||
TimeoutSecs: 30,
|
||||
})
|
||||
|
||||
if result.Passed {
|
||||
t.Fatal("expected failure for 500")
|
||||
}
|
||||
if result.StatusCode != http.StatusInternalServerError {
|
||||
t.Fatalf("expected 500, got: %d", result.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPTestRunner_Run_Timeout(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
runner := NewHTTPTestRunner()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
result := runner.Run(ctx, TestCase{
|
||||
ID: "test-3",
|
||||
Name: "Test Timeout",
|
||||
Endpoint: server.URL,
|
||||
Method: http.MethodGet,
|
||||
TimeoutSecs: 1, // but context is 50ms
|
||||
})
|
||||
|
||||
if result.Error == "" {
|
||||
t.Fatal("expected error on timeout")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPTestRunner_Run_ContextCanceled(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(5 * time.Second)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
runner := NewHTTPTestRunner()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel() // cancel immediately
|
||||
|
||||
result := runner.Run(ctx, TestCase{
|
||||
ID: "test-4",
|
||||
Name: "Test Cancel",
|
||||
Endpoint: server.URL,
|
||||
Method: http.MethodGet,
|
||||
TimeoutSecs: 30,
|
||||
})
|
||||
|
||||
if result.Error == "" {
|
||||
t.Fatal("expected error on context cancel")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTestSuiteForPlatform_OpenAI(t *testing.T) {
|
||||
suite := BuildTestSuiteForPlatform("openai", "https://api.openai.com", "sk-test")
|
||||
if suite.Platform != "openai" {
|
||||
t.Fatalf("expected openai, got: %s", suite.Platform)
|
||||
}
|
||||
if len(suite.Cases) == 0 {
|
||||
t.Fatal("expected at least 1 test case")
|
||||
}
|
||||
if suite.Cases[0].Method != http.MethodGet {
|
||||
t.Fatalf("expected GET for models list, got: %s", suite.Cases[0].Method)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTestSuiteForPlatform_Anthropic(t *testing.T) {
|
||||
suite := BuildTestSuiteForPlatform("anthropic", "https://api.anthropic.com", "sk-ant-test")
|
||||
if suite.Platform != "anthropic" {
|
||||
t.Fatalf("expected anthropic, got: %s", suite.Platform)
|
||||
}
|
||||
if len(suite.Cases) == 0 {
|
||||
t.Fatal("expected at least 1 test case")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildTestSuiteForPlatform_Unknown(t *testing.T) {
|
||||
suite := BuildTestSuiteForPlatform("unknown", "", "")
|
||||
if len(suite.Cases) != 0 {
|
||||
t.Fatal("expected 0 cases for unknown platform")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPTestRunner_Run_PostWithJSONBody(t *testing.T) {
|
||||
var receivedBody string
|
||||
var receivedContentType string
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
receivedContentType = r.Header.Get("Content-Type")
|
||||
body := make([]byte, 1024)
|
||||
n, _ := r.Body.Read(body)
|
||||
receivedBody = string(body[:n])
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
runner := NewHTTPTestRunner()
|
||||
result := runner.Run(context.Background(), TestCase{
|
||||
ID: "test-post",
|
||||
Name: "POST JSON",
|
||||
Endpoint: server.URL,
|
||||
Method: http.MethodPost,
|
||||
Headers: map[string]string{"Authorization": "Bearer token"},
|
||||
Body: `{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}`,
|
||||
TimeoutSecs: 30,
|
||||
})
|
||||
|
||||
if !result.Passed {
|
||||
t.Fatalf("expected pass: %+v", result)
|
||||
}
|
||||
if receivedContentType != "application/json" {
|
||||
t.Fatalf("expected application/json, got: %s", receivedContentType)
|
||||
}
|
||||
_ = receivedBody // validated via status code pass check
|
||||
}
|
||||
166
internal/admission/service.go
Normal file
166
internal/admission/service.go
Normal file
@@ -0,0 +1,166 @@
|
||||
package admission
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrCandidateNotFound = errors.New("candidate not found")
|
||||
ErrInvalidCandidateID = errors.New("invalid candidate id")
|
||||
ErrTestTimeout = errors.New("admission test timed out")
|
||||
ErrCandidateNotRunnable = errors.New("candidate not in runnable state")
|
||||
)
|
||||
|
||||
// TestRunner executes a single test case
|
||||
type TestRunner interface {
|
||||
Run(ctx context.Context, tc TestCase) TestCaseResult
|
||||
}
|
||||
|
||||
// TestCaseResult is the outcome of a single test case execution
|
||||
type TestCaseResult struct {
|
||||
Passed bool
|
||||
StatusCode int
|
||||
LatencyMs int
|
||||
Error string
|
||||
ResponseLen int
|
||||
}
|
||||
|
||||
// Service orchestrates the admission testing workflow
|
||||
type Service struct {
|
||||
candidateRepo CandidateRepository
|
||||
packageRepo SupplyPackageRepository
|
||||
testSuites map[string]TestSuite // key = platform
|
||||
runner TestRunner
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// NewService creates a new admission service
|
||||
func NewService(candidateRepo CandidateRepository, packageRepo SupplyPackageRepository, suites []TestSuite, runner TestRunner) *Service {
|
||||
suiteMap := make(map[string]TestSuite)
|
||||
for _, s := range suites {
|
||||
suiteMap[s.Platform] = s
|
||||
}
|
||||
return &Service{
|
||||
candidateRepo: candidateRepo,
|
||||
packageRepo: packageRepo,
|
||||
testSuites: suiteMap,
|
||||
runner: runner,
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
}
|
||||
}
|
||||
|
||||
// RunAdmission executes the full admission test for a candidate
|
||||
func (s *Service) RunAdmission(ctx context.Context, candidateID string) (*TestResult, error) {
|
||||
if candidateID == "" {
|
||||
return nil, ErrInvalidCandidateID
|
||||
}
|
||||
|
||||
candidate, ok := s.candidateRepo.GetCandidateByIDContext(ctx, candidateID)
|
||||
if !ok {
|
||||
return nil, ErrCandidateNotFound
|
||||
}
|
||||
|
||||
// Candidate must be in pending_admission state to run
|
||||
if candidate.Status != CandidateStatusPendingAdmission {
|
||||
return nil, ErrCandidateNotRunnable
|
||||
}
|
||||
|
||||
suite, ok := s.testSuites[candidate.Platform]
|
||||
if !ok {
|
||||
// No test suite for this platform — auto-pass (no known test cases)
|
||||
s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusAdmitted, "", "")
|
||||
return &TestResult{
|
||||
CandidateID: candidateID,
|
||||
Status: CandidateStatusAdmitted,
|
||||
TestedAt: s.now(),
|
||||
Passed: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Execute all test cases
|
||||
var failedCases []string
|
||||
var failureCode string
|
||||
var failureSummary string
|
||||
|
||||
for _, tc := range suite.Cases {
|
||||
timeoutCtx, cancel := context.WithTimeout(ctx, time.Duration(tc.TimeoutSecs)*time.Second)
|
||||
result := s.runner.Run(timeoutCtx, tc)
|
||||
cancel()
|
||||
|
||||
if !result.Passed {
|
||||
failedCases = append(failedCases, tc.Name)
|
||||
if failureCode == "" {
|
||||
failureCode = classifyFailure(result, tc)
|
||||
failureSummary = formatFailure(result, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
testedAt := s.now()
|
||||
|
||||
if len(failedCases) > 0 {
|
||||
// Test failed
|
||||
err := s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusRejected, failureCode, failureSummary)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &TestResult{
|
||||
CandidateID: candidateID,
|
||||
Status: CandidateStatusRejected,
|
||||
TestedAt: testedAt,
|
||||
FailureCode: failureCode,
|
||||
FailureSummary: failureSummary,
|
||||
Passed: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// All cases passed — generate draft package
|
||||
_, err := s.packageRepo.UpsertDraftPackage(ctx, candidate.Platform, candidate.Model, candidate.Source)
|
||||
if err != nil {
|
||||
// Draft generation failed — still mark as admitted but record the error
|
||||
failureCode = "draft_generation_failed"
|
||||
failureSummary = err.Error()
|
||||
_ = s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusAdmitted, failureCode, failureSummary)
|
||||
} else {
|
||||
_ = s.candidateRepo.UpdateCandidateStatus(ctx, candidateID, CandidateStatusAdmitted, "", "")
|
||||
}
|
||||
|
||||
return &TestResult{
|
||||
CandidateID: candidateID,
|
||||
Status: CandidateStatusAdmitted,
|
||||
TestedAt: testedAt,
|
||||
Passed: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// classifyFailure determines the failure code from a failed test case result
|
||||
func classifyFailure(result TestCaseResult, tc TestCase) string {
|
||||
if result.Error != "" {
|
||||
if result.Error == "context deadline exceeded" {
|
||||
return "timeout"
|
||||
}
|
||||
return "execution_error"
|
||||
}
|
||||
if result.StatusCode >= 500 {
|
||||
return "upstream_error"
|
||||
}
|
||||
if result.StatusCode >= 400 {
|
||||
return "client_error"
|
||||
}
|
||||
return "unknown_failure"
|
||||
}
|
||||
|
||||
// formatFailure creates a human-readable failure summary
|
||||
func formatFailure(result TestCaseResult, tc TestCase) string {
|
||||
if result.Error != "" {
|
||||
return tc.Name + ": " + result.Error
|
||||
}
|
||||
return tc.Name + ": status=" + string(rune(result.StatusCode))
|
||||
}
|
||||
|
||||
// GetRunnableCandidates returns all candidates eligible for admission testing
|
||||
func (s *Service) GetRunnableCandidates(ctx context.Context) []Candidate {
|
||||
return s.candidateRepo.ListCandidatesByStatus(ctx, CandidateStatusPendingAdmission)
|
||||
}
|
||||
201
internal/admission/service_test.go
Normal file
201
internal/admission/service_test.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package admission
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
type mockCandidateRepo struct {
|
||||
candidates map[string]Candidate
|
||||
}
|
||||
|
||||
func (r *mockCandidateRepo) GetCandidateByIDContext(ctx context.Context, candidateID string) (Candidate, bool) {
|
||||
c, ok := r.candidates[candidateID]
|
||||
return c, ok
|
||||
}
|
||||
|
||||
func (r *mockCandidateRepo) UpdateCandidateStatus(ctx context.Context, candidateID string, status CandidateStatus, failureCode, failureSummary string) error {
|
||||
if c, ok := r.candidates[candidateID]; ok {
|
||||
c.Status = status
|
||||
c.ReasonCode = failureCode
|
||||
c.UpdatedAt = time.Now().UTC()
|
||||
r.candidates[candidateID] = c
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *mockCandidateRepo) ListCandidatesByStatus(ctx context.Context, status CandidateStatus) []Candidate {
|
||||
var result []Candidate
|
||||
for _, c := range r.candidates {
|
||||
if status == "" || c.Status == status {
|
||||
result = append(result, c)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
type mockPackageRepo struct {
|
||||
drafts map[string]DraftPackage
|
||||
nextID int64
|
||||
}
|
||||
|
||||
func (r *mockPackageRepo) UpsertDraftPackage(ctx context.Context, platform, model, source string) (int64, error) {
|
||||
r.nextID++
|
||||
id := r.nextID
|
||||
r.drafts[platform+"/"+model] = DraftPackage{
|
||||
PackageID: id,
|
||||
Platform: platform,
|
||||
Model: model,
|
||||
Status: "draft",
|
||||
Source: source,
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *mockPackageRepo) GetDraftPackage(ctx context.Context, platform, model string) (DraftPackage, bool) {
|
||||
d, ok := r.drafts[platform+"/"+model]
|
||||
return d, ok
|
||||
}
|
||||
|
||||
type mockTestRunner struct {
|
||||
results map[string]TestCaseResult
|
||||
}
|
||||
|
||||
func (r *mockTestRunner) Run(ctx context.Context, tc TestCase) TestCaseResult {
|
||||
if res, ok := r.results[tc.ID]; ok {
|
||||
return res
|
||||
}
|
||||
return TestCaseResult{Passed: true, StatusCode: 200}
|
||||
}
|
||||
|
||||
func TestRunAdmission_PassesAllCases(t *testing.T) {
|
||||
candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
|
||||
"cand-1": {CandidateID: "cand-1", Platform: "openai", Model: "gpt-4", Status: CandidateStatusPendingAdmission},
|
||||
}}
|
||||
packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
|
||||
runner := &mockTestRunner{results: map[string]TestCaseResult{}}
|
||||
|
||||
suites := []TestSuite{{
|
||||
Platform: "openai",
|
||||
Cases: []TestCase{
|
||||
{ID: "case-1", Name: "models endpoint", Endpoint: "/v1/models", Method: "GET", TimeoutSecs: 30},
|
||||
},
|
||||
}}
|
||||
|
||||
svc := NewService(candidateRepo, packageRepo, suites, runner)
|
||||
result, err := svc.RunAdmission(context.Background(), "cand-1")
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !result.Passed {
|
||||
t.Fatalf("expected pass, got failed: %+v", result)
|
||||
}
|
||||
if result.Status != CandidateStatusAdmitted {
|
||||
t.Fatalf("expected admitted status, got: %s", result.Status)
|
||||
}
|
||||
if len(packageRepo.drafts) != 1 {
|
||||
t.Fatalf("expected 1 draft package, got %d", len(packageRepo.drafts))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAdmission_FailsOneCase(t *testing.T) {
|
||||
candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
|
||||
"cand-2": {CandidateID: "cand-2", Platform: "openai", Model: "gpt-4", Status: CandidateStatusPendingAdmission},
|
||||
}}
|
||||
packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
|
||||
runner := &mockTestRunner{results: map[string]TestCaseResult{
|
||||
"case-1": {Passed: false, StatusCode: 500, Error: ""},
|
||||
}}
|
||||
|
||||
suites := []TestSuite{{
|
||||
Platform: "openai",
|
||||
Cases: []TestCase{
|
||||
{ID: "case-1", Name: "models endpoint", Endpoint: "/v1/models", Method: "GET", TimeoutSecs: 30},
|
||||
},
|
||||
}}
|
||||
|
||||
svc := NewService(candidateRepo, packageRepo, suites, runner)
|
||||
result, err := svc.RunAdmission(context.Background(), "cand-2")
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if result.Passed {
|
||||
t.Fatalf("expected failure, got pass")
|
||||
}
|
||||
if result.Status != CandidateStatusRejected {
|
||||
t.Fatalf("expected rejected status, got: %s", result.Status)
|
||||
}
|
||||
if result.FailureCode == "" {
|
||||
t.Fatalf("expected failure code to be set")
|
||||
}
|
||||
if len(packageRepo.drafts) != 0 {
|
||||
t.Fatalf("expected 0 draft packages on failure, got %d", len(packageRepo.drafts))
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAdmission_CandidateNotFound(t *testing.T) {
|
||||
candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{}}
|
||||
packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
|
||||
runner := &mockTestRunner{results: map[string]TestCaseResult{}}
|
||||
|
||||
svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner)
|
||||
_, err := svc.RunAdmission(context.Background(), "nonexistent")
|
||||
|
||||
if !errors.Is(err, ErrCandidateNotFound) {
|
||||
t.Fatalf("expected ErrCandidateNotFound, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAdmission_CandidateNotRunnable(t *testing.T) {
|
||||
candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
|
||||
"cand-3": {CandidateID: "cand-3", Platform: "openai", Model: "gpt-4", Status: CandidateStatusAdmitted},
|
||||
}}
|
||||
packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
|
||||
runner := &mockTestRunner{results: map[string]TestCaseResult{}}
|
||||
|
||||
svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner)
|
||||
_, err := svc.RunAdmission(context.Background(), "cand-3")
|
||||
|
||||
if !errors.Is(err, ErrCandidateNotRunnable) {
|
||||
t.Fatalf("expected ErrCandidateNotRunnable, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunAdmission_NoTestSuite_AutoPass(t *testing.T) {
|
||||
candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
|
||||
"cand-4": {CandidateID: "cand-4", Platform: "unknown-platform", Model: "some-model", Status: CandidateStatusPendingAdmission},
|
||||
}}
|
||||
packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
|
||||
runner := &mockTestRunner{results: map[string]TestCaseResult{}}
|
||||
|
||||
svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner) // no suites
|
||||
result, err := svc.RunAdmission(context.Background(), "cand-4")
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !result.Passed {
|
||||
t.Fatalf("expected auto-pass for unknown platform, got: %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRunnableCandidates(t *testing.T) {
|
||||
candidateRepo := &mockCandidateRepo{candidates: map[string]Candidate{
|
||||
"cand-1": {CandidateID: "cand-1", Status: CandidateStatusPendingAdmission},
|
||||
"cand-2": {CandidateID: "cand-2", Status: CandidateStatusAdmitted},
|
||||
"cand-3": {CandidateID: "cand-3", Status: CandidateStatusPendingAdmission},
|
||||
}}
|
||||
packageRepo := &mockPackageRepo{drafts: map[string]DraftPackage{}}
|
||||
runner := &mockTestRunner{}
|
||||
|
||||
svc := NewService(candidateRepo, packageRepo, []TestSuite{}, runner)
|
||||
candidates := svc.GetRunnableCandidates(context.Background())
|
||||
|
||||
if len(candidates) != 2 {
|
||||
t.Fatalf("expected 2 pending candidates, got %d", len(candidates))
|
||||
}
|
||||
}
|
||||
62
internal/admission/types.go
Normal file
62
internal/admission/types.go
Normal file
@@ -0,0 +1,62 @@
|
||||
package admission
|
||||
|
||||
import "time"
|
||||
|
||||
// ProbeClassification mirrors domain.ProbeClassification for internal use
|
||||
type ProbeClassification string
|
||||
|
||||
const (
|
||||
ProbeClassificationSuccess ProbeClassification = "success"
|
||||
ProbeClassificationExplicitFailure ProbeClassification = "explicit_failure"
|
||||
ProbeClassificationInconclusive ProbeClassification = "inconclusive"
|
||||
)
|
||||
|
||||
// CandidateStatus mirrors domain.DiscoveryCandidateStatus
|
||||
type CandidateStatus string
|
||||
|
||||
const (
|
||||
CandidateStatusPendingAdmission CandidateStatus = "pending_admission"
|
||||
CandidateStatusAdmitted CandidateStatus = "admitted"
|
||||
CandidateStatusRejected CandidateStatus = "rejected"
|
||||
)
|
||||
|
||||
// Candidate represents a discovered model waiting for admission testing
|
||||
type Candidate struct {
|
||||
CandidateID string `json:"candidate_id"`
|
||||
AccountID int64 `json:"account_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
Status CandidateStatus `json:"status"`
|
||||
Source string `json:"source"`
|
||||
ReasonCode string `json:"reason_code,omitempty"`
|
||||
DiscoveredAt time.Time `json:"discovered_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Version int64 `json:"version"`
|
||||
}
|
||||
|
||||
// TestResult records the outcome of an admission test run
|
||||
type TestResult struct {
|
||||
CandidateID string `json:"candidate_id"`
|
||||
Status CandidateStatus `json:"status"` // admitted or rejected
|
||||
TestedAt time.Time `json:"tested_at"`
|
||||
FailureCode string `json:"failure_code,omitempty"`
|
||||
FailureSummary string `json:"failure_summary,omitempty"`
|
||||
Passed bool `json:"passed"`
|
||||
}
|
||||
|
||||
// TestCase defines a single test case within an admission test run
|
||||
type TestCase struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
Method string `json:"method"`
|
||||
Headers map[string]string `json:"headers,omitempty"`
|
||||
Body string `json:"body,omitempty"`
|
||||
TimeoutSecs int `json:"timeout_secs"`
|
||||
}
|
||||
|
||||
// TestSuite defines a collection of test cases for a model type
|
||||
type TestSuite struct {
|
||||
Platform string `json:"platform"`
|
||||
Cases []TestCase `json:"cases"`
|
||||
}
|
||||
160
internal/app/app.go
Normal file
160
internal/app/app.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/admission"
|
||||
"supply-intelligence/internal/discovery"
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/gatewayconsumer"
|
||||
"supply-intelligence/internal/httpapi"
|
||||
"supply-intelligence/internal/poller"
|
||||
"supply-intelligence/internal/probe"
|
||||
"supply-intelligence/internal/publish"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
type Application struct {
|
||||
Repo *repository.MemoryRepository
|
||||
ProbeService *probe.Service
|
||||
PublishService *publish.Service
|
||||
DiscoveryService *discovery.Service
|
||||
GatewayConsumerService *gatewayconsumer.Service
|
||||
GatewayPoller *poller.GatewayPackagePoller
|
||||
GatewayRuntime *poller.Runtime
|
||||
AdmissionService *admission.Service
|
||||
Server *httpapi.Server
|
||||
}
|
||||
|
||||
func New() *Application {
|
||||
repo := repository.NewMemoryRepository()
|
||||
probeService := probe.NewService(repo)
|
||||
publishService := publish.NewService(repo)
|
||||
discoveryService := discovery.NewService(repo)
|
||||
gatewayConsumerService := gatewayconsumer.NewService(repo)
|
||||
gatewayPoller := poller.NewGatewayPackagePoller(gatewayConsumerService)
|
||||
gatewayRuntime := poller.NewRuntime(gatewayPoller, time.Second)
|
||||
|
||||
// Wire MemoryRepository as admission's CandidateRepository
|
||||
candidateRepo := &admissionMemoryRepoAdapter{repo: repo}
|
||||
packageRepo := &admissionSupplyPackageAdapter{repo: repo}
|
||||
runner := admission.NewHTTPTestRunner()
|
||||
|
||||
// Build test suites for known platforms (in real use, loaded from config)
|
||||
suites := []admission.TestSuite{
|
||||
admission.BuildTestSuiteForPlatform("openai", "https://api.openai.com", ""),
|
||||
admission.BuildTestSuiteForPlatform("anthropic", "https://api.anthropic.com", ""),
|
||||
}
|
||||
|
||||
admissionService := admission.NewService(candidateRepo, packageRepo, suites, runner)
|
||||
|
||||
return &Application{
|
||||
Repo: repo,
|
||||
ProbeService: probeService,
|
||||
PublishService: publishService,
|
||||
DiscoveryService: discoveryService,
|
||||
GatewayConsumerService: gatewayConsumerService,
|
||||
GatewayPoller: gatewayPoller,
|
||||
GatewayRuntime: gatewayRuntime,
|
||||
AdmissionService: admissionService,
|
||||
Server: httpapi.NewServer(repo, probeService, publishService, gatewayConsumerService, discoveryService, admissionService),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Application) StartBackground(ctx context.Context) {
|
||||
if a == nil || a.GatewayRuntime == nil {
|
||||
return
|
||||
}
|
||||
a.GatewayRuntime.Start(ctx)
|
||||
}
|
||||
|
||||
func (a *Application) StopBackground() {
|
||||
if a == nil || a.GatewayRuntime == nil {
|
||||
return
|
||||
}
|
||||
a.GatewayRuntime.Stop()
|
||||
}
|
||||
|
||||
func (a *Application) IsInMemoryGatewayState() bool {
|
||||
return a != nil && a.Repo != nil
|
||||
}
|
||||
|
||||
// --- Adapters that bridge MemoryRepository to admission.Repository interfaces ---
|
||||
|
||||
// admissionMemoryRepoAdapter adapts MemoryRepository to admission.CandidateRepository
|
||||
type admissionMemoryRepoAdapter struct {
|
||||
repo *repository.MemoryRepository
|
||||
}
|
||||
|
||||
func (a *admissionMemoryRepoAdapter) GetCandidateByIDContext(ctx context.Context, candidateID string) (admission.Candidate, bool) {
|
||||
c, ok := a.repo.GetDiscoveryCandidateByIDContext(ctx, candidateID)
|
||||
if !ok {
|
||||
return admission.Candidate{}, false
|
||||
}
|
||||
return toAdmissionCandidate(c), true
|
||||
}
|
||||
|
||||
func (a *admissionMemoryRepoAdapter) UpdateCandidateStatus(ctx context.Context, candidateID string, status admission.CandidateStatus, failureCode, failureSummary string) error {
|
||||
return a.repo.UpdateCandidateStatus(ctx, candidateID, domain.DiscoveryCandidateStatus(status), failureCode, failureSummary)
|
||||
}
|
||||
|
||||
func (a *admissionMemoryRepoAdapter) ListCandidatesByStatus(ctx context.Context, status admission.CandidateStatus) []admission.Candidate {
|
||||
candidates := a.repo.ListDiscoveryCandidatesContext(ctx, domain.DiscoveryCandidateStatus(status))
|
||||
result := make([]admission.Candidate, len(candidates))
|
||||
for i, c := range candidates {
|
||||
result[i] = toAdmissionCandidate(c)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func toAdmissionCandidate(c domain.DiscoveryCandidate) admission.Candidate {
|
||||
return admission.Candidate{
|
||||
CandidateID: c.CandidateID,
|
||||
AccountID: c.AccountID,
|
||||
Platform: c.Platform,
|
||||
Model: c.Model,
|
||||
Status: admission.CandidateStatus(c.Status),
|
||||
Source: c.Source,
|
||||
ReasonCode: c.ReasonCode,
|
||||
DiscoveredAt: c.DiscoveredAt,
|
||||
UpdatedAt: c.UpdatedAt,
|
||||
Version: c.Version,
|
||||
}
|
||||
}
|
||||
|
||||
// admissionSupplyPackageAdapter adapts MemoryRepository to admission.SupplyPackageRepository
|
||||
type admissionSupplyPackageAdapter struct {
|
||||
repo *repository.MemoryRepository
|
||||
}
|
||||
|
||||
func (a *admissionSupplyPackageAdapter) UpsertDraftPackage(ctx context.Context, platform, model, source string) (int64, error) {
|
||||
if existing, ok := a.repo.GetSupplyPackage(platform, model); ok {
|
||||
return existing.PackageID, nil
|
||||
}
|
||||
pkg := domain.SupplyPackage{
|
||||
Platform: platform,
|
||||
Model: model,
|
||||
Status: "draft",
|
||||
Source: source,
|
||||
}
|
||||
a.repo.UpsertSupplyPackage(pkg)
|
||||
if newPkg, ok := a.repo.GetSupplyPackage(platform, model); ok {
|
||||
return newPkg.PackageID, nil
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (a *admissionSupplyPackageAdapter) GetDraftPackage(ctx context.Context, platform, model string) (admission.DraftPackage, bool) {
|
||||
pkg, ok := a.repo.GetSupplyPackage(platform, model)
|
||||
if !ok {
|
||||
return admission.DraftPackage{}, false
|
||||
}
|
||||
return admission.DraftPackage{
|
||||
PackageID: pkg.PackageID,
|
||||
Platform: pkg.Platform,
|
||||
Model: pkg.Model,
|
||||
Status: pkg.Status,
|
||||
Source: pkg.Source,
|
||||
}, true
|
||||
}
|
||||
85
internal/app/app_test.go
Normal file
85
internal/app/app_test.go
Normal file
@@ -0,0 +1,85 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
func TestNewApplication(t *testing.T) {
|
||||
application := New()
|
||||
if application == nil {
|
||||
t.Fatalf("expected application")
|
||||
}
|
||||
if application.Repo == nil {
|
||||
t.Fatalf("expected repository")
|
||||
}
|
||||
if application.ProbeService == nil {
|
||||
t.Fatalf("expected probe service")
|
||||
}
|
||||
if application.PublishService == nil {
|
||||
t.Fatalf("expected publish service")
|
||||
}
|
||||
if application.DiscoveryService == nil {
|
||||
t.Fatalf("expected discovery service")
|
||||
}
|
||||
if application.GatewayConsumerService == nil {
|
||||
t.Fatalf("expected gateway consumer service")
|
||||
}
|
||||
if application.GatewayPoller == nil {
|
||||
t.Fatalf("expected gateway poller")
|
||||
}
|
||||
if application.GatewayRuntime == nil {
|
||||
t.Fatalf("expected gateway runtime")
|
||||
}
|
||||
if application.Server == nil {
|
||||
t.Fatalf("expected server")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplicationStartBackgroundPollsEvents(t *testing.T) {
|
||||
application := New()
|
||||
application.Repo.AppendPackageEvent(domain.PackageChangeEvent{
|
||||
EventID: "evt-app-runtime-1",
|
||||
EventType: "supply_package_published",
|
||||
PackageID: 11,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
OccurredAt: time.Unix(2, 0).UTC(),
|
||||
Version: 1,
|
||||
GatewaySyncStatus: domain.GatewaySyncStatusPending,
|
||||
})
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
application.StartBackground(ctx)
|
||||
defer application.StopBackground()
|
||||
|
||||
deadline := time.Now().Add(1500 * time.Millisecond)
|
||||
for time.Now().Before(deadline) {
|
||||
items, _ := application.Repo.ListPackageEventsAfter("")
|
||||
if len(items) == 1 && items[0].GatewaySyncStatus == domain.GatewaySyncStatusApplied {
|
||||
return
|
||||
}
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
}
|
||||
items, _ := application.Repo.ListPackageEventsAfter("")
|
||||
t.Fatalf("expected background runtime to apply event, got %+v", items)
|
||||
}
|
||||
|
||||
func TestApplicationStartBackgroundHandlesNilRuntime(t *testing.T) {
|
||||
application := New()
|
||||
application.GatewayRuntime = nil
|
||||
application.StartBackground(context.Background())
|
||||
if application.GatewayRuntime != nil {
|
||||
t.Fatalf("expected nil runtime guard to keep runtime nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplicationReportsInMemoryGatewayState(t *testing.T) {
|
||||
application := New()
|
||||
if !application.IsInMemoryGatewayState() {
|
||||
t.Fatalf("expected in-memory gateway state")
|
||||
}
|
||||
}
|
||||
150
internal/control/module.go
Normal file
150
internal/control/module.go
Normal file
@@ -0,0 +1,150 @@
|
||||
package control
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// ModuleState represents the lifecycle state of a module
|
||||
type ModuleState string
|
||||
|
||||
const (
|
||||
ModuleStateActive ModuleState = "active"
|
||||
ModuleStateClosing ModuleState = "closing"
|
||||
ModuleStateClosed ModuleState = "closed"
|
||||
)
|
||||
|
||||
// ModuleGate controls the enable/disable/close lifecycle of a module
|
||||
type ModuleGate struct {
|
||||
mu sync.RWMutex
|
||||
enabled bool
|
||||
state ModuleState
|
||||
closedAt *time.Time
|
||||
}
|
||||
|
||||
func NewModuleGate(enabled bool) *ModuleGate {
|
||||
return &ModuleGate{enabled: enabled, state: ModuleStateActive}
|
||||
}
|
||||
|
||||
// IsEnabled returns whether the module is accepting new tasks
|
||||
func (g *ModuleGate) IsEnabled() bool {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
return g.enabled && g.state == ModuleStateActive
|
||||
}
|
||||
|
||||
// Close signals the module to stop accepting new tasks
|
||||
func (g *ModuleGate) Close() {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
if g.state == ModuleStateActive {
|
||||
g.state = ModuleStateClosing
|
||||
now := time.Now().UTC()
|
||||
g.closedAt = &now
|
||||
}
|
||||
}
|
||||
|
||||
// MarkClosed marks the module as fully closed (no in-flight tasks)
|
||||
func (g *ModuleGate) MarkClosed() {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
g.state = ModuleStateClosed
|
||||
g.enabled = false
|
||||
}
|
||||
|
||||
// State returns the current module state
|
||||
func (g *ModuleGate) State() ModuleState {
|
||||
g.mu.RLock()
|
||||
defer g.mu.RUnlock()
|
||||
return g.state
|
||||
}
|
||||
|
||||
// ModuleController manages all module gates
|
||||
type ModuleController struct {
|
||||
probes *ModuleGate
|
||||
discovery *ModuleGate
|
||||
admission *ModuleGate
|
||||
publish *ModuleGate
|
||||
}
|
||||
|
||||
func NewModuleController(enabled bool) *ModuleController {
|
||||
return &ModuleController{
|
||||
probes: NewModuleGate(enabled),
|
||||
discovery: NewModuleGate(enabled),
|
||||
admission: NewModuleGate(enabled),
|
||||
publish: NewModuleGate(enabled),
|
||||
}
|
||||
}
|
||||
|
||||
// ShutdownInitiate closes all modules (stop accepting new tasks)
|
||||
func (c *ModuleController) ShutdownInitiate() {
|
||||
c.probes.Close()
|
||||
c.discovery.Close()
|
||||
c.admission.Close()
|
||||
c.publish.Close()
|
||||
}
|
||||
|
||||
// ShutdownComplete marks all modules as fully closed
|
||||
func (c *ModuleController) ShutdownComplete() {
|
||||
c.probes.MarkClosed()
|
||||
c.discovery.MarkClosed()
|
||||
c.admission.MarkClosed()
|
||||
c.publish.MarkClosed()
|
||||
}
|
||||
|
||||
// IsInflight returns true if any module still has in-flight tasks
|
||||
func (c *ModuleController) IsInflight() bool {
|
||||
return c.probes.State() == ModuleStateClosing ||
|
||||
c.discovery.State() == ModuleStateClosing ||
|
||||
c.admission.State() == ModuleStateClosing ||
|
||||
c.publish.State() == ModuleStateClosing
|
||||
}
|
||||
|
||||
// GetModuleState returns the state of a specific module
|
||||
func (c *ModuleController) GetModuleState(name string) ModuleState {
|
||||
switch name {
|
||||
case "probes":
|
||||
return c.probes.State()
|
||||
case "discovery":
|
||||
return c.discovery.State()
|
||||
case "admission":
|
||||
return c.admission.State()
|
||||
case "publish":
|
||||
return c.publish.State()
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Status returns a snapshot of all module states
|
||||
type ModuleStatus struct {
|
||||
Probes ModuleState `json:"probes"`
|
||||
Discovery ModuleState `json:"discovery"`
|
||||
Admission ModuleState `json:"admission"`
|
||||
Publish ModuleState `json:"publish"`
|
||||
}
|
||||
|
||||
func (c *ModuleController) Status() ModuleStatus {
|
||||
return ModuleStatus{
|
||||
Probes: c.probes.State(),
|
||||
Discovery: c.discovery.State(),
|
||||
Admission: c.admission.State(),
|
||||
Publish: c.publish.State(),
|
||||
}
|
||||
}
|
||||
|
||||
// RejectIfNotEnabled returns an error if the module is not enabled
|
||||
func (g *ModuleGate) RejectIfNotEnabled(moduleName string) error {
|
||||
if !g.IsEnabled() {
|
||||
return ErrModuleClosed
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var ErrModuleClosed = &ModuleClosedError{}
|
||||
|
||||
type ModuleClosedError struct{}
|
||||
|
||||
func (e *ModuleClosedError) Error() string {
|
||||
return "module is not accepting new tasks"
|
||||
}
|
||||
124
internal/control/module_test.go
Normal file
124
internal/control/module_test.go
Normal file
@@ -0,0 +1,124 @@
|
||||
package control
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestModuleGate_IsEnabled(t *testing.T) {
|
||||
g := NewModuleGate(true)
|
||||
if !g.IsEnabled() {
|
||||
t.Fatal("expected enabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleGate_IsDisabled(t *testing.T) {
|
||||
g := NewModuleGate(false)
|
||||
if g.IsEnabled() {
|
||||
t.Fatal("expected disabled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleGate_Close(t *testing.T) {
|
||||
g := NewModuleGate(true)
|
||||
g.Close()
|
||||
if g.State() != ModuleStateClosing {
|
||||
t.Fatalf("expected closing, got: %s", g.State())
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleGate_MarkClosed(t *testing.T) {
|
||||
g := NewModuleGate(true)
|
||||
g.Close()
|
||||
g.MarkClosed()
|
||||
if g.State() != ModuleStateClosed {
|
||||
t.Fatalf("expected closed, got: %s", g.State())
|
||||
}
|
||||
if g.IsEnabled() {
|
||||
t.Fatal("expected not enabled after closed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleGate_RejectIfNotEnabled(t *testing.T) {
|
||||
g := NewModuleGate(true)
|
||||
err := g.RejectIfNotEnabled("test")
|
||||
if err != nil {
|
||||
t.Fatal("expected no error when enabled")
|
||||
}
|
||||
|
||||
g.Close()
|
||||
err = g.RejectIfNotEnabled("test")
|
||||
if err == nil {
|
||||
t.Fatal("expected error when closing")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleController_ShutdownInitiate(t *testing.T) {
|
||||
c := NewModuleController(true)
|
||||
c.ShutdownInitiate()
|
||||
|
||||
if c.probes.State() != ModuleStateClosing {
|
||||
t.Fatalf("probes should be closing, got: %s", c.probes.State())
|
||||
}
|
||||
if c.discovery.State() != ModuleStateClosing {
|
||||
t.Fatalf("discovery should be closing, got: %s", c.discovery.State())
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleController_ShutdownComplete(t *testing.T) {
|
||||
c := NewModuleController(true)
|
||||
c.ShutdownInitiate()
|
||||
c.ShutdownComplete()
|
||||
|
||||
if c.probes.State() != ModuleStateClosed {
|
||||
t.Fatalf("probes should be closed, got: %s", c.probes.State())
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleController_IsInflight(t *testing.T) {
|
||||
c := NewModuleController(true)
|
||||
c.ShutdownInitiate()
|
||||
|
||||
if !c.IsInflight() {
|
||||
t.Fatal("expected in-flight during closing")
|
||||
}
|
||||
|
||||
c.ShutdownComplete()
|
||||
|
||||
if c.IsInflight() {
|
||||
t.Fatal("expected not in-flight after closed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleController_GetModuleState(t *testing.T) {
|
||||
c := NewModuleController(true)
|
||||
|
||||
if c.GetModuleState("probes") != ModuleStateActive {
|
||||
t.Fatalf("expected active, got: %s", c.GetModuleState("probes"))
|
||||
}
|
||||
if c.GetModuleState("unknown") != "" {
|
||||
t.Fatalf("expected empty for unknown module")
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleController_Status(t *testing.T) {
|
||||
c := NewModuleController(true)
|
||||
status := c.Status()
|
||||
|
||||
if status.Probes != ModuleStateActive {
|
||||
t.Fatalf("expected active, got: %s", status.Probes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestModuleGate_ClosedAt(t *testing.T) {
|
||||
g := NewModuleGate(true)
|
||||
g.Close()
|
||||
|
||||
if g.State() != ModuleStateClosing {
|
||||
t.Fatal("expected closing state")
|
||||
}
|
||||
|
||||
// closedAt should be set when entering closing state
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
_ = g.closedAt // not nil when closing
|
||||
}
|
||||
161
internal/discovery/scheduler.go
Normal file
161
internal/discovery/scheduler.go
Normal file
@@ -0,0 +1,161 @@
|
||||
package discovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/integration"
|
||||
)
|
||||
|
||||
// SchedulerTrigger defines how discovery is invoked
|
||||
type SchedulerTrigger int
|
||||
|
||||
const (
|
||||
TriggerManual SchedulerTrigger = iota
|
||||
TriggerScheduled
|
||||
TriggerNewAccount
|
||||
)
|
||||
|
||||
// SupplierAdapterRegistry holds all registered platform adapters
|
||||
type SupplierAdapterRegistry struct {
|
||||
adapters map[string]integration.SupplierAdapter
|
||||
}
|
||||
|
||||
func NewSupplierAdapterRegistry() *SupplierAdapterRegistry {
|
||||
return &SupplierAdapterRegistry{adapters: make(map[string]integration.SupplierAdapter)}
|
||||
}
|
||||
|
||||
func (r *SupplierAdapterRegistry) Register(adapter integration.SupplierAdapter) {
|
||||
r.adapters[adapter.Platform()] = adapter
|
||||
}
|
||||
|
||||
func (r *SupplierAdapterRegistry) Get(platform string) (integration.SupplierAdapter, bool) {
|
||||
adapter, ok := r.adapters[platform]
|
||||
return adapter, ok
|
||||
}
|
||||
|
||||
func (r *SupplierAdapterRegistry) ListPlatforms() []string {
|
||||
platforms := make([]string, 0, len(r.adapters))
|
||||
for p := range r.adapters {
|
||||
platforms = append(platforms, p)
|
||||
}
|
||||
return platforms
|
||||
}
|
||||
|
||||
// ScanResult holds the outcome of a platform scan
|
||||
type ScanResult struct {
|
||||
Platform string
|
||||
NewModels int
|
||||
RemovedModels []string // models that were in candidates but not in supplier list
|
||||
Errors []string
|
||||
}
|
||||
|
||||
// DiscoveryScheduler orchestrates periodic and on-demand discovery scans
|
||||
type DiscoveryScheduler struct {
|
||||
service *Service
|
||||
registry *SupplierAdapterRegistry
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
func NewDiscoveryScheduler(service *Service, registry *SupplierAdapterRegistry) *DiscoveryScheduler {
|
||||
return &DiscoveryScheduler{
|
||||
service: service,
|
||||
registry: registry,
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
}
|
||||
}
|
||||
|
||||
// ScanAllPlatforms runs discovery across all registered platforms
|
||||
func (s *DiscoveryScheduler) ScanAllPlatforms(ctx context.Context) ([]ScanResult, error) {
|
||||
platforms := s.registry.ListPlatforms()
|
||||
results := make([]ScanResult, 0, len(platforms))
|
||||
|
||||
for _, platform := range platforms {
|
||||
result, err := s.ScanPlatform(ctx, platform)
|
||||
if err != nil {
|
||||
results = append(results, ScanResult{Platform: platform, Errors: []string{err.Error()}})
|
||||
continue
|
||||
}
|
||||
results = append(results, *result)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// ScanPlatform runs discovery for a single platform
|
||||
func (s *DiscoveryScheduler) ScanPlatform(ctx context.Context, platform string) (*ScanResult, error) {
|
||||
adapter, ok := s.registry.Get(platform)
|
||||
if !ok {
|
||||
return nil, ErrPlatformNotSupported
|
||||
}
|
||||
|
||||
result := &ScanResult{Platform: platform}
|
||||
|
||||
// Get models from the platform
|
||||
// In production these accounts come from the database; here we accept a map for injection
|
||||
accounts := s.loadAccountsForPlatform(ctx, platform)
|
||||
if len(accounts) == 0 {
|
||||
log.Printf("[discovery] no accounts registered for platform %s, skipping", platform)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// Use the first account as the source of models (in production would fan out)
|
||||
account := accounts[0]
|
||||
models, err := adapter.GetModels(ctx, account)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, "GetModels: "+err.Error())
|
||||
return result, err
|
||||
}
|
||||
|
||||
log.Printf("[discovery] platform=%s found %d models", platform, len(models))
|
||||
|
||||
// Record each model as a candidate
|
||||
for _, model := range models {
|
||||
candidateInput := RecordCandidateInput{
|
||||
CandidateID: platform + "-" + model.ModelID,
|
||||
AccountID: account.AccountID,
|
||||
Platform: platform,
|
||||
Model: model.ModelID,
|
||||
Source: "official_api",
|
||||
DiscoveredAt: s.now(),
|
||||
}
|
||||
out, err := s.service.RecordCandidate(ctx, candidateInput)
|
||||
if err != nil {
|
||||
result.Errors = append(result.Errors, "RecordCandidate: "+err.Error())
|
||||
continue
|
||||
}
|
||||
if out.Created {
|
||||
result.NewModels++
|
||||
log.Printf("[discovery] new candidate: platform=%s model=%s", platform, model.ModelID)
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// loadAccountsForPlatform returns supplier accounts for a platform
|
||||
// In production this queries the accounts table; here it returns a seeded default
|
||||
func (s *DiscoveryScheduler) loadAccountsForPlatform(ctx context.Context, platform string) []integration.SupplierAccount {
|
||||
// Production: query supply_accounts where platform = X and status = active
|
||||
// For now: return a placeholder that will work with adapter.GetModels
|
||||
return []integration.SupplierAccount{
|
||||
{
|
||||
AccountID: 1,
|
||||
Platform: platform,
|
||||
APIKey: "",
|
||||
BaseURL: defaultBaseURL(platform),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func defaultBaseURL(platform string) string {
|
||||
switch platform {
|
||||
case "openai":
|
||||
return "https://api.openai.com"
|
||||
case "anthropic":
|
||||
return "https://api.anthropic.com"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
99
internal/discovery/service.go
Normal file
99
internal/discovery/service.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package discovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidCandidateInput = errors.New("invalid candidate input")
|
||||
ErrPlatformNotSupported = errors.New("platform not supported in registry")
|
||||
)
|
||||
|
||||
type CandidateRepository interface {
|
||||
GetDiscoveryCandidateByIDContext(ctx context.Context, candidateID string) (domain.DiscoveryCandidate, bool)
|
||||
FindDiscoveryCandidateContext(ctx context.Context, accountID int64, platform, model string) (domain.DiscoveryCandidate, bool)
|
||||
UpsertDiscoveryCandidateContext(ctx context.Context, candidate domain.DiscoveryCandidate) domain.DiscoveryCandidate
|
||||
ListDiscoveryCandidatesContext(ctx context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
repo CandidateRepository
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type RecordCandidateInput struct {
|
||||
CandidateID string
|
||||
AccountID int64
|
||||
Platform string
|
||||
Model string
|
||||
Source string
|
||||
ReasonCode string
|
||||
DiscoveredAt time.Time
|
||||
}
|
||||
|
||||
type RecordCandidateOutput struct {
|
||||
Candidate domain.DiscoveryCandidate `json:"candidate"`
|
||||
Created bool `json:"created"`
|
||||
}
|
||||
|
||||
func NewService(repo CandidateRepository) *Service {
|
||||
return &Service{
|
||||
repo: repo,
|
||||
now: func() time.Time {
|
||||
return time.Now().UTC()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) RecordCandidate(ctx context.Context, input RecordCandidateInput) (RecordCandidateOutput, error) {
|
||||
if s == nil || s.repo == nil {
|
||||
return RecordCandidateOutput{}, ErrInvalidCandidateInput
|
||||
}
|
||||
candidateID := strings.TrimSpace(input.CandidateID)
|
||||
platform := strings.TrimSpace(input.Platform)
|
||||
model := strings.TrimSpace(input.Model)
|
||||
source := strings.TrimSpace(input.Source)
|
||||
reasonCode := strings.TrimSpace(input.ReasonCode)
|
||||
if candidateID == "" || input.AccountID <= 0 || platform == "" || model == "" || source == "" {
|
||||
return RecordCandidateOutput{}, ErrInvalidCandidateInput
|
||||
}
|
||||
if existing, ok := s.repo.GetDiscoveryCandidateByIDContext(ctx, candidateID); ok {
|
||||
return RecordCandidateOutput{Candidate: existing, Created: false}, nil
|
||||
}
|
||||
at := input.DiscoveredAt.UTC()
|
||||
if at.IsZero() {
|
||||
at = s.now()
|
||||
}
|
||||
if existing, ok := s.repo.FindDiscoveryCandidateContext(ctx, input.AccountID, platform, model); ok {
|
||||
existing.Source = source
|
||||
existing.ReasonCode = reasonCode
|
||||
existing.UpdatedAt = at
|
||||
existing.Version++
|
||||
return RecordCandidateOutput{Candidate: s.repo.UpsertDiscoveryCandidateContext(ctx, existing), Created: false}, nil
|
||||
}
|
||||
candidate := domain.DiscoveryCandidate{
|
||||
CandidateID: candidateID,
|
||||
AccountID: input.AccountID,
|
||||
Platform: platform,
|
||||
Model: model,
|
||||
Source: source,
|
||||
Status: domain.DiscoveryCandidateStatusPendingAdmission,
|
||||
ReasonCode: reasonCode,
|
||||
DiscoveredAt: at,
|
||||
UpdatedAt: at,
|
||||
Version: 1,
|
||||
}
|
||||
return RecordCandidateOutput{Candidate: s.repo.UpsertDiscoveryCandidateContext(ctx, candidate), Created: true}, nil
|
||||
}
|
||||
|
||||
func (s *Service) ListCandidates(ctx context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate {
|
||||
if s == nil || s.repo == nil {
|
||||
return nil
|
||||
}
|
||||
return s.repo.ListDiscoveryCandidatesContext(ctx, status)
|
||||
}
|
||||
160
internal/discovery/service_test.go
Normal file
160
internal/discovery/service_test.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package discovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestRecordCandidateCreatesPendingAdmissionCandidate(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
at := time.Unix(100, 0).UTC()
|
||||
|
||||
out, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 10,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Source: "manual_seed",
|
||||
ReasonCode: "new_model",
|
||||
DiscoveredAt: at,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if !out.Created {
|
||||
t.Fatalf("expected created candidate")
|
||||
}
|
||||
if out.Candidate.Status != domain.DiscoveryCandidateStatusPendingAdmission {
|
||||
t.Fatalf("unexpected status: %q", out.Candidate.Status)
|
||||
}
|
||||
if out.Candidate.Version != 1 {
|
||||
t.Fatalf("unexpected version: %d", out.Candidate.Version)
|
||||
}
|
||||
if !out.Candidate.DiscoveredAt.Equal(at) || !out.Candidate.UpdatedAt.Equal(at) {
|
||||
t.Fatalf("unexpected timestamps: %+v", out.Candidate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordCandidateIsIdempotentByCandidateID(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
|
||||
first, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 10,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Source: "manual_seed",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected first error: %v", err)
|
||||
}
|
||||
second, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 99,
|
||||
Platform: "other",
|
||||
Model: "other-model",
|
||||
Source: "other_source",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected second error: %v", err)
|
||||
}
|
||||
if second.Created {
|
||||
t.Fatalf("expected idempotent replay")
|
||||
}
|
||||
if second.Candidate.AccountID != first.Candidate.AccountID || second.Candidate.Platform != first.Candidate.Platform || second.Candidate.Model != first.Candidate.Model {
|
||||
t.Fatalf("expected original candidate to be preserved: %+v", second.Candidate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordCandidateDeduplicatesByBusinessKey(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
firstAt := time.Unix(100, 0).UTC()
|
||||
secondAt := time.Unix(200, 0).UTC()
|
||||
|
||||
_, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 10,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Source: "manual_seed",
|
||||
ReasonCode: "first",
|
||||
DiscoveredAt: firstAt,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected first error: %v", err)
|
||||
}
|
||||
out, err := service.RecordCandidate(context.Background(), RecordCandidateInput{
|
||||
CandidateID: "cand-2",
|
||||
AccountID: 10,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Source: "scan",
|
||||
ReasonCode: "second",
|
||||
DiscoveredAt: secondAt,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected second error: %v", err)
|
||||
}
|
||||
if out.Created {
|
||||
t.Fatalf("expected business-key dedupe")
|
||||
}
|
||||
if out.Candidate.CandidateID != "cand-1" {
|
||||
t.Fatalf("expected original candidate id to be retained: %+v", out.Candidate)
|
||||
}
|
||||
if out.Candidate.Source != "scan" || out.Candidate.ReasonCode != "second" {
|
||||
t.Fatalf("expected metadata update: %+v", out.Candidate)
|
||||
}
|
||||
if out.Candidate.Version != 2 {
|
||||
t.Fatalf("expected version bump, got %d", out.Candidate.Version)
|
||||
}
|
||||
if !out.Candidate.UpdatedAt.Equal(secondAt) {
|
||||
t.Fatalf("expected updated timestamp to change: %+v", out.Candidate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordCandidateRejectsInvalidInput(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
_, err := service.RecordCandidate(context.Background(), RecordCandidateInput{})
|
||||
if err == nil {
|
||||
t.Fatalf("expected invalid input error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestListCandidatesFiltersByStatus(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.UpsertDiscoveryCandidateContext(context.Background(), domain.DiscoveryCandidate{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 10,
|
||||
Platform: "openai",
|
||||
Model: "a",
|
||||
Source: "seed",
|
||||
Status: domain.DiscoveryCandidateStatusPendingAdmission,
|
||||
DiscoveredAt: time.Unix(100, 0).UTC(),
|
||||
UpdatedAt: time.Unix(100, 0).UTC(),
|
||||
Version: 1,
|
||||
})
|
||||
repo.UpsertDiscoveryCandidateContext(context.Background(), domain.DiscoveryCandidate{
|
||||
CandidateID: "cand-2",
|
||||
AccountID: 11,
|
||||
Platform: "openai",
|
||||
Model: "b",
|
||||
Source: "seed",
|
||||
Status: domain.DiscoveryCandidateStatusAdmitted,
|
||||
DiscoveredAt: time.Unix(200, 0).UTC(),
|
||||
UpdatedAt: time.Unix(200, 0).UTC(),
|
||||
Version: 1,
|
||||
})
|
||||
service := NewService(repo)
|
||||
items := service.ListCandidates(context.Background(), domain.DiscoveryCandidateStatusPendingAdmission)
|
||||
if len(items) != 1 || items[0].CandidateID != "cand-1" {
|
||||
t.Fatalf("unexpected filtered items: %+v", items)
|
||||
}
|
||||
}
|
||||
132
internal/domain/types.go
Normal file
132
internal/domain/types.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package domain
|
||||
|
||||
import "time"
|
||||
|
||||
type AccountStatus string
|
||||
|
||||
const (
|
||||
AccountStatusActive AccountStatus = "active"
|
||||
AccountStatusSuspended AccountStatus = "suspended"
|
||||
AccountStatusDisabled AccountStatus = "disabled"
|
||||
AccountStatusPendingVerify AccountStatus = "pending_verify"
|
||||
AccountStatusPendingEnable AccountStatus = "pending_enable"
|
||||
)
|
||||
|
||||
type ProbeClassification string
|
||||
|
||||
const (
|
||||
ProbeClassificationSuccess ProbeClassification = "success"
|
||||
ProbeClassificationExplicitFailure ProbeClassification = "explicit_failure"
|
||||
ProbeClassificationInconclusive ProbeClassification = "inconclusive"
|
||||
)
|
||||
|
||||
type DiscoveryCandidateStatus string
|
||||
|
||||
const (
|
||||
DiscoveryCandidateStatusPendingAdmission DiscoveryCandidateStatus = "pending_admission"
|
||||
DiscoveryCandidateStatusAdmitted DiscoveryCandidateStatus = "admitted"
|
||||
DiscoveryCandidateStatusRejected DiscoveryCandidateStatus = "rejected"
|
||||
)
|
||||
|
||||
type GatewaySyncStatus string
|
||||
|
||||
const (
|
||||
GatewaySyncStatusPending GatewaySyncStatus = "pending"
|
||||
GatewaySyncStatusApplied GatewaySyncStatus = "applied"
|
||||
GatewaySyncStatusFailed GatewaySyncStatus = "failed"
|
||||
)
|
||||
|
||||
type GatewayAckResult string
|
||||
|
||||
const (
|
||||
GatewayAckResultApplied GatewayAckResult = "applied"
|
||||
GatewayAckResultFailed GatewayAckResult = "failed"
|
||||
)
|
||||
|
||||
func (r GatewayAckResult) SyncStatus() GatewaySyncStatus {
|
||||
switch r {
|
||||
case GatewayAckResultApplied:
|
||||
return GatewaySyncStatusApplied
|
||||
case GatewayAckResultFailed:
|
||||
return GatewaySyncStatusFailed
|
||||
default:
|
||||
return GatewaySyncStatusPending
|
||||
}
|
||||
}
|
||||
|
||||
type ProbeResult struct {
|
||||
AccountID int64
|
||||
Classification ProbeClassification
|
||||
ReasonCode string
|
||||
ObservedAt time.Time
|
||||
}
|
||||
|
||||
type AccountRoutingState struct {
|
||||
AccountID int64 `json:"account_id"`
|
||||
Platform string `json:"platform"`
|
||||
AccountStatus AccountStatus `json:"account_status"`
|
||||
RoutingEnabled bool `json:"routing_enabled"`
|
||||
RiskScore int `json:"risk_score"`
|
||||
ReasonCode string `json:"reason_code"`
|
||||
LastProbeAt time.Time `json:"last_probe_at"`
|
||||
Version int64 `json:"version"`
|
||||
}
|
||||
|
||||
type PackageChangeEvent struct {
|
||||
EventID string `json:"event_id"`
|
||||
EventType string `json:"event_type"`
|
||||
PackageID int64 `json:"package_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
OccurredAt time.Time `json:"occurred_at"`
|
||||
Version int64 `json:"version"`
|
||||
GatewaySyncStatus GatewaySyncStatus `json:"gateway_sync_status"`
|
||||
Consumer string `json:"consumer,omitempty"`
|
||||
ConsumerDetail string `json:"consumer_detail,omitempty"`
|
||||
AckedAt *time.Time `json:"acked_at,omitempty"`
|
||||
}
|
||||
|
||||
type PackageChangeAck struct {
|
||||
EventID string `json:"event_id"`
|
||||
Consumer string `json:"consumer"`
|
||||
Result GatewayAckResult `json:"result"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
AckedAt time.Time `json:"acked_at"`
|
||||
SyncState GatewaySyncStatus `json:"gateway_sync_status"`
|
||||
}
|
||||
|
||||
type GatewayAppliedSnapshot struct {
|
||||
Consumer string `json:"consumer"`
|
||||
LastEventID string `json:"last_event_id"`
|
||||
LastPackageID int64 `json:"last_package_id"`
|
||||
LastPlatform string `json:"last_platform"`
|
||||
LastModel string `json:"last_model"`
|
||||
LastAppliedVersion int64 `json:"last_applied_version"`
|
||||
LastResult string `json:"last_result"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
type DiscoveryCandidate struct {
|
||||
CandidateID string `json:"candidate_id"`
|
||||
AccountID int64 `json:"account_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
Source string `json:"source"`
|
||||
Status DiscoveryCandidateStatus `json:"status"`
|
||||
ReasonCode string `json:"reason_code,omitempty"`
|
||||
DiscoveredAt time.Time `json:"discovered_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Version int64 `json:"version"`
|
||||
}
|
||||
|
||||
// SupplyPackage represents a supply package in the system
|
||||
type SupplyPackage struct {
|
||||
PackageID int64 `json:"package_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
Status string `json:"status"` // draft, active, deprecated
|
||||
Source string `json:"source"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
Version int64 `json:"version"`
|
||||
}
|
||||
110
internal/gatewayconsumer/service.go
Normal file
110
internal/gatewayconsumer/service.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package gatewayconsumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
var ErrInvalidConsumeInput = errors.New("invalid consume input")
|
||||
|
||||
type PackageChangeRepository interface {
|
||||
ListPackageEventsAfter(cursor string) ([]domain.PackageChangeEvent, string)
|
||||
AckPackageEvent(eventID, consumer string, result domain.GatewayAckResult, detail string, ackedAt time.Time) (domain.PackageChangeEvent, error)
|
||||
UpsertGatewayAppliedSnapshot(snapshot domain.GatewayAppliedSnapshot) domain.GatewayAppliedSnapshot
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
repo PackageChangeRepository
|
||||
now func() time.Time
|
||||
applier func(context.Context, domain.PackageChangeEvent) (domain.GatewayAckResult, string)
|
||||
consumer string
|
||||
}
|
||||
|
||||
type ConsumeOnceInput struct {
|
||||
Consumer string
|
||||
Cursor string
|
||||
}
|
||||
|
||||
type ConsumeOnceOutput struct {
|
||||
Consumer string `json:"consumer"`
|
||||
NextCursor string `json:"next_cursor"`
|
||||
Items []ConsumedPackageChangeItem `json:"items"`
|
||||
}
|
||||
|
||||
type ConsumedPackageChangeItem struct {
|
||||
EventID string `json:"event_id"`
|
||||
PackageID int64 `json:"package_id"`
|
||||
GatewaySyncStatus domain.GatewaySyncStatus `json:"gateway_sync_status"`
|
||||
Result domain.GatewayAckResult `json:"result"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
}
|
||||
|
||||
func NewService(repo PackageChangeRepository) *Service {
|
||||
return &Service{
|
||||
repo: repo,
|
||||
now: func() time.Time {
|
||||
return time.Now().UTC()
|
||||
},
|
||||
consumer: "gateway",
|
||||
applier: func(_ context.Context, event domain.PackageChangeEvent) (domain.GatewayAckResult, string) {
|
||||
if strings.Contains(strings.ToLower(event.Model), "fail") {
|
||||
return domain.GatewayAckResultFailed, "simulated apply failure"
|
||||
}
|
||||
return domain.GatewayAckResultApplied, "applied to gateway snapshot"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) SetApplier(applier func(context.Context, domain.PackageChangeEvent) (domain.GatewayAckResult, string)) {
|
||||
s.applier = applier
|
||||
}
|
||||
|
||||
func (s *Service) ConsumeOnce(ctx context.Context, input ConsumeOnceInput) (ConsumeOnceOutput, error) {
|
||||
if s == nil || s.repo == nil || s.applier == nil {
|
||||
return ConsumeOnceOutput{}, ErrInvalidConsumeInput
|
||||
}
|
||||
consumer := strings.TrimSpace(input.Consumer)
|
||||
if consumer == "" {
|
||||
consumer = s.consumer
|
||||
}
|
||||
items, nextCursor := s.repo.ListPackageEventsAfter(strings.TrimSpace(input.Cursor))
|
||||
result := ConsumeOnceOutput{Consumer: consumer, NextCursor: nextCursor, Items: make([]ConsumedPackageChangeItem, 0, len(items))}
|
||||
for _, event := range items {
|
||||
if event.GatewaySyncStatus != domain.GatewaySyncStatusPending {
|
||||
continue
|
||||
}
|
||||
ackResult, detail := s.applier(ctx, event)
|
||||
if ackResult != domain.GatewayAckResultApplied && ackResult != domain.GatewayAckResultFailed {
|
||||
return ConsumeOnceOutput{}, ErrInvalidConsumeInput
|
||||
}
|
||||
ackedAt := s.now()
|
||||
if ackResult == domain.GatewayAckResultApplied {
|
||||
s.repo.UpsertGatewayAppliedSnapshot(domain.GatewayAppliedSnapshot{
|
||||
Consumer: consumer,
|
||||
LastEventID: event.EventID,
|
||||
LastPackageID: event.PackageID,
|
||||
LastPlatform: event.Platform,
|
||||
LastModel: event.Model,
|
||||
LastAppliedVersion: event.Version,
|
||||
LastResult: string(ackResult),
|
||||
UpdatedAt: ackedAt,
|
||||
})
|
||||
}
|
||||
updated, err := s.repo.AckPackageEvent(event.EventID, consumer, ackResult, detail, ackedAt)
|
||||
if err != nil {
|
||||
return ConsumeOnceOutput{}, err
|
||||
}
|
||||
result.Items = append(result.Items, ConsumedPackageChangeItem{
|
||||
EventID: updated.EventID,
|
||||
PackageID: updated.PackageID,
|
||||
GatewaySyncStatus: updated.GatewaySyncStatus,
|
||||
Result: ackResult,
|
||||
Detail: detail,
|
||||
})
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
89
internal/gatewayconsumer/service_test.go
Normal file
89
internal/gatewayconsumer/service_test.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package gatewayconsumer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestServiceConsumeOnceAppliedAndFailed(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{
|
||||
EventID: "evt-applied",
|
||||
EventType: "supply_package_published",
|
||||
PackageID: 101,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Version: 3,
|
||||
OccurredAt: time.Unix(10, 0).UTC(),
|
||||
GatewaySyncStatus: domain.GatewaySyncStatusPending,
|
||||
})
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{
|
||||
EventID: "evt-failed",
|
||||
EventType: "supply_package_published",
|
||||
PackageID: 102,
|
||||
Platform: "openai",
|
||||
Model: "gpt-fail-model",
|
||||
Version: 4,
|
||||
OccurredAt: time.Unix(20, 0).UTC(),
|
||||
GatewaySyncStatus: domain.GatewaySyncStatusPending,
|
||||
})
|
||||
|
||||
service := NewService(repo)
|
||||
service.now = func() time.Time { return time.Unix(30, 0).UTC() }
|
||||
|
||||
out, err := service.ConsumeOnce(context.Background(), ConsumeOnceInput{Consumer: "gateway"})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(out.Items) != 2 {
|
||||
t.Fatalf("unexpected item count: %d", len(out.Items))
|
||||
}
|
||||
if out.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
|
||||
t.Fatalf("unexpected first status: %+v", out.Items[0])
|
||||
}
|
||||
if out.Items[1].GatewaySyncStatus != domain.GatewaySyncStatusFailed {
|
||||
t.Fatalf("unexpected second status: %+v", out.Items[1])
|
||||
}
|
||||
|
||||
events := repo.ListPackageEvents()
|
||||
if events[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
|
||||
t.Fatalf("expected applied event, got %+v", events[0])
|
||||
}
|
||||
if events[1].GatewaySyncStatus != domain.GatewaySyncStatusFailed {
|
||||
t.Fatalf("expected failed event, got %+v", events[1])
|
||||
}
|
||||
snapshot, ok := repo.GetGatewayAppliedSnapshot("gateway")
|
||||
if !ok {
|
||||
t.Fatal("expected applied snapshot")
|
||||
}
|
||||
if snapshot.LastEventID != "evt-applied" || snapshot.LastPackageID != 101 {
|
||||
t.Fatalf("unexpected snapshot: %+v", snapshot)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceConsumeOnceRejectsInvalidApplierResult(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{
|
||||
EventID: "evt-1",
|
||||
EventType: "supply_package_published",
|
||||
PackageID: 101,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Version: 3,
|
||||
OccurredAt: time.Unix(10, 0).UTC(),
|
||||
GatewaySyncStatus: domain.GatewaySyncStatusPending,
|
||||
})
|
||||
service := NewService(repo)
|
||||
service.SetApplier(func(context.Context, domain.PackageChangeEvent) (domain.GatewayAckResult, string) {
|
||||
return domain.GatewayAckResult("unknown"), "bad"
|
||||
})
|
||||
|
||||
_, err := service.ConsumeOnce(context.Background(), ConsumeOnceInput{})
|
||||
if err != ErrInvalidConsumeInput {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
12
internal/httpapi/parse.go
Normal file
12
internal/httpapi/parse.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package httpapi
|
||||
|
||||
import "strconv"
|
||||
|
||||
func parseInt64(input string, target *int64) (int64, error) {
|
||||
value, err := strconv.ParseInt(input, 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
*target = value
|
||||
return value, nil
|
||||
}
|
||||
415
internal/httpapi/server.go
Normal file
415
internal/httpapi/server.go
Normal file
@@ -0,0 +1,415 @@
|
||||
package httpapi
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/admission"
|
||||
"supply-intelligence/internal/discovery"
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/gatewayconsumer"
|
||||
"supply-intelligence/internal/probe"
|
||||
"supply-intelligence/internal/publish"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
type Server struct {
|
||||
repo *repository.MemoryRepository
|
||||
probeService *probe.Service
|
||||
publishService *publish.Service
|
||||
gatewayConsumerService *gatewayconsumer.Service
|
||||
discoveryService *discovery.Service
|
||||
admissionService *admission.Service
|
||||
}
|
||||
|
||||
type packageChangesResponse struct {
|
||||
Items []domain.PackageChangeEvent `json:"items"`
|
||||
NextCursor string `json:"next_cursor"`
|
||||
}
|
||||
|
||||
type discoveryCandidatesResponse struct {
|
||||
Items []domain.DiscoveryCandidate `json:"items"`
|
||||
}
|
||||
|
||||
func NewServer(repo *repository.MemoryRepository, probeService *probe.Service, publishService *publish.Service, gatewayConsumerService *gatewayconsumer.Service, discoveryService *discovery.Service, admissionService *admission.Service) *Server {
|
||||
return &Server{repo: repo, probeService: probeService, publishService: publishService, gatewayConsumerService: gatewayConsumerService, discoveryService: discoveryService, admissionService: admissionService}
|
||||
}
|
||||
|
||||
func (s *Server) Routes() http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/healthz", s.handleHealth)
|
||||
mux.HandleFunc("/internal/supply-intelligence/accounts/", s.handleGetRoutingState)
|
||||
mux.HandleFunc("/internal/supply-intelligence/probe/evaluate", s.handleEvaluateProbe)
|
||||
mux.HandleFunc("/internal/supply-intelligence/publish/package-event", s.handlePublishPackageEvent)
|
||||
mux.HandleFunc("/internal/supply-intelligence/discovery/candidates", s.handleDiscoveryCandidates)
|
||||
mux.HandleFunc("/internal/supply-intelligence/gateway/package-changes", s.handleListPackageChanges)
|
||||
mux.HandleFunc("/internal/supply-intelligence/gateway/package-changes/", s.handleAckPackageChange)
|
||||
mux.HandleFunc("/internal/supply-intelligence/gateway/consume-once", s.handleConsumeOnce)
|
||||
mux.HandleFunc("/internal/supply-intelligence/admission/run", s.handleAdmissionRun)
|
||||
mux.HandleFunc("/internal/supply-intelligence/admission/candidates", s.handleAdmissionCandidates)
|
||||
return mux
|
||||
}
|
||||
|
||||
func (s *Server) handleHealth(w http.ResponseWriter, _ *http.Request) {
|
||||
writeJSON(w, http.StatusOK, map[string]string{"status": "ok"})
|
||||
}
|
||||
|
||||
func (s *Server) handleGetRoutingState(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
prefix := "/internal/supply-intelligence/accounts/"
|
||||
path := strings.TrimPrefix(r.URL.Path, prefix)
|
||||
if !strings.HasSuffix(path, "/routing-state") {
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
|
||||
return
|
||||
}
|
||||
accountIDPart := strings.TrimSuffix(path, "/routing-state")
|
||||
var accountID int64
|
||||
if _, err := parseInt64(accountIDPart, &accountID); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_account_id"})
|
||||
return
|
||||
}
|
||||
state, ok := s.repo.GetRoutingState(accountID)
|
||||
if !ok {
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, state)
|
||||
}
|
||||
|
||||
func (s *Server) handleEvaluateProbe(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
if s.probeService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "probe_service_unavailable"})
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
AccountID int64 `json:"account_id"`
|
||||
Platform string `json:"platform"`
|
||||
CurrentStatus string `json:"current_status"`
|
||||
StatusCode int `json:"status_code"`
|
||||
TransportError string `json:"transport_error"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
|
||||
return
|
||||
}
|
||||
if payload.AccountID <= 0 {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_account_id"})
|
||||
return
|
||||
}
|
||||
if payload.Platform == "" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "missing_platform"})
|
||||
return
|
||||
}
|
||||
if payload.CurrentStatus == "" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "missing_current_status"})
|
||||
return
|
||||
}
|
||||
|
||||
var transportErr error
|
||||
if payload.TransportError != "" {
|
||||
transportErr = errors.New(payload.TransportError)
|
||||
}
|
||||
|
||||
result, err := s.probeService.EvaluateHTTPResult(context.Background(), probe.EvaluateInput{
|
||||
AccountID: payload.AccountID,
|
||||
Platform: payload.Platform,
|
||||
CurrentStatus: domainAccountStatus(payload.CurrentStatus),
|
||||
StatusCode: payload.StatusCode,
|
||||
TransportError: transportErr,
|
||||
})
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, result)
|
||||
}
|
||||
|
||||
func (s *Server) handlePublishPackageEvent(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
if s.publishService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "publish_service_unavailable"})
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
EventID string `json:"event_id"`
|
||||
PackageID int64 `json:"package_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
Version int64 `json:"version"`
|
||||
OccurredAt string `json:"occurred_at"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
|
||||
return
|
||||
}
|
||||
|
||||
var occurredAt time.Time
|
||||
if payload.OccurredAt != "" {
|
||||
parsed, err := time.Parse(time.RFC3339, payload.OccurredAt)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_occurred_at"})
|
||||
return
|
||||
}
|
||||
occurredAt = parsed
|
||||
}
|
||||
|
||||
event, err := s.publishService.RecordPackagePublished(r.Context(), publish.RecordPackagePublishedInput{
|
||||
EventID: payload.EventID,
|
||||
PackageID: payload.PackageID,
|
||||
Platform: payload.Platform,
|
||||
Model: payload.Model,
|
||||
Version: payload.Version,
|
||||
OccurredAt: occurredAt,
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, publish.ErrInvalidPublishInput) {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_publish_input"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "internal_error"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, event)
|
||||
}
|
||||
|
||||
func (s *Server) handleDiscoveryCandidates(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.Method {
|
||||
case http.MethodPost:
|
||||
s.handleCreateDiscoveryCandidate(w, r)
|
||||
case http.MethodGet:
|
||||
s.handleListDiscoveryCandidates(w, r)
|
||||
default:
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) handleCreateDiscoveryCandidate(w http.ResponseWriter, r *http.Request) {
|
||||
if s.discoveryService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "discovery_service_unavailable"})
|
||||
return
|
||||
}
|
||||
var payload struct {
|
||||
CandidateID string `json:"candidate_id"`
|
||||
AccountID int64 `json:"account_id"`
|
||||
Platform string `json:"platform"`
|
||||
Model string `json:"model"`
|
||||
Source string `json:"source"`
|
||||
ReasonCode string `json:"reason_code"`
|
||||
DiscoveredAt string `json:"discovered_at"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
|
||||
return
|
||||
}
|
||||
var discoveredAt time.Time
|
||||
if strings.TrimSpace(payload.DiscoveredAt) != "" {
|
||||
parsed, err := time.Parse(time.RFC3339, payload.DiscoveredAt)
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_discovered_at"})
|
||||
return
|
||||
}
|
||||
discoveredAt = parsed
|
||||
}
|
||||
out, err := s.discoveryService.RecordCandidate(r.Context(), discovery.RecordCandidateInput{
|
||||
CandidateID: payload.CandidateID,
|
||||
AccountID: payload.AccountID,
|
||||
Platform: payload.Platform,
|
||||
Model: payload.Model,
|
||||
Source: payload.Source,
|
||||
ReasonCode: payload.ReasonCode,
|
||||
DiscoveredAt: discoveredAt,
|
||||
})
|
||||
if err != nil {
|
||||
if errors.Is(err, discovery.ErrInvalidCandidateInput) {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_candidate_input"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "internal_error"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
func (s *Server) handleListDiscoveryCandidates(w http.ResponseWriter, r *http.Request) {
|
||||
if s.discoveryService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "discovery_service_unavailable"})
|
||||
return
|
||||
}
|
||||
status, ok := parseDiscoveryCandidateStatus(strings.TrimSpace(r.URL.Query().Get("status")))
|
||||
if !ok {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_status"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, discoveryCandidatesResponse{Items: s.discoveryService.ListCandidates(r.Context(), status)})
|
||||
}
|
||||
|
||||
func parseDiscoveryCandidateStatus(raw string) (domain.DiscoveryCandidateStatus, bool) {
|
||||
if raw == "" {
|
||||
return "", true
|
||||
}
|
||||
status := domain.DiscoveryCandidateStatus(raw)
|
||||
switch status {
|
||||
case domain.DiscoveryCandidateStatusPendingAdmission, domain.DiscoveryCandidateStatusAdmitted, domain.DiscoveryCandidateStatusRejected:
|
||||
return status, true
|
||||
default:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) handleListPackageChanges(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
items, nextCursor := s.repo.ListPackageEventsAfter(strings.TrimSpace(r.URL.Query().Get("cursor")))
|
||||
writeJSON(w, http.StatusOK, packageChangesResponse{Items: items, NextCursor: nextCursor})
|
||||
}
|
||||
|
||||
func (s *Server) handleAckPackageChange(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
prefix := "/internal/supply-intelligence/gateway/package-changes/"
|
||||
path := strings.TrimPrefix(r.URL.Path, prefix)
|
||||
if !strings.HasSuffix(path, "/ack") {
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
|
||||
return
|
||||
}
|
||||
eventID := strings.TrimSuffix(path, "/ack")
|
||||
var payload struct {
|
||||
Consumer string `json:"consumer"`
|
||||
Result string `json:"result"`
|
||||
Detail string `json:"detail"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
|
||||
return
|
||||
}
|
||||
ackResult := domain.GatewayAckResult(payload.Result)
|
||||
if !repository.IsGatewayAckResult(ackResult) {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_result"})
|
||||
return
|
||||
}
|
||||
consumer := strings.TrimSpace(payload.Consumer)
|
||||
if consumer == "" {
|
||||
consumer = "gateway"
|
||||
}
|
||||
_, err := s.repo.AckPackageEvent(eventID, consumer, ackResult, payload.Detail, time.Now().UTC())
|
||||
if err != nil {
|
||||
if errors.Is(err, repository.ErrEventNotFound) {
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{"error": "not_found"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "internal_error"})
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}
|
||||
|
||||
func (s *Server) handleConsumeOnce(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
if s.gatewayConsumerService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "gateway_consumer_unavailable"})
|
||||
return
|
||||
}
|
||||
var payload struct {
|
||||
Consumer string `json:"consumer"`
|
||||
Cursor string `json:"cursor"`
|
||||
}
|
||||
if r.Body != nil {
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil && err.Error() != "EOF" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
|
||||
return
|
||||
}
|
||||
}
|
||||
out, err := s.gatewayConsumerService.ConsumeOnce(r.Context(), gatewayconsumer.ConsumeOnceInput{Consumer: payload.Consumer, Cursor: payload.Cursor})
|
||||
if err != nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "consume_failed"})
|
||||
return
|
||||
}
|
||||
writeJSON(w, http.StatusOK, out)
|
||||
}
|
||||
|
||||
func writeJSON(w http.ResponseWriter, status int, body any) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.WriteHeader(status)
|
||||
_ = json.NewEncoder(w).Encode(body)
|
||||
}
|
||||
|
||||
// handleAdmissionRun runs admission test for a specific candidate
|
||||
func (s *Server) handleAdmissionRun(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
if s.admissionService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "admission_service_unavailable"})
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
CandidateID string `json:"candidate_id"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "invalid_json"})
|
||||
return
|
||||
}
|
||||
if strings.TrimSpace(payload.CandidateID) == "" {
|
||||
writeJSON(w, http.StatusBadRequest, map[string]string{"error": "missing_candidate_id"})
|
||||
return
|
||||
}
|
||||
|
||||
result, err := s.admissionService.RunAdmission(r.Context(), payload.CandidateID)
|
||||
if err != nil {
|
||||
switch {
|
||||
case errors.Is(err, admission.ErrCandidateNotFound):
|
||||
writeJSON(w, http.StatusNotFound, map[string]string{"error": "candidate_not_found"})
|
||||
case errors.Is(err, admission.ErrCandidateNotRunnable):
|
||||
writeJSON(w, http.StatusConflict, map[string]string{"error": "candidate_not_runnable"})
|
||||
default:
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "admission_run_failed"})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, http.StatusOK, result)
|
||||
}
|
||||
|
||||
// handleAdmissionCandidates lists candidates pending admission testing
|
||||
func (s *Server) handleAdmissionCandidates(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
writeJSON(w, http.StatusMethodNotAllowed, map[string]string{"error": "method_not_allowed"})
|
||||
return
|
||||
}
|
||||
if s.admissionService == nil {
|
||||
writeJSON(w, http.StatusInternalServerError, map[string]string{"error": "admission_service_unavailable"})
|
||||
return
|
||||
}
|
||||
|
||||
candidates := s.admissionService.GetRunnableCandidates(r.Context())
|
||||
writeJSON(w, http.StatusOK, map[string]any{"items": candidates})
|
||||
}
|
||||
|
||||
func domainAccountStatus(raw string) domain.AccountStatus {
|
||||
return domain.AccountStatus(raw)
|
||||
}
|
||||
149
internal/httpapi/server_integration_test.go
Normal file
149
internal/httpapi/server_integration_test.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package httpapi_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"supply-intelligence/internal/app"
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/probe"
|
||||
)
|
||||
|
||||
func TestApplicationServerRoutes(t *testing.T) {
|
||||
application := app.New()
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/probe/evaluate", bytes.NewBufferString(`{"account_id":7,"platform":"openai","current_status":"active","status_code":401}`))
|
||||
rr := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
|
||||
var result probe.EvaluateOutput
|
||||
if err := json.NewDecoder(rr.Body).Decode(&result); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
if result.RoutingState.AccountID != 7 || result.RoutingState.AccountStatus != "suspended" {
|
||||
t.Fatalf("unexpected state: %+v", result.RoutingState)
|
||||
}
|
||||
|
||||
getReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/accounts/7/routing-state", nil)
|
||||
getRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(getRR, getReq)
|
||||
if getRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected get status: %d body=%s", getRR.Code, getRR.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestPublishConsumeOnceListAppliedIntegration(t *testing.T) {
|
||||
application := app.New()
|
||||
|
||||
publishReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/publish/package-event", bytes.NewBufferString(`{"event_id":"evt-integration-1","package_id":501,"platform":"openai","model":"gpt-4.1-mini","version":9,"occurred_at":"2026-05-06T20:30:00Z"}`))
|
||||
publishRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(publishRR, publishReq)
|
||||
if publishRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected publish status: %d body=%s", publishRR.Code, publishRR.Body.String())
|
||||
}
|
||||
|
||||
consumeReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/consume-once", bytes.NewBufferString(`{"consumer":"gateway"}`))
|
||||
consumeRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(consumeRR, consumeReq)
|
||||
if consumeRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected consume status: %d body=%s", consumeRR.Code, consumeRR.Body.String())
|
||||
}
|
||||
|
||||
listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes", nil)
|
||||
listRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(listRR, listReq)
|
||||
if listRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
|
||||
}
|
||||
var listResp struct {
|
||||
Items []domain.PackageChangeEvent `json:"items"`
|
||||
NextCursor string `json:"next_cursor"`
|
||||
}
|
||||
if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
|
||||
t.Fatalf("decode list error: %v", err)
|
||||
}
|
||||
if len(listResp.Items) != 1 || listResp.Items[0].EventID != "evt-integration-1" {
|
||||
t.Fatalf("unexpected list items: %+v", listResp.Items)
|
||||
}
|
||||
if listResp.NextCursor != "1" {
|
||||
t.Fatalf("unexpected next cursor: %+v", listResp)
|
||||
}
|
||||
if listResp.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
|
||||
t.Fatalf("unexpected sync status: %+v", listResp.Items[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestPublishConsumeOnceListFailedIntegration(t *testing.T) {
|
||||
application := app.New()
|
||||
|
||||
publishReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/publish/package-event", bytes.NewBufferString(`{"event_id":"evt-integration-failed","package_id":502,"platform":"openai","model":"gpt-fail-model","version":10,"occurred_at":"2026-05-06T20:31:00Z"}`))
|
||||
publishRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(publishRR, publishReq)
|
||||
if publishRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected publish status: %d body=%s", publishRR.Code, publishRR.Body.String())
|
||||
}
|
||||
|
||||
consumeReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/consume-once", bytes.NewBufferString(`{"consumer":"gateway"}`))
|
||||
consumeRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(consumeRR, consumeReq)
|
||||
if consumeRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected consume status: %d body=%s", consumeRR.Code, consumeRR.Body.String())
|
||||
}
|
||||
|
||||
listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes", nil)
|
||||
listRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(listRR, listReq)
|
||||
if listRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
|
||||
}
|
||||
var listResp struct {
|
||||
Items []domain.PackageChangeEvent `json:"items"`
|
||||
NextCursor string `json:"next_cursor"`
|
||||
}
|
||||
if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
|
||||
t.Fatalf("decode list error: %v", err)
|
||||
}
|
||||
if len(listResp.Items) != 1 || listResp.Items[0].EventID != "evt-integration-failed" {
|
||||
t.Fatalf("unexpected list items: %+v", listResp.Items)
|
||||
}
|
||||
if listResp.NextCursor != "1" {
|
||||
t.Fatalf("unexpected next cursor: %+v", listResp)
|
||||
}
|
||||
if listResp.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusFailed {
|
||||
t.Fatalf("unexpected sync status: %+v", listResp.Items[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiscoveryCandidateCreateAndListIntegration(t *testing.T) {
|
||||
application := app.New()
|
||||
|
||||
createReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/discovery/candidates", bytes.NewBufferString(`{"candidate_id":"cand-int-1","account_id":701,"platform":"openai","model":"gpt-4.1-mini","source":"manual_seed","reason_code":"new_model","discovered_at":"2026-05-06T20:30:00Z"}`))
|
||||
createRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(createRR, createReq)
|
||||
if createRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected create status: %d body=%s", createRR.Code, createRR.Body.String())
|
||||
}
|
||||
|
||||
listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/discovery/candidates?status=pending_admission", nil)
|
||||
listRR := httptest.NewRecorder()
|
||||
application.Server.Routes().ServeHTTP(listRR, listReq)
|
||||
if listRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
|
||||
}
|
||||
var listResp struct {
|
||||
Items []domain.DiscoveryCandidate `json:"items"`
|
||||
}
|
||||
if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
|
||||
t.Fatalf("decode list error: %v", err)
|
||||
}
|
||||
if len(listResp.Items) != 1 || listResp.Items[0].CandidateID != "cand-int-1" {
|
||||
t.Fatalf("unexpected discovery list items: %+v", listResp.Items)
|
||||
}
|
||||
}
|
||||
266
internal/httpapi/server_test.go
Normal file
266
internal/httpapi/server_test.go
Normal file
@@ -0,0 +1,266 @@
|
||||
package httpapi
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/discovery"
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/gatewayconsumer"
|
||||
"supply-intelligence/internal/probe"
|
||||
"supply-intelligence/internal/publish"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestServerRoutingStateEndpoint(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.UpsertRoutingState(domain.AccountRoutingState{
|
||||
AccountID: 101,
|
||||
Platform: "openai",
|
||||
AccountStatus: domain.AccountStatusActive,
|
||||
RoutingEnabled: true,
|
||||
RiskScore: 10,
|
||||
ReasonCode: "ok",
|
||||
LastProbeAt: time.Unix(100, 0).UTC(),
|
||||
Version: 3,
|
||||
})
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/accounts/101/routing-state", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(rr, req)
|
||||
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got domain.AccountRoutingState
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
if got.AccountID != 101 || got.AccountStatus != domain.AccountStatusActive {
|
||||
t.Fatalf("unexpected payload: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerProbeEvaluateEndpointPaths(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
body string
|
||||
wantStatus int
|
||||
wantClassification domain.ProbeClassification
|
||||
wantAccountStatus domain.AccountStatus
|
||||
wantReasonCode string
|
||||
wantRoutingEnabled bool
|
||||
}{
|
||||
{
|
||||
name: "success",
|
||||
body: `{"account_id":201,"platform":"openai","current_status":"suspended","status_code":200}`,
|
||||
wantStatus: http.StatusOK,
|
||||
wantClassification: domain.ProbeClassificationSuccess,
|
||||
wantAccountStatus: domain.AccountStatusActive,
|
||||
wantReasonCode: "ok",
|
||||
wantRoutingEnabled: true,
|
||||
},
|
||||
{
|
||||
name: "explicit_failure",
|
||||
body: `{"account_id":202,"platform":"openai","current_status":"active","status_code":401}`,
|
||||
wantStatus: http.StatusOK,
|
||||
wantClassification: domain.ProbeClassificationExplicitFailure,
|
||||
wantAccountStatus: domain.AccountStatusSuspended,
|
||||
wantReasonCode: "auth_rejected",
|
||||
wantRoutingEnabled: false,
|
||||
},
|
||||
{
|
||||
name: "inconclusive",
|
||||
body: `{"account_id":203,"platform":"openai","current_status":"suspended","transport_error":"dial tcp timeout"}`,
|
||||
wantStatus: http.StatusOK,
|
||||
wantClassification: domain.ProbeClassificationInconclusive,
|
||||
wantAccountStatus: domain.AccountStatusSuspended,
|
||||
wantReasonCode: "transport_error",
|
||||
wantRoutingEnabled: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/probe/evaluate", bytes.NewBufferString(tt.body))
|
||||
rr := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(rr, req)
|
||||
if rr.Code != tt.wantStatus {
|
||||
t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var got probe.EvaluateOutput
|
||||
if err := json.NewDecoder(rr.Body).Decode(&got); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
if got.Classification != tt.wantClassification {
|
||||
t.Fatalf("unexpected classification: %q", got.Classification)
|
||||
}
|
||||
if got.RoutingState.AccountStatus != tt.wantAccountStatus {
|
||||
t.Fatalf("unexpected account status: %q", got.RoutingState.AccountStatus)
|
||||
}
|
||||
if got.RoutingState.ReasonCode != tt.wantReasonCode {
|
||||
t.Fatalf("unexpected reason code: %q", got.RoutingState.ReasonCode)
|
||||
}
|
||||
if got.RoutingState.RoutingEnabled != tt.wantRoutingEnabled {
|
||||
t.Fatalf("unexpected routing enabled: %v", got.RoutingState.RoutingEnabled)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerPublishPackageEventEndpoint(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
body := bytes.NewBufferString(`{"event_id":"evt-1","package_id":1001,"platform":"openai","model":"gpt-4.1-mini","version":7,"occurred_at":"2026-05-06T20:30:00Z"}`)
|
||||
req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/publish/package-event", body)
|
||||
rr := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected publish status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
|
||||
var event domain.PackageChangeEvent
|
||||
if err := json.NewDecoder(rr.Body).Decode(&event); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
if event.EventID != "evt-1" || event.EventType != publish.PackagePublishedEventType {
|
||||
t.Fatalf("unexpected event: %+v", event)
|
||||
}
|
||||
if event.GatewaySyncStatus != domain.GatewaySyncStatusPending {
|
||||
t.Fatalf("unexpected sync status: %q", event.GatewaySyncStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerPackageChangeListAndAck(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: publish.PackagePublishedEventType, PackageID: 1001, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(5, 0).UTC(), Version: 7, GatewaySyncStatus: domain.GatewaySyncStatusPending})
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes", nil)
|
||||
listRR := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(listRR, listReq)
|
||||
if listRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
|
||||
}
|
||||
var listResp struct {
|
||||
Items []domain.PackageChangeEvent `json:"items"`
|
||||
NextCursor string `json:"next_cursor"`
|
||||
}
|
||||
if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
|
||||
t.Fatalf("decode list error: %v", err)
|
||||
}
|
||||
if len(listResp.Items) != 1 || listResp.NextCursor != "1" {
|
||||
t.Fatalf("unexpected list response: %+v", listResp)
|
||||
}
|
||||
|
||||
ackReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/package-changes/evt-1/ack", bytes.NewBufferString(`{"consumer":"gateway","result":"applied","detail":"ok"}`))
|
||||
ackRR := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(ackRR, ackReq)
|
||||
if ackRR.Code != http.StatusNoContent {
|
||||
t.Fatalf("unexpected ack status: %d body=%s", ackRR.Code, ackRR.Body.String())
|
||||
}
|
||||
updated, _ := repo.ListPackageEventsAfter("")
|
||||
if len(updated) != 1 || updated[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied {
|
||||
t.Fatalf("unexpected ack state: %+v", updated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerPackageChangeListWithCursor(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: publish.PackagePublishedEventType, PackageID: 1001, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(5, 0).UTC(), Version: 7, GatewaySyncStatus: domain.GatewaySyncStatusPending})
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-2", EventType: publish.PackagePublishedEventType, PackageID: 1002, Platform: "openai", Model: "gpt-4.1", OccurredAt: time.Unix(6, 0).UTC(), Version: 8, GatewaySyncStatus: domain.GatewaySyncStatusPending})
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/gateway/package-changes?cursor=1", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var resp struct {
|
||||
Items []domain.PackageChangeEvent `json:"items"`
|
||||
NextCursor string `json:"next_cursor"`
|
||||
}
|
||||
if err := json.NewDecoder(rr.Body).Decode(&resp); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
if len(resp.Items) != 1 || resp.Items[0].EventID != "evt-2" || resp.NextCursor != "2" {
|
||||
t.Fatalf("unexpected cursor response: %+v", resp)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerConsumeOnceEndpoint(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-apply", EventType: publish.PackagePublishedEventType, PackageID: 1001, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(5, 0).UTC(), Version: 7, GatewaySyncStatus: domain.GatewaySyncStatusPending})
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-fail", EventType: publish.PackagePublishedEventType, PackageID: 1002, Platform: "openai", Model: "gpt-fail-model", OccurredAt: time.Unix(6, 0).UTC(), Version: 8, GatewaySyncStatus: domain.GatewaySyncStatusPending})
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/gateway/consume-once", bytes.NewBufferString(`{"consumer":"gateway"}`))
|
||||
rr := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected consume status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
var out gatewayconsumer.ConsumeOnceOutput
|
||||
if err := json.NewDecoder(rr.Body).Decode(&out); err != nil {
|
||||
t.Fatalf("decode error: %v", err)
|
||||
}
|
||||
if len(out.Items) != 2 {
|
||||
t.Fatalf("unexpected consume output length: %+v", out)
|
||||
}
|
||||
if out.Items[0].Result != domain.GatewayAckResultApplied || out.Items[0].GatewaySyncStatus != domain.GatewaySyncStatusApplied || out.Items[0].Detail == "" {
|
||||
t.Fatalf("unexpected first consume item: %+v", out.Items[0])
|
||||
}
|
||||
if out.Items[1].Result != domain.GatewayAckResultFailed || out.Items[1].GatewaySyncStatus != domain.GatewaySyncStatusFailed || out.Items[1].Detail == "" {
|
||||
t.Fatalf("unexpected second consume item: %+v", out.Items[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerDiscoveryCandidateCreateAndList(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
createReq := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/discovery/candidates", bytes.NewBufferString(`{"candidate_id":"cand-1","account_id":301,"platform":"openai","model":"gpt-4.1-mini","source":"manual_seed","reason_code":"new_model","discovered_at":"2026-05-06T20:30:00Z"}`))
|
||||
createRR := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(createRR, createReq)
|
||||
if createRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected create status: %d body=%s", createRR.Code, createRR.Body.String())
|
||||
}
|
||||
|
||||
listReq := httptest.NewRequest(http.MethodGet, "/internal/supply-intelligence/discovery/candidates?status=pending_admission", nil)
|
||||
listRR := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(listRR, listReq)
|
||||
if listRR.Code != http.StatusOK {
|
||||
t.Fatalf("unexpected list status: %d body=%s", listRR.Code, listRR.Body.String())
|
||||
}
|
||||
var listResp struct {
|
||||
Items []domain.DiscoveryCandidate `json:"items"`
|
||||
}
|
||||
if err := json.NewDecoder(listRR.Body).Decode(&listResp); err != nil {
|
||||
t.Fatalf("decode list error: %v", err)
|
||||
}
|
||||
if len(listResp.Items) != 1 || listResp.Items[0].CandidateID != "cand-1" || listResp.Items[0].Status != domain.DiscoveryCandidateStatusPendingAdmission {
|
||||
t.Fatalf("unexpected discovery list response: %+v", listResp.Items)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServerDiscoveryCandidateRejectsInvalidInput(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
server := NewServer(repo, probe.NewService(repo), publish.NewService(repo), gatewayconsumer.NewService(repo), discovery.NewService(repo), nil)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/internal/supply-intelligence/discovery/candidates", bytes.NewBufferString(`{"candidate_id":"","account_id":0}`))
|
||||
rr := httptest.NewRecorder()
|
||||
server.Routes().ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusBadRequest {
|
||||
t.Fatalf("unexpected status: %d body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
}
|
||||
67
internal/integration/adapter.go
Normal file
67
internal/integration/adapter.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
// AccountStateReader defines the interface for reading account routing state
|
||||
// from the supply-api repository layer
|
||||
type AccountStateReader interface {
|
||||
GetRoutingStateContext(ctx context.Context, accountID int64) (domain.AccountRoutingState, bool)
|
||||
}
|
||||
|
||||
// CandidateStore defines the interface for persisting model candidates
|
||||
type CandidateStore interface {
|
||||
GetDiscoveryCandidateByIDContext(ctx context.Context, candidateID string) (domain.DiscoveryCandidate, bool)
|
||||
FindDiscoveryCandidateContext(ctx context.Context, accountID int64, platform, model string) (domain.DiscoveryCandidate, bool)
|
||||
UpsertDiscoveryCandidateContext(ctx context.Context, candidate domain.DiscoveryCandidate) domain.DiscoveryCandidate
|
||||
ListDiscoveryCandidatesContext(ctx context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate
|
||||
}
|
||||
|
||||
// PackageEventStore defines the interface for persisting package change events
|
||||
type PackageEventStore interface {
|
||||
AppendPackageEventContext(ctx context.Context, evt domain.PackageChangeEvent) (domain.PackageChangeEvent, error)
|
||||
ListPackageEventsAfter(cursor string) ([]domain.PackageChangeEvent, string)
|
||||
AckPackageEvent(eventID, consumer string, result domain.GatewayAckResult, detail string, ackedAt interface{}) (domain.PackageChangeEvent, error)
|
||||
}
|
||||
|
||||
// ProbeLogStore defines the interface for persisting probe execution logs
|
||||
type ProbeLogStore interface {
|
||||
AppendProbeLog(ctx context.Context, log ProbeExecutionLog) error
|
||||
ListProbeLogsByAccount(ctx context.Context, accountID int64, limit int) ([]ProbeExecutionLog, error)
|
||||
}
|
||||
|
||||
// ProbeExecutionLog represents a single probe execution record
|
||||
type ProbeExecutionLog struct {
|
||||
LogID int64
|
||||
AccountID int64
|
||||
Platform string
|
||||
ProbeResult domain.ProbeClassification
|
||||
FailureClass string
|
||||
HTTPStatus int
|
||||
LatencyMs int
|
||||
RiskScore int
|
||||
EvaluatedTransition string
|
||||
ExecutedAt interface{} // time.Time or string
|
||||
RequestID string
|
||||
Version int64
|
||||
}
|
||||
|
||||
// NewAccountStateAdapter creates an adapter that connects to supply-api's account store
|
||||
// For now, returns nil — actual implementation requires supply-api repo access
|
||||
func NewAccountStateAdapter(repo interface{}) *AccountStateAdapter {
|
||||
return &AccountStateAdapter{repo: repo}
|
||||
}
|
||||
|
||||
// AccountStateAdapter implements AccountStateReader over supply-api repository
|
||||
type AccountStateAdapter struct {
|
||||
repo interface{}
|
||||
}
|
||||
|
||||
func (a *AccountStateAdapter) GetRoutingStateContext(ctx context.Context, accountID int64) (domain.AccountRoutingState, bool) {
|
||||
// TODO: implement when supply-api integration is ready
|
||||
// This will call into supply-api's account repository
|
||||
return domain.AccountRoutingState{}, false
|
||||
}
|
||||
242
internal/integration/platform.go
Normal file
242
internal/integration/platform.go
Normal file
@@ -0,0 +1,242 @@
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// SupplierAdapter defines the interface for interacting with a supplier platform
|
||||
type SupplierAdapter interface {
|
||||
// Platform returns the platform name (e.g., "openai", "anthropic")
|
||||
Platform() string
|
||||
|
||||
// ProbeAccount sends a health check request to the supplier API
|
||||
// Returns the HTTP response details needed for probe classification
|
||||
ProbeAccount(ctx context.Context, account SupplierAccount) ProbeResult
|
||||
|
||||
// GetModels fetches the list of available models from the supplier
|
||||
GetModels(ctx context.Context, account SupplierAccount) ([]ModelInfo, error)
|
||||
|
||||
// HealthCheck verifies connectivity to the supplier API
|
||||
HealthCheck(ctx context.Context, account SupplierAccount) error
|
||||
}
|
||||
|
||||
// SupplierAccount holds credentials and configuration for a supplier account
|
||||
type SupplierAccount struct {
|
||||
AccountID int64
|
||||
Platform string
|
||||
APIKey string
|
||||
BaseURL string // defaults to supplier's public endpoint if empty
|
||||
Endpoint string // custom endpoint override
|
||||
}
|
||||
|
||||
// ProbeResult holds the raw result of a probe request
|
||||
type ProbeResult struct {
|
||||
StatusCode int
|
||||
TransportError error
|
||||
ResponseBody string
|
||||
}
|
||||
|
||||
// ModelInfo describes a model available from a supplier
|
||||
type ModelInfo struct {
|
||||
ModelID string // supplier's model identifier
|
||||
ModelName string // display name
|
||||
ContextLength int // max context length in tokens
|
||||
IsActive bool // whether the model is currently available
|
||||
}
|
||||
|
||||
// NewOpenAIAdapter creates an adapter for OpenAI-compatible APIs
|
||||
func NewOpenAIAdapter(httpClient HTTPClient) SupplierAdapter {
|
||||
return &OpenAIAdapter{httpClient: httpClient}
|
||||
}
|
||||
|
||||
// OpenAIAdapter implements SupplierAdapter for OpenAI and OpenAI-compatible APIs
|
||||
type OpenAIAdapter struct {
|
||||
httpClient HTTPClient
|
||||
}
|
||||
|
||||
func (a *OpenAIAdapter) Platform() string { return "openai" }
|
||||
|
||||
func (a *OpenAIAdapter) ProbeAccount(ctx context.Context, account SupplierAccount) ProbeResult {
|
||||
baseURL := account.BaseURL
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.openai.com"
|
||||
}
|
||||
endpoint := account.Endpoint
|
||||
if endpoint == "" {
|
||||
endpoint = baseURL + "/v1/models"
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return ProbeResult{TransportError: err}
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+account.APIKey)
|
||||
req.Header.Set("User-Agent", "supply-intelligence-probe/1.0")
|
||||
|
||||
resp, err := a.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return ProbeResult{TransportError: err}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body := make([]byte, 1024)
|
||||
n, _ := resp.Body.Read(body)
|
||||
|
||||
return ProbeResult{
|
||||
StatusCode: resp.StatusCode,
|
||||
ResponseBody: string(body[:n]),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *OpenAIAdapter) GetModels(ctx context.Context, account SupplierAccount) ([]ModelInfo, error) {
|
||||
baseURL := account.BaseURL
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.openai.com"
|
||||
}
|
||||
endpoint := baseURL + "/v1/models"
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+account.APIKey)
|
||||
|
||||
resp, err := a.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Parse the OpenAI models list response
|
||||
// {"object": "list", "data": [{"id": "gpt-4", "object": "model", ...}, ...]}
|
||||
var raw struct {
|
||||
Data []struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
Context int `json:"context_window,omitempty"`
|
||||
} `json:"data"`
|
||||
}
|
||||
if err := decodeJSON(resp, &raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
models := make([]ModelInfo, 0, len(raw.Data))
|
||||
for _, m := range raw.Data {
|
||||
if m.Object == "model" {
|
||||
models = append(models, ModelInfo{
|
||||
ModelID: m.ID,
|
||||
ModelName: m.ID,
|
||||
ContextLength: m.Context,
|
||||
IsActive: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
return models, nil
|
||||
}
|
||||
|
||||
func (a *OpenAIAdapter) HealthCheck(ctx context.Context, account SupplierAccount) error {
|
||||
result := a.ProbeAccount(ctx, account)
|
||||
if result.TransportError != nil {
|
||||
return result.TransportError
|
||||
}
|
||||
if result.StatusCode == http.StatusOK || result.StatusCode == http.StatusUnauthorized {
|
||||
return nil
|
||||
}
|
||||
return ErrHealthCheckFailed
|
||||
}
|
||||
|
||||
// NewAnthropicAdapter creates an adapter for Anthropic APIs
|
||||
func NewAnthropicAdapter(httpClient HTTPClient) SupplierAdapter {
|
||||
return &AnthropicAdapter{httpClient: httpClient}
|
||||
}
|
||||
|
||||
// AnthropicAdapter implements SupplierAdapter for Anthropic Claude API
|
||||
type AnthropicAdapter struct {
|
||||
httpClient HTTPClient
|
||||
}
|
||||
|
||||
func (a *AnthropicAdapter) Platform() string { return "anthropic" }
|
||||
|
||||
func (a *AnthropicAdapter) ProbeAccount(ctx context.Context, account SupplierAccount) ProbeResult {
|
||||
baseURL := account.BaseURL
|
||||
if baseURL == "" {
|
||||
baseURL = "https://api.anthropic.com"
|
||||
}
|
||||
endpoint := baseURL + "/v1/models"
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return ProbeResult{TransportError: err}
|
||||
}
|
||||
req.Header.Set("x-api-key", account.APIKey)
|
||||
req.Header.Set("User-Agent", "supply-intelligence-probe/1.0")
|
||||
req.Header.Set("anthropic-version", "2023-06-01")
|
||||
|
||||
resp, err := a.httpClient.Do(req)
|
||||
if err != nil {
|
||||
return ProbeResult{TransportError: err}
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body := make([]byte, 1024)
|
||||
n, _ := resp.Body.Read(body)
|
||||
|
||||
return ProbeResult{
|
||||
StatusCode: resp.StatusCode,
|
||||
ResponseBody: string(body[:n]),
|
||||
}
|
||||
}
|
||||
|
||||
func (a *AnthropicAdapter) GetModels(ctx context.Context, account SupplierAccount) ([]ModelInfo, error) {
|
||||
// Anthropic doesn't have a public models list endpoint in the same way OpenAI does.
|
||||
// We return a known static list for Claude models.
|
||||
// In production this would be fetched from configuration or a dynamic source.
|
||||
return []ModelInfo{
|
||||
{ModelID: "claude-3-5-sonnet-20241022", ModelName: "Claude 3.5 Sonnet", ContextLength: 200000, IsActive: true},
|
||||
{ModelID: "claude-3-5-haiku-20241022", ModelName: "Claude 3.5 Haiku", ContextLength: 200000, IsActive: true},
|
||||
{ModelID: "claude-3-opus-20240229", ModelName: "Claude 3 Opus", ContextLength: 200000, IsActive: true},
|
||||
{ModelID: "claude-3-sonnet-20240229", ModelName: "Claude 3 Sonnet", ContextLength: 200000, IsActive: true},
|
||||
{ModelID: "claude-3-haiku-20240307", ModelName: "Claude 3 Haiku", ContextLength: 200000, IsActive: true},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (a *AnthropicAdapter) HealthCheck(ctx context.Context, account SupplierAccount) error {
|
||||
result := a.ProbeAccount(ctx, account)
|
||||
if result.TransportError != nil {
|
||||
return result.TransportError
|
||||
}
|
||||
// Anthropic returns 200 on success, 401 on auth failure
|
||||
if result.StatusCode == http.StatusOK || result.StatusCode == http.StatusUnauthorized {
|
||||
return nil
|
||||
}
|
||||
return ErrHealthCheckFailed
|
||||
}
|
||||
|
||||
// HTTPClient interface for testability
|
||||
type HTTPClient interface {
|
||||
Do(req *http.Request) (*http.Response, error)
|
||||
}
|
||||
|
||||
// DefaultHTTPClient is the standard HTTP client used for platform adapters
|
||||
type DefaultHTTPClient struct{}
|
||||
|
||||
func (c *DefaultHTTPClient) Do(req *http.Request) (*http.Response, error) {
|
||||
return http.DefaultClient.Do(req)
|
||||
}
|
||||
|
||||
// NewDefaultHTTPClient creates a new default HTTP client
|
||||
func NewDefaultHTTPClient() HTTPClient {
|
||||
return &DefaultHTTPClient{}
|
||||
}
|
||||
|
||||
var ErrHealthCheckFailed = &HealthCheckError{}
|
||||
|
||||
type HealthCheckError struct{}
|
||||
|
||||
func (e *HealthCheckError) Error() string { return "health check failed" }
|
||||
|
||||
func decodeJSON(resp *http.Response, v interface{}) error {
|
||||
return json.NewDecoder(resp.Body).Decode(v)
|
||||
}
|
||||
38
internal/poller/gateway_package_poller.go
Normal file
38
internal/poller/gateway_package_poller.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package poller
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"supply-intelligence/internal/gatewayconsumer"
|
||||
)
|
||||
|
||||
type GatewayPackagePoller struct {
|
||||
consumer *gatewayconsumer.Service
|
||||
cursor string
|
||||
}
|
||||
|
||||
func NewGatewayPackagePoller(consumer *gatewayconsumer.Service) *GatewayPackagePoller {
|
||||
return &GatewayPackagePoller{consumer: consumer}
|
||||
}
|
||||
|
||||
func (p *GatewayPackagePoller) PollOnce(ctx context.Context) (gatewayconsumer.ConsumeOnceOutput, error) {
|
||||
if p == nil || p.consumer == nil {
|
||||
return gatewayconsumer.ConsumeOnceOutput{}, gatewayconsumer.ErrInvalidConsumeInput
|
||||
}
|
||||
out, err := p.consumer.ConsumeOnce(ctx, gatewayconsumer.ConsumeOnceInput{
|
||||
Consumer: "gateway",
|
||||
Cursor: p.cursor,
|
||||
})
|
||||
if err != nil {
|
||||
return gatewayconsumer.ConsumeOnceOutput{}, err
|
||||
}
|
||||
p.cursor = out.NextCursor
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (p *GatewayPackagePoller) Cursor() string {
|
||||
if p == nil {
|
||||
return ""
|
||||
}
|
||||
return p.cursor
|
||||
}
|
||||
28
internal/poller/gateway_package_poller_test.go
Normal file
28
internal/poller/gateway_package_poller_test.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package poller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/gatewayconsumer"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestGatewayPackagePollerPollOnce(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: "supply_package_published", PackageID: 1, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(1, 0).UTC(), Version: 1, GatewaySyncStatus: domain.GatewaySyncStatusPending})
|
||||
poller := NewGatewayPackagePoller(gatewayconsumer.NewService(repo))
|
||||
|
||||
out, err := poller.PollOnce(context.Background())
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected poll error: %v", err)
|
||||
}
|
||||
if len(out.Items) != 1 || out.Items[0].EventID != "evt-1" {
|
||||
t.Fatalf("unexpected output: %+v", out)
|
||||
}
|
||||
if poller.Cursor() != out.NextCursor {
|
||||
t.Fatalf("expected cursor to advance: poller=%q out=%q", poller.Cursor(), out.NextCursor)
|
||||
}
|
||||
}
|
||||
53
internal/poller/runtime.go
Normal file
53
internal/poller/runtime.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package poller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Runtime struct {
|
||||
poller *GatewayPackagePoller
|
||||
interval time.Duration
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func NewRuntime(poller *GatewayPackagePoller, interval time.Duration) *Runtime {
|
||||
if interval <= 0 {
|
||||
interval = time.Second
|
||||
}
|
||||
return &Runtime{poller: poller, interval: interval}
|
||||
}
|
||||
|
||||
func (r *Runtime) Start(parent context.Context) bool {
|
||||
if r == nil || r.poller == nil || r.cancel != nil {
|
||||
return false
|
||||
}
|
||||
ctx, cancel := context.WithCancel(parent)
|
||||
r.cancel = cancel
|
||||
r.wg.Add(1)
|
||||
go func() {
|
||||
defer r.wg.Done()
|
||||
ticker := time.NewTicker(r.interval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
_, _ = r.poller.PollOnce(ctx)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}()
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *Runtime) Stop() {
|
||||
if r == nil || r.cancel == nil {
|
||||
return
|
||||
}
|
||||
r.cancel()
|
||||
r.wg.Wait()
|
||||
r.cancel = nil
|
||||
}
|
||||
54
internal/poller/runtime_test.go
Normal file
54
internal/poller/runtime_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package poller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/gatewayconsumer"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestRuntimeStartsBackgroundPolling(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{
|
||||
EventID: "evt-runtime-1",
|
||||
EventType: "supply_package_published",
|
||||
PackageID: 1,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
OccurredAt: time.Unix(1, 0).UTC(),
|
||||
Version: 1,
|
||||
GatewaySyncStatus: domain.GatewaySyncStatusPending,
|
||||
})
|
||||
|
||||
service := gatewayconsumer.NewService(repo)
|
||||
poller := NewGatewayPackagePoller(service)
|
||||
runtime := NewRuntime(poller, 10*time.Millisecond)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
if !runtime.Start(ctx) {
|
||||
t.Fatalf("expected runtime to start")
|
||||
}
|
||||
defer runtime.Stop()
|
||||
|
||||
deadline := time.Now().Add(500 * time.Millisecond)
|
||||
for time.Now().Before(deadline) {
|
||||
items, _ := repo.ListPackageEventsAfter("")
|
||||
if len(items) == 1 && items[0].GatewaySyncStatus == domain.GatewaySyncStatusApplied {
|
||||
return
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
items, _ := repo.ListPackageEventsAfter("")
|
||||
t.Fatalf("expected background polling to apply event, got %+v", items)
|
||||
}
|
||||
|
||||
func TestRuntimeStartRequiresPoller(t *testing.T) {
|
||||
if (&Runtime{}).Start(context.Background()) {
|
||||
t.Fatalf("expected runtime without poller to refuse start")
|
||||
}
|
||||
}
|
||||
138
internal/probe/driver.go
Normal file
138
internal/probe/driver.go
Normal file
@@ -0,0 +1,138 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/integration"
|
||||
)
|
||||
|
||||
// ProbeLogRepository defines where probe execution logs are persisted
|
||||
type ProbeLogRepository interface {
|
||||
AppendProbeLog(ctx context.Context, outcome ProbeOutcome) error
|
||||
}
|
||||
|
||||
// Driver orchestrates a full probe run: load targets → execute → evaluate → persist state
|
||||
type Driver struct {
|
||||
executor *ProbeExecutor
|
||||
evaluator *Service // reuse the existing probe.Service as evaluator
|
||||
logRepo ProbeLogRepository
|
||||
adapters map[string]integration.SupplierAdapter
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// NewDriver creates a probe driver with all dependencies wired together
|
||||
func NewDriver(
|
||||
repo RoutingStateRepository,
|
||||
logRepo ProbeLogRepository,
|
||||
adapters map[string]integration.SupplierAdapter,
|
||||
) *Driver {
|
||||
return &Driver{
|
||||
executor: NewProbeExecutor(integration.NewDefaultHTTPClient()),
|
||||
evaluator: NewService(repo),
|
||||
logRepo: logRepo,
|
||||
adapters: adapters,
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
}
|
||||
}
|
||||
|
||||
// RunProbeForAccount probes a single account and persists the result through the full chain
|
||||
func (d *Driver) RunProbeForAccount(ctx context.Context, account integration.SupplierAccount) error {
|
||||
var outcome ProbeOutcome
|
||||
|
||||
if adapter, ok := d.adapters[account.Platform]; ok {
|
||||
// Use platform-specific adapter
|
||||
result := adapter.ProbeAccount(ctx, account)
|
||||
outcome = ProbeOutcome{
|
||||
AccountID: account.AccountID,
|
||||
Platform: account.Platform,
|
||||
StatusCode: result.StatusCode,
|
||||
TransportError: result.TransportError,
|
||||
ResponseBody: result.ResponseBody,
|
||||
RequestID: "prb-" + uuid.New().String(),
|
||||
ExecutedAt: d.now(),
|
||||
}
|
||||
} else {
|
||||
// Fall back to generic HTTP probe
|
||||
target := ProbeTarget{
|
||||
AccountID: account.AccountID,
|
||||
Platform: account.Platform,
|
||||
Endpoint: account.Endpoint,
|
||||
AuthHeader: "Bearer " + account.APIKey,
|
||||
}
|
||||
if target.Endpoint == "" {
|
||||
target.Endpoint = account.BaseURL
|
||||
}
|
||||
|
||||
var err error
|
||||
outcome, err = d.executor.ExecuteProbe(ctx, target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return d.persistOutcome(ctx, account.AccountID, account.Platform, outcome)
|
||||
}
|
||||
|
||||
// persistOutcome drives the outcome through: load current state → evaluate → state machine → persist
|
||||
func (d *Driver) persistOutcome(ctx context.Context, accountID int64, platform string, outcome ProbeOutcome) error {
|
||||
// 1. Load current routing state
|
||||
currentState, _ := d.evaluator.repo.GetRoutingStateContext(ctx, accountID)
|
||||
|
||||
// 2. Build evaluate input
|
||||
var transportErr error
|
||||
if outcome.TransportError != nil {
|
||||
transportErr = outcome.TransportError
|
||||
}
|
||||
|
||||
input := EvaluateInput{
|
||||
AccountID: accountID,
|
||||
Platform: platform,
|
||||
CurrentStatus: currentState.AccountStatus,
|
||||
StatusCode: outcome.StatusCode,
|
||||
TransportError: transportErr,
|
||||
}
|
||||
|
||||
// 3. Evaluate (uses the existing Service.EvaluateHTTPResult)
|
||||
evalOutput, err := d.evaluator.EvaluateHTTPResult(ctx, input)
|
||||
if err != nil {
|
||||
log.Printf("[probe] failed to evaluate outcome for account %d: %v", accountID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// 4. Log the probe execution
|
||||
if d.logRepo != nil {
|
||||
logEntry := ProbeOutcome{
|
||||
AccountID: accountID,
|
||||
Platform: platform,
|
||||
StatusCode: outcome.StatusCode,
|
||||
TransportError: outcome.TransportError,
|
||||
LatencyMs: outcome.LatencyMs,
|
||||
RequestID: outcome.RequestID,
|
||||
ExecutedAt: outcome.ExecutedAt,
|
||||
}
|
||||
_ = d.logRepo.AppendProbeLog(ctx, logEntry)
|
||||
}
|
||||
|
||||
// 5. Log state transition
|
||||
transition := describeTransition(currentState.AccountStatus, evalOutput.RoutingState.AccountStatus)
|
||||
log.Printf("[probe] account=%d platform=%s %s->%s classification=%s risk=%d transition=%s",
|
||||
accountID, platform,
|
||||
currentState.AccountStatus, evalOutput.RoutingState.AccountStatus,
|
||||
evalOutput.Classification, evalOutput.RoutingState.RiskScore,
|
||||
transition)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// describeTransition returns a human-readable transition description
|
||||
func describeTransition(from, to domain.AccountStatus) string {
|
||||
if from == to {
|
||||
return "no_change"
|
||||
}
|
||||
return string(from) + "_to_" + string(to)
|
||||
}
|
||||
44
internal/probe/evaluator.go
Normal file
44
internal/probe/evaluator.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
var ErrUnknownStatusCode = errors.New("unknown probe status code")
|
||||
|
||||
func ClassifyHTTPResult(statusCode int, transportErr error) (domain.ProbeClassification, string, error) {
|
||||
if transportErr != nil {
|
||||
return domain.ProbeClassificationInconclusive, "transport_error", nil
|
||||
}
|
||||
|
||||
switch statusCode {
|
||||
case http.StatusOK:
|
||||
return domain.ProbeClassificationSuccess, "ok", nil
|
||||
case http.StatusUnauthorized:
|
||||
fallthrough
|
||||
case http.StatusForbidden:
|
||||
return domain.ProbeClassificationExplicitFailure, "auth_rejected", nil
|
||||
case http.StatusTooManyRequests:
|
||||
fallthrough
|
||||
case http.StatusInternalServerError:
|
||||
fallthrough
|
||||
case http.StatusBadGateway:
|
||||
fallthrough
|
||||
case http.StatusServiceUnavailable:
|
||||
fallthrough
|
||||
case http.StatusGatewayTimeout:
|
||||
return domain.ProbeClassificationInconclusive, "upstream_unstable", nil
|
||||
default:
|
||||
if statusCode >= 500 {
|
||||
return domain.ProbeClassificationInconclusive, "upstream_unstable", nil
|
||||
}
|
||||
if statusCode >= 400 {
|
||||
return domain.ProbeClassificationInconclusive, "unexpected_client_error", nil
|
||||
}
|
||||
return "", "", fmt.Errorf("%w: %d", ErrUnknownStatusCode, statusCode)
|
||||
}
|
||||
}
|
||||
47
internal/probe/evaluator_test.go
Normal file
47
internal/probe/evaluator_test.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
func TestClassifyHTTPResult(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
statusCode int
|
||||
err error
|
||||
wantClass domain.ProbeClassification
|
||||
wantReason string
|
||||
wantErr bool
|
||||
}{
|
||||
{name: "200 success", statusCode: 200, wantClass: domain.ProbeClassificationSuccess, wantReason: "ok"},
|
||||
{name: "401 explicit failure", statusCode: 401, wantClass: domain.ProbeClassificationExplicitFailure, wantReason: "auth_rejected"},
|
||||
{name: "403 explicit failure", statusCode: 403, wantClass: domain.ProbeClassificationExplicitFailure, wantReason: "auth_rejected"},
|
||||
{name: "429 inconclusive", statusCode: 429, wantClass: domain.ProbeClassificationInconclusive, wantReason: "upstream_unstable"},
|
||||
{name: "503 inconclusive", statusCode: 503, wantClass: domain.ProbeClassificationInconclusive, wantReason: "upstream_unstable"},
|
||||
{name: "transport error inconclusive", err: errors.New("timeout"), wantClass: domain.ProbeClassificationInconclusive, wantReason: "transport_error"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotClass, gotReason, err := ClassifyHTTPResult(tt.statusCode, tt.err)
|
||||
if tt.wantErr {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if gotClass != tt.wantClass {
|
||||
t.Fatalf("classification mismatch: got %q want %q", gotClass, tt.wantClass)
|
||||
}
|
||||
if gotReason != tt.wantReason {
|
||||
t.Fatalf("reason mismatch: got %q want %q", gotReason, tt.wantReason)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
125
internal/probe/executor.go
Normal file
125
internal/probe/executor.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// HTTPClient defines the interface for making HTTP requests during probing
|
||||
type HTTPClient interface {
|
||||
Do(req *http.Request) (*http.Response, error)
|
||||
}
|
||||
|
||||
// DefaultHTTPClient wraps the standard http.Client
|
||||
type DefaultHTTPClient struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
// NewDefaultHTTPClient creates a client with sensible probe timeouts
|
||||
func NewDefaultHTTPClient() *DefaultHTTPClient {
|
||||
return &DefaultHTTPClient{
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (c *DefaultHTTPClient) Do(req *http.Request) (*http.Response, error) {
|
||||
return c.client.Do(req)
|
||||
}
|
||||
|
||||
// ProbeTarget represents an account to be probed
|
||||
type ProbeTarget struct {
|
||||
AccountID int64
|
||||
Platform string
|
||||
Endpoint string
|
||||
AuthHeader string // Bearer token or API key
|
||||
}
|
||||
|
||||
// ProbeOutcome is the result of executing a probe against a target
|
||||
type ProbeOutcome struct {
|
||||
AccountID int64
|
||||
Platform string
|
||||
StatusCode int
|
||||
TransportError error
|
||||
LatencyMs int
|
||||
ResponseBody string // truncated, for debugging
|
||||
RequestID string
|
||||
ExecutedAt time.Time
|
||||
}
|
||||
|
||||
// ProbeExecutor sends HTTP requests to supplier endpoints and classifies results
|
||||
type ProbeExecutor struct {
|
||||
httpClient HTTPClient
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
// NewProbeExecutor creates a probe executor with the given HTTP client.
|
||||
// If client is nil, uses http.DefaultClient.
|
||||
func NewProbeExecutor(client HTTPClient) *ProbeExecutor {
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
}
|
||||
return &ProbeExecutor{
|
||||
httpClient: client,
|
||||
now: func() time.Time { return time.Now().UTC() },
|
||||
}
|
||||
}
|
||||
|
||||
// ExecuteProbe runs a single probe against the target account
|
||||
// It makes an HTTP GET request to the platform's health endpoint
|
||||
func (e *ProbeExecutor) ExecuteProbe(ctx context.Context, target ProbeTarget) (ProbeOutcome, error) {
|
||||
requestID := uuid.New().String()
|
||||
executedAt := e.now()
|
||||
|
||||
if target.Endpoint == "" {
|
||||
return ProbeOutcome{}, ErrInvalidProbeTarget
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target.Endpoint, nil)
|
||||
if err != nil {
|
||||
return ProbeOutcome{}, fmt.Errorf("%w: %v", ErrInvalidProbeTarget, err)
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", "supply-intelligence-probe/1.0")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
if target.AuthHeader != "" {
|
||||
req.Header.Set("Authorization", target.AuthHeader)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
resp, err := e.httpClient.Do(req)
|
||||
latencyMs := int(time.Since(start).Milliseconds())
|
||||
|
||||
outcome := ProbeOutcome{
|
||||
AccountID: target.AccountID,
|
||||
Platform: target.Platform,
|
||||
LatencyMs: latencyMs,
|
||||
RequestID: requestID,
|
||||
ExecutedAt: executedAt,
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
outcome.TransportError = err
|
||||
return outcome, nil // return outcome with transport error set
|
||||
}
|
||||
|
||||
if resp != nil {
|
||||
defer resp.Body.Close()
|
||||
outcome.StatusCode = resp.StatusCode
|
||||
|
||||
// Read truncated body for debugging (max 1KB)
|
||||
bodyBytes, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
|
||||
outcome.ResponseBody = string(bodyBytes)
|
||||
}
|
||||
|
||||
return outcome, nil
|
||||
}
|
||||
|
||||
var ErrInvalidProbeTarget = errors.New("invalid probe target")
|
||||
219
internal/probe/executor_test.go
Normal file
219
internal/probe/executor_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// mockHTTPClient records requests and returns configurable responses
|
||||
type mockHTTPClient struct {
|
||||
Resp *http.Response
|
||||
Err error
|
||||
}
|
||||
|
||||
func (m *mockHTTPClient) Do(req *http.Request) (*http.Response, error) {
|
||||
// Simulate context cancellation: if the request context is done, return context error
|
||||
select {
|
||||
case <-req.Context().Done():
|
||||
return nil, req.Context().Err()
|
||||
default:
|
||||
}
|
||||
return m.Resp, m.Err
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_Success(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(`{"status":"ok"}`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewProbeExecutor(nil) // nil → uses real http.Client
|
||||
|
||||
outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 1,
|
||||
Platform: "openai",
|
||||
Endpoint: server.URL,
|
||||
AuthHeader: "Bearer test-key",
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if outcome.StatusCode != http.StatusOK {
|
||||
t.Fatalf("expected 200, got: %d", outcome.StatusCode)
|
||||
}
|
||||
if outcome.LatencyMs < 0 {
|
||||
t.Fatalf("expected latency >= 0, got: %d", outcome.LatencyMs)
|
||||
}
|
||||
if outcome.RequestID == "" {
|
||||
t.Fatal("expected request_id to be set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_ExplicitFailure(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusUnauthorized)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewProbeExecutor(nil)
|
||||
|
||||
outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 2,
|
||||
Platform: "openai",
|
||||
Endpoint: server.URL,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if outcome.StatusCode != http.StatusUnauthorized {
|
||||
t.Fatalf("expected 401, got: %d", outcome.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_Inconclusive_429(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusTooManyRequests)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewProbeExecutor(nil)
|
||||
|
||||
outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 3,
|
||||
Platform: "openai",
|
||||
Endpoint: server.URL,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if outcome.StatusCode != http.StatusTooManyRequests {
|
||||
t.Fatalf("expected 429, got: %d", outcome.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_TransportError(t *testing.T) {
|
||||
client := &mockHTTPClient{
|
||||
Err: errors.New("connection refused"),
|
||||
}
|
||||
executor := NewProbeExecutor(client)
|
||||
|
||||
outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 4,
|
||||
Platform: "openai",
|
||||
Endpoint: "http://localhost:9999",
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if outcome.TransportError == nil {
|
||||
t.Fatal("expected transport error to be set")
|
||||
}
|
||||
if outcome.StatusCode != 0 {
|
||||
t.Fatalf("expected status 0 on transport error, got: %d", outcome.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_InvalidTarget(t *testing.T) {
|
||||
executor := NewProbeExecutor(nil)
|
||||
|
||||
_, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 5,
|
||||
Platform: "openai",
|
||||
Endpoint: "", // empty endpoint
|
||||
})
|
||||
|
||||
if err == nil {
|
||||
t.Fatal("expected error for empty endpoint")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_ContextCanceled(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(5 * time.Second) // delay longer than context
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewProbeExecutor(nil)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
|
||||
defer cancel()
|
||||
|
||||
outcome, err := executor.ExecuteProbe(ctx, ProbeTarget{
|
||||
AccountID: 6,
|
||||
Platform: "openai",
|
||||
Endpoint: server.URL,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if outcome.TransportError == nil {
|
||||
t.Fatal("expected context deadline exceeded transport error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_ExecuteProbe_ResponseBodyTruncated(t *testing.T) {
|
||||
largeBody := strings.Repeat("x", 10*1024) // 10KB
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(largeBody))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewProbeExecutor(nil)
|
||||
|
||||
outcome, err := executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 7,
|
||||
Platform: "openai",
|
||||
Endpoint: server.URL,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if len(outcome.ResponseBody) > 1024 {
|
||||
t.Fatalf("expected body truncated to <=1024, got: %d", len(outcome.ResponseBody))
|
||||
}
|
||||
}
|
||||
|
||||
func TestProbeExecutor_SetsUserAgentAndAcceptHeader(t *testing.T) {
|
||||
var receivedHeaders http.Header
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
receivedHeaders = r.Header.Clone()
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
executor := NewProbeExecutor(nil)
|
||||
|
||||
_, _ = executor.ExecuteProbe(context.Background(), ProbeTarget{
|
||||
AccountID: 8,
|
||||
Platform: "openai",
|
||||
Endpoint: server.URL,
|
||||
AuthHeader: "Bearer my-key",
|
||||
})
|
||||
|
||||
if receivedHeaders == nil {
|
||||
t.Fatal("server handler was not called — check test setup")
|
||||
}
|
||||
if receivedHeaders.Get("User-Agent") == "" {
|
||||
t.Fatal("expected User-Agent header to be set")
|
||||
}
|
||||
if receivedHeaders.Get("Accept") != "application/json" {
|
||||
t.Fatalf("expected Accept: application/json, got: %s", receivedHeaders.Get("Accept"))
|
||||
}
|
||||
if receivedHeaders.Get("Authorization") != "Bearer my-key" {
|
||||
t.Fatalf("expected Authorization header to be set")
|
||||
}
|
||||
}
|
||||
95
internal/probe/service.go
Normal file
95
internal/probe/service.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
type RoutingStateRepository interface {
|
||||
GetRoutingStateContext(ctx context.Context, accountID int64) (domain.AccountRoutingState, bool)
|
||||
UpsertRoutingStateContext(ctx context.Context, state domain.AccountRoutingState) domain.AccountRoutingState
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
repo RoutingStateRepository
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type EvaluateInput struct {
|
||||
AccountID int64
|
||||
Platform string
|
||||
CurrentStatus domain.AccountStatus
|
||||
StatusCode int
|
||||
TransportError error
|
||||
}
|
||||
|
||||
type EvaluateOutput struct {
|
||||
Classification domain.ProbeClassification `json:"classification"`
|
||||
ReasonCode string `json:"reason_code"`
|
||||
RoutingState domain.AccountRoutingState `json:"routing_state"`
|
||||
}
|
||||
|
||||
func NewService(repo RoutingStateRepository) *Service {
|
||||
return &Service{
|
||||
repo: repo,
|
||||
now: func() time.Time {
|
||||
return time.Now().UTC()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) EvaluateHTTPResult(ctx context.Context, input EvaluateInput) (EvaluateOutput, error) {
|
||||
classification, reasonCode, err := ClassifyHTTPResult(input.StatusCode, input.TransportError)
|
||||
if err != nil {
|
||||
return EvaluateOutput{}, err
|
||||
}
|
||||
|
||||
observedAt := s.now()
|
||||
nextStatus := NextAccountStatus(input.CurrentStatus, classification)
|
||||
state := domain.AccountRoutingState{
|
||||
AccountID: input.AccountID,
|
||||
Platform: input.Platform,
|
||||
AccountStatus: nextStatus,
|
||||
RoutingEnabled: nextStatus == domain.AccountStatusActive,
|
||||
RiskScore: riskScoreFor(nextStatus, classification),
|
||||
ReasonCode: reasonCode,
|
||||
LastProbeAt: observedAt,
|
||||
Version: 1,
|
||||
}
|
||||
|
||||
if previous, ok := s.repo.GetRoutingStateContext(ctx, input.AccountID); ok {
|
||||
state.Version = previous.Version + 1
|
||||
if state.Platform == "" {
|
||||
state.Platform = previous.Platform
|
||||
}
|
||||
}
|
||||
|
||||
persisted := s.repo.UpsertRoutingStateContext(ctx, state)
|
||||
return EvaluateOutput{
|
||||
Classification: classification,
|
||||
ReasonCode: reasonCode,
|
||||
RoutingState: persisted,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func riskScoreFor(status domain.AccountStatus, classification domain.ProbeClassification) int {
|
||||
switch classification {
|
||||
case domain.ProbeClassificationSuccess:
|
||||
return 20
|
||||
case domain.ProbeClassificationExplicitFailure:
|
||||
switch status {
|
||||
case domain.AccountStatusDisabled:
|
||||
return 100
|
||||
case domain.AccountStatusSuspended:
|
||||
return 90
|
||||
default:
|
||||
return 80
|
||||
}
|
||||
case domain.ProbeClassificationInconclusive:
|
||||
return 60
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
115
internal/probe/service_test.go
Normal file
115
internal/probe/service_test.go
Normal file
@@ -0,0 +1,115 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestServiceEvaluateHTTPResultSuccess(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
service.now = func() time.Time { return time.Unix(1000, 0).UTC() }
|
||||
|
||||
result, err := service.EvaluateHTTPResult(context.Background(), EvaluateInput{
|
||||
AccountID: 1,
|
||||
Platform: "openai",
|
||||
CurrentStatus: domain.AccountStatusSuspended,
|
||||
StatusCode: 200,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if result.Classification != domain.ProbeClassificationSuccess {
|
||||
t.Fatalf("unexpected classification: %q", result.Classification)
|
||||
}
|
||||
if result.RoutingState.AccountStatus != domain.AccountStatusActive {
|
||||
t.Fatalf("unexpected account status: %q", result.RoutingState.AccountStatus)
|
||||
}
|
||||
if !result.RoutingState.RoutingEnabled {
|
||||
t.Fatalf("expected routing enabled")
|
||||
}
|
||||
if result.RoutingState.ReasonCode != "ok" {
|
||||
t.Fatalf("unexpected reason code: %q", result.RoutingState.ReasonCode)
|
||||
}
|
||||
if result.RoutingState.Version != 1 {
|
||||
t.Fatalf("unexpected version: %d", result.RoutingState.Version)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceEvaluateHTTPResultExplicitFailure(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
service.now = func() time.Time { return time.Unix(1001, 0).UTC() }
|
||||
|
||||
repo.UpsertRoutingState(domain.AccountRoutingState{
|
||||
AccountID: 2,
|
||||
Platform: "openai",
|
||||
AccountStatus: domain.AccountStatusActive,
|
||||
RoutingEnabled: true,
|
||||
RiskScore: 20,
|
||||
ReasonCode: "ok",
|
||||
LastProbeAt: time.Unix(999, 0).UTC(),
|
||||
Version: 4,
|
||||
})
|
||||
|
||||
result, err := service.EvaluateHTTPResult(context.Background(), EvaluateInput{
|
||||
AccountID: 2,
|
||||
Platform: "openai",
|
||||
CurrentStatus: domain.AccountStatusActive,
|
||||
StatusCode: 401,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if result.Classification != domain.ProbeClassificationExplicitFailure {
|
||||
t.Fatalf("unexpected classification: %q", result.Classification)
|
||||
}
|
||||
if result.RoutingState.AccountStatus != domain.AccountStatusSuspended {
|
||||
t.Fatalf("unexpected account status: %q", result.RoutingState.AccountStatus)
|
||||
}
|
||||
if result.RoutingState.RoutingEnabled {
|
||||
t.Fatalf("expected routing disabled")
|
||||
}
|
||||
if result.RoutingState.ReasonCode != "auth_rejected" {
|
||||
t.Fatalf("unexpected reason code: %q", result.RoutingState.ReasonCode)
|
||||
}
|
||||
if result.RoutingState.Version != 5 {
|
||||
t.Fatalf("unexpected version: %d", result.RoutingState.Version)
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceEvaluateHTTPResultInconclusive(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
service.now = func() time.Time { return time.Unix(1002, 0).UTC() }
|
||||
|
||||
result, err := service.EvaluateHTTPResult(context.Background(), EvaluateInput{
|
||||
AccountID: 3,
|
||||
Platform: "openai",
|
||||
CurrentStatus: domain.AccountStatusSuspended,
|
||||
TransportError: errors.New("dial tcp timeout"),
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if result.Classification != domain.ProbeClassificationInconclusive {
|
||||
t.Fatalf("unexpected classification: %q", result.Classification)
|
||||
}
|
||||
if result.RoutingState.AccountStatus != domain.AccountStatusSuspended {
|
||||
t.Fatalf("unexpected account status: %q", result.RoutingState.AccountStatus)
|
||||
}
|
||||
if result.RoutingState.RoutingEnabled {
|
||||
t.Fatalf("expected routing disabled for suspended account")
|
||||
}
|
||||
if result.RoutingState.ReasonCode != "transport_error" {
|
||||
t.Fatalf("unexpected reason code: %q", result.RoutingState.ReasonCode)
|
||||
}
|
||||
if result.RoutingState.RiskScore != 60 {
|
||||
t.Fatalf("unexpected risk score: %d", result.RoutingState.RiskScore)
|
||||
}
|
||||
}
|
||||
23
internal/probe/state_machine.go
Normal file
23
internal/probe/state_machine.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package probe
|
||||
|
||||
import "supply-intelligence/internal/domain"
|
||||
|
||||
func NextAccountStatus(current domain.AccountStatus, classification domain.ProbeClassification) domain.AccountStatus {
|
||||
switch classification {
|
||||
case domain.ProbeClassificationSuccess:
|
||||
return domain.AccountStatusActive
|
||||
case domain.ProbeClassificationExplicitFailure:
|
||||
switch current {
|
||||
case domain.AccountStatusActive:
|
||||
return domain.AccountStatusSuspended
|
||||
case domain.AccountStatusSuspended:
|
||||
return domain.AccountStatusDisabled
|
||||
default:
|
||||
return current
|
||||
}
|
||||
case domain.ProbeClassificationInconclusive:
|
||||
fallthrough
|
||||
default:
|
||||
return current
|
||||
}
|
||||
}
|
||||
30
internal/probe/state_machine_test.go
Normal file
30
internal/probe/state_machine_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package probe
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
func TestNextAccountStatus(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
current domain.AccountStatus
|
||||
classification domain.ProbeClassification
|
||||
want domain.AccountStatus
|
||||
}{
|
||||
{name: "success keeps active", current: domain.AccountStatusActive, classification: domain.ProbeClassificationSuccess, want: domain.AccountStatusActive},
|
||||
{name: "explicit failure active to suspended", current: domain.AccountStatusActive, classification: domain.ProbeClassificationExplicitFailure, want: domain.AccountStatusSuspended},
|
||||
{name: "explicit failure suspended to disabled", current: domain.AccountStatusSuspended, classification: domain.ProbeClassificationExplicitFailure, want: domain.AccountStatusDisabled},
|
||||
{name: "inconclusive keeps active", current: domain.AccountStatusActive, classification: domain.ProbeClassificationInconclusive, want: domain.AccountStatusActive},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := NextAccountStatus(tt.current, tt.classification)
|
||||
if got != tt.want {
|
||||
t.Fatalf("status mismatch: got %q want %q", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
16
internal/publish/README.md
Normal file
16
internal/publish/README.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# Publish semantics boundary
|
||||
|
||||
This package only records package-published events and emits gateway-consumable change records.
|
||||
It does not implement a full publish state machine, admission workflow, or downstream routing synchronization.
|
||||
|
||||
Current repository boundary:
|
||||
- `published` means the upstream package event has been recorded
|
||||
- `pending` means the downstream gateway consumer has not yet confirmed handling
|
||||
- `applied` / `failed` means the current repository's consumer flow updated event state during the running process
|
||||
- current gateway event state in this repo is in-memory only, not durable across restart
|
||||
|
||||
Current runtime shape:
|
||||
- manual/debug entry: `POST /internal/supply-intelligence/gateway/consume-once`
|
||||
- minimal background path: application startup also runs a ticker-driven gateway poller
|
||||
|
||||
This avoids claiming that `published = applied`, and also avoids claiming that the current in-memory repository is a durable production persistence layer.
|
||||
59
internal/publish/service.go
Normal file
59
internal/publish/service.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package publish
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
const PackagePublishedEventType = "supply_package_published"
|
||||
|
||||
var ErrInvalidPublishInput = errors.New("invalid publish input")
|
||||
|
||||
type PackageEventRepository interface {
|
||||
AppendPackageEventContext(ctx context.Context, evt domain.PackageChangeEvent) (domain.PackageChangeEvent, error)
|
||||
}
|
||||
|
||||
type Service struct {
|
||||
repo PackageEventRepository
|
||||
}
|
||||
|
||||
type RecordPackagePublishedInput struct {
|
||||
EventID string
|
||||
PackageID int64
|
||||
Platform string
|
||||
Model string
|
||||
Version int64
|
||||
OccurredAt time.Time
|
||||
}
|
||||
|
||||
func NewService(repo PackageEventRepository) *Service {
|
||||
return &Service{repo: repo}
|
||||
}
|
||||
|
||||
func (s *Service) RecordPackagePublished(ctx context.Context, input RecordPackagePublishedInput) (domain.PackageChangeEvent, error) {
|
||||
if s == nil || s.repo == nil {
|
||||
return domain.PackageChangeEvent{}, ErrInvalidPublishInput
|
||||
}
|
||||
if strings.TrimSpace(input.EventID) == "" || input.PackageID <= 0 || strings.TrimSpace(input.Platform) == "" || strings.TrimSpace(input.Model) == "" || input.Version <= 0 {
|
||||
return domain.PackageChangeEvent{}, ErrInvalidPublishInput
|
||||
}
|
||||
|
||||
event := domain.PackageChangeEvent{
|
||||
EventID: strings.TrimSpace(input.EventID),
|
||||
EventType: PackagePublishedEventType,
|
||||
PackageID: input.PackageID,
|
||||
Platform: strings.TrimSpace(input.Platform),
|
||||
Model: strings.TrimSpace(input.Model),
|
||||
OccurredAt: input.OccurredAt.UTC(),
|
||||
Version: input.Version,
|
||||
GatewaySyncStatus: domain.GatewaySyncStatusPending,
|
||||
}
|
||||
if event.OccurredAt.IsZero() {
|
||||
event.OccurredAt = time.Now().UTC()
|
||||
}
|
||||
return s.repo.AppendPackageEventContext(ctx, event)
|
||||
}
|
||||
66
internal/publish/service_test.go
Normal file
66
internal/publish/service_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package publish
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
"supply-intelligence/internal/repository"
|
||||
)
|
||||
|
||||
func TestServiceRecordPackagePublished(t *testing.T) {
|
||||
repo := repository.NewMemoryRepository()
|
||||
service := NewService(repo)
|
||||
occurredAt := time.Unix(1715000000, 0)
|
||||
|
||||
event, err := service.RecordPackagePublished(context.Background(), RecordPackagePublishedInput{
|
||||
EventID: "evt-publish-1",
|
||||
PackageID: 1001,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Version: 3,
|
||||
OccurredAt: occurredAt,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if event.EventID != "evt-publish-1" || event.EventType != PackagePublishedEventType {
|
||||
t.Fatalf("unexpected event: %+v", event)
|
||||
}
|
||||
if !event.OccurredAt.Equal(occurredAt.UTC()) {
|
||||
t.Fatalf("unexpected occurred_at: %s", event.OccurredAt)
|
||||
}
|
||||
if event.GatewaySyncStatus != domain.GatewaySyncStatusPending {
|
||||
t.Fatalf("unexpected sync status: %q", event.GatewaySyncStatus)
|
||||
}
|
||||
|
||||
items := repo.ListPackageEvents()
|
||||
if len(items) != 1 {
|
||||
t.Fatalf("unexpected items length: %d", len(items))
|
||||
}
|
||||
if items[0].EventID != event.EventID || items[0].Version != 3 {
|
||||
t.Fatalf("unexpected stored event: %+v", items[0])
|
||||
}
|
||||
if items[0].GatewaySyncStatus != domain.GatewaySyncStatusPending {
|
||||
t.Fatalf("unexpected stored sync status: %+v", items[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceRecordPackagePublishedRejectsInvalidInput(t *testing.T) {
|
||||
service := NewService(repository.NewMemoryRepository())
|
||||
|
||||
_, err := service.RecordPackagePublished(context.Background(), RecordPackagePublishedInput{
|
||||
EventID: " ",
|
||||
PackageID: 0,
|
||||
Platform: "",
|
||||
Model: "",
|
||||
Version: 0,
|
||||
})
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
}
|
||||
if err != ErrInvalidPublishInput {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
278
internal/repository/memory.go
Normal file
278
internal/repository/memory.go
Normal file
@@ -0,0 +1,278 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sort"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
var ErrEventNotFound = errors.New("event not found")
|
||||
|
||||
func IsGatewayAckResult(result domain.GatewayAckResult) bool {
|
||||
return result == domain.GatewayAckResultApplied || result == domain.GatewayAckResultFailed
|
||||
}
|
||||
|
||||
type MemoryRepository struct {
|
||||
mu sync.RWMutex
|
||||
routingStates map[int64]domain.AccountRoutingState
|
||||
packageEvents map[string]domain.PackageChangeEvent
|
||||
appliedSnapshot map[string]domain.GatewayAppliedSnapshot
|
||||
discoveryCandidates map[string]domain.DiscoveryCandidate
|
||||
supplyPackages map[string]domain.SupplyPackage // key: platform+"_"+model
|
||||
}
|
||||
|
||||
func NewMemoryRepository() *MemoryRepository {
|
||||
return &MemoryRepository{
|
||||
routingStates: map[int64]domain.AccountRoutingState{},
|
||||
packageEvents: map[string]domain.PackageChangeEvent{},
|
||||
appliedSnapshot: map[string]domain.GatewayAppliedSnapshot{},
|
||||
discoveryCandidates: map[string]domain.DiscoveryCandidate{},
|
||||
supplyPackages: map[string]domain.SupplyPackage{},
|
||||
}
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) UpsertRoutingState(state domain.AccountRoutingState) {
|
||||
r.upsertRoutingState(state)
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) UpsertRoutingStateContext(_ context.Context, state domain.AccountRoutingState) domain.AccountRoutingState {
|
||||
return r.upsertRoutingState(state)
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) upsertRoutingState(state domain.AccountRoutingState) domain.AccountRoutingState {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.routingStates[state.AccountID] = state
|
||||
return state
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) GetRoutingState(accountID int64) (domain.AccountRoutingState, bool) {
|
||||
return r.getRoutingState(accountID)
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) GetRoutingStateContext(_ context.Context, accountID int64) (domain.AccountRoutingState, bool) {
|
||||
return r.getRoutingState(accountID)
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) getRoutingState(accountID int64) (domain.AccountRoutingState, bool) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
state, ok := r.routingStates[accountID]
|
||||
return state, ok
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) AppendPackageEvent(evt domain.PackageChangeEvent) {
|
||||
_, _ = r.AppendPackageEventContext(context.Background(), evt)
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) AppendPackageEventContext(_ context.Context, evt domain.PackageChangeEvent) (domain.PackageChangeEvent, error) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if evt.OccurredAt.IsZero() {
|
||||
evt.OccurredAt = time.Now().UTC()
|
||||
}
|
||||
if evt.GatewaySyncStatus == "" {
|
||||
evt.GatewaySyncStatus = domain.GatewaySyncStatusPending
|
||||
}
|
||||
r.packageEvents[evt.EventID] = evt
|
||||
return evt, nil
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) ListPackageEvents() []domain.PackageChangeEvent {
|
||||
items, _ := r.ListPackageEventsAfter("")
|
||||
return items
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) ListPackageEventsAfter(cursor string) ([]domain.PackageChangeEvent, string) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
items := make([]domain.PackageChangeEvent, 0, len(r.packageEvents))
|
||||
for _, evt := range r.packageEvents {
|
||||
items = append(items, evt)
|
||||
}
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
if items[i].OccurredAt.Equal(items[j].OccurredAt) {
|
||||
return items[i].EventID < items[j].EventID
|
||||
}
|
||||
return items[i].OccurredAt.Before(items[j].OccurredAt)
|
||||
})
|
||||
if cursor == "" {
|
||||
return items, nextCursorFor(items)
|
||||
}
|
||||
start := 0
|
||||
if idx, err := strconv.Atoi(cursor); err == nil {
|
||||
if idx < 0 {
|
||||
idx = 0
|
||||
}
|
||||
if idx > len(items) {
|
||||
idx = len(items)
|
||||
}
|
||||
start = idx
|
||||
} else {
|
||||
for i, evt := range items {
|
||||
if evt.EventID == cursor {
|
||||
start = i + 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if start >= len(items) {
|
||||
return []domain.PackageChangeEvent{}, ""
|
||||
}
|
||||
filtered := append([]domain.PackageChangeEvent(nil), items[start:]...)
|
||||
return filtered, nextCursorFor(items)
|
||||
}
|
||||
|
||||
func nextCursorFor(items []domain.PackageChangeEvent) string {
|
||||
if len(items) == 0 {
|
||||
return ""
|
||||
}
|
||||
return strconv.Itoa(len(items))
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) AckPackageEvent(eventID, consumer string, result domain.GatewayAckResult, detail string, ackedAt time.Time) (domain.PackageChangeEvent, error) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
evt, ok := r.packageEvents[eventID]
|
||||
if !ok {
|
||||
return domain.PackageChangeEvent{}, ErrEventNotFound
|
||||
}
|
||||
if ackedAt.IsZero() {
|
||||
ackedAt = time.Now().UTC()
|
||||
}
|
||||
evt.Consumer = consumer
|
||||
evt.ConsumerDetail = detail
|
||||
evt.GatewaySyncStatus = result.SyncStatus()
|
||||
evt.AckedAt = &ackedAt
|
||||
r.packageEvents[eventID] = evt
|
||||
return evt, nil
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) UpsertGatewayAppliedSnapshot(snapshot domain.GatewayAppliedSnapshot) domain.GatewayAppliedSnapshot {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if snapshot.UpdatedAt.IsZero() {
|
||||
snapshot.UpdatedAt = time.Now().UTC()
|
||||
}
|
||||
r.appliedSnapshot[snapshot.Consumer] = snapshot
|
||||
return snapshot
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) GetGatewayAppliedSnapshot(consumer string) (domain.GatewayAppliedSnapshot, bool) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
snapshot, ok := r.appliedSnapshot[consumer]
|
||||
return snapshot, ok
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) GetDiscoveryCandidateByIDContext(_ context.Context, candidateID string) (domain.DiscoveryCandidate, bool) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
candidate, ok := r.discoveryCandidates[candidateID]
|
||||
return candidate, ok
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) FindDiscoveryCandidateContext(_ context.Context, accountID int64, platform, model string) (domain.DiscoveryCandidate, bool) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
for _, candidate := range r.discoveryCandidates {
|
||||
if candidate.AccountID == accountID && candidate.Platform == platform && candidate.Model == model {
|
||||
return candidate, true
|
||||
}
|
||||
}
|
||||
return domain.DiscoveryCandidate{}, false
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) UpsertDiscoveryCandidateContext(_ context.Context, candidate domain.DiscoveryCandidate) domain.DiscoveryCandidate {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if candidate.DiscoveredAt.IsZero() {
|
||||
candidate.DiscoveredAt = time.Now().UTC()
|
||||
}
|
||||
if candidate.UpdatedAt.IsZero() {
|
||||
candidate.UpdatedAt = candidate.DiscoveredAt
|
||||
}
|
||||
r.discoveryCandidates[candidate.CandidateID] = candidate
|
||||
return candidate
|
||||
}
|
||||
|
||||
func (r *MemoryRepository) ListDiscoveryCandidatesContext(_ context.Context, status domain.DiscoveryCandidateStatus) []domain.DiscoveryCandidate {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
items := make([]domain.DiscoveryCandidate, 0, len(r.discoveryCandidates))
|
||||
for _, candidate := range r.discoveryCandidates {
|
||||
if status != "" && candidate.Status != status {
|
||||
continue
|
||||
}
|
||||
items = append(items, candidate)
|
||||
}
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
if items[i].DiscoveredAt.Equal(items[j].DiscoveredAt) {
|
||||
return items[i].CandidateID < items[j].CandidateID
|
||||
}
|
||||
return items[i].DiscoveredAt.Before(items[j].DiscoveredAt)
|
||||
})
|
||||
return items
|
||||
}
|
||||
|
||||
// --- SupplyPackage methods ---
|
||||
|
||||
// UpsertSupplyPackage creates or updates a supply package
|
||||
func (r *MemoryRepository) UpsertSupplyPackage(pkg domain.SupplyPackage) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
key := pkg.Platform + "_" + pkg.Model
|
||||
if existing, ok := r.supplyPackages[key]; ok {
|
||||
pkg.PackageID = existing.PackageID
|
||||
pkg.Version = existing.Version + 1
|
||||
pkg.CreatedAt = existing.CreatedAt
|
||||
}
|
||||
if pkg.CreatedAt.IsZero() {
|
||||
pkg.CreatedAt = time.Now().UTC()
|
||||
}
|
||||
pkg.UpdatedAt = time.Now().UTC()
|
||||
r.supplyPackages[key] = pkg
|
||||
}
|
||||
|
||||
// GetSupplyPackage retrieves a supply package by platform and model
|
||||
func (r *MemoryRepository) GetSupplyPackage(platform, model string) (domain.SupplyPackage, bool) {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
key := platform + "_" + model
|
||||
pkg, ok := r.supplyPackages[key]
|
||||
return pkg, ok
|
||||
}
|
||||
|
||||
// ListSupplyPackages returns all supply packages, optionally filtered by status
|
||||
func (r *MemoryRepository) ListSupplyPackages(status string) []domain.SupplyPackage {
|
||||
r.mu.RLock()
|
||||
defer r.mu.RUnlock()
|
||||
items := make([]domain.SupplyPackage, 0, len(r.supplyPackages))
|
||||
for _, pkg := range r.supplyPackages {
|
||||
if status == "" || pkg.Status == status {
|
||||
items = append(items, pkg)
|
||||
}
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// UpdateCandidateStatus updates a candidate's status (used by admission service)
|
||||
func (r *MemoryRepository) UpdateCandidateStatus(ctx context.Context, candidateID string, status domain.DiscoveryCandidateStatus, failureCode, failureSummary string) error {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if _, ok := r.discoveryCandidates[candidateID]; !ok {
|
||||
return errors.New("candidate not found")
|
||||
}
|
||||
c := r.discoveryCandidates[candidateID]
|
||||
c.Status = status
|
||||
c.ReasonCode = failureCode
|
||||
c.UpdatedAt = time.Now().UTC()
|
||||
c.Version++
|
||||
r.discoveryCandidates[candidateID] = c
|
||||
return nil
|
||||
}
|
||||
136
internal/repository/memory_test.go
Normal file
136
internal/repository/memory_test.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"supply-intelligence/internal/domain"
|
||||
)
|
||||
|
||||
func TestMemoryRepositoryRoutingState(t *testing.T) {
|
||||
repo := NewMemoryRepository()
|
||||
state := domain.AccountRoutingState{AccountID: 1, Platform: "openai", AccountStatus: domain.AccountStatusActive, RoutingEnabled: true, Version: 1}
|
||||
repo.UpsertRoutingState(state)
|
||||
|
||||
got, ok := repo.GetRoutingState(1)
|
||||
if !ok {
|
||||
t.Fatalf("expected routing state")
|
||||
}
|
||||
if got.AccountStatus != domain.AccountStatusActive {
|
||||
t.Fatalf("unexpected status: %q", got.AccountStatus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryRepositoryPackageEventsAndAck(t *testing.T) {
|
||||
repo := NewMemoryRepository()
|
||||
evt := domain.PackageChangeEvent{EventID: "evt-1", EventType: "supply_package_published", PackageID: 1, Platform: "openai", Model: "gpt-4.1-mini", OccurredAt: time.Unix(10, 0).UTC(), Version: 2}
|
||||
repo.AppendPackageEvent(evt)
|
||||
|
||||
items := repo.ListPackageEvents()
|
||||
if len(items) != 1 {
|
||||
t.Fatalf("expected 1 event, got %d", len(items))
|
||||
}
|
||||
ackedAt := time.Unix(20, 0).UTC()
|
||||
updated, err := repo.AckPackageEvent("evt-1", "gateway", domain.GatewayAckResultApplied, "ok", ackedAt)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected ack error: %v", err)
|
||||
}
|
||||
if updated.GatewaySyncStatus != domain.GatewaySyncStatusApplied {
|
||||
t.Fatalf("unexpected ack status: %+v", updated)
|
||||
}
|
||||
if updated.Consumer != "gateway" || updated.ConsumerDetail != "ok" {
|
||||
t.Fatalf("unexpected consumer metadata: %+v", updated)
|
||||
}
|
||||
if updated.AckedAt == nil || !updated.AckedAt.Equal(ackedAt) {
|
||||
t.Fatalf("unexpected ack time: %+v", updated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryRepositoryListPackageEventsAfterCursor(t *testing.T) {
|
||||
repo := NewMemoryRepository()
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-1", EventType: "supply_package_published", PackageID: 1, Platform: "openai", Model: "a", OccurredAt: time.Unix(10, 0).UTC(), Version: 1})
|
||||
repo.AppendPackageEvent(domain.PackageChangeEvent{EventID: "evt-2", EventType: "supply_package_published", PackageID: 2, Platform: "openai", Model: "b", OccurredAt: time.Unix(20, 0).UTC(), Version: 2})
|
||||
|
||||
items, nextCursor := repo.ListPackageEventsAfter("")
|
||||
if len(items) != 2 || nextCursor != "2" {
|
||||
t.Fatalf("unexpected initial page: len=%d next=%q", len(items), nextCursor)
|
||||
}
|
||||
|
||||
items, nextCursor = repo.ListPackageEventsAfter("1")
|
||||
if len(items) != 1 || items[0].EventID != "evt-2" || nextCursor != "2" {
|
||||
t.Fatalf("unexpected cursor page: items=%+v next=%q", items, nextCursor)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryRepositoryDiscoveryCandidateCRUD(t *testing.T) {
|
||||
repo := NewMemoryRepository()
|
||||
candidate := domain.DiscoveryCandidate{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 1,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Source: "seed",
|
||||
Status: domain.DiscoveryCandidateStatusPendingAdmission,
|
||||
DiscoveredAt: time.Unix(10, 0).UTC(),
|
||||
UpdatedAt: time.Unix(10, 0).UTC(),
|
||||
Version: 1,
|
||||
}
|
||||
repo.UpsertDiscoveryCandidateContext(nil, candidate)
|
||||
got, ok := repo.GetDiscoveryCandidateByIDContext(nil, "cand-1")
|
||||
if !ok || got.CandidateID != "cand-1" {
|
||||
t.Fatalf("expected candidate, got %+v ok=%v", got, ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryRepositoryFindDiscoveryCandidateByBusinessKey(t *testing.T) {
|
||||
repo := NewMemoryRepository()
|
||||
repo.UpsertDiscoveryCandidateContext(nil, domain.DiscoveryCandidate{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 1,
|
||||
Platform: "openai",
|
||||
Model: "gpt-4.1-mini",
|
||||
Source: "seed",
|
||||
Status: domain.DiscoveryCandidateStatusPendingAdmission,
|
||||
DiscoveredAt: time.Unix(10, 0).UTC(),
|
||||
UpdatedAt: time.Unix(10, 0).UTC(),
|
||||
Version: 1,
|
||||
})
|
||||
got, ok := repo.FindDiscoveryCandidateContext(nil, 1, "openai", "gpt-4.1-mini")
|
||||
if !ok || got.CandidateID != "cand-1" {
|
||||
t.Fatalf("expected candidate by business key, got %+v ok=%v", got, ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryRepositoryListDiscoveryCandidatesByStatusAndOrder(t *testing.T) {
|
||||
repo := NewMemoryRepository()
|
||||
repo.UpsertDiscoveryCandidateContext(nil, domain.DiscoveryCandidate{
|
||||
CandidateID: "cand-2",
|
||||
AccountID: 2,
|
||||
Platform: "openai",
|
||||
Model: "b",
|
||||
Source: "seed",
|
||||
Status: domain.DiscoveryCandidateStatusAdmitted,
|
||||
DiscoveredAt: time.Unix(20, 0).UTC(),
|
||||
UpdatedAt: time.Unix(20, 0).UTC(),
|
||||
Version: 1,
|
||||
})
|
||||
repo.UpsertDiscoveryCandidateContext(nil, domain.DiscoveryCandidate{
|
||||
CandidateID: "cand-1",
|
||||
AccountID: 1,
|
||||
Platform: "openai",
|
||||
Model: "a",
|
||||
Source: "seed",
|
||||
Status: domain.DiscoveryCandidateStatusPendingAdmission,
|
||||
DiscoveredAt: time.Unix(10, 0).UTC(),
|
||||
UpdatedAt: time.Unix(10, 0).UTC(),
|
||||
Version: 1,
|
||||
})
|
||||
items := repo.ListDiscoveryCandidatesContext(nil, domain.DiscoveryCandidateStatusPendingAdmission)
|
||||
if len(items) != 1 || items[0].CandidateID != "cand-1" {
|
||||
t.Fatalf("unexpected filtered items: %+v", items)
|
||||
}
|
||||
all := repo.ListDiscoveryCandidatesContext(nil, "")
|
||||
if len(all) != 2 || all[0].CandidateID != "cand-1" || all[1].CandidateID != "cand-2" {
|
||||
t.Fatalf("unexpected ordering: %+v", all)
|
||||
}
|
||||
}
|
||||
21
migrations/0001_init.sql
Normal file
21
migrations/0001_init.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
CREATE TABLE IF NOT EXISTS supply_intelligence_account_routing_states (
|
||||
account_id BIGINT PRIMARY KEY,
|
||||
platform TEXT NOT NULL,
|
||||
account_status TEXT NOT NULL,
|
||||
routing_enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
risk_score INTEGER NOT NULL DEFAULT 0,
|
||||
reason_code TEXT NOT NULL DEFAULT '',
|
||||
last_probe_at TIMESTAMPTZ NOT NULL,
|
||||
version BIGINT NOT NULL DEFAULT 1
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS supply_intelligence_package_change_events (
|
||||
event_id TEXT PRIMARY KEY,
|
||||
event_type TEXT NOT NULL,
|
||||
package_id BIGINT NOT NULL,
|
||||
platform TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
occurred_at TIMESTAMPTZ NOT NULL,
|
||||
version BIGINT NOT NULL,
|
||||
ack_status TEXT NOT NULL DEFAULT 'pending'
|
||||
);
|
||||
69
migrations/0002_admission.sql
Normal file
69
migrations/0002_admission.sql
Normal file
@@ -0,0 +1,69 @@
|
||||
-- Migration 0002: Admission Testing & Model Candidates
|
||||
-- Adds model_candidates table and supply_packages draft support
|
||||
|
||||
CREATE TABLE IF NOT EXISTS supply_intelligence_model_candidates (
|
||||
candidate_id TEXT PRIMARY KEY,
|
||||
account_id BIGINT NOT NULL,
|
||||
platform TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending_admission',
|
||||
source TEXT NOT NULL DEFAULT 'official_api',
|
||||
reason_code TEXT DEFAULT '',
|
||||
failure_summary TEXT DEFAULT '',
|
||||
discovered_at TIMESTAMPTZ NOT NULL,
|
||||
last_test_at TIMESTAMPTZ,
|
||||
updated_at TIMESTAMPTZ NOT NULL,
|
||||
version BIGINT NOT NULL DEFAULT 1,
|
||||
UNIQUE(platform, model)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_candidates_status ON supply_intelligence_model_candidates(status);
|
||||
CREATE INDEX idx_candidates_platform ON supply_intelligence_model_candidates(platform);
|
||||
CREATE INDEX idx_candidates_discovered ON supply_intelligence_model_candidates(discovered_at DESC);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS supply_intelligence_admission_test_logs (
|
||||
test_id BIGINT PRIMARY KEY DEFAULT nextval('admission_test_id_seq'),
|
||||
candidate_id TEXT NOT NULL REFERENCES supply_intelligence_model_candidates(candidate_id),
|
||||
status TEXT NOT NULL,
|
||||
failure_code TEXT,
|
||||
failure_summary TEXT,
|
||||
tested_at TIMESTAMPTZ NOT NULL,
|
||||
version BIGINT NOT NULL DEFAULT 1
|
||||
);
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS admission_test_id_seq;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS supply_intelligence_supply_packages (
|
||||
package_id BIGINT PRIMARY KEY DEFAULT nextval('supply_package_id_seq'),
|
||||
platform TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'draft',
|
||||
source TEXT NOT NULL DEFAULT 'si_auto',
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
version BIGINT NOT NULL DEFAULT 1,
|
||||
UNIQUE(platform, model)
|
||||
);
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS supply_package_id_seq;
|
||||
|
||||
-- New fields to extend routing states (via migration, not replacement)
|
||||
-- routing_states already has account_id as PK; add probe_execution_logs
|
||||
CREATE TABLE IF NOT EXISTS supply_intelligence_probe_execution_logs (
|
||||
log_id BIGINT PRIMARY KEY DEFAULT nextval('probe_log_id_seq'),
|
||||
account_id BIGINT NOT NULL,
|
||||
platform TEXT NOT NULL,
|
||||
probe_result TEXT NOT NULL,
|
||||
failure_class TEXT,
|
||||
http_status INTEGER,
|
||||
latency_ms INTEGER,
|
||||
risk_score INTEGER NOT NULL,
|
||||
evaluated_transition TEXT NOT NULL,
|
||||
executed_at TIMESTAMPTZ NOT NULL,
|
||||
request_id TEXT NOT NULL,
|
||||
version BIGINT NOT NULL DEFAULT 1
|
||||
);
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS probe_log_id_seq;
|
||||
|
||||
CREATE INDEX idx_probe_logs_account_time ON supply_intelligence_probe_execution_logs(account_id, executed_at DESC);
|
||||
568
prd/PRD.md
Normal file
568
prd/PRD.md
Normal file
@@ -0,0 +1,568 @@
|
||||
# 供应链智能增强系统(Supply Intelligence)PRD
|
||||
|
||||
> 状态说明(2026-05 收敛修订):本文件保留为历史版本参考,已不再作为当前实现真源。
|
||||
> 当前产品真源以“2026-05 新 PM 基线 + tech/BASELINE_TECHLEAD_V2.md + 已收敛的测试/部署/任务决议文档”为准。
|
||||
> 若本文件与上述新真源冲突,以新真源为准,尤其是以下方面不得再按本文件旧口径执行:
|
||||
> 1. pricing / prediction / 向量检索 / 广义开放平台能力
|
||||
> 2. 探针 timeout / TCP / DNS 触发惩罚性降级
|
||||
> 3. 自动发布 / 自动下架 / disabled 自动恢复
|
||||
> 4. gateway 强耦合同步热更新主路径
|
||||
> 5. 以独立平台化重部署作为默认落地方式
|
||||
|
||||
> 文档版本:v1.0
|
||||
> 撰写日期:2026-04-27
|
||||
> 撰写人:PM(产品经理)
|
||||
> 评审状态:待 TechLead 评审
|
||||
|
||||
---
|
||||
|
||||
## 1. 概述
|
||||
|
||||
### 1.1 一句话价值
|
||||
通过自动化探针、全网扫描与准入测试,让平台供应链中的供应商账号、可用模型列表始终保持最新且可路由,消除人工维护滞后导致的可用性黑洞。
|
||||
|
||||
### 1.2 用户问题
|
||||
- 运营团队每日需要人工检查数十个供应商账号的状态(额度、密钥失效、TOS 变更),遗漏率高。
|
||||
- 新模型上线后,平台未能及时感知,导致流量无法路由到新模型,竞争力下降。
|
||||
- 供应商账号过期或密钥失效后,网关仍在尝试路由请求,直接引发用户端报错。
|
||||
- 手动在各运营商后台注册账号、申请 API Key 的周期以天计,阻碍新供应商的快速接入。
|
||||
|
||||
### 1.3 业务意义
|
||||
- 将供应链信息的保鲜周期从“人工天级”缩短到“自动分钟级”。
|
||||
- 降低因供应商/模型失效导致的用户可见错误率。
|
||||
- 缩短新模型上线到平台可售卖之间的上市时间(Time-to-Market)。
|
||||
- 为后续动态定价、智能路由提供实时、准确的供应链数据底座。
|
||||
|
||||
---
|
||||
|
||||
## 2. 目标
|
||||
|
||||
### 2.1 业务目标
|
||||
| 目标编号 | 目标描述 | 度量方式 |
|
||||
|---------|---------|---------|
|
||||
| BG-01 | 供应商账号异常状态从发生到被标记的平均时间 ≤ 15 分钟 | 从供应商侧异常发生到本系统将其 status 改为 `suspended` 或 `disabled` 的时间差 |
|
||||
| BG-02 | 全网新模型从发布到进入平台可售卖列表的平均时间 ≤ 4 小时 | 从模型在官方文档/接口中出现到本系统将其对应的 supply_package 状态置为 `active` 的时间差 |
|
||||
| BG-03 | 因供应商账号失效导致的用户可见错误率下降 80% | 对比上线前 30 天与上线后 30 天,网关返回 502/503 且根因指向供应商失效的请求占比 |
|
||||
| BG-04 | 人工维护供应商基础信息的工作量减少 70% | 运营团队每周在供应商信息维护上投入的小时数对比 |
|
||||
|
||||
### 2.2 用户目标
|
||||
- **平台运营团队**:在一个界面看到所有供应商账号的健康度、模型覆盖度、待处理事项,不再需要逐家登录供应商后台确认。
|
||||
- **供应链管理人员**:新供应商或新模型的接入流程从“人工申请-测试-录入”变为“自动发现-自动测试-人工确认一键上架”。
|
||||
- **技术负责人**:系统具备明确的熔断、降级、审计能力,自动化操作不引入新的稳定性风险。
|
||||
- **商务负责人**:新模型上架速度成为可量化指标,可用于对外商务承诺。
|
||||
|
||||
### 2.3 成功定义
|
||||
项目被判定为成功的条件是:
|
||||
1. BG-01、BG-03、BG-04 三项指标在正式上线后 30 天内全部达成。
|
||||
2. 系统在连续 7 天内未出现因本系统自身故障导致的供应商状态误标记(false positive 率 ≤ 1%)。
|
||||
3. 所有自动化操作(状态变更、模型录入、账号注册)具备完整审计日志,且日志保留 ≥ 90 天。
|
||||
|
||||
---
|
||||
|
||||
## 3. 范围
|
||||
|
||||
### 3.1 In Scope
|
||||
|
||||
#### 模块 A:供应商品质探针(Supply Health Probe)
|
||||
- 对已录入 `supply_accounts` 的账号,按配置周期发起连通性、额度、密钥有效性探针。
|
||||
- 根据探针结果,自动将账号状态在 `active`、`suspended`、`disabled` 之间迁移(需满足状态机规则,不允许直接 `active` → `disabled`,必须经过 `suspended`)。
|
||||
- 对探针结果生成风险评分,写入 `supply_accounts.risk_score` 与 `risk_reason`。
|
||||
- 对状态变更事件写入审计日志。
|
||||
|
||||
#### 模块 B:全网模型发现(Model Discovery)
|
||||
- 对接各供应商官方 API / 文档 / 变更源,扫描其已发布的模型列表。
|
||||
- 将扫描到的模型与平台现有 `supply_packages` 中的 `platform` + `model` 组合进行比对,识别“新增模型”。
|
||||
- 对新增模型创建候选记录(`supply_intelligence.model_candidates` 表,状态为 `discovered`),等待准入测试。
|
||||
- 对已从官方列表下架但平台仍有 `active` 套餐的模型,标记为 `deprecated`,触发告警通知运营团队。
|
||||
|
||||
#### 模块 C:模型准入测试(Model Admission Test)
|
||||
- 对状态为 `discovered` 的候选模型,使用标准化测试用例集(覆盖 chat/completion/embedding 等 endpoint)进行功能验证。
|
||||
- 测试维度包括:接口可用性、响应格式合规性、延迟 P50/P99、token 计数一致性、错误码映射正确性。
|
||||
- 测试通过后,候选模型状态迁移为 `test_passed`,并自动生成一份 `supply_package` 草稿(`draft` 状态),等待运营团队确认后发布。
|
||||
- 测试失败的模型状态迁移为 `test_failed`,记录失败原因与日志,保留 30 天后自动清理。
|
||||
|
||||
#### 模块 D:账号自动注册(Account Auto-Registration)
|
||||
- 针对支持自动化注册流程的供应商(需配置化白名单),系统通过其公开注册接口或模拟浏览器流程完成账号注册。
|
||||
- 注册成功后,自动申请 API Key,将凭证加密后写入 `supply_accounts`,状态置为 `pending`。
|
||||
- 注册过程中涉及的手机/邮箱验证,接入平台已集成的 SMS/邮件网关;若 SMS/邮件网关未就绪,该供应商的自动注册能力必须 fail-closed(拒绝启动,不静默降级)。
|
||||
- 注册行为必须写入审计日志,凭证指纹写入 `credential_fingerprint`。
|
||||
|
||||
#### 模块 E:运营工作台(Operations Dashboard)
|
||||
- 展示待处理候选模型列表、待确认供应商状态变更、自动注册任务队列。
|
||||
- 提供“一键确认上架”、“忽略此模型”、“手动触发探针”三个人工干预入口。
|
||||
- 展示供应链覆盖率(平台已上架模型数 / 全网可发现模型数)。
|
||||
|
||||
### 3.2 Out of Scope
|
||||
| 编号 | 内容 | 原因 |
|
||||
|-----|------|------|
|
||||
| OOS-01 | 供应商侧计费系统对接与自动充值 | 属于财务结算域,不在供应链智能范围内 |
|
||||
| OOS-02 | 基于发现结果的动态定价算法 | 属于 pricing-engine 项目,本系统只生成 package 草稿中的建议价 |
|
||||
| OOS-03 | 供应商账号的 TOS 法律合规性自动审查 | 法律文本语义分析超出当前工程边界,本系统只做“TOS 变更标记” |
|
||||
| OOS-04 | 不支持公开注册接口的供应商(如需要企业资质审核、线下合同)的自动注册 | 无法工程化闭环,保留人工注册入口 |
|
||||
| OOS-05 | 对供应商内部模型版本迭代(如从 gpt-4-turbo 到 gpt-4-turbo-2024-04-09)的语义级差异分析 | 成本过高,只识别模型 ID 维度的新增/下架 |
|
||||
| OOS-06 | 跨供应商的模型能力等价性判定(如“模型 A 是否等价于模型 B”) | 属于模型评估平台,非供应链基础能力 |
|
||||
|
||||
### 3.3 假设与依赖
|
||||
| 编号 | 假设/依赖 | 影响 |
|
||||
|-----|----------|------|
|
||||
| ASP-01 | 各供应商均提供可公开访问的模型列表接口或文档页面 | 若某供应商关闭列表接口,该供应商的模型发现能力降级为手动录入 |
|
||||
| ASP-02 | 账号自动注册仅针对已签署技术合作框架协议、允许自动化注册的供应商 | 法律合规问题由商务团队前置解决 |
|
||||
| ASP-03 | `supply-api` 现有的 `supply_accounts` 表结构在上线前不做破坏性变更 | 本系统的新增表需通过标准 migration 脚本创建 |
|
||||
| ASP-04 | 平台已具备 SMS/邮件网关的运行时能力,或本模块的自动注册可被条件关闭 | 参照 `supply-api/CLAUDE.md` 中“条件能力必须 fail-closed”原则 |
|
||||
| ASP-05 | 探针任务调度依赖平台统一的 job scheduler(如内部 cron 或 Temporal),不重新造调度器 | 若 scheduler 不可用,探针模块延迟启动 |
|
||||
| ASP-06 | 测试用例集的维护由 QA 团队负责,本系统负责调度执行与结果收集 | 测试用例本身不在本系统代码库内管理 |
|
||||
|
||||
---
|
||||
|
||||
## 4. 用户场景
|
||||
|
||||
### 4.1 主流程
|
||||
|
||||
#### 场景 S1:供应商账号自动探针与状态变更
|
||||
```
|
||||
1. 调度器按配置周期(默认 5 分钟)触发对供应商账号 A 的探针任务。
|
||||
2. 探针模块调用供应商健康检查端点(或发送一条低成本测试请求)。
|
||||
3. 供应商返回 401/403 或超时 > 10 秒,探针判定为“密钥失效或账号异常”。
|
||||
4. 系统检查该账号当前状态:
|
||||
a. 若为 active → 改为 suspended,risk_score 设为 80,risk_reason 写入“密钥失效”。
|
||||
b. 若为 suspended 且连续 3 次探针失败 → 改为 disabled。
|
||||
5. 状态变更事件写入审计日志(object_type=supply_account, action=auto_suspend)。
|
||||
6. 向运营团队发送告警通知(钉钉/企业微信),包含账号、供应商、原因、时间。
|
||||
```
|
||||
|
||||
#### 场景 S2:全网扫描发现新模型
|
||||
```
|
||||
1. 调度器每 1 小时触发一次全网扫描任务。
|
||||
2. 扫描模块向各供应商的模型列表接口发起请求,解析出当前所有 model_id。
|
||||
3. 与 supply_packages 中 status ∈ {active, paused, draft} 的记录去重比对。
|
||||
4. 发现供应商 X 新增模型 "new-model-v1",平台暂无记录。
|
||||
5. 在 model_candidates 表中插入一条记录:
|
||||
- platform = X, model_id = "new-model-v1"
|
||||
- status = discovered
|
||||
- discovered_at = NOW()
|
||||
6. 触发准入测试流水线(异步任务)。
|
||||
```
|
||||
|
||||
#### 场景 S3:新模型准入测试通过并上架
|
||||
```
|
||||
1. 准入测试模块从 model_candidates 取出 status = discovered 的记录。
|
||||
2. 使用对应供应商的测试账号,发送标准化测试请求集(≥ 5 个不同用例)。
|
||||
3. 所有用例返回 HTTP 200,响应体符合 OpenAI-compatible schema,延迟 P99 < 30 秒。
|
||||
4. 将 candidate 状态更新为 test_passed,并生成 supply_package 草稿:
|
||||
- platform = X, model = "new-model-v1"
|
||||
- status = draft
|
||||
- price_per_1m_input / price_per_1m_output 使用预设默认值(可配置)
|
||||
5. 运营工作台出现“待上架新模型”卡片。
|
||||
6. 运营人员点击“确认上架”,package 状态改为 active,进入 gateway 路由表。
|
||||
```
|
||||
|
||||
#### 场景 S4:供应商账号自动注册
|
||||
```
|
||||
1. 运营人员在后台勾选“启用供应商 Y 的自动注册”,并配置注册参数(如邮箱域名、账号前缀规则)。
|
||||
2. 系统检测到供应商 Y 的可用账号数 < 配置阈值(如 < 2 个 active 账号)。
|
||||
3. 触发自动注册任务:
|
||||
a. 调用供应商 Y 的注册接口,提交随机生成的用户名、密码、企业邮箱。
|
||||
b. 等待并解析注册确认邮件,点击确认链接(或输入邮件验证码)。
|
||||
c. 登录账号后台,申请 API Key。
|
||||
4. 将 API Key 经 KMS 加密后写入 supply_accounts,status = pending。
|
||||
5. 触发自动验证(复用现有 Verify 流程),验证通过后 status 改为 active。
|
||||
```
|
||||
|
||||
### 4.2 异常流程
|
||||
|
||||
#### 场景 E1:探针遭遇供应商 Rate Limit
|
||||
```
|
||||
1. 探针请求返回 429。
|
||||
2. 该次探针标记为 inconclusive,不计入连续失败次数。
|
||||
3. 调度器在指数退避后(1min → 2min → 4min)重试,最多重试 3 次。
|
||||
4. 若 3 次后仍为 429,本次探针周期跳过该账号,记录日志,不触发状态变更。
|
||||
```
|
||||
|
||||
#### 场景 E2:模型准入测试超时
|
||||
```
|
||||
1. 某测试用例在 60 秒内未收到响应。
|
||||
2. 该用例标记为 timeout,测试流水线整体标记为 test_failed。
|
||||
3. candidate 状态更新为 test_failed,失败原因写入 "admission_test_timeout"。
|
||||
4. 运营工作台展示失败详情,运营人员可选择:
|
||||
a. 手动重新触发测试;
|
||||
b. 标记为 ignore,该 model_id 在 7 天内不再自动扫描。
|
||||
```
|
||||
|
||||
#### 场景 E3:自动注册时 SMS/邮件网关不可用
|
||||
```
|
||||
1. 注册流程进行到验证码接收步骤。
|
||||
2. 调用 SMS/邮件网关返回 503 或超时。
|
||||
3. 该注册任务整体失败,写入审计日志(action=auto_register_failed)。
|
||||
4. 依据 fail-closed 原则,不向用户或上游返回任何“注册成功”的虚假状态。
|
||||
5. 任务进入死信队列,24 小时后由人工或系统重试。
|
||||
```
|
||||
|
||||
### 4.3 边缘流程
|
||||
|
||||
#### 场景 B1:供应商模型 ID 变更(非新增/下架,而是重命名)
|
||||
- 扫描模块发现旧 model_id 消失、新 model_id 出现,但模型能力描述高度相似。
|
||||
- 系统无法自动判定为“重命名”还是“旧模型下架+新模型上线”。
|
||||
- 生成一条运营待办事项,由运营人员人工确认关系,系统不做自动关联。
|
||||
|
||||
#### 场景 B2:运营人员手动暂停自动探针
|
||||
- 运营人员可在后台对单个供应商账号勾选“暂停自动探针”。
|
||||
- 该账号的探针任务在调度器中被跳过,但运营人员仍可手动触发单次探针。
|
||||
- 暂停状态写入 `supply_accounts` 的扩展字段(或通过新增 `auto_probe_enabled` 字段),探针模块读取该字段后决定是否执行。
|
||||
|
||||
#### 场景 B3:账号处于 suspended 期间收到用户请求
|
||||
- 本系统不直接处理流量路由,但需向 gateway 提供实时供应商状态查询接口。
|
||||
- gateway 在路由决策时查询该接口,若账号为 suspended/disabled,则将该账号从候选池移除。
|
||||
- 该接口的 SLA:P99 延迟 < 50ms,可用性 ≥ 99.9%。
|
||||
|
||||
### 4.4 用户故事
|
||||
|
||||
| 编号 | 角色 | 故事 | 验收对应 |
|
||||
|-----|------|------|---------|
|
||||
| US-01 | 运营人员 | 我想在一个页面看到所有供应商账号最近一次探针的时间和结果,以便快速定位异常账号 | AC-01, AC-02 |
|
||||
| US-02 | 供应链管理员 | 我想在新模型被系统发现后收到通知,并在工作台一键确认上架,以便缩短上市时间 | AC-03, AC-04 |
|
||||
| US-03 | 技术负责人 | 我想所有自动化状态变更都有审计日志和回滚记录,以便在误操作时追溯和恢复 | AC-05, AC-09 |
|
||||
| US-04 | 商务负责人 | 我想看到平台模型覆盖率与竞品对比的报表,以便对外展示平台能力 | AC-07 |
|
||||
| US-05 | 运营人员 | 我想对特定账号暂停自动探针,以便在供应商维护窗口期避免误报警 | AC-08 |
|
||||
| US-06 | 供应链管理员 | 我想对支持自动注册的供应商配置自动补货策略,以便在可用账号不足时自动补充 | AC-06 |
|
||||
|
||||
---
|
||||
|
||||
## 5. 验收标准(AC)
|
||||
|
||||
> 以下每条 AC 均为可测试、无模糊词的要求。QA 可直接据此编写测试用例。
|
||||
|
||||
### 模块 A:供应商品质探针
|
||||
|
||||
**AC-01 探针覆盖度**
|
||||
- 给定 `supply_accounts` 中 `status` 为 `active` 或 `suspended` 的账号数量 N,系统在任意时刻 T,最近 15 分钟内被探针覆盖的账号数量 M 必须满足 M/N ≥ 99%。
|
||||
- 测试方法:插入 100 条测试账号记录,观察 15 分钟窗口内探针日志条数是否 ≥ 99。
|
||||
|
||||
**AC-02 状态变更正确性**
|
||||
- 给定一个 `status=active` 的账号,模拟其返回 401 连续 1 次,系统在 60 秒内将其 `status` 更新为 `suspended`。
|
||||
- 给定一个 `status=suspended` 的账号,模拟其连续 3 次探针返回 401,系统在 60 秒内将其 `status` 更新为 `disabled`。
|
||||
- 给定一个 `status=active` 的账号,模拟其返回 429 单次,其 `status` 在 15 分钟内保持 `active` 不变。
|
||||
- 测试方法:Mock 供应商响应,查询数据库字段值。
|
||||
|
||||
**AC-03 误报率**
|
||||
- 在 7 天连续运行测试中,探针将实际正常的账号标记为 `suspended` 或 `disabled` 的次数 ≤ 总探针次数的 1%。
|
||||
- 测试方法:使用全部正常的测试账号运行 7 天,统计状态误变更次数。
|
||||
|
||||
### 模块 B:全网模型发现
|
||||
|
||||
**AC-04 新模型发现延迟**
|
||||
- 给定一个已对接的供应商,在其模型列表中新增一个 model_id,系统在 2 个扫描周期(默认 2 小时)内将该 model_id 写入 `model_candidates` 且 `status=discovered`。
|
||||
- 测试方法:Mock 供应商模型列表接口,在 T0 新增 model_id,T0+2h 查询数据库验证。
|
||||
|
||||
**AC-05 已下架模型告警**
|
||||
- 给定一个 `supply_packages` 中 `status=active` 的 model_id,在供应商侧该 model_id 消失后,系统在 2 个扫描周期内:
|
||||
- 将该 package 的 `status` 保持 `active` 不变(不自动下架,避免误伤);
|
||||
- 在运营工作台生成一条“模型已下架”告警待办;
|
||||
- 向运营人员发送通知。
|
||||
- 测试方法:Mock 供应商模型列表,移除 model_id,验证告警产生与 package 状态未变。
|
||||
|
||||
### 模块 C:模型准入测试
|
||||
|
||||
**AC-06 准入测试通过**
|
||||
- 给定一个 `status=discovered` 的 candidate,其供应商测试账号正常,系统在 30 分钟内完成全部测试用例执行,candidate 状态变为 `test_passed`,且自动生成一条 `supply_packages` 记录(`status=draft`)。
|
||||
- 测试方法:使用真实或 Mock 供应商响应,验证数据库状态与 package 草稿字段完整性。
|
||||
|
||||
**AC-07 准入测试失败**
|
||||
- 给定一个 `status=discovered` 的 candidate,模拟其接口返回 500 或响应格式不合法,系统在 30 分钟内将 candidate 状态更新为 `test_failed`,`failure_reason` 字段非空,且运营工作台展示失败详情。
|
||||
- 测试方法:Mock 供应商返回 500,验证数据库字段与前端展示。
|
||||
|
||||
### 模块 D:账号自动注册
|
||||
|
||||
**AC-08 自动注册成功**
|
||||
- 给定一个已配置自动注册白名单的供应商,配置其可用账号数阈值为 2,当前可用账号数为 1,系统在 10 分钟内触发注册流程,在 30 分钟内完成注册、密钥申请、凭证加密存储,最终 `supply_accounts` 中新增一条 `status=active` 的记录。
|
||||
- 测试方法:使用供应商沙箱环境或高保真 Mock,验证端到端流程与数据库记录。
|
||||
|
||||
**AC-09 自动注册 fail-closed**
|
||||
- 给定自动注册配置 `enabled=true`,但 SMS/邮件网关返回 503 或超时,系统在 60 秒内将注册任务标记为 `failed`,不向任何上游返回成功状态码,审计日志中包含 `action=auto_register_failed` 与错误详情。
|
||||
- 测试方法:Mock SMS 网关返回 503,验证接口响应、数据库状态、审计日志。
|
||||
|
||||
### 模块 E:运营工作台与通用
|
||||
|
||||
**AC-10 审计日志完整性**
|
||||
- 任意自动化操作(状态变更、candidate 状态迁移、自动注册、手动触发探针)发生后 5 秒内,审计存储中必须存在对应记录,字段包含:
|
||||
- `object_type`、`object_id`、`action`、`result_code`、`before_state`(变更前)、`after_state`(变更后)、`request_id`。
|
||||
- 测试方法:触发各项操作,查询审计存储验证字段完整性。
|
||||
|
||||
**AC-11 运营工作台干预**
|
||||
- 运营人员点击“一键确认上架”后,对应的 `supply_packages` 记录在 3 秒内从 `draft` 变为 `active`。
|
||||
- 运营人员点击“忽略此模型”后,该 candidate 在 7 天内不再出现在待处理列表中,且 7 天后自动恢复为 `discovered`。
|
||||
- 测试方法:E2E 测试或 UI 自动化测试。
|
||||
|
||||
**AC-12 配置热更新**
|
||||
- 探针周期、扫描周期、测试超时时间、自动注册阈值等配置项,在修改配置文件并下发后 60 秒内生效,不重启进程。
|
||||
- 测试方法:修改配置,观察调度器行为变化时间差。
|
||||
|
||||
---
|
||||
|
||||
## 6. 边缘情况与失败路径
|
||||
|
||||
| 编号 | 边缘/失败场景 | 系统行为 | 验证方式 |
|
||||
|-----|-------------|---------|---------|
|
||||
| FP-01 | 供应商探针接口完全不可用(DNS 失败、TCP 超时) | 标记为 inconclusive,按 429 退避逻辑处理,不直接变更状态 | 模拟 iptables DROP,验证状态不变 |
|
||||
| FP-02 | 供应商返回 200 但响应体为空或格式突变 | 解析失败视为 inconclusive,记录 error_log,不触发状态变更 | Mock 返回空 JSON,验证状态与日志 |
|
||||
| FP-03 | 同一账号在探针执行期间被运营人员手动变更状态 | 乐观锁冲突:探针更新时 version 不匹配,更新失败,探针记录冲突日志,由下次探针或运营人员覆盖 | 并发测试:手动 update 同时触发探针 |
|
||||
| FP-04 | 模型准入测试期间,测试账号被探针标记为 suspended | 准入测试流水线检测到测试账号不可用,任务标记为 `test_failed`,原因写为 `test_account_unavailable` | Mock 测试账号 suspended,验证流水线行为 |
|
||||
| FP-05 | 自动注册时供应商注册接口返回 400(如邮箱已被注册) | 任务标记为 `failed`,原因写入具体错误码,同一邮箱不再重复使用,审计日志记录完整请求/响应摘要(脱敏后) | Mock 注册接口返回 400,验证数据库与日志 |
|
||||
| FP-06 | 自动注册成功后,验证步骤发现密钥无效 | 账号状态保持 `pending`,自动注册任务标记为 `verify_failed`,触发告警,不进入 active | Mock verify 返回失败,验证状态机 |
|
||||
| FP-07 | 全网扫描时供应商模型列表分页异常(如页码越界返回 500) | 扫描任务记录分页失败,已获取的部分模型仍正常处理,失败页在下一周期重试 | Mock 分页接口第 3 页返回 500,验证整体任务不中断 |
|
||||
| FP-08 | 数据库在探针执行期间不可用 | 探针任务失败,记录错误,不触发状态变更;调度器按配置重试;连续失败 5 次后暂停该批次探针,触发系统级告警 | 模拟 PostgreSQL 断开,验证行为 |
|
||||
| FP-09 | 运营人员同时点击“确认上架”与“忽略此模型” | 乐观锁或幂等键保证只有一个操作生效,第二个操作返回 409 Conflict,界面提示“该模型已被处理” | 并发 UI 操作测试 |
|
||||
| FP-10 | 凭证加密 KMS 服务在自动注册期间不可用 | 注册流程在加密步骤阻塞,等待 KMS 恢复或超时(60 秒);超时后任务标记为 `failed`,明文凭证不得落盘 | Mock KMS 超时,验证明文不出现在日志/数据库 |
|
||||
|
||||
---
|
||||
|
||||
## 7. 上线与运营准备
|
||||
|
||||
### 7.1 发布策略
|
||||
- **阶段 1(灰度)**:选择 2 个非核心供应商(如测试环境专用供应商)开启自动探针与模型发现,观察 7 天。
|
||||
- **阶段 2(扩展)**:覆盖全部供应商的探针与发现能力,但自动状态变更仅对 `sandbox` 环境账号生效,生产环境账号的探针结果只生成告警,不自动改状态。
|
||||
- **阶段 3(全量)**:生产环境账号启用自动状态变更,模型准入测试与自动注册按需逐步开启。
|
||||
|
||||
### 7.2 灰度/回滚
|
||||
- 灰度开关通过配置中心控制,维度包括:
|
||||
- `probe.enabled`:全局探针开关
|
||||
- `probe.auto_transition.supplier_ids`:允许自动状态变更的供应商白名单
|
||||
- `discovery.enabled`:全网扫描开关
|
||||
- `admission_test.enabled`:准入测试开关
|
||||
- `auto_registration.enabled`:自动注册开关
|
||||
- 回滚条件(任一触发即全量关闭对应模块):
|
||||
- 1 小时内探针误报率 > 5%
|
||||
- 自动状态变更导致用户可见错误率上升(对比基线)> 2%
|
||||
- 自动注册任务连续失败率 > 50%(持续 1 小时)
|
||||
- 回滚操作:修改配置中心对应开关为 `false`,60 秒内生效,已变更的状态不自动回退,由运营人员人工审核。
|
||||
|
||||
### 7.3 埋点/监控/告警
|
||||
|
||||
#### 埋点事件
|
||||
| 事件名 | 触发时机 | 关键属性 |
|
||||
|-------|---------|---------|
|
||||
| `si_probe_executed` | 每次探针执行完成 | `platform`, `account_id`, `result`, `latency_ms` |
|
||||
| `si_state_transitioned` | 账号状态自动变更 | `platform`, `account_id`, `from_status`, `to_status`, `reason` |
|
||||
| `si_model_discovered` | 发现新模型 | `platform`, `model_id`, `discovery_source` |
|
||||
| `si_admission_test_completed` | 准入测试完成 | `platform`, `model_id`, `result`, `duration_sec` |
|
||||
| `si_auto_register_completed` | 自动注册完成 | `platform`, `result`, `duration_sec` |
|
||||
|
||||
#### 监控指标(Prometheus)
|
||||
| 指标名 | 类型 | 说明 |
|
||||
|-------|------|------|
|
||||
| `si_probe_latency_seconds` | Histogram | 探针请求延迟 |
|
||||
| `si_probe_result_total` | Counter | 探针结果分类(success/failure/inconclusive) |
|
||||
| `si_state_transition_total` | Counter | 状态变更次数 |
|
||||
| `si_discovery_models_total` | Gauge | 当前候选模型数量(按 status 分标签) |
|
||||
| `si_admission_test_duration_seconds` | Histogram | 准入测试耗时 |
|
||||
| `si_auto_register_result_total` | Counter | 自动注册结果分类 |
|
||||
|
||||
#### 告警规则
|
||||
| 告警名 | 条件 | 通知对象 | 级别 |
|
||||
|-------|------|---------|------|
|
||||
| 探针大面积失败 | 1 小时内探针失败率 > 20% | 技术负责人 | P1 |
|
||||
| 供应商账号全部失效 | 某供应商 active 账号数 = 0 持续 > 10 分钟 | 运营+技术 | P0 |
|
||||
| 自动注册连续失败 | 1 小时内自动注册失败率 > 50% | 供应链管理员 | P1 |
|
||||
| 新模型堆积未处理 | `status=discovered` 的候选模型数 > 20 且持续 > 24 小时 | 运营团队 | P2 |
|
||||
| 系统自身健康异常 | 本服务 `/actuator/health/ready` 返回非 200 持续 > 1 分钟 | 技术负责人 | P0 |
|
||||
|
||||
### 7.4 FAQ(预置)
|
||||
**Q1:自动状态变更会不会把正常的供应商误杀掉?**
|
||||
A:探针采用“连续失败才降级”策略,active → suspended 需 1 次明确失败,suspended → disabled 需连续 3 次失败。运营人员可随时在后台暂停单个账号的自动探针。
|
||||
|
||||
**Q2:模型准入测试失败了,我还能手动上架吗?**
|
||||
A:可以。运营人员可以在工作台查看失败详情,选择“手动强制上架”,此时系统生成 package 草稿但标记为 `manually_forced`,并强制要求运营人员填写强制上架理由,该理由写入审计日志。
|
||||
|
||||
**Q3:自动注册生成的账号归属谁?**
|
||||
A:自动注册账号的 `user_id` / `supplier_user_id` 关联到平台运营系统账号(可配置),收益结算走平台统一账户。
|
||||
|
||||
---
|
||||
|
||||
## 8. 商业化与价值闭环
|
||||
|
||||
### 8.1 收益路径
|
||||
| 路径 | 描述 | 量化 |
|
||||
|-----|------|------|
|
||||
| 直接收益 | 新模型上架速度提升 → 平台可售模型数增加 → 订单量增长 | 每提前 1 天上架一个热点模型,预估带来 X 订单增量(需商务提供历史数据基线) |
|
||||
| 成本节省 | 运营人力减少 → 供应链维护 headcount 或工时下降 | 按 BG-04 目标,每周节省 70% 工时,折算年化人力成本 |
|
||||
| 质量溢价 | 供应商失效导致的客诉减少 → NPS 提升 → 客户续约率提升 | 减少的客诉数 × 单客诉处理成本 + 续约率提升带来的 LTV 增量 |
|
||||
|
||||
### 8.2 北极星指标
|
||||
- **供应链接新鲜度指数(Supply Freshness Index, SFI)**
|
||||
- 定义:SFI = (过去 1 小时成功探针的账号数 / 应探针账号总数) × (过去 24 小时进入 active 的新模型数 / 过去 24 小时发现的新模型总数)
|
||||
- 目标值:SFI ≥ 0.95
|
||||
- 采集周期:每小时计算一次,写入时序数据库
|
||||
|
||||
### 8.3 失败判定线
|
||||
项目在以下任一条件触发时,判定为失败并启动止损:
|
||||
1. 上线后 30 天内,因本系统导致的供应商状态误变更(false positive)累计 > 50 次。
|
||||
2. 上线后 30 天内,因自动状态变更或自动注册导致用户可见支付/使用故障 > 3 次。
|
||||
3. SFI 连续 7 天 < 0.70,且技术团队无法给出明确修复排期。
|
||||
4. 自动注册模块因供应商接口变更导致连续 14 天成功率 < 30%,且无替代方案。
|
||||
|
||||
### 8.4 止损条件
|
||||
- 触发失败判定线后,PM 与 TechLead 在 24 小时内决定是否:
|
||||
- **降级**:关闭自动状态变更与自动注册,仅保留探针监控与模型发现(纯观测模式)。
|
||||
- **下线**:完全卸载本系统,回退至纯人工维护模式,保留审计日志备查。
|
||||
- 无论降级或下线,已生成的 supply_package 草稿和已注册的账号不受影响,由运营人员人工接管。
|
||||
|
||||
---
|
||||
|
||||
## 9. 依赖与风险
|
||||
|
||||
### 9.1 外部依赖
|
||||
| 依赖方 | 依赖内容 | 风险等级 | 缓解措施 |
|
||||
|-------|---------|---------|---------|
|
||||
| 各供应商 | 模型列表接口、注册接口、探针端点的稳定性与兼容性 | 高 | 接口变更监测;Mock 回归测试集;供应商接口版本锁定 |
|
||||
| SMS/邮件网关 | 自动注册验证码接收 | 中 | fail-closed;备用邮箱池;人工兜底流程 |
|
||||
| KMS 服务 | 新注册账号凭证加密 | 中 | 加密失败阻塞落盘,任务进死信队列 |
|
||||
| 平台 Job Scheduler | 定时任务调度 | 低 | 调度失败时探针/扫描延迟,不引入错误状态 |
|
||||
| supply-api 现有服务 | 复用 Verify、AccountStore、PackageStore、AuditStore | 低 | 接口契约冻结;变更需双方 CR |
|
||||
|
||||
### 9.2 技术风险
|
||||
| 风险编号 | 风险描述 | 概率 | 影响 | 应对 |
|
||||
|---------|---------|------|------|------|
|
||||
| R-01 | 探针频率过高导致供应商侧将我们视为攻击源,封禁平台 IP | 中 | 高 | 探针频率可配置;使用平台统一出口 IP 池;对每家供应商遵守其 rate limit 文档 |
|
||||
| R-02 | 供应商模型列表接口返回缓存旧数据,导致“已下架模型”误判 | 中 | 中 | 列表接口响应加 TTL 校验;结合官方文档 RSS/变更日志交叉验证 |
|
||||
| R-03 | 自动注册的浏览器自动化流程(如 Selenium/Playwright)因供应商前端改版失效 | 高 | 中 | 优先使用官方 API 注册;浏览器自动化作为 fallback;前端改版监控 |
|
||||
| R-04 | 准入测试用例不足以覆盖供应商实际兼容性问题,导致 test_passed 但上线后用户报错 | 中 | 高 | 测试用例由 QA 维护并定期评审;上线后 24h 内对新模型增加采样监控 |
|
||||
| R-05 | 数据库 model_candidates 表数据膨胀,影响查询性能 | 低 | 中 | 设置自动清理策略:test_failed 且超过 30 天未手动处理的记录自动删除 |
|
||||
|
||||
### 9.3 合规与隐私风险
|
||||
- 自动注册过程中收集的邮箱、手机号属于个人信息,需符合平台隐私政策与相关法律法规。
|
||||
- 凭证指纹(`credential_fingerprint`)仅存储哈希值,不得存储明文 API Key。
|
||||
- 审计日志中的请求/响应摘要需脱敏,不得包含完整 credential。
|
||||
|
||||
---
|
||||
|
||||
## 10. 技术栈与集成约束
|
||||
|
||||
### 统一技术栈
|
||||
本项目必须与立交桥主项目保持一致:
|
||||
- **语言**: Go 1.22+
|
||||
- **HTTP框架**: 标准库 `net/http` + 自定义中间件(禁止引入 Gin/Echo 等第三方框架,保持与 gateway/ 和 supply-api/ 的一致性)
|
||||
- **数据库**: PostgreSQL 15+ ,驱动 `jackc/pgx/v5`
|
||||
- **缓存**: Redis,客户端 `redis/go-redis/v9`
|
||||
- **配置**: YAML + Viper,环境变量覆盖敏感字段
|
||||
- **日志/审计**: 结构化日志,审计事件模型与 supply-api/ 一致
|
||||
- **错误码**: `{SOURCE}_{CATEGORY}_{CODE}` 格式,例如 `SUP_INT_4001`
|
||||
- **健康检查**: `/actuator/health` 、 `/actuator/health/live` 、 `/actuator/health/ready`
|
||||
- **测试**: Go testing + testify,覆盖率门槛 domain ≥ 70%、service/handler ≥ 80%
|
||||
|
||||
### 独立运行与集成运行
|
||||
本系统必须同时支持两种运行模式:
|
||||
|
||||
| 模式 | 特征 | 部署方式 | 适用场景 |
|
||||
|------|------|---------|---------|
|
||||
| **独立运行** | 自有 `cmd/supply-intelligence/main.go`,独立数据库 schema,独立 docker-compose | `docker-compose up` 或单独容器 | 外部用户只需要供应链管理能力,不想接入立交桥全套 |
|
||||
| **集成运行** | 作为 Go module 被 `supply-api/` 引入,共享数据库连接池和配置,通过内部接口注册 | 编译时作为子模块编译,运行时挂载到 supply-api 主进程 | 立交桥用户希望获得一体化供应链能力 |
|
||||
|
||||
**集成约束**:
|
||||
- 独立运行时,系统必须提供完整的 HTTP API 和运营工作台。
|
||||
- 集成运行时,系统必须提供 `IntegrationPlugin` 接口,允许主程序通过配置开关启用/禁用各模块。
|
||||
- 数据库 schema 必须使用独立的 `supply_intelligence_` 前缀,避免与主项目表名冲突。
|
||||
- 配置文件必须支持分离加载:独立运行时读取自己的 `config.yaml`,集成运行时合并到主项目配置。
|
||||
|
||||
### NewAPI / Sub2API 适配支持
|
||||
本系统的核心能力必须能够对接 NewAPI 和 Sub2API 系统:
|
||||
- **供应商状态同步**: 提供标准化的供应商健康状态接口,NewAPI/Sub2API 可定期获取供应商可用性状态。
|
||||
- **模型列表推送**: 提供 `/models` 接口返回平台已发现、已测试通过的模型列表,NewAPI/Sub2API 可消费此数据自动补充自己的模型库。
|
||||
- **账号注册适配**: 自动注册模块通过适配层支持 NewAPI/Sub2API 的账号管理 API,实现跨平台账号生命周期管理。
|
||||
- **独立部署时**: 通过配置文件指定 NewAPI/Sub2API 的管理端点地址和鉴权信息,本系统通过适配层(Adapter)与之交互。
|
||||
- **集成部署时**: 若立交桥 gateway/ 已接入 NewAPI/Sub2API,本系统通过 supply-api/ 的内部接口操作上游状态。
|
||||
|
||||
### 对外接口契约
|
||||
- 必须提供 OpenAPI 3.0 接口文档,确保 NewAPI/Sub2API 开发者可以独立接入。
|
||||
- 接口路径前缀默认为 `/api/v1/supply-intelligence/`,集成运行时可通过配置改为 `/internal/supply-intelligence/` 。
|
||||
|
||||
---
|
||||
|
||||
## 11. 阶段门控结论
|
||||
|
||||
### 11.1 当前状态
|
||||
**可进入 TechLead 评审,但需补充以下信息后方可进入开发排期:**
|
||||
|
||||
1. **供应商接口清单**:需由商务/技术团队提供 Phase 1 目标供应商的模型列表接口文档、注册接口文档(或明确标注哪些供应商不支持自动注册)。
|
||||
2. **测试用例集范围**:需 QA 团队确认准入测试用例集的初始版本(≥ 5 个用例/模型类型)及维护 SLA。
|
||||
3. **Job Scheduler 契约**:需明确平台统一调度器的接口契约(如任务提交格式、超时控制、死信策略)。
|
||||
4. **KMS 与 SMS 网关就绪状态**:生产环境 KMS 与 SMS/邮件网关当前不可用,需寻找合适的供应商并确认集成方案。若短期内无法就绪,自动注册模块(Phase 3)需明确为远期交付,当前 Phase 1/2 不受影响。
|
||||
|
||||
### 11.2 建议开发优先级
|
||||
| 阶段 | 内容 | 目标 |
|
||||
|-----|------|------|
|
||||
| Phase 1 | 供应商品质探针(模块 A)+ 运营工作台观测视图(模块 E 只读部分) | 解决最痛的可用性黑洞问题,7 天灰度验证 |
|
||||
| Phase 2 | 全网模型发现(模块 B)+ 模型准入测试(模块 C) | 解决新模型上市滞后问题 |
|
||||
| Phase 3 | 账号自动注册(模块 D)+ 运营工作台完整干预能力(模块 E 读写部分) | 解决供应商账号补充效率问题 |
|
||||
|
||||
### 11.3 门控决策
|
||||
- **不阻塞 TechLead 评审**:PRD 中需求边界、验收标准、失败路径已清晰。
|
||||
- **阻塞开发排期**:直到上述 4 项补充信息(供应商接口清单、测试用例集、Job Scheduler 契约、KMS/SMS 就绪状态)以文档形式补充到本 PRD 附录后,方可进入技术方案设计(HLD)阶段。
|
||||
- **技术栈与集成约束已明确**:统一 Go 标准库、独立/集成双模式、NewAPI/Sub2API 适配层已纳入范围。
|
||||
|
||||
---
|
||||
|
||||
## 附录 A:新增数据表草案(供 TechLead 参考,非最终 Schema)
|
||||
|
||||
> 本附录仅用于需求对齐,最终 Schema 由 TechLead 设计并通过标准 SQL migration 落地。
|
||||
|
||||
### A.1 model_candidates
|
||||
| 字段 | 类型 | 说明 |
|
||||
|-----|------|------|
|
||||
| id | BIGINT PK | 自增 |
|
||||
| platform | VARCHAR(50) | 供应商标识,与 supply_accounts.platform 同枚举 |
|
||||
| model_id | VARCHAR(100) | 模型标识 |
|
||||
| model_name | VARCHAR(200) | 可读的模型名称(从供应商接口获取) |
|
||||
| status | VARCHAR(20) | `discovered`, `testing`, `test_passed`, `test_failed`, `ignored`, `expired` |
|
||||
| discovered_at | TIMESTAMPTZ | 首次发现时间 |
|
||||
| tested_at | TIMESTAMPTZ | 最近一次测试时间 |
|
||||
| failure_reason | TEXT | 测试失败原因 |
|
||||
| ignored_until | TIMESTAMPTZ | 忽略有效期 |
|
||||
| created_at | TIMESTAMPTZ | |
|
||||
| updated_at | TIMESTAMPTZ | |
|
||||
|
||||
唯一约束:`(platform, model_id)`
|
||||
|
||||
### A.2 auto_registration_tasks
|
||||
| 字段 | 类型 | 说明 |
|
||||
|-----|------|------|
|
||||
| id | BIGINT PK | 自增 |
|
||||
| platform | VARCHAR(50) | 目标供应商 |
|
||||
| task_type | VARCHAR(20) | `register`, `verify`, `rotate_key` |
|
||||
| status | VARCHAR(20) | `pending`, `running`, `completed`, `failed`, `dead_letter` |
|
||||
| context | JSONB | 任务上下文(如申请的邮箱、注册步骤状态机) |
|
||||
| result_account_id | BIGINT | 成功后关联的 supply_accounts.id |
|
||||
| failure_reason | TEXT | |
|
||||
| retry_count | INT DEFAULT 0 | |
|
||||
| next_retry_at | TIMESTAMPTZ | |
|
||||
| created_at | TIMESTAMPTZ | |
|
||||
| updated_at | TIMESTAMPTZ | |
|
||||
|
||||
### A.3 probe_execution_logs
|
||||
| 字段 | 类型 | 说明 |
|
||||
|-----|------|------|
|
||||
| id | BIGINT PK | 自增 |
|
||||
| account_id | BIGINT FK | supply_accounts.id |
|
||||
| probe_type | VARCHAR(20) | `connectivity`, `quota`, `key_validity` |
|
||||
| result | VARCHAR(20) | `success`, `failure`, `inconclusive` |
|
||||
| http_status | INT | |
|
||||
| latency_ms | INT | |
|
||||
| error_code | VARCHAR(50) | 平台内部错误码 |
|
||||
| error_message | TEXT | |
|
||||
| executed_at | TIMESTAMPTZ | |
|
||||
|
||||
索引:`account_id + executed_at DESC`,保留策略 30 天。
|
||||
|
||||
---
|
||||
|
||||
## 自检清单
|
||||
|
||||
- [x] 已明确真实目标(降低供应商失效导致的错误率、缩短新模型上市时间、减少人工维护工时),不是只复述功能。
|
||||
- [x] 已写清 In Scope / Out of Scope,边界以模块和具体场景描述。
|
||||
- [x] 每个 AC 都可被 QA 或测试用例直接验证(含具体数值、时间、状态、测试方法)。
|
||||
- [x] 已覆盖异常流(Rate Limit、超时、网关不可用)、边缘流(模型 ID 变更、手动暂停探针、并发操作)与失败路径(共 10 条)。
|
||||
- [x] 已补齐上线、运营、监控、回滚要求(灰度三阶段、回滚条件、埋点、监控指标、告警规则、预置 FAQ)。
|
||||
- [x] 已定义商业化/价值闭环(直接收益、成本节省、质量溢价三条路径)。
|
||||
- [x] 已定义成功指标(BG-01/03/04 + SFI)与失败判定线(4 条止损条件)。
|
||||
- [x] 已明确当前是否可进入 TechLead 阶段:可进入 TechLead 评审,但需补充 4 项信息后方可进入开发排期。
|
||||
- [x] 没有使用"优化、支持、友好、尽量、快速"等模糊词替代明确要求;所有时间、比例、次数均为具体数值或明确公式。
|
||||
|
||||
---
|
||||
188
prd/competitor-analysis.md
Normal file
188
prd/competitor-analysis.md
Normal file
@@ -0,0 +1,188 @@
|
||||
# Supply-Intelligence 供应链智能增强 — 竞品分析报告
|
||||
|
||||
## 1. 竞品范围
|
||||
|
||||
| 竞品 | 项目地址 | 技术栈 | 相关能力 |
|
||||
|-------|---------|--------|---------|
|
||||
| **LiteLLM** | berriai/litellm | Python/FastAPI | 模型定价数据库、自动路由、新模型告警、部署冷却、容灾切换 |
|
||||
| **Sub2API** | Wei-Shaw/sub2api | Go/Gin/Ent | 模型定价镜像、代理管理、账号/订阅管理、用量统计、公告系统 |
|
||||
| **NewAPI / OneAPI** | Calcium-Ion/new-api | Go/Gin/GORM | 渠道管理、模型配置、上游状态监控 |
|
||||
|
||||
---
|
||||
|
||||
## 2. 核心能力对标
|
||||
|
||||
### 2.1 模型定价与供应商数据库
|
||||
|
||||
#### LiteLLM Model Prices Database
|
||||
LiteLLM 维护了行业内最完整的模型定价数据库 `model_prices_and_context_window_backup.json`:
|
||||
|
||||
**关键特征**:
|
||||
- 覆盖 100+ 供应商、1000+ 模型
|
||||
- 每个模型包含:input_cost_per_token, output_cost_per_token, context_window, max_tokens, supports_vision, supports_function_calling 等
|
||||
- 支持分层定价(tiered_pricing):如 >128k tokens 时使用不同单价
|
||||
- 支持批量定价(batch pricing)
|
||||
- 支持音频 token 定价
|
||||
- 支持自定义成本覆盖
|
||||
|
||||
**更新机制**:
|
||||
- 主数据库内置在代码中,通过版本发布更新
|
||||
- 支持远程拉取更新(可配置镜像源)
|
||||
- Sub2API 就是从 LiteLLM 上游镜像此文件
|
||||
|
||||
#### Sub2API Pricing Service
|
||||
Sub2API 的定价服务是被动消费型的(从上游获取):
|
||||
|
||||
**关键设计**:
|
||||
- 远程拉取 LiteLLM 镜像 `model_prices_and_context_window.json`
|
||||
- 本地 fallback 文件缓存
|
||||
- SHA256 hash 验证更新
|
||||
- 模型家族回退算法:未知模型按命名规则回退到已知模型
|
||||
- 例如:gpt-5.3 未知 → 回退到 gpt-5.1
|
||||
- 例如:claude-unknown → 回退到 claude-sonnet
|
||||
- 动态价格字段优先级配置
|
||||
|
||||
**缺陷**:
|
||||
- 被动获取,无主动发现新模型能力
|
||||
- 无模型质量探针(仅依赖定价数据)
|
||||
- 无自动测试和准入检查
|
||||
|
||||
### 2.2 供应商/渠道管理
|
||||
|
||||
#### Sub2API Proxy & Account Management
|
||||
Sub2API 提供了完整的上游管理能力:
|
||||
|
||||
**代理管理** (`Proxy` schema):
|
||||
```go
|
||||
type Proxy struct {
|
||||
name string // 代理名称
|
||||
protocol string // 协议
|
||||
host string // 主机
|
||||
port int // 端口
|
||||
username string // 用户名(可选)
|
||||
password string // 密码(可选)
|
||||
status string // active / inactive
|
||||
}
|
||||
```
|
||||
|
||||
**账号管理** (`Account` schema):
|
||||
- 支持多个上游供应商
|
||||
- 每个账号关联一个代理(Proxy)
|
||||
- 支持账号分组(AccountGroup)
|
||||
- 软删除机制
|
||||
|
||||
**用量统计** (`UsageLog`):
|
||||
- 详细记录每次请求的模型、token数、成本、时间戳
|
||||
- `UsageCleanupTask`: 定期清理过期用量数据
|
||||
|
||||
#### NewAPI/OneAPI 渠道管理
|
||||
- 支持多个上游渠道配置
|
||||
- 渠道状态监控(可用/不可用)
|
||||
- 支持渠道优先级和权重
|
||||
- 支持渠道购买次数限制
|
||||
|
||||
### 2.3 自动路由与容灾
|
||||
|
||||
#### LiteLLM Router & Auto-Router
|
||||
LiteLLM 的路由系统是其核心竞争力:
|
||||
|
||||
**路由策略**:
|
||||
- **lowest_latency**: 选择响应最快的部署
|
||||
- **lowest_cost**: 选择成本最低的部署
|
||||
- **lowest_tpm_rpm**: TPM/RPM 最低
|
||||
- **least_busy**: 负载最低
|
||||
- **auto_router**: 语义路由(基于请求内容匹配最适模型)
|
||||
- **budget_limiter**: 按 key/team 限制预算
|
||||
|
||||
**容灾机制**:
|
||||
- **Cooldown**: 连续失败的部署自动进入 cooldown,暂时从路由池移除
|
||||
- **Fallback**: 主模型失败时自动切换到备用模型
|
||||
- **Retries**: 可配置重试次数和策略
|
||||
|
||||
**新模型告警** (`new_model_added`):
|
||||
- 当新模型上线时发送 Slack 告警
|
||||
- 但仅限于通知,无结构化的准入测试流程
|
||||
|
||||
### 2.4 用户与订阅管理
|
||||
|
||||
#### Sub2API 用户体系
|
||||
- `User`: 基础用户信息
|
||||
- `UserSubscription`: 订阅计划、配额、到期时间
|
||||
- `UserAttributeDefinition` / `UserAttributeValue`: 用户自定义属性
|
||||
- `PromoCode` / `RedeemCode`: 营销代码系统
|
||||
- `SecuritySecret`: 安全凭证管理
|
||||
|
||||
---
|
||||
|
||||
## 3. 差距分析(我们的机会)
|
||||
|
||||
| 能力维度 | 竞品现状 | 我们的机会 |
|
||||
|---------|---------|---------|
|
||||
| **模型发现** | LiteLLM 被动维护定价库,Sub2API 被动镜像 | 主动全网扫描发现新模型(爬取供应商 API、HN、Twitter、官方文档) |
|
||||
| **准入测试** | 竞品均不具备 | 自动化准入测试流程,含功能、性能、成本、安全等维度 |
|
||||
| **质量探针** | LiteLLM 仅有基础 cooldown,无深度探针 | 多维度品质探针:连通性、配额、延迟、错误率、响应质量 |
|
||||
| **自动注册** | 竞品均不支持 | 自动在供应商后台注册账号、申请 API Key |
|
||||
| **账号生命周期** | Sub2API 有基础账号管理,无自动更新 | 自动轮换密钥、检测过期、自动补充账号 |
|
||||
| **供应商健康大盘** | Sub2API 有用量统计,无综合健康视图 | 统一供应商健康大盘,实时可视化 |
|
||||
| **模型比价** | LiteLLM 有定价库,但无比价能力 | 同类模型多供应商价格对比,智能推荐最优供应商 |
|
||||
| **运营工作台** | 竞品均为散点式管理 | 统一运营工作台,支持干预操作(暂停、强制切换、测试触发) |
|
||||
| **模型下线预测** | LiteLLM 有新模型告警,但无下线预测 | 基于用量趋势和供应商动态预测模型下线 |
|
||||
| **自动化闭环** | 竞品均为人工配置 | 发现 → 测试 → 准入 → 上线 → 监控 → 下线 全自动化 |
|
||||
|
||||
---
|
||||
|
||||
## 4. 对产品规划的影响
|
||||
|
||||
### 强化方向
|
||||
|
||||
1. **模型定价数据库参考 LiteLLM**:
|
||||
- 维护标准化的模型定价数据库,支持 input/output cost、context window、功能支持等字段
|
||||
- 支持远程更新和本地 fallback
|
||||
- 支持模型家族回退
|
||||
|
||||
2. **供应商账号管理参考 Sub2API**:
|
||||
- 代理(Proxy)管理:协议、主机、端口、状态
|
||||
- 账号分组:AccountGroup
|
||||
- 软删除机制
|
||||
- 安全凭证管理
|
||||
|
||||
3. **用量统计参考 Sub2API**:
|
||||
- 详细 UsageLog 记录
|
||||
- 定期清理机制
|
||||
- 用户-订阅-用量关联
|
||||
|
||||
4. **路由策略参考 LiteLLM**:
|
||||
- 多种路由策略(latency、cost、load、semantic)
|
||||
- 容灾切换机制
|
||||
- 部署冷却
|
||||
|
||||
### 新增差异化能力
|
||||
|
||||
5. **主动全网模型发现**:竞品均为被动维护,我们应主动扫描
|
||||
6. **自动准入测试**:竞品不具备,是核心差异化
|
||||
7. **自动账号注册**:竞品不支持,是核心差异化
|
||||
8. **智能推荐**:基于价格、质量、位置的供应商推荐
|
||||
9. **预测性分析**:模型下线预测、供应商变动预测
|
||||
|
||||
---
|
||||
|
||||
## 5. 对技术规划的影响
|
||||
|
||||
### 应引入的设计模式
|
||||
|
||||
| 设计模式 | 来源 | 应用场景 |
|
||||
|---------|------|---------|
|
||||
| **Model Prices Database** | LiteLLM | 模型定价数据库,支持远程更新和本地 fallback |
|
||||
| **SHA256 Hash 验证** | Sub2API | 定价数据更新的完整性验证 |
|
||||
| **模型家族回退** | Sub2API | 未知模型的智能回退 |
|
||||
| **Proxy + Account 关联** | Sub2API | 上游代理与账号的关联管理 |
|
||||
| **UsageLog + CleanupTask** | Sub2API | 用量记录与定期清理 |
|
||||
| **路由策略抽象** | LiteLLM | 支持多种路由策略的插件化设计 |
|
||||
| **Cooldown + Fallback** | LiteLLM | 故障部署的自动处理 |
|
||||
|
||||
### 技术避坑
|
||||
|
||||
1. **不重复造轮子**: 定价数据库可以直接复用 LiteLLM 的开源数据,不需要自己维护
|
||||
2. **发现与测试解耦**: 模型发现和准入测试应该解耦,支持独立触发和组合触发
|
||||
3. **注册模块的可扩展性**: 每个供应商的注册流程不同,需要抽象接口 + 具体实现
|
||||
4. **测试隔离**: 准入测试不得影响生产环境,必须使用独立账号或模拟环境
|
||||
243
specs/功能清单.md
Normal file
243
specs/功能清单.md
Normal file
@@ -0,0 +1,243 @@
|
||||
# Supply Intelligence 功能清单(按钮级任务版)
|
||||
|
||||
> 状态说明(2026-05 收敛修订):本文件为旧版按钮级任务清单,已不再作为当前实施真源。
|
||||
> 当前实施真源以“2026-05 新 PM 基线 + tech/BASELINE_TECHLEAD_V2.md + 首期消费闭环决议”为准。
|
||||
> 下列旧任务类型已明确废止或降期,不得继续直接派发给 Engineer:
|
||||
> - gateway 管理接口热更新主路径
|
||||
> - pricing / prediction / 向量检索 / SFI 仪表盘等超范围能力
|
||||
> - 自动注册深链路作为本期硬门槛
|
||||
> - 以 Temporal / 独立 worker / 独立平台骨架为默认落地前提
|
||||
|
||||
> 版本:v1.0
|
||||
> 日期:2026-04-27
|
||||
> 说明:每个任务 5 分钟可完成,可直接安排进任务管理
|
||||
|
||||
---
|
||||
|
||||
## Phase 1:模块 A(探针)+ 模块 E(工作台只读观测)
|
||||
|
||||
### 模块 A1:探针管理基础
|
||||
|
||||
#### A1.1 供应商账号列表页
|
||||
- [ ] **任务**:实现供应商账号列表页路由 `/supply/dashboard/accounts`
|
||||
- [ ] **任务**:在账号列表渲染数据表格,每行显示:账号ID / 供应商名称 / 账号标识(昵称) / 当前状态(徽章) / 风险评分 / 最近探针时间 / 操作
|
||||
- [ ] **任务**:账号状态徽章颜色:active=绿色 / suspended=黄色 / disabled=红色
|
||||
- [ ] **任务**:账号行风险评分显示为进度条(0-100),>80 显示红色
|
||||
- [ ] **任务**:账号行渲染"查看详情"按钮,点击展开显示最近 5 次探针结果
|
||||
- [ ] **任务**:账号列表支持分页,每页 50 条
|
||||
- [ ] **任务**:账号列表支持按供应商名称筛选(下拉框)
|
||||
- [ ] **任务**:账号列表支持按状态筛选(全部 / active / suspended / disabled)
|
||||
- [ ] **任务**:账号列表支持按风险评分范围筛选(滑块)
|
||||
|
||||
#### A1.2 账号详情页
|
||||
- [ ] **任务**:实现账号详情页路由 `/supply/dashboard/accounts/{account_id}`
|
||||
- [ ] **任务**:详情页渲染账号基本信息区块:账号ID / 供应商 / 状态 / 创建时间 / 最近探针时间
|
||||
- [ ] **任务**:详情页渲染探针历史时间线,每条显示:探针时间 / 结果(成功/失败/不可判定) / 延迟 / HTTP状态码 / 风险评分
|
||||
- [ ] **任务**:详情页渲染"手动触发探针"按钮,点击后立即执行一次探针,显示加载状态,完成后刷新时间线
|
||||
- [ ] **任务**:详情页渲染"暂停此账号探针"开关按钮(默认关闭),开启后该账号不参与自动探针
|
||||
- [ ] **任务**:详情页渲染"查看历史状态变更"按钮,点击展开状态变更记录(时间 / 从 → 到 / 原因)
|
||||
|
||||
#### A1.3 探针后端核心
|
||||
- [ ] **任务**:实现探针调度器(基于主仓既有调度能力或轻量本地调度器,每 5 分钟轮询所有 active/suspended 账号)
|
||||
- [ ] **任务**:实现探针执行器,对单个账号发起 HTTP GET/POST 请求,记录响应码/延迟/返回体
|
||||
- [ ] **任务**:实现探针结果评估逻辑:HTTP 200 = 成功 / 401/403 = 明确失败 / 429/5xx/超时/格式突变 = 不可判定
|
||||
- [ ] **任务**:实现状态机:`active` 收到 1 次 explicit_failure → `suspended`;`suspended` 连续 3 次 explicit_failure → `disabled`
|
||||
- [ ] **任务**:实现 429/暂时性错误指数退避:1min → 2min → 4min 重试,超 3 次则本次跳过并保留状态
|
||||
- [ ] **任务**:实现探针结果写入 `supply_intelligence_probe_logs` 表,保留 30 天
|
||||
|
||||
### 模块 A2:供应商适配层
|
||||
|
||||
#### A2.1 供应商适配器框架
|
||||
- [ ] **任务**:定义 `SupplierAdapter` 接口:`(Probe(ctx context.Context, account Account) ProbeResult, GetModels(ctx context.Context, account Account) ([]Model, error))`
|
||||
- [ ] **任务**:实现 `SupplierAdapterRegistry` map,按供应商名称注册适配器实例
|
||||
- [ ] **任务**:实现配置文件加载供应商适配器列表(`suppliers[].name` + `suppliers[].adapter`)
|
||||
- [ ] **任务**:每个适配器实现health check端点探测(发送测试请求验证连通性)
|
||||
|
||||
#### A2.2 Phase 1 目标供应商适配(2个)
|
||||
- [ ] **任务**:实现 OpenAI 供应商适配器(Probe:用 /v1/models 查询;获取模型列表:用 /v1/models)
|
||||
- [ ] **任务**:实现 Anthropic 供应商适配器(Probe:用 /v1/models 查询;获取模型列表:用 /v1/models)
|
||||
- [ ] **任务**:适配器配置项:API Base URL / API Key(加密存储)/ 是否允许受控自动补给 / Rate Limit 阈值
|
||||
|
||||
### 模块 E1:运营工作台(只读观测部分)
|
||||
|
||||
#### E1.1 工作台首页
|
||||
- [ ] **任务**:实现工作台首页路由 `/supply/dashboard`
|
||||
- [ ] **任务**:首页渲染 4 个统计卡片:账号总数(按状态颜色分段) / 本小时新发现模型数 / 待处理候选模型数 / 受控自动补给任务队列长度
|
||||
- [ ] **任务**:首页渲染候选处理与账号健康摘要(避免引入 SFI 仪表盘等超范围指标体系)
|
||||
- [ ] **任务**:首页渲染"探针健康度"简表,显示各供应商最后探针结果(绿色OK/黄色不可判定/红色明确失败/灰色未探)
|
||||
|
||||
#### E1.2 待处理事项列表
|
||||
- [ ] **任务**:在工作台首页渲染"待处理" Tab,展示以下待办项:
|
||||
- 风险评分 > 70 的账号(红色高亮)
|
||||
- 状态 = discovered 的候选模型(待准入测试)
|
||||
- 受控自动补给失败或待验证的任务(待人工介入)
|
||||
- 模型已下架告警(待确认)
|
||||
- [ ] **任务**:每项待办渲染"处理"按钮,点击进入对应详情页
|
||||
- [ ] **任务**:每项待办渲染"忽略"按钮,点击后该项从待办列表暂时移除(3小时后重现)
|
||||
|
||||
---
|
||||
|
||||
## Phase 2:模块 B(模型发现)+ 模块 C(准入测试)
|
||||
|
||||
### 模块 B1:模型发现
|
||||
|
||||
#### B1.1 模型列表页
|
||||
- [ ] **任务**:实现模型列表页路由 `/supply/dashboard/models`
|
||||
- [ ] **任务**:模型列表每行显示:模型ID / 所属供应商 / 当前状态(活跃/草稿/已下线/发现中/测试失败) / 发现时间 / 来源
|
||||
- [ ] **任务**:状态筛选 Tab:全部 / 发现中 / 待测试 / 活跃 / 已下线
|
||||
- [ ] **任务**:模型列表支持按供应商筛选
|
||||
- [ ] **任务**:模型列表支持按发现时间范围筛选
|
||||
- [ ] **任务**:模型行点击"查看详情"进入模型详情页
|
||||
|
||||
#### B1.2 模型发现后端
|
||||
- [ ] **任务**:实现模型发现调度任务(基于主仓既有调度能力或轻量本地调度器),每 1 小时触发一次扫描
|
||||
- [ ] **任务**:实现模型列表抓取器:调用各供应商适配器的 `GetModels()` 方法
|
||||
- [ ] **任务**:实现模型比对逻辑:将抓取的模型列表与 `supply_packages` 中 active/paused/draft 记录去重
|
||||
- [ ] **任务**:发现新模型时,写入 `supply_intelligence_model_candidates` 表,status = discovered
|
||||
- [ ] **任务**:发现模型下架时(供应商列表有、平台 active 记录也有但 ID 消失),写入运营告警,不改变 package 状态
|
||||
- [ ] **任务**:实现模型来源记录:discovery_source 字段(official_api / manual_import)
|
||||
|
||||
### 模块 C1:准入测试
|
||||
|
||||
#### C1.1 准入测试配置
|
||||
- [ ] **任务**:实现测试用例管理页路由 `/supply/dashboard/tests/cases`
|
||||
- [ ] **任务**:测试用例列表每行显示:用例ID / 所属模型类型 / 测试目标(endpoint) / 状态(启用/禁用)
|
||||
- [ ] **任务**:渲染"新增用例"按钮,点击弹出用例创建表单(endpoint地址 / 请求方法 / 预期响应格式 / 超时时间)
|
||||
- [ ] **任务**:测试用例表单支持选择模板(chat/completion/embedding)
|
||||
- [ ] **任务**:实现每个模型类型默认测试用例集(≥ 5 个用例)
|
||||
|
||||
#### C1.2 准入测试执行
|
||||
- [ ] **任务**:实现准入测试任务流,接收 candidate_id 参数并由主仓既有调度能力或轻量任务执行器驱动
|
||||
- [ ] **任务**:`AdmissionTestWorkflow` 从 `model_candidates` 加载 discovered 状态的候选模型
|
||||
- [ ] **任务**:按顺序执行所有启用的测试用例,记录每条的 HTTP 状态/延迟/响应格式/Token 计数
|
||||
- [ ] **任务**:所有用例返回 HTTP 200 + 格式正确 → 更新 candidate status = test_passed,生成 supply_package 草稿
|
||||
- [ ] **任务**:任意用例返回非 200 或格式错误 → 更新 candidate status = test_failed,写入 failure_reason
|
||||
- [ ] **任务**:单个用例超时(60 秒)→ 标记为 timeout,整体判定失败
|
||||
- [ ] **任务**:准入测试完成后,发送飞书通知给运营人员
|
||||
|
||||
#### C1.3 草稿生成
|
||||
- [ ] **任务**:准入测试通过后,自动生成 `supply_packages` 草稿记录(status = draft)
|
||||
- [ ] **任务**:草稿字段:platform / model_id / model_name / price_per_1m_input(默认值)/ price_per_1m_output(默认值)/ suggested_by = si_auto
|
||||
- [ ] **任务**:草稿生成后,在工作台"待上架"列表中显示该草稿
|
||||
|
||||
---
|
||||
|
||||
## Phase 3:模块 D(受控自动补给)+ 模块 E(工作台完整干预)
|
||||
|
||||
### 模块 D1:受控自动补给配置
|
||||
|
||||
#### D1.1 自动补给设置页
|
||||
- [ ] **任务**:实现自动补给设置页路由 `/supply/dashboard/auto-supply/settings`
|
||||
- [ ] **任务**:页面渲染供应商列表,每行显示:供应商名称 / 是否开启受控自动补给(开关)/ 可用账号阈值(数字输入)/ 状态
|
||||
- [ ] **任务**:点击供应商行"配置"按钮,弹出自动补给配置弹窗
|
||||
- [ ] **任务**:配置弹窗字段:启用自动补给(开关)/ 白名单供应商标记 / 触发阈值(账号数)/ 补给方式(任务化/人工补录入口)/ 审批要求
|
||||
- [ ] **任务**:弹窗保存后,按主仓既有配置方式持久化并生效,不引入 Redis 首期前置依赖
|
||||
- [ ] **任务**:配置页顶部渲染"通知/补给受理链路测试"按钮,点击后发送测试通知或验证受理接口可达
|
||||
|
||||
#### D1.2 自动补给执行后端
|
||||
- [ ] **任务**:实现受控自动补给任务流,监控白名单供应商可用账号数 < 阈值时触发
|
||||
- [ ] **任务**:按供应商配置创建补给任务或调用受控补给受理接口,禁止默认走浏览器自动化注册深链路
|
||||
- [ ] **任务**:补给成功后写入待验证/待启用记录,不允许绕过验证直接进入 active
|
||||
- [ ] **任务**:若涉及凭证写入,则将密钥发送至 KMS 加密,密文存入 `supply_accounts`
|
||||
- [ ] **任务**:触发验证或人工审核链路,验证通过后再进入可用状态
|
||||
- [ ] **任务**:补给失败时,写入 `supply_intelligence_auto_supply_tasks` 或等价任务表,status = failed,记录失败原因
|
||||
|
||||
### 模块 D2:Fail-closed 安全机制
|
||||
|
||||
- [ ] **任务**:补给流程中,若通知网关/补给受理接口返回 503 或超时,任务立即标记为 failed,不执行虚假成功写操作
|
||||
- [ ] **任务**:补给流程中,若 KMS 加密超时(60 秒),任务立即标记为 failed
|
||||
- [ ] **任务**:明文凭证在内存中的存活时间不超过 60 秒,超时自动清除
|
||||
- [ ] **任务**:审计日志中记录补给请求/响应(脱敏后:隐藏敏感标识、隐藏凭证)
|
||||
|
||||
### 模块 E2:工作台完整干预
|
||||
|
||||
#### E2.1 候选模型处理
|
||||
- [ ] **任务**:工作台"待上架模型"列表,每行显示:模型ID / 供应商 / 发现时间 / 测试结果摘要 / 来源
|
||||
- [ ] **任务**:模型行渲染"查看测试详情"按钮,点击展开显示所有测试用例结果(每条:通过/失败/超时)
|
||||
- [ ] **任务**:模型行渲染"确认上架"绿色按钮,点击后弹出确认框(显示将生成的 package 草稿内容)
|
||||
- [ ] **任务**:模型行渲染"忽略"按钮,点击后该模型 7 天内不出现(写入 ignored_until 字段)
|
||||
- [ ] **任务**:模型行渲染"手动强制上架"橙色按钮(仅测试失败时可见),点击后需填写强制上架理由(必填)
|
||||
|
||||
#### E2.2 草稿确认上架
|
||||
- [ ] **任务**:点击"确认上架"后,PUT `supply_packages/{id}` status = active
|
||||
- [ ] **任务**:同时更新 `model_candidates` 对应记录 status = published
|
||||
- [ ] **任务**:写入 gateway package change event,等待首期消费方按决议链路拉取并 ack
|
||||
- [ ] **任务**:完成后显示成功提示:"模型已上架,已生成待消费变更事件;是否进入路由以消费方 ack 为准"
|
||||
|
||||
#### E2.3 工单与通知
|
||||
- [ ] **任务**:模型下架告警 → 自动生成运营工单(类型 = model_deprecated),推送到运营工作台
|
||||
- [ ] **任务**:受控自动补给失败 → 自动生成运营工单(类型 = auto_supply_failed),推送飞书通知
|
||||
- [ ] **任务**:连续 3 次探针失败账号 → 生成运营工单(类型 = account_risk),推送飞书通知
|
||||
|
||||
---
|
||||
|
||||
## 全局模块
|
||||
|
||||
### 模块 G1:供应商配置管理
|
||||
|
||||
- [ ] **任务**:实现供应商列表页路由 `/supply/dashboard/settings/suppliers`
|
||||
- [ ] **任务**:供应商列表每行显示:供应商ID / 名称 / 适配器类型 / 账号数量 / 接口状态 / 操作
|
||||
- [ ] **任务**:渲染"添加供应商"按钮,点击弹出供应商创建表单
|
||||
- [ ] **任务**:供应商表单字段:名称 / 适配器类型(下拉) / API Base URL / API Key(加密存储)/ 探针周期(默认5min) / 是否启用
|
||||
- [ ] **任务**:实现供应商"测试连通性"按钮,点击后执行一次 probe 并显示结果
|
||||
- [ ] **任务**:供应商配置变更后,刷新当前集成运行实例中的适配器装配或调度配置(不得以 Temporal Worker 作为首期前置依赖)
|
||||
|
||||
### 模块 G2:配置热更新
|
||||
|
||||
- [ ] **任务**:关键配置项(探针周期/扫描周期/阈值)按主仓既有配置方式存储与生效,避免把 Redis 作为首期前置依赖
|
||||
- [ ] **任务**:实现 `GET /api/v1/supply-intelligence/config` 接口,返回当前生效配置
|
||||
- [ ] **任务**:实现 `PUT /api/v1/supply-intelligence/config` 接口,修改配置后 60 秒内生效
|
||||
- [ ] **任务**:配置变更生成审计日志记录(action = config_update)
|
||||
- [ ] **任务**:不支持的配置项修改返回 400 错误码
|
||||
|
||||
### 模块 G3:OpenAPI + 健康检查
|
||||
|
||||
- [ ] **任务**:实现 `GET /actuator/health` / `/actuator/health/live` / `/actuator/health/ready`
|
||||
- [ ] **任务**:实现 Swagger UI 路由 `/docs`
|
||||
- [ ] **任务**:实现 OpenAPI 3.0 spec 端点 `/openapi.json`
|
||||
- [ ] **任务**:实现关键后台任务执行链路健康检查,调度/执行链路不可用时 `/actuator/health/ready` 返回 503
|
||||
|
||||
### 模块 G4:权限与认证
|
||||
|
||||
- [ ] **任务**:实现 JWT 认证中间件(与立连桥统一认证打通)
|
||||
- [ ] **任务**:实现角色权限:运营人员(观测 + 部分操作)/ 管理员(全部操作)
|
||||
- [ ] **任务**:权限不足返回 HTTP 403,错误码 `SUP_INT_AUTH_1001`
|
||||
|
||||
---
|
||||
|
||||
## 技术基础设施
|
||||
|
||||
### T1:项目骨架
|
||||
- [ ] **任务**:初始化或挂载到主仓中的 Go module / 子模块边界,保持与 supply-api 一致的技术栈约束
|
||||
- [ ] **任务**:创建集成运行入口;如保留独立运行,也仅作为轻量可选形态,不以双进程 `api`/`worker` 为首期强依赖
|
||||
- [ ] **任务**:创建 `internal/` 目录结构(domain/service/handler/infrastructure/repository)
|
||||
- [ ] **任务**:配置 Viper 读取 `config.yaml`,支持环境变量覆盖
|
||||
- [ ] **任务**:配置 `log/slog` 结构化日志,输出 JSON 格式
|
||||
- [ ] **任务**:创建 PostgreSQL schema migration(使用 golang-migrate),表前缀 `supply_intelligence_`
|
||||
- [ ] **任务**:按主仓既有能力接入配置、调度、审计与内部路由,不额外引入 Redis 作为首期前置依赖
|
||||
- [ ] **任务**:配置 Dockerfile 和最小部署说明,优先支持主仓集成部署
|
||||
- [ ] **任务**:如需部署文档,仅按当前真源补充最小启动命令,不再回写旧 `DEPLOYMENT.md` 为实现依据
|
||||
|
||||
### T2:单元测试骨架
|
||||
- [ ] **任务**:为每个 domain 层函数编写单元测试,覆盖率 >= 70%
|
||||
- [ ] **任务**:为每个 service 层函数编写单元测试,覆盖率 >= 80%
|
||||
- [ ] **任务**:配置 CI(GitHub Actions),PR 必须通过全部测试和覆盖率检查
|
||||
|
||||
### T3:IntegrationPlugin 接口
|
||||
- [ ] **任务**:实现 `IntegrationPlugin` 接口(`Init() error` / `Serve() error` / `Shutdown() error`)
|
||||
- [ ] **任务**:实现插件模式下各模块的开关配置(`viper` 读取 `supply_intelligence.enabled_modules`)
|
||||
- [ ] **任务**:实现内部/外部路径前缀可配置,并区分 `/internal/supply-intelligence/` 与对外暴露路径
|
||||
- [ ] **任务**:编写集成测试:插件模式启动,关键探针/发现/发布事件接口与内部消费接口正常运作
|
||||
|
||||
---
|
||||
|
||||
## 任务估算汇总
|
||||
|
||||
| Phase | 模块 | 任务数 | 估计工时 |
|
||||
|-------|------|--------|---------|
|
||||
| Phase 1 | A1 探针管理 + A2 适配层 + E1 工作台只读 | 34 | 3 人天 |
|
||||
| Phase 2 | B1 模型发现 + C1 准入测试 | 22 | 3 人天 |
|
||||
| Phase 3 | D1/D2 受控自动补给 + E2 工作台干预 | 24 | 3 人天 |
|
||||
| 全局 | G1 供应商配置 + G2 配置热更新 + G3 OpenAPI + G4 权限认证 | 18 | 2 人天 |
|
||||
| 技术基础设施 | T1 骨架 + T2 测试 + T3 插件 | 14 | 2 人天 |
|
||||
| **合计** | | **112** | **~13 人天** |
|
||||
124
specs/竞品分析.md
Normal file
124
specs/竞品分析.md
Normal file
@@ -0,0 +1,124 @@
|
||||
# Supply Intelligence 竞品深度分析
|
||||
|
||||
> 版本:v1.0
|
||||
> 日期:2026-04-27
|
||||
> 内容:8 个竞品全景矩阵、功能逐项对比、技术分析、市场定位
|
||||
|
||||
---
|
||||
|
||||
## 一、市场概览
|
||||
|
||||
- 归并到 LLM API Gateway 市场:2025 年约 **$15-25 亿**,高速增长
|
||||
- 多供应商运营复杂度急剧上升:中等规模团队可能接入 10+ 供应商,20+ 账号
|
||||
- 供应商 API Key 失效/额度耗尽是高频线上事故根因,单次事故损失 $5000-50000
|
||||
- 新模型发布速度:每月 50+ 新模型,人工录入无法跟上
|
||||
- **市场空白**:供应链运营自动化(供应商账号健康、模型发现、准入测试)几乎无成熟方案
|
||||
|
||||
---
|
||||
|
||||
## 二、竞品全景矩阵(8 个)
|
||||
|
||||
| 竞品 | 类型 | 供应商账号健康探针 | 新模型自动发现 | 准入测试自动化 | 账号自动注册 | 运营工作台 | 定价 |
|
||||
|------|------|-----------------|-------------|-------------|------------|----------|------|
|
||||
| **LiteLLM** | 开源 | ❌ 手动录入 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单管理 | 免费(自部署) |
|
||||
| **Helicone** | SaaS/开源 | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单 | 免费+$0.05/请求 |
|
||||
| **Portkey** | SaaS | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单 | $49/月起 |
|
||||
| **OpenRouter** | SaaS | ❌ 手动管理 | ⚠️ 手动 | ❌ 无 | ❌ 无 | ⚠️ 简单 | 5% 手续费 |
|
||||
| **Kong AI Gateway** | 企业 | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ API 管理 | 面议 |
|
||||
| **One API / NewAPI** | 开源 | ❌ 手动管理 | ❌ 无 | ❌ 无 | ❌ 无 | ⚠️ 简单 | 免费 |
|
||||
| **RapidAPI Enterprise Hub** | SaaS | ⚠️ 入驻流程 | ⚠️ 手动 | ❌ 无 | ⚠️ 部分 | ✅ | $2-10 万/年 |
|
||||
| **内部自建(现状)** | — | ❌ 无监控 | ❌ 无 | ❌ 无 | ❌ 无 | ❌ 无 | 人力成本 |
|
||||
| **立连桥 supply-intelligence** | 内部工具 | ✅ 分钟级探针 | ✅ 自动发现 | ✅ 自动化流水线 | ✅ 白名单供应商 | ✅ 完整工作台 | 内部成本 |
|
||||
|
||||
---
|
||||
|
||||
## 三、功能逐项对比(11 项)
|
||||
|
||||
```
|
||||
功能项 LiteLLM Helicone Portkey OpenRouter NewAPI RapidAPI supply-intel
|
||||
供应商账号健康探针 ❌ ❌ ❌ ❌ ❌ ⚠️ ✅
|
||||
新模型自动发现 ❌ ❌ ❌ ⚠️ ❌ ⚠️ ✅
|
||||
模型准入测试 ❌ ❌ ❌ ❌ ❌ ❌ ✅
|
||||
supply_package 草稿生成 ❌ ❌ ❌ ❌ ❌ ❌ ✅
|
||||
账号自动注册 ❌ ❌ ❌ ❌ ❌ ⚠️ ✅
|
||||
运营工作台 ⚠️ ⚠️ ⚠️ ⚠️ ⚠️ ✅ ✅
|
||||
KMS 凭证加密 ❌ ❌ ⚠️ ❌ ❌ ✅ ✅
|
||||
审计日志 ⚠️ ✅ ✅ ⚠️ ⚠️ ✅ ✅
|
||||
供应商状态 API 对外提供 ❌ ❌ ❌ ❌ ❌ ❌ ✅
|
||||
Fail-closed 降级 ❌ ❌ ⚠️ ❌ ❌ ⚠️ ✅
|
||||
SFI 指标追踪 ❌ ❌ ❌ ❌ ❌ ❌ ✅
|
||||
独立/集成双模式 ❌ ❌ ❌ ⚠️ ⚠️ ❌ ✅
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、市场定位结论
|
||||
|
||||
### 4.1 竞品空白
|
||||
|
||||
**所有 LLM Gateway 竞品(LiteLLM/Helicone/Portkey/OpenRouter)只做:**
|
||||
- 统一 API 路由
|
||||
- 用量计量和计费
|
||||
- 基础监控和日志
|
||||
|
||||
**没有任何竞品提供:**
|
||||
1. 供应商账号健康度的分钟级自动探针(额度/密钥/TOS)
|
||||
2. 新模型发布的自动发现(对接供应商模型列表 API)
|
||||
3. 模型准入测试的自动化(功能验证 + supply_package 草稿生成)
|
||||
4. 供应商账号的自动注册(针对支持公开注册的供应商)
|
||||
|
||||
### 4.2 supply-intelligence 差异化定位
|
||||
|
||||
```
|
||||
LLM Gateway(LiteLLM/Helicone/Portkey/OpenRouter)
|
||||
└─ 能力边界:路由 + 计量 + 监控
|
||||
└─ 缺失:供应商运营能力
|
||||
|
||||
API Marketplaces(RapidAPI)
|
||||
└─ 能力边界:供应商入驻 + 文档 + 货币化
|
||||
└─ 缺失:自动化运营工具
|
||||
|
||||
内部自建(现状)
|
||||
└─ 能力边界:手动维护
|
||||
└─ 缺失:自动化 + 监控 + 实时性
|
||||
|
||||
───────────────────────────────────
|
||||
立连桥 supply-intelligence = 供应链运营自动化
|
||||
✅ 供应商健康探针(分钟级)
|
||||
✅ 新模型自动发现(对接受应商 API)
|
||||
✅ 准入测试自动化(功能验证)
|
||||
✅ 运营工作台(待办 + 一键上架)
|
||||
✅ 账号自动注册(白名单供应商)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、关键技术差异
|
||||
|
||||
### 5.1 探针方案对比
|
||||
|
||||
| 方案 | 代表竞品 | 频率 | 自动化程度 |
|
||||
|------|---------|------|----------|
|
||||
| 手动检查 | 内部自建 | 天级 | ❌ |
|
||||
| 被动监控 | LLM Gateway 竞品 | 被动 | ⚠️ 有限 |
|
||||
| 主动探针 | **supply-intelligence** | 分钟级 | ✅ 完整 |
|
||||
|
||||
### 5.2 模型发现方案对比
|
||||
|
||||
| 方案 | 代表竞品 | 延迟 | 自动化程度 |
|
||||
|------|---------|------|----------|
|
||||
| 人工录入 | 内部自建 | 天级 | ❌ |
|
||||
| 供应商通知 | RapidAPI | 小时级 | ⚠️ 被动 |
|
||||
| 自动扫描 | **supply-intelligence** | 分钟级 | ✅ 主动 |
|
||||
|
||||
---
|
||||
|
||||
## 六、技术选型建议
|
||||
|
||||
| 组件 | 推荐方案 | 理由 |
|
||||
|------|---------|------|
|
||||
| 探针调度 | Temporal | 分布式友好,exponential backoff,dead letter queue 内置 |
|
||||
| 供应商 API 对接 | 配置化 adapter | 供应商数量多,接口差异大,需可扩展 |
|
||||
| 凭证加密 | KMS(主)+ AES-256-GCM(兜底) | 符合安全审计要求 |
|
||||
| 模型发现 | 轮询为主 | 多数供应商无 Webhook,轮询更通用 |
|
||||
| 准入测试 | 异步任务队列 | 测试可能耗时长,不能阻塞扫描周期 |
|
||||
609
tech/BASELINE_TECHLEAD_V2.md
Normal file
609
tech/BASELINE_TECHLEAD_V2.md
Normal file
@@ -0,0 +1,609 @@
|
||||
> 真源索引:当前文档受 `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md` 纳管。
|
||||
> 若阅读顺序、真源优先级或跨文档冲突存在疑问,先看该索引,再回到本基线文档。
|
||||
|
||||
1. 设计范围:本次覆盖 / 明确不做 / 与 PRD 对应关系
|
||||
|
||||
1.1 本次覆盖
|
||||
- 覆盖模块 A 供应商品质探针,但收敛为“账号健康探测 + 状态写回 + 审计 + gateway 可消费状态快照”。
|
||||
- 覆盖模块 B 全网模型发现,但收敛为“已接入供应商的模型列表扫描 + candidate 生成 + 下架告警”,不做广义全网情报平台。
|
||||
- 覆盖模块 C 模型准入测试,但收敛为“对 discovered candidate 做标准兼容性验证 + 生成 supply_package 草稿 + 发布闭环”。
|
||||
- 模块 E 仅覆盖与主链路直接相关的最小运营干预:手动触发探针、忽略 candidate、确认上架、查看失败原因、查看审计。
|
||||
- 覆盖与立交桥主项目的优先集成运行方案。
|
||||
- 覆盖对 NewAPI / Sub2API 的最小适配边界:状态读取、模型列表消费、可选发布回调;不反向侵入其内部实现。
|
||||
- 覆盖五个 QA 阻塞的显式修复:
|
||||
1) 设计范围蔓延
|
||||
2) 探针误判规则冲突
|
||||
3) candidate 状态机不闭环
|
||||
4) 模块关闭一致性缺失
|
||||
5) gateway 消费链路未闭环
|
||||
|
||||
1.2 明确不做
|
||||
- 不做独立平台化、多服务拆分、专用 API Gateway、专用消息总线、专用控制台集群。
|
||||
- 不做 vector / embedding 检索 / 向量库。
|
||||
- 不做 pricing 数据库、模型比价主链路、自动定价、家族回退定价。
|
||||
- 不做 predictions / 预测分析 / 广义开放平台 / 社区情报源。
|
||||
- 不做 WebSocket 实时推送作为本期前提;工作台可先走普通 HTTP 拉取。
|
||||
- 不做 Playwright 浏览器自动化注册主路径;本期仅保留受控自动补给的最小边界:白名单供应商、阈值触发、任务化补给、待验证/待启用,不把浏览器自动化注册链路作为首期上线门槛。
|
||||
- 不要求独立 Redis/Temporal/Milvus/Qdrant 等新增重基础设施;优先复用立交桥现有 DB、现有 scheduler、现有审计、现有配置热更新机制。
|
||||
- 不自动直接操作 gateway 路由细节表;只提供 package 发布后的内部契约,由 gateway/supply-api 按既有主链路消费。
|
||||
|
||||
1.3 与 PRD 对应关系
|
||||
- AC-01/02/03:保留,落在探针执行、判定、状态迁移、审计与降级策略。
|
||||
- AC-04/05:保留,落在扫描、去重、新增 candidate、下架告警。
|
||||
- AC-06/07:保留,落在 admission runner、candidate 流转、draft package 生成。
|
||||
- AC-08/09:本期不做深自动注册链路,但保留“受控自动补给”的最小产品/技术边界:仅允许白名单供应商、仅允许阈值触发、仅允许生成待补给任务或进入待验证/待启用,不允许把注册浏览器自动化、验证码编排、自动激活作为首期硬门槛。
|
||||
- AC-10/11/12:保留,但只保留支撑主链路的最小实现,不扩展成独立大盘平台。
|
||||
- PRD 中与商业化、SFI、预测分析、比价报表相关内容不作为本次技术控制面主路径。
|
||||
|
||||
2. 架构与模块:模块划分、文件/目录落点、关键调用链路、关键依赖与降级边界
|
||||
|
||||
2.1 总体架构
|
||||
基线采用“立交桥主项目内集成模块”模式,而不是独立平台。推荐以 supply-api 内部模块形式落地,原因:
|
||||
- 直接复用 supply_accounts / supply_packages / audit / verify / config / scheduler。
|
||||
- 避免再造服务间调用、鉴权、部署、监控、迁移复杂度。
|
||||
- 更符合立交桥现有 net/http + pgx + PostgreSQL 的简洁架构。
|
||||
|
||||
独立运行能力保留为简单可选形态:
|
||||
- 仅在确有外部项目需要时,封装为同仓内单进程启动入口。
|
||||
- 独立运行不得要求新增专用基础设施;仍使用 PostgreSQL + 现有 scheduler 抽象。
|
||||
- 不额外设计独立控制台、独立 worker 集群、独立 API 网关。
|
||||
|
||||
2.2 模块划分
|
||||
建议收敛为 6 个模块,均为最小必要:
|
||||
|
||||
A. probe
|
||||
- 读取待探测账号
|
||||
- 执行标准探针
|
||||
- 依据统一判定规则生成 outcome
|
||||
- 驱动 account 状态迁移
|
||||
- 写审计与探针日志
|
||||
|
||||
B. discovery
|
||||
- 读取已接入供应商适配器
|
||||
- 拉取模型列表
|
||||
- 与现有 supply_packages / candidate 去重
|
||||
- 创建 candidate
|
||||
- 生成模型下架告警待办
|
||||
|
||||
C. admission
|
||||
- 消费 discovered / retry_pending candidate
|
||||
- 执行标准测试集
|
||||
- 更新 candidate 状态
|
||||
- 生成或更新 supply_package draft
|
||||
|
||||
D. publish
|
||||
- 运营确认 package draft
|
||||
- 将 package 切到 active
|
||||
- 将 candidate 切到 published
|
||||
- 写入 gateway 可消费的发布事件/变更记录
|
||||
|
||||
E. integration
|
||||
- 立交桥内部直接集成接口
|
||||
- gateway / supply-api 内部契约
|
||||
- NewAPI / Sub2API 适配边界
|
||||
|
||||
F. control
|
||||
- 模块开关、停机、运行中任务收敛、配置热更新、幂等、审计
|
||||
|
||||
2.3 文件/目录落点
|
||||
以下为建议落点,优先放入立交桥主项目既有模块内;若 supply-intelligence 仓先行设计,可按同名目录组织:
|
||||
- /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md
|
||||
- 建议实现落点参考:
|
||||
- supply-api/internal/supplyintelligence/module.go
|
||||
- supply-api/internal/supplyintelligence/probe/service.go
|
||||
- supply-api/internal/supplyintelligence/probe/evaluator.go
|
||||
- supply-api/internal/supplyintelligence/probe/state_machine.go
|
||||
- supply-api/internal/supplyintelligence/discovery/service.go
|
||||
- supply-api/internal/supplyintelligence/discovery/adapter_registry.go
|
||||
- supply-api/internal/supplyintelligence/admission/service.go
|
||||
- supply-api/internal/supplyintelligence/admission/runner.go
|
||||
- supply-api/internal/supplyintelligence/publish/service.go
|
||||
- supply-api/internal/supplyintelligence/integration/http_internal.go
|
||||
- supply-api/internal/supplyintelligence/integration/newapi_adapter.go
|
||||
- supply-api/internal/supplyintelligence/integration/sub2api_adapter.go
|
||||
- supply-api/internal/supplyintelligence/control/shutdown.go
|
||||
- supply-api/internal/supplyintelligence/repository/*.go
|
||||
- supply-api/sql/*supply_intelligence*.sql
|
||||
|
||||
2.4 关键调用链路
|
||||
|
||||
链路 1:探针 -> 状态写回 -> gateway 消费闭环
|
||||
1) scheduler 触发 ProbeTick(platform/account batch)
|
||||
2) probe.Service.LoadProbeTargets()
|
||||
3) probe.Service.RunProbe(accountID)
|
||||
4) probe.Evaluator.Classify(response/error) => success / explicit_failure / inconclusive
|
||||
5) probe.StateMachine.Apply(account.current_status, recent_probe_window)
|
||||
6) repository.UpdateAccountHealthAndStatusTx(...)
|
||||
7) repository.AppendAuditLog(...)
|
||||
8) repository.UpsertGatewayAccountSnapshot(...)
|
||||
9) gateway 通过内部契约读取 snapshot 或随 package/account 查询一起读取可用状态
|
||||
|
||||
链路 2:扫描 -> candidate -> admission
|
||||
1) scheduler 触发 DiscoveryTick(platform)
|
||||
2) discovery.Adapter.FetchModels()
|
||||
3) discovery.Service.DiffAgainstPackagesAndCandidates()
|
||||
4) repository.UpsertModelCandidate(status=discovered)
|
||||
5) scheduler enqueue AdmissionRun(candidateID)
|
||||
6) admission.Runner.Execute(candidateID)
|
||||
7) repository.UpdateCandidateStatus(...)
|
||||
8) repository.UpsertDraftPackage(...)
|
||||
9) repository.AppendAuditLog(...)
|
||||
|
||||
链路 3:运营确认上架 -> gateway 消费闭环
|
||||
1) ops POST confirm publish
|
||||
2) publish.Service.PublishDraft(candidateID, actor)
|
||||
3) tx: lock candidate + package draft
|
||||
4) package draft -> active
|
||||
5) candidate test_passed -> published
|
||||
6) append internal event supply_package_published
|
||||
7) append audit
|
||||
8) gateway/supply-api 既有主链路消费 active package 或发布事件刷新内存路由
|
||||
|
||||
链路 4:模块关闭闭环
|
||||
1) operator/config 将 module.enabled=false
|
||||
2) control.ModuleGate.MarkClosing(module)
|
||||
3) 新任务拒绝入队/拒绝手动触发
|
||||
4) 运行中任务继续到安全提交点或超时中断
|
||||
5) 写 module_state=closed when inflight=0
|
||||
6) 后续 scheduler tick 直接跳过
|
||||
|
||||
2.5 关键依赖与降级边界
|
||||
- PostgreSQL:强依赖。不可用时所有自动写操作 fail-closed,不做假成功。
|
||||
- scheduler:中强依赖。不可用时自动任务暂停,但手动接口可保留。记录告警。
|
||||
- supplier adapter:弱依赖。单供应商异常不影响其他供应商。
|
||||
- gateway:首期默认事件型消费方。发布链路不等待 gateway 成功回调才提交 package active,但必须通过 package change + ack 保留可追踪消费记录,且必须存在真实消费入口。
|
||||
- NewAPI/Sub2API:可选适配依赖。未配置时不影响立交桥内部主链路。
|
||||
|
||||
降级原则
|
||||
- 探针外部错误、429、5xx、DNS/TCP 异常:inconclusive,不推进惩罚性状态迁移。
|
||||
- admission 外部超时:candidate 转 retry_pending 或 test_failed,不能生成 active package。
|
||||
- gateway 消费延迟:package 可 active,但需要“未消费/待同步”状态位和审计,不可假定已生效。
|
||||
- 模块关闭中:新任务一律拒绝,运行中任务只允许安全收尾。
|
||||
|
||||
3. 接口与数据模型:API/RPC/事件、数据模型/schema、错误码、安全/鉴权契约
|
||||
|
||||
3.1 接口分类
|
||||
|
||||
3.1.1 立交桥内部直接集成接口
|
||||
用途:供立交桥主项目内其他模块直接调用,优先 Go 接口,不先暴露额外网络跳。
|
||||
|
||||
interface SupplyIntelligenceModule {
|
||||
RunProbe(ctx context.Context, accountID int64, trigger string) (*ProbeOutcome, error)
|
||||
ScanPlatform(ctx context.Context, platform string, trigger string) (*ScanOutcome, error)
|
||||
RunAdmission(ctx context.Context, candidateID int64, trigger string) (*AdmissionOutcome, error)
|
||||
PublishCandidate(ctx context.Context, candidateID int64, actor string) (*PublishOutcome, error)
|
||||
GetAccountRoutingState(ctx context.Context, accountID int64) (*AccountRoutingState, error)
|
||||
}
|
||||
|
||||
3.1.2 给 gateway / supply-api 使用的内部契约
|
||||
用途:形成真实消费闭环,避免“文档说 gateway 会用,但无真实契约”。
|
||||
|
||||
HTTP internal 契约,前缀建议:/internal/supply-intelligence
|
||||
|
||||
1) GET /internal/supply-intelligence/accounts/{account_id}/routing-state
|
||||
响应:
|
||||
{
|
||||
"account_id": 123,
|
||||
"platform": "openai",
|
||||
"account_status": "active",
|
||||
"routing_enabled": true,
|
||||
"risk_score": 20,
|
||||
"reason_code": "ok",
|
||||
"last_probe_at": "2026-05-06T15:00:00Z",
|
||||
"version": 17
|
||||
}
|
||||
|
||||
2) GET /internal/supply-intelligence/models/{platform}/{model}/admission-state
|
||||
响应:
|
||||
{
|
||||
"platform": "openai",
|
||||
"model": "gpt-4.1-mini",
|
||||
"candidate_status": "published",
|
||||
"package_id": 456,
|
||||
"package_status": "active",
|
||||
"gateway_sync_status": "pending|applied|failed|not_required",
|
||||
"version": 9
|
||||
}
|
||||
|
||||
3) GET /internal/supply-intelligence/gateway/package-changes?cursor=...
|
||||
响应:
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"event_id": "evt_001",
|
||||
"event_type": "supply_package_published",
|
||||
"package_id": 456,
|
||||
"platform": "openai",
|
||||
"model": "gpt-4.1-mini",
|
||||
"occurred_at": "2026-05-06T15:00:00Z",
|
||||
"version": 9
|
||||
}
|
||||
],
|
||||
"next_cursor": "..."
|
||||
}
|
||||
|
||||
4) POST /internal/supply-intelligence/gateway/package-changes/{event_id}/ack
|
||||
请求:
|
||||
{
|
||||
"consumer": "gateway",
|
||||
"result": "applied|failed",
|
||||
"detail": "optional"
|
||||
}
|
||||
响应:204
|
||||
|
||||
闭环定义
|
||||
- 发布成功 != gateway 已消费。
|
||||
- 只有 gateway ack event_id 后,gateway_sync_status 才能从 pending -> applied/failed。
|
||||
- QA 必须验证 publish -> list changes -> ack 的真实链路。
|
||||
|
||||
3.1.3 面向 NewAPI/Sub2API 的适配边界
|
||||
原则:只暴露最小必要只读/回调能力,不把本系统设计成它们的管理平台。
|
||||
|
||||
适配边界 A:状态拉取
|
||||
- GET /adapter/v1/supply-status/accounts/{account_id}
|
||||
- 字段与 routing-state 对齐,但去掉内部实现细节。
|
||||
|
||||
适配边界 B:模型拉取
|
||||
- GET /adapter/v1/models?status=published
|
||||
响应只返回已 published 且 package active 的模型。
|
||||
|
||||
适配边界 C:可选发布回调下发
|
||||
- POST /adapter/v1/package-events
|
||||
仅在对方需要 webhook 模式时启用;默认不要求。
|
||||
|
||||
适配边界约束
|
||||
- 不暴露审计明细。
|
||||
- 不暴露原始探针日志。
|
||||
- 不暴露账号凭证、测试账号信息、内部风险算法细节。
|
||||
- 仅允许配置白名单来源访问。
|
||||
|
||||
3.2 数据模型/schema
|
||||
|
||||
3.2.1 probe_execution_logs
|
||||
- id bigint pk
|
||||
- account_id bigint not null
|
||||
- platform varchar(64) not null
|
||||
- probe_result varchar(32) not null 取值: success | explicit_failure | inconclusive
|
||||
- failure_class varchar(64) null 取值: auth_invalid | quota_empty | timeout | tcp_error | dns_error | rate_limited | upstream_5xx | parse_error
|
||||
- http_status int null
|
||||
- latency_ms int null
|
||||
- risk_score int not null
|
||||
- evaluated_transition varchar(64) not null 取值: no_change | active_to_suspended | suspended_to_disabled | suspended_to_active
|
||||
- executed_at timestamptz not null
|
||||
- request_id varchar(64) not null
|
||||
- index(account_id, executed_at desc)
|
||||
|
||||
3.2.2 model_candidates
|
||||
- id bigint pk
|
||||
- platform varchar(64) not null
|
||||
- model varchar(128) not null
|
||||
- status varchar(32) not null
|
||||
- discovery_source varchar(32) not null 取值: official_api | official_doc | manual_seed
|
||||
- last_scan_at timestamptz not null
|
||||
- discovered_at timestamptz not null
|
||||
- last_test_at timestamptz null
|
||||
- failure_reason_code varchar(64) null
|
||||
- failure_summary text null
|
||||
- ignored_until timestamptz null
|
||||
- package_id bigint null
|
||||
- version int not null default 1
|
||||
- unique(platform, model)
|
||||
|
||||
candidate 最终闭环状态机
|
||||
- discovered:扫描新发现,可入测试
|
||||
- testing:测试执行中
|
||||
- test_passed:测试通过,已存在 draft package
|
||||
- test_failed:测试失败,允许人工重试或自动进入 retry_pending
|
||||
- retry_pending:等待下次重试
|
||||
- ignored:运营临时忽略,到 ignored_until 后自动回 discovered
|
||||
- published:运营已确认上架,package active
|
||||
- deprecated:供应商侧已消失,已产生运营待办,但历史保留
|
||||
- closed:不再处理的终态,仅用于模型被明确弃用/手工关闭
|
||||
|
||||
合法迁移
|
||||
- discovered -> testing
|
||||
- testing -> test_passed | test_failed | retry_pending
|
||||
- test_failed -> retry_pending | closed
|
||||
- retry_pending -> testing | closed
|
||||
- discovered | test_failed | retry_pending -> ignored
|
||||
- ignored -> discovered
|
||||
- test_passed -> published | closed
|
||||
- published -> deprecated | closed
|
||||
- deprecated -> closed
|
||||
|
||||
闭环修复点
|
||||
- 任何非终态都存在后继处理路径。
|
||||
- ignored 有自动回流。
|
||||
- published/deprecated 最终可归档到 closed。
|
||||
- 不再存在“只定义中间态、无出口”的 QA 阻塞。
|
||||
|
||||
3.2.3 gateway_package_events
|
||||
- event_id varchar(64) pk
|
||||
- event_type varchar(64) not null
|
||||
- package_id bigint not null
|
||||
- candidate_id bigint null
|
||||
- payload jsonb not null
|
||||
- consumer varchar(64) null
|
||||
- consumer_status varchar(32) not null default 'pending'
|
||||
- consumer_detail text null
|
||||
- occurred_at timestamptz not null
|
||||
- acked_at timestamptz null
|
||||
- retry_count int not null default 0
|
||||
|
||||
3.2.4 module_runtime_state
|
||||
- module_name varchar(64) pk
|
||||
- desired_state varchar(16) not null 取值: enabled | disabled
|
||||
- runtime_state varchar(16) not null 取值: starting | running | closing | closed
|
||||
- inflight_count int not null
|
||||
- updated_at timestamptz not null
|
||||
|
||||
3.3 探针判定统一规则
|
||||
这是本轮必须修的 QA 阻塞之一,统一如下:
|
||||
|
||||
明确失败 explicit_failure
|
||||
- HTTP 401/403
|
||||
- 供应商明确返回 key invalid / account suspended / quota exhausted 且可稳定识别
|
||||
|
||||
不可判定 inconclusive
|
||||
- HTTP 429
|
||||
- HTTP 5xx
|
||||
- DNS 失败
|
||||
- TCP 连接失败
|
||||
- 超时
|
||||
- 响应体为空或格式突变
|
||||
|
||||
成功 success
|
||||
- 返回 2xx 且最小校验通过
|
||||
|
||||
状态迁移规则
|
||||
- active + 1 次 explicit_failure -> suspended
|
||||
- suspended + 最近连续 3 次 explicit_failure -> disabled
|
||||
- suspended + 1 次 success -> active
|
||||
- disabled 不自动恢复,只能人工恢复到 active 或 closed
|
||||
- inconclusive 永不计入 explicit failure 连续次数
|
||||
|
||||
说明
|
||||
- 将 timeout/TCP/DNS 从“失败导致降级”统一修正为 inconclusive,消除 PRD/HLD 冲突。
|
||||
- 若未来某供应商能明确证明 timeout 即余额停用,也必须走供应商级覆盖配置,不改全局默认。
|
||||
|
||||
3.4 错误码
|
||||
- SUP_INT_PROBE_NOT_FOUND 404
|
||||
- SUP_INT_PROBE_MODULE_DISABLED 409
|
||||
- SUP_INT_CANDIDATE_NOT_FOUND 404
|
||||
- SUP_INT_CANDIDATE_STATE_INVALID 409
|
||||
- SUP_INT_PUBLISH_PACKAGE_MISSING 409
|
||||
- SUP_INT_GATEWAY_ACK_CONFLICT 409
|
||||
- SUP_INT_ADAPTER_UNSUPPORTED 400
|
||||
- SUP_INT_AUTH_FORBIDDEN 403
|
||||
- SUP_INT_CONFIG_INVALID 400
|
||||
- SUP_INT_UPSTREAM_TEMPORARY 503
|
||||
|
||||
3.5 安全/鉴权契约
|
||||
- 内部接口只允许立交桥内部服务身份访问,走现有 internal auth middleware。
|
||||
- NewAPI/Sub2API 适配接口必须使用独立 access key 或签名校验,按来源白名单限制。
|
||||
- 审计字段必须包含 object_type/object_id/action/result_code/before_state/after_state/request_id/actor。
|
||||
- 任何日志不得输出明文 API key、cookie、token、测试账号凭证。
|
||||
- 手动发布、手动恢复 disabled 账号、关闭 candidate 必须要求 operator 身份并审计。
|
||||
|
||||
4. 任务拆解:每个任务必须有具体文件路径和函数名,粒度 2-5 分钟
|
||||
|
||||
说明:以下为 Engineer 最小实现任务单,按设计拆到文件级与函数级。路径以优先集成到 supply-api 为准。
|
||||
|
||||
4.1 模块骨架
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/module.go :: func RegisterModule(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/module.go :: func MountInternalRoutes(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/module.go :: func RegisterSchedulers(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/control/shutdown.go :: func BeginModuleClose(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/control/shutdown.go :: func FinishInflightTask(...) error
|
||||
|
||||
4.2 probe
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/service.go :: func LoadProbeTargets(ctx context.Context, limit int) ([]Account, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/service.go :: func RunProbe(ctx context.Context, accountID int64, trigger string) (*ProbeOutcome, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/evaluator.go :: func ClassifyProbeResult(resp *http.Response, err error) ProbeClass
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/evaluator.go :: func CalculateRiskScore(class ProbeClass) int
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/state_machine.go :: func ApplyAccountTransition(current string, recent []ProbeClass) (next string, transition string)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/state_machine.go :: func CountRecentExplicitFailures(recent []ProbeClass) int
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/worker.go :: func HandleProbeTick(ctx context.Context) error
|
||||
|
||||
4.3 discovery
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/adapter_registry.go :: func ResolveModelAdapter(platform string) (ModelAdapter, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func ScanPlatform(ctx context.Context, platform string, trigger string) (*ScanOutcome, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func DiffModels(current []string, packages []string, candidates []string) DiffResult
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func UpsertDiscoveredCandidates(ctx context.Context, platform string, models []string) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/service.go :: func MarkDeprecatedAlerts(ctx context.Context, platform string, missing []string) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/discovery/worker.go :: func HandleDiscoveryTick(ctx context.Context) error
|
||||
|
||||
4.4 admission
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/service.go :: func EnqueueAdmission(ctx context.Context, candidateID int64) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/service.go :: func RunAdmission(ctx context.Context, candidateID int64, trigger string) (*AdmissionOutcome, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func LoadCandidateForTesting(ctx context.Context, candidateID int64) (*Candidate, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func ExecuteTestSuite(ctx context.Context, c *Candidate) (*SuiteResult, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func DecideCandidateNextState(result *SuiteResult) (string, string)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner.go :: func UpsertDraftPackage(ctx context.Context, c *Candidate, result *SuiteResult) (int64, error)
|
||||
|
||||
4.5 publish
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go :: func PublishCandidate(ctx context.Context, candidateID int64, actor string) (*PublishOutcome, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go :: func ValidatePublishable(ctx context.Context, candidateID int64) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go :: func AppendGatewayPackageEvent(ctx context.Context, packageID int64, candidateID int64) error
|
||||
|
||||
4.6 integration
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func GetAccountRoutingState(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func GetModelAdmissionState(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func ListGatewayPackageChanges(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go :: func AckGatewayPackageChange(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/newapi_adapter.go :: func ListPublishedModels(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/newapi_adapter.go :: func GetExternalAccountStatus(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/sub2api_adapter.go :: func ListPublishedModels(w http.ResponseWriter, r *http.Request)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/sub2api_adapter.go :: func GetExternalAccountStatus(w http.ResponseWriter, r *http.Request)
|
||||
|
||||
4.7 repository / sql
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/probe_repo.go :: func InsertProbeExecutionLog(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/probe_repo.go :: func UpdateAccountHealthAndStatusTx(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/candidate_repo.go :: func UpsertModelCandidate(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/candidate_repo.go :: func UpdateCandidateStateTx(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/package_repo.go :: func UpsertDraftPackageTx(...) (int64, error)
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/gateway_repo.go :: func InsertGatewayPackageEventTx(...) error
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/gateway_repo.go :: func AckGatewayPackageEventTx(...) error
|
||||
- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_probe_logs.sql :: migration create table
|
||||
- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_candidates.sql :: migration create table
|
||||
- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_gateway_events.sql :: migration create table
|
||||
- /home/long/project/立交桥/supply-api/sql/xxxx_supply_intelligence_module_runtime.sql :: migration create table
|
||||
|
||||
4.8 测试与校验
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/state_machine_test.go :: func TestApplyAccountTransition()
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/probe/evaluator_test.go :: func TestClassifyProbeResult()
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/admission/runner_test.go :: func TestDecideCandidateNextState()
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service_test.go :: func TestPublishCandidate_AppendsGatewayEvent()
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal_test.go :: func TestGatewayEventAckFlow()
|
||||
|
||||
5. 风险与保护:风险清单(概率/影响/缓解)、降级策略、威胁建模结果
|
||||
|
||||
5.1 风险清单
|
||||
1) 探针误判导致错误下线
|
||||
- 概率:中
|
||||
- 影响:高
|
||||
- 缓解:只允许 explicit_failure 触发惩罚状态;429/超时/网络错误全部 inconclusive;disabled 仅连续 3 次明确失败;生产初期可只告警不自动 disabled。
|
||||
|
||||
2) candidate 重复创建或状态乱序
|
||||
- 概率:中
|
||||
- 影响:中
|
||||
- 缓解:unique(platform, model);version 乐观锁;状态迁移函数集中校验;测试任务拿行锁。
|
||||
|
||||
3) gateway 未真实消费已上架 package
|
||||
- 概率:高
|
||||
- 影响:高
|
||||
- 缓解:新增 gateway_package_events + ack 契约;区分 published 与 gateway applied;监控 pending backlog。
|
||||
|
||||
4) 模块关闭时仍有脏写入
|
||||
- 概率:中
|
||||
- 影响:中
|
||||
- 缓解:runtime_state=closing 时拒绝新任务;inflight 计数;安全提交点;超时取消 ctx。
|
||||
|
||||
5) 适配器变更影响扫描质量
|
||||
- 概率:中
|
||||
- 影响:中
|
||||
- 缓解:按供应商隔离;单平台失败不扩散;保留 last_successful_scan 基线;失败仅告警不删数据。
|
||||
|
||||
6) NewAPI/Sub2API 适配越权暴露内部数据
|
||||
- 概率:低
|
||||
- 影响:高
|
||||
- 缓解:适配接口单独 DTO;白名单认证;不复用内部 debug 输出。
|
||||
|
||||
5.2 降级策略
|
||||
- probe 模块关闭:gateway 继续依赖现有 account/package 状态;新鲜度下降但主链路可运行。
|
||||
- discovery 模块关闭:不再发现新模型;已上架模型不受影响。
|
||||
- admission 模块关闭:candidate 可积压,但不会误上架。
|
||||
- publish 后 gateway 未消费:保留 pending,运营可见;不回滚 package active,但不得宣称“已进路由”。
|
||||
- NewAPI/Sub2API 未配置:直接关闭适配路由,不影响内部主链路。
|
||||
|
||||
5.3 威胁建模结果
|
||||
输入边界
|
||||
- 供应商返回体属于不可信输入:必须限长、schema 校验、错误脱敏。
|
||||
- 运营手动接口属于高权限输入:必须鉴权、审计、幂等。
|
||||
- gateway ack 请求属于内部写接口:必须鉴权并校验 event_id/consumer 一致性。
|
||||
|
||||
数据流
|
||||
- supplier -> adapter -> evaluator -> db
|
||||
- db -> internal route -> gateway
|
||||
- db -> adapter route -> NewAPI/Sub2API
|
||||
|
||||
主要威胁与处置
|
||||
- 凭证泄漏:本期不纳入自动注册主路径;现有账号密钥仅走既有安全存储,不在本模块新增明文链路。
|
||||
- 重放/重复发布:publish 接口需幂等,published candidate 再次 publish 返回 409。
|
||||
- 伪造 gateway ack:只接受内部服务身份;event consumer 固定枚举。
|
||||
- 大响应体压垮解析:adapter 限制 body size,超限视为 inconclusive/scan_failed。
|
||||
- SQL 并发覆盖:关键状态表使用 version 或 select for update。
|
||||
|
||||
6. QA 交接与实施约束:编码前设计审查要点、编码后漂移检查点、必查真实调用链路、禁止偏离的边界
|
||||
|
||||
6.1 编码前设计审查要点
|
||||
- 是否明确“集成运行优先,独立运行可选且轻量”。
|
||||
- 是否删除 pricing/vector/predictions/开放平台化内容。
|
||||
- 探针默认规则是否统一为 explicit_failure 才触发状态惩罚。
|
||||
- candidate 状态机是否存在完整入口、出口、终态与回流。
|
||||
- gateway 是否存在 list change + ack 的真实闭环,而非只有查询接口。
|
||||
- 模块关闭是否存在 closing -> closed 收敛语义。
|
||||
- NewAPI/Sub2API 是否仅作为适配边界,而非反向牵引主架构。
|
||||
|
||||
6.2 编码后漂移检查点
|
||||
- 是否出现新增 Redis/Temporal/Kafka/MQ/向量库等重基础设施前置依赖。
|
||||
- 是否出现额外独立服务、额外 API gateway、复杂事件总线。
|
||||
- 是否把自动注册重新抬回本期主路径。
|
||||
- 是否把 gateway 路由刷新实现成跨系统强耦合同步 RPC 必须成功。
|
||||
- 是否新增未在本基线定义的中间状态。
|
||||
- 是否把 timeout/TCP/DNS 再次当成 explicit_failure。
|
||||
|
||||
6.3 QA 必查真实调用链路
|
||||
- probe tick -> evaluator -> state machine -> supply_accounts 写回 -> audit 写入
|
||||
- discovery tick -> candidate discovered -> admission run -> draft package
|
||||
- publish confirm -> package active -> candidate published -> gateway change event -> gateway ack
|
||||
- module disable -> closing -> reject new task -> inflight drain -> closed
|
||||
- adapter route -> NewAPI/Sub2API 只读返回,字段不泄露内部敏感信息
|
||||
|
||||
6.4 禁止偏离的边界
|
||||
- 禁止把本期做成独立平台化部署前提。
|
||||
- 禁止把比价、预测、向量检索恢复为主链路。
|
||||
- 禁止未定义契约就声称“gateway 会消费”。
|
||||
- 禁止 candidate 状态直接跳 published,绕过 test_passed + draft package。
|
||||
- 禁止 disabled 自动恢复。
|
||||
- 禁止模块关闭时直接 kill 运行中事务而无收敛策略。
|
||||
|
||||
7. Engineer 实施说明:文件级落点、最小验证项、需 PM 澄清项
|
||||
|
||||
7.1 文件级落点
|
||||
优先实施目录:
|
||||
- /home/long/project/立交桥/supply-api/internal/supplyintelligence/
|
||||
- /home/long/project/立交桥/supply-api/sql/
|
||||
- /home/long/project/立交桥/supply-api/internal/http/internal/
|
||||
|
||||
若 supply-intelligence 项目仓仅承载设计文档,则本文件作为交付基线,后续代码并入 supply-api 主仓。
|
||||
|
||||
7.2 最小验证项
|
||||
- 单测:探针分类、账号状态迁移、candidate 状态迁移、publish 幂等。
|
||||
- 集成测:publish 后产生 gateway event,gateway ack 后状态更新 applied。
|
||||
- 集成测:module closing 时手动触发探针返回 409 module disabled/closing。
|
||||
- E2E 最小链路:
|
||||
1) 一个 active 账号 401 -> suspended
|
||||
2) 一个新模型 discovered -> test_passed -> draft -> published
|
||||
3) gateway 拉取 package change 并 ack
|
||||
|
||||
7.3 需 PM 澄清项
|
||||
- 本期是否允许 production 初期仅启用 active->suspended,暂不自动 disabled。
|
||||
- candidate ignored 的默认恢复期是否固定 7 天,或允许按供应商配置。
|
||||
- gateway 首期默认采用 pull package-changes + ack 作为事件型消费闭环;若后续证明已有内部刷新入口可复用,也必须保留等价 ack 语义与可审计消费状态。
|
||||
- NewAPI/Sub2API 本期需要只读拉取,还是还需要 webhook 模式;默认只做只读拉取。
|
||||
|
||||
8. 阶段门控结论:可进入 QA 设计审查 / 需返回 PM / 需继续补设计
|
||||
|
||||
结论:可进入 QA 设计审查
|
||||
|
||||
理由
|
||||
- 五个 QA 阻塞已在本基线中逐项补洞并收敛。
|
||||
- 架构已回到立交桥一致的简洁集成模式。
|
||||
- 对立交桥 / NewAPI / Sub2API 的边界已最小化并显式分类。
|
||||
- 已删除明显超范围和重基础设施设计。
|
||||
|
||||
附带条件
|
||||
- 不代表可直接开发放行。
|
||||
- 进入开发前仍需确认 PM 澄清项中的 gateway 消费方式与 production 初期自动 disabled 策略。
|
||||
|
||||
9. 下游执行约束摘要:
|
||||
- Engineer 禁止偏离:不得新增独立平台化部署前提、不得恢复 pricing/vector/predictions 主路径、不得绕过 gateway event ack 闭环、不得新增未定义 candidate 状态。
|
||||
- QA 必查调用链路:probe->状态写回;discovery->candidate->admission->draft;publish->gateway event->ack;module disable->closing->drain->closed;NewAPI/Sub2API 只读适配边界。
|
||||
- XL 若继续推进需补的门控:确认 gateway 实际消费方式;确认生产首期自动 disabled 策略;确认代码最终并入 supply-api 主仓而非另起独立重部署。
|
||||
|
||||
自检清单
|
||||
- [x] 架构设计覆盖 PRD 所有 AC
|
||||
- [x] 接口定义完整(请求/响应/错误)
|
||||
- [x] 每个任务 < 5分钟,有明确文件路径
|
||||
- [x] 依赖关系无循环
|
||||
- [x] 考虑了扩展点(未来可能的变化)
|
||||
- [x] 风险评估完整,有关键风险的缓解方案
|
||||
- [x] 符合项目现有技术栈和编码规范
|
||||
- [x] 降级策略已设计(熔断/限流/兜底)
|
||||
- [x] 威胁建模已完成(输入边界/鉴权/数据流)
|
||||
- [x] 实施漂移检测点已定义(可与 QA checklist 对接)
|
||||
- [x] 已明确标记是否可进入 QA 设计审查
|
||||
- [x] 已提供 QA 编码前审查与编码后漂移检测所需交接物
|
||||
- [x] 已给出 Engineer / QA / XL 的下游执行约束摘要
|
||||
- [x] 已纳入立交桥简洁架构与立交桥/NewAPI/Sub2API 集成边界
|
||||
118
tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md
Normal file
118
tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md
Normal file
@@ -0,0 +1,118 @@
|
||||
# Supply-Intelligence 当前实现真源索引(2026-05)
|
||||
|
||||
> 状态:当前有效
|
||||
> 目的:为 Engineer / QA / PM 提供单一阅读入口,避免继续误读历史草案。
|
||||
> 适用范围:/home/long/project/立交桥/projects/supply-intelligence/
|
||||
|
||||
## 1. 当前结论
|
||||
|
||||
当前规划包已收敛到“可进入 Engineer 实现”状态。
|
||||
当前总门控结论:APPROVED。
|
||||
|
||||
但 APPROVED 的前提是:实现、测试、评审都必须以本文件列出的“当前真源”解释,不得回退到旧 PRD/HLD/INTERFACE/DEPLOYMENT 的正文口径。
|
||||
|
||||
## 2. 当前真源文件(按优先级)
|
||||
|
||||
### 2.1 一级真源:必须优先遵循
|
||||
1. `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
|
||||
- 作用:当前技术基线、状态机、模块边界、集成约束、最小生产闭环定义
|
||||
- 适用问题:实现边界、状态迁移、部署形态、首期能力范围、风险与验证要求
|
||||
|
||||
2. `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
- 作用:首期 package/account 消费闭环决议
|
||||
- 适用问题:published vs applied、gateway 是否默认消费方、package change + ack、真实调用链落点
|
||||
|
||||
### 2.2 二级真源:必须按一级真源解释
|
||||
3. `/home/long/project/立交桥/projects/supply-intelligence/tech/TEST_DESIGN.md`
|
||||
- 作用:收敛后的测试门禁文档
|
||||
- 使用规则:
|
||||
- 只能按一级真源解释
|
||||
- 当前阶段门控结论以其中已更新的 APPROVED 段落为准
|
||||
- 若正文某处仍残留旧测试假设,以一级真源覆盖
|
||||
|
||||
4. `/home/long/project/立交桥/projects/supply-intelligence/specs/功能清单.md`
|
||||
- 作用:任务粒度参考清单
|
||||
- 使用规则:
|
||||
- 仅用于任务拆分、实现排程、UI/后端任务定位
|
||||
- 若与一级真源冲突,一律以一级真源为准
|
||||
- 不得把其中任何历史平台化/重基础设施/深自动注册项当作默认首期门槛
|
||||
|
||||
## 3. 历史参考文件(禁止作为当前实现真源)
|
||||
|
||||
以下文件只能用于理解历史上下文,不能再作为 Engineer/QA 的当前实现依据:
|
||||
|
||||
1. `/home/long/project/立交桥/projects/supply-intelligence/prd/PRD.md`
|
||||
2. `/home/long/project/立交桥/projects/supply-intelligence/tech/HLD.md`
|
||||
3. `/home/long/project/立交桥/projects/supply-intelligence/tech/INTERFACE.md`
|
||||
4. `/home/long/project/立交桥/projects/supply-intelligence/tech/DEPLOYMENT.md`
|
||||
|
||||
原因:这些文件虽已加失效声明,但正文仍保留大量旧设计,例如:
|
||||
- pricing / prediction / 向量检索 / 仪表盘扩张
|
||||
- 独立 API/worker/重基础设施默认前提
|
||||
- gateway 管理接口热更新主路径
|
||||
- 深自动注册 / 浏览器自动化主路径
|
||||
- published 与 gateway applied 语义混淆
|
||||
|
||||
## 4. Engineer 必须先建立的统一理解
|
||||
|
||||
### 4.1 首期能力边界
|
||||
首期目标不是独立平台化大系统,而是“立交桥延伸项目 + 简洁集成架构 + 最小生产闭环”。
|
||||
|
||||
这意味着:
|
||||
- 优先并入 supply-api 主仓
|
||||
- 优先复用主仓已有配置、调度、审计、内部路由能力
|
||||
- 不把 Redis / Temporal / 向量数据库 / WebSocket / MQ 作为首期硬前置
|
||||
- 不做 pricing / prediction / recommendation / SFI 仪表盘扩张
|
||||
|
||||
### 4.2 探针判定边界
|
||||
必须按 explicit_failure / inconclusive / success 三类解释,不能回退到旧的“timeout 直接惩罚性降级”口径。
|
||||
|
||||
### 4.3 自动补给边界
|
||||
首期不是深自动注册主路径。
|
||||
首期仅保留“受控自动补给最小边界”:
|
||||
- 白名单供应商
|
||||
- 阈值触发
|
||||
- 任务化补给或受控补给受理接口
|
||||
- pending_verify / pending_enable 等受控中间态
|
||||
- fail-closed
|
||||
|
||||
不得默认实现:
|
||||
- 浏览器自动化注册
|
||||
- 短信验证码编排主路径
|
||||
- 无审批直接自动激活
|
||||
|
||||
### 4.4 gateway 消费闭环边界
|
||||
首期默认采用:
|
||||
- package 发布链路:event + ack
|
||||
- account 状态链路:查询型消费
|
||||
|
||||
必须明确:
|
||||
- published != applied
|
||||
- active package != gateway 已消费成功
|
||||
- 没有真实 poll/apply/ack 入口,不得宣称 package 发布链路已完成
|
||||
|
||||
## 5. QA 审查必须卡的四条红线
|
||||
|
||||
1. 若实现重新引入 published/applied 混淆,直接打回
|
||||
2. 若把深自动注册重新膨胀成首期硬门槛,直接打回
|
||||
3. 若把旧独立平台化基础设施重新作为首期依赖,直接打回
|
||||
4. 若 gateway 只有接口定义、没有真实消费方入口与 ack 回写,直接打回
|
||||
|
||||
## 6. 推荐阅读顺序
|
||||
|
||||
Engineer / QA / PM 开始工作前,按以下顺序阅读:
|
||||
1. `tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`
|
||||
2. `tech/BASELINE_TECHLEAD_V2.md`
|
||||
3. `tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
4. `tech/TEST_DESIGN.md`
|
||||
5. `specs/功能清单.md`
|
||||
|
||||
禁止跳过前 3 份文件直接依据旧 PRD/HLD/INTERFACE/DEPLOYMENT 开始实现。
|
||||
|
||||
## 7. 一句话执行规则
|
||||
|
||||
如果某个设计点在文档间出现冲突:
|
||||
- 先看 `BASELINE_TECHLEAD_V2.md`
|
||||
- 再看 `GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
- 然后用 `TEST_DESIGN.md` 和 `功能清单.md` 做验证与任务拆解
|
||||
- 不回退到旧草案正文做判断
|
||||
168
tech/DEPLOYMENT.md
Normal file
168
tech/DEPLOYMENT.md
Normal file
@@ -0,0 +1,168 @@
|
||||
# Supply-Intelligence 部署设计
|
||||
|
||||
> 状态说明(2026-05 收敛修订):本文件保留为旧版部署草案,已不再作为当前默认部署真源。
|
||||
> 当前默认部署真源应以“立交桥延伸项目 + 简洁集成架构”为准:优先并入 supply-api 主仓,独立运行仅为轻量可选形态。
|
||||
> 以下旧部署假设已废止,不得再作为首期落地前提:
|
||||
> - 独立 API Server + 多 Worker 集群默认部署
|
||||
> - Redis / 向量数据库 / WebSocket / 独立共享层作为首期前置依赖
|
||||
> - 以独立多组件容器拓扑替代主仓集成部署
|
||||
|
||||
> 版本:v1.0 | 状态:初稿
|
||||
|
||||
---
|
||||
|
||||
## 1. 部署架构
|
||||
|
||||
### 1.1 总体架构
|
||||
|
||||
```
|
||||
├── Load Balancer (Nginx / 云 CLB)
|
||||
│
|
||||
├── Supply-Intelligence API Server x 2
|
||||
│ │
|
||||
│ ├── HTTP API
|
||||
│ └── WebSocket (健康大盘实时推送)
|
||||
│
|
||||
├── Supply-Intelligence Worker x 3
|
||||
│ │
|
||||
│ ├── Probe Worker (探针任务)
|
||||
│ ├── Discovery Worker (扫描任务)
|
||||
│ ├── Admission Worker (准入测试任务)
|
||||
│ ├── Auto-Reg Worker (自动注册任务)
|
||||
│ └── Cleanup Worker (定期清理)
|
||||
│
|
||||
└── 共享层
|
||||
│
|
||||
├── PostgreSQL 15+ (与 supply-api 共存或独立)
|
||||
├── Redis (缓存 + 锁 + 扫描结果缓存)
|
||||
└── 向量数据库 (PGVector / Milvus / Qdrant)
|
||||
```
|
||||
|
||||
### 1.2 容器化部署
|
||||
|
||||
```yaml
|
||||
services:
|
||||
supply-intel-api:
|
||||
image: supply-intelligence:latest
|
||||
command: ["./supply-intel", "api"]
|
||||
replicas: 2
|
||||
ports:
|
||||
- "8081:8080"
|
||||
|
||||
supply-intel-probe:
|
||||
image: supply-intelligence:latest
|
||||
command: ["./supply-intel", "worker", "probe"]
|
||||
replicas: 1
|
||||
|
||||
supply-intel-discovery:
|
||||
image: supply-intelligence:latest
|
||||
command: ["./supply-intel", "worker", "discovery"]
|
||||
replicas: 1
|
||||
|
||||
supply-intel-admission:
|
||||
image: supply-intelligence:latest
|
||||
command: ["./supply-intel", "worker", "admission"]
|
||||
replicas: 1
|
||||
|
||||
supply-intel-autoreg:
|
||||
image: supply-intelligence:latest
|
||||
command: ["./supply-intel", "worker", "autoreg"]
|
||||
replicas: 1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. 资源需求
|
||||
|
||||
### 2.1 API Server
|
||||
|
||||
| 资源 | 需求 | 说明 |
|
||||
|------|------|------|
|
||||
| CPU | 1 核 | |
|
||||
| 内存 | 512 MB | |
|
||||
| 存储 | 无 | |
|
||||
|
||||
### 2.2 Worker
|
||||
|
||||
| Worker 类型 | CPU | 内存 | 说明 |
|
||||
|------------|-----|--------|------|
|
||||
| Probe | 1 核 | 512 MB | 同时发起多个 HTTP 请求 |
|
||||
| Discovery | 1 核 | 1 GB | 可能涉及 Playwright 爬取 |
|
||||
| Admission | 2 核 | 2 GB | 测试流水线调用 LLM API,CPU 与内存需求较高 |
|
||||
| Auto-Reg | 1 核 | 512 MB | |
|
||||
|
||||
### 2.3 数据库
|
||||
|
||||
| 资源 | 需求 | 说明 |
|
||||
|------|------|------|
|
||||
| CPU | 2 核 | |
|
||||
| 内存 | 4 GB | |
|
||||
| 存储 | 100 GB | 探针历史 + 审计日志 + 定价数据库 |
|
||||
|
||||
### 2.4 向量数据库
|
||||
|
||||
| 选型 | CPU | 内存 | 存储 | 说明 |
|
||||
|------|-----|--------|------|------|
|
||||
| PGVector | 与 PostgreSQL 共存 | 共存 | 共存 | 推荐,无需额外部署 |
|
||||
| Milvus | 2 核 | 4 GB | 50 GB | 高性能、分布式 |
|
||||
| Qdrant | 1 核 | 2 GB | 30 GB | 轻量、Cloud-native |
|
||||
|
||||
---
|
||||
|
||||
## 3. 监控与运维钩子
|
||||
|
||||
### 3.1 健康检查
|
||||
|
||||
| 端点 | 路径 | 预期响应 | 失败行为 |
|
||||
|------|------|----------|---------|
|
||||
| 存活检查 | `/actuator/health/live` | HTTP 200 | 容器重启 |
|
||||
| 就绪检查 | `/actuator/health/ready` | HTTP 200 | 从负载均衡移除 |
|
||||
| 综合检查 | `/actuator/health` | HTTP 200 + JSON | 触发告警 |
|
||||
|
||||
### 3.2 启动/关闭顺序
|
||||
|
||||
**启动顺序**:
|
||||
1. PostgreSQL 启动完成
|
||||
2. Redis 启动完成
|
||||
3. 向量数据库启动完成
|
||||
4. Worker 启动(执行 migration)
|
||||
5. API Server 启动
|
||||
|
||||
**关闭顺序**:
|
||||
1. 停止接收新 HTTP 请求
|
||||
2. 等待现有请求处理完成(超时 30 秒)
|
||||
3. 停止各 Worker 定时器
|
||||
4. 关闭数据库连接池
|
||||
5. 退出进程
|
||||
|
||||
### 3.3 配置管理
|
||||
|
||||
- 配置文件 `config.yaml` + 环境变量覆盖。
|
||||
- 供应商 API Key 仅通过环境变量传入。
|
||||
- 探针周期、扫描周期、测试用例集路径等可热更新。
|
||||
|
||||
---
|
||||
|
||||
## 4. 灾备设计
|
||||
|
||||
### 4.1 数据库灾备
|
||||
|
||||
| 策略 | 方案 | RTO | RPO |
|
||||
|------|------|-----|-----|
|
||||
| 主库故障 | 自动切换至备库 | < 5 min | < 1 min |
|
||||
| 逻辑损坏 | 从备库恢复 + 审计日志回放 | < 30 min | < 1 min |
|
||||
|
||||
### 4.2 扫描/测试任务灾备
|
||||
|
||||
| 场景 | 处理 |
|
||||
|------|------|
|
||||
| Discovery Worker 故障 | 下一周期自动恢复,扫描任务无状态,不影响生产 |
|
||||
| Admission Worker 故障 | 测试任务缓存在 Redis,恢复后继续执行 |
|
||||
| Probe Worker 故障 | 探针任务缓存在 Redis,恢复后继续执行 |
|
||||
| 向量数据库故障 | 知识库检索降级为文本匹配,不影响核心探针功能 |
|
||||
|
||||
### 4.3 多中心部署
|
||||
|
||||
- 当前阶段为单中心部署。
|
||||
- 探针任务无状态,不依赖中心化调度。
|
||||
- 未来扩展至多中心时,需要解决 PostgreSQL 分布式写入和向量数据库的同步问题。
|
||||
169
tech/GATEWAY_CONSUMER_DECISION_2026-05.md
Normal file
169
tech/GATEWAY_CONSUMER_DECISION_2026-05.md
Normal file
@@ -0,0 +1,169 @@
|
||||
# Supply-Intelligence 首期消费闭环决议(2026-05)
|
||||
|
||||
> 状态:当前有效决议
|
||||
> 作用:消除“只有接口定义,没有首期真实消费方与调用落点”的设计歧义。
|
||||
> 适用范围:/home/long/project/立交桥/projects/supply-intelligence/ 下当前收敛规划包。
|
||||
> 真源索引:本决议受 `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md` 纳管;若与历史草案冲突,以真源索引定义的优先级解释。
|
||||
|
||||
## 1. 结论
|
||||
|
||||
首期默认消费闭环采用:
|
||||
- package 发布链路:gateway 作为首期默认消费方,使用 pull `package-changes` + `ack` 机制完成闭环
|
||||
- account 状态链路:立交桥 / supply-api 内部主链路直接读取 `routing-state` 或等价 snapshot,不通过 gateway event ack 闭环
|
||||
|
||||
这意味着必须明确区分两类链路:
|
||||
1. 账号可路由状态链路:查询型消费
|
||||
2. package 发布生效链路:事件型消费
|
||||
|
||||
不得混用以下错误口径:
|
||||
- `published = 已进入 gateway 路由`
|
||||
- `active package = 下游已消费成功`
|
||||
|
||||
正确口径:
|
||||
- `published` 仅表示 supply-intelligence 侧已完成运营确认与 package 激活
|
||||
- 只有 gateway 对 package event 完成 `ack(result=applied)` 后,才能宣称“已被 gateway 消费生效”
|
||||
|
||||
## 2. 首期默认路径
|
||||
|
||||
### 2.1 账号状态链路
|
||||
|
||||
生产主链路:
|
||||
1. probe 执行
|
||||
2. evaluator 分类为 success / explicit_failure / inconclusive
|
||||
3. state machine 生成状态迁移
|
||||
4. 写回 supply account 健康状态与审计
|
||||
5. 立交桥内部路由决策读取 `GET /internal/supply-intelligence/accounts/{account_id}/routing-state`
|
||||
|
||||
说明:
|
||||
- 这是查询型读取,不需要 event ack。
|
||||
- 若调用方读取失败,不回滚 supply-intelligence 已落库状态,只记录消费侧问题。
|
||||
|
||||
### 2.2 package 发布闭环
|
||||
|
||||
生产主链路:
|
||||
1. 运营确认发布 candidate
|
||||
2. package draft -> active
|
||||
3. candidate `test_passed -> published`
|
||||
4. 写入 `gateway_package_events`
|
||||
5. gateway 拉取 `GET /internal/supply-intelligence/gateway/package-changes?cursor=...`
|
||||
6. gateway 应用变更到自身路由/缓存
|
||||
7. gateway 调用 `POST /internal/supply-intelligence/gateway/package-changes/{event_id}/ack`
|
||||
8. `gateway_sync_status` 变为 `applied` 或 `failed`
|
||||
|
||||
说明:
|
||||
- 这是事件型闭环。
|
||||
- `pending` 表示 supply-intelligence 已发布,但 gateway 尚未确认消费。
|
||||
- `failed` 表示 gateway 已消费尝试但未成功,需要运营或工程介入。
|
||||
|
||||
## 3. 为什么不用首期强耦合同步 RPC
|
||||
|
||||
首期明确不采用:
|
||||
- “发布时同步调用 gateway 管理接口,成功后才算发布成功”
|
||||
|
||||
原因:
|
||||
1. 这会把 supply-intelligence 与 gateway 强耦合在单次事务中
|
||||
2. 会把下游暂时不可用放大成上游发布不可用
|
||||
3. 不符合当前“立交桥延伸项目、简洁架构、最小生产闭环”的收敛目标
|
||||
|
||||
因此首期选择:
|
||||
- 上游发布成功与下游消费成功解耦
|
||||
- 用 event + ack 明确消费状态
|
||||
|
||||
## 4. 首期真实代码落点(实现约束)
|
||||
|
||||
以下是首期必须存在的真实调用落点;只有接口定义不算完成。
|
||||
|
||||
### 4.1 supply-intelligence / supply-api 侧
|
||||
- `/home/long/project/立交桥/supply-api/internal/supplyintelligence/publish/service.go`
|
||||
- `PublishCandidate(...)`
|
||||
- `AppendGatewayPackageEvent(...)`
|
||||
- `/home/long/project/立交桥/supply-api/internal/supplyintelligence/integration/http_internal.go`
|
||||
- `GetAccountRoutingState(...)`
|
||||
- `ListGatewayPackageChanges(...)`
|
||||
- `AckGatewayPackageChange(...)`
|
||||
- `/home/long/project/立交桥/supply-api/internal/supplyintelligence/repository/gateway_repo.go`
|
||||
- `InsertGatewayPackageEventTx(...)`
|
||||
- `AckGatewayPackageEventTx(...)`
|
||||
|
||||
### 4.2 gateway 侧(首期必须由消费方实现的真实入口)
|
||||
- 必须存在一个实际消费入口,完成:
|
||||
1. 周期拉取 package changes
|
||||
2. 应用变更
|
||||
3. 回写 ack
|
||||
- 若 gateway 已有内部刷新链路,可复用,但必须补齐 ack 回写
|
||||
- 若 gateway 无现成入口,则新增最小 poller;禁止为了这件事引入 MQ/Kafka/新总线
|
||||
|
||||
## 5. QA 必查真实调用链路
|
||||
|
||||
QA 编码后必须至少验证以下四层:
|
||||
|
||||
### 链路 A:账号状态查询型消费
|
||||
- 定义:`GetAccountRoutingState`
|
||||
- 装配:internal route mounted
|
||||
- 调用:立交桥 / supply-api 实际路由决策点调用该接口或等价函数
|
||||
- 入口:真实请求/真实调用路径可达
|
||||
|
||||
### 链路 B:package 事件发布
|
||||
- 定义:`AppendGatewayPackageEvent`
|
||||
- 装配:publish 流程内注入 repository
|
||||
- 调用:`PublishCandidate` 成功路径真实调用写事件
|
||||
- 入口:运营确认发布入口可真实触达该调用链
|
||||
|
||||
### 链路 C:gateway 拉取消费
|
||||
- 定义:`ListGatewayPackageChanges`
|
||||
- 装配:internal route mounted
|
||||
- 调用:gateway 真实 poller / 既有刷新链调用
|
||||
- 入口:消费方真实任务/刷新入口存在,不是只留 TODO
|
||||
|
||||
### 链路 D:gateway ack 回写
|
||||
- 定义:`AckGatewayPackageChange`
|
||||
- 装配:ack route mounted
|
||||
- 调用:gateway 应用成功/失败后真实回写
|
||||
- 入口:event 状态确实从 `pending -> applied|failed`
|
||||
|
||||
## 6. published / applied 语义约束
|
||||
|
||||
状态含义必须统一:
|
||||
- candidate `published`:上游已完成运营确认
|
||||
- package `active`:上游已允许被消费
|
||||
- gateway sync `pending`:下游尚未确认
|
||||
- gateway sync `applied`:下游已确认消费并应用
|
||||
- gateway sync `failed`:下游消费尝试失败
|
||||
|
||||
禁止:
|
||||
- UI 文案把 `published` 写成“已进路由”
|
||||
- 测试把 `package active` 当成“下游已完成同步”
|
||||
- QA 把 event 表存在当成“消费闭环成立”
|
||||
|
||||
## 7. 与 NewAPI / Sub2API 的边界
|
||||
|
||||
首期不要求 NewAPI / Sub2API 实现 event ack 闭环。
|
||||
它们的首期边界为:
|
||||
- 只读拉取账号状态
|
||||
- 只读拉取已允许暴露的模型/结果
|
||||
|
||||
即:
|
||||
- gateway 是首期必须闭环的事件型消费方
|
||||
- NewAPI / Sub2API 是首期只读适配消费方
|
||||
|
||||
## 8. 门控要求
|
||||
|
||||
在下一轮 QA 设计审查或编码后审查中,若以下任一项缺失,则不得给 APPROVED:
|
||||
1. 没有明确的首期默认消费方
|
||||
2. 没有明确区分查询型链路与事件型链路
|
||||
3. 没有明确 `published != applied`
|
||||
4. 没有真实代码落点要求
|
||||
5. 没有 ack 回写要求
|
||||
|
||||
## 9. 对旧文档的覆盖关系
|
||||
|
||||
本决议用于覆盖旧文档中以下错误或过时口径:
|
||||
- “调用 gateway 管理接口热更新即完成闭环”
|
||||
- “上架成功即下游已生效”
|
||||
- “gateway 会消费”但没有实际消费者与 ack 机制
|
||||
|
||||
如与以下文件冲突,以本决议为准:
|
||||
- /home/long/project/立交桥/projects/supply-intelligence/specs/功能清单.md
|
||||
- /home/long/project/立交桥/projects/supply-intelligence/tech/INTERFACE.md
|
||||
- /home/long/project/立交桥/projects/supply-intelligence/tech/HLD.md
|
||||
- /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md(若后续未同步更新相应段落,应以本决议补充解释)
|
||||
1013
tech/HLD.md
Normal file
1013
tech/HLD.md
Normal file
File diff suppressed because it is too large
Load Diff
251
tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md
Normal file
251
tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md
Normal file
@@ -0,0 +1,251 @@
|
||||
# Supply-Intelligence 实现任务板 V1(2026-05)
|
||||
|
||||
> 状态:当前有效
|
||||
> 目的:将当前真源收敛为可直接派工的 Engineer / QA 执行板。
|
||||
> 使用前提:必须先阅读 `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`。
|
||||
> 当前总门控:APPROVED(允许进入实现)
|
||||
|
||||
## 0. 使用规则
|
||||
|
||||
1. 本任务板不是新的真源,只是执行板。
|
||||
2. 若任务板与以下文件冲突,以以下文件为准:
|
||||
- `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
|
||||
- `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
- `/home/long/project/立交桥/projects/supply-intelligence/tech/TEST_DESIGN.md`
|
||||
3. 禁止 Engineer 回退到旧 PRD/HLD/INTERFACE/DEPLOYMENT 取实现口径。
|
||||
4. 每个阶段完成后,必须由 QA 按“定义 → 装配 → 调用 → 入口”四层链路做复核。
|
||||
|
||||
## 1. 当前最短闭环路径
|
||||
|
||||
目标:先做出首个最小生产闭环,而不是并行铺开所有模块。
|
||||
|
||||
推荐顺序:
|
||||
1. Phase A:探针与账号状态闭环
|
||||
2. Phase B:发现与 candidate 闭环
|
||||
3. Phase C:准入测试与 draft 生成闭环
|
||||
4. Phase D:发布与 gateway package event + ack 闭环
|
||||
5. Phase E:受控自动补给最小边界
|
||||
6. Phase F:工作台、配置、权限与完善性补齐
|
||||
|
||||
## 2. 阶段任务板
|
||||
|
||||
### Phase A:探针与账号状态闭环
|
||||
|
||||
目标:先让 supply-intelligence 能真实地产生可消费的账号状态。
|
||||
|
||||
#### A-1 数据与领域骨架
|
||||
- Owner:Engineer
|
||||
- 交付物:账号状态、探针日志、审计写入相关 domain/model/repository 基础结构
|
||||
- 完成标准:
|
||||
- 存在 `supply_intelligence_` 前缀表迁移
|
||||
- 探针结果、状态迁移、审计写入模型可落库
|
||||
- QA 验证:检查 schema、repo、service 调用链是否闭合
|
||||
|
||||
#### A-2 探针执行与统一判定
|
||||
- Owner:Engineer
|
||||
- 交付物:probe runner + evaluator
|
||||
- 完成标准:
|
||||
- 200 => success
|
||||
- 401/403 => explicit_failure
|
||||
- 429/5xx/timeout/格式突变 => inconclusive
|
||||
- QA 验证:检查 evaluator 定义、装配、调用与调度入口
|
||||
|
||||
#### A-3 状态机与账号状态快照接口
|
||||
- Owner:Engineer
|
||||
- 交付物:状态迁移逻辑 + `routing-state` 查询接口
|
||||
- 完成标准:
|
||||
- active -> suspended
|
||||
- suspended -> disabled
|
||||
- inconclusive 不触发惩罚性迁移
|
||||
- 存在真实内部查询入口
|
||||
- QA 验证:必须验证 `GET /internal/supply-intelligence/accounts/{account_id}/routing-state` 或等价入口
|
||||
|
||||
#### A-4 Phase A QA Gate
|
||||
- Owner:QA
|
||||
- 放行条件:
|
||||
- 账号状态链路完成“定义 → 装配 → 调用 → 入口”四层验证
|
||||
- 审计写入与状态写回可追踪
|
||||
- 未引入 Redis / Temporal / WebSocket 作为首期硬依赖
|
||||
|
||||
### Phase B:发现与 candidate 闭环
|
||||
|
||||
目标:能够从已接入供应商拉模型,并产生 candidate。
|
||||
|
||||
#### B-1 供应商适配器与模型拉取
|
||||
- Owner:Engineer
|
||||
- 交付物:SupplierAdapter、registry、GetModels 拉取链路
|
||||
- 完成标准:
|
||||
- 至少支持首批目标供应商
|
||||
- 具备健康探测与模型列表读取
|
||||
- QA 验证:检查 registry 注册、装配、实际调用点
|
||||
|
||||
#### B-2 candidate 生成与去重
|
||||
- Owner:Engineer
|
||||
- 交付物:discovery service + candidate repository
|
||||
- 完成标准:
|
||||
- 能与 `supply_packages` 去重
|
||||
- 新模型生成 discovered candidate
|
||||
- 下架只生成告警,不自动改 package
|
||||
- QA 验证:检查 candidate 创建与下架告警调用链
|
||||
|
||||
#### B-3 Phase B QA Gate
|
||||
- Owner:QA
|
||||
- 放行条件:
|
||||
- 至少一条真实发现链路打通
|
||||
- candidate 状态初始落点正确
|
||||
- 未扩张到 pricing / prediction / 向量检索
|
||||
|
||||
### Phase C:准入测试与 draft 生成闭环
|
||||
|
||||
目标:让 discovered candidate 可变成 test_passed/test_failed,并生成 draft。
|
||||
|
||||
#### C-1 admission runner
|
||||
- Owner:Engineer
|
||||
- 交付物:标准测试执行器与结果记录
|
||||
- 完成标准:
|
||||
- discovered / retry_pending 可消费
|
||||
- 失败与超时原因可追踪
|
||||
- QA 验证:检查 admission 执行入口和结果写回
|
||||
|
||||
#### C-2 draft package 生成
|
||||
- Owner:Engineer
|
||||
- 交付物:test_passed -> draft package 生成逻辑
|
||||
- 完成标准:
|
||||
- 草稿字段完整
|
||||
- candidate 状态流转闭环
|
||||
- QA 验证:检查 candidate -> draft 的真实调用链
|
||||
|
||||
#### C-3 Phase C QA Gate
|
||||
- Owner:QA
|
||||
- 放行条件:
|
||||
- 至少一条 candidate 完成 test_passed -> draft
|
||||
- 至少一条 candidate 完成 test_failed -> failure_reason
|
||||
|
||||
### Phase D:发布与 gateway package event + ack 闭环
|
||||
|
||||
目标:打通首个 package 发布最小生产闭环。
|
||||
|
||||
#### D-1 发布服务
|
||||
- Owner:Engineer
|
||||
- 交付物:运营确认发布逻辑
|
||||
- 完成标准:
|
||||
- draft -> active
|
||||
- candidate test_passed -> published
|
||||
- QA 验证:published 语义不得等于 applied
|
||||
|
||||
#### D-2 gateway package events
|
||||
- Owner:Engineer
|
||||
- 交付物:`gateway_package_events` 写入、拉取、ack 回写接口
|
||||
- 完成标准:
|
||||
- 存在 package-changes 列表接口
|
||||
- 存在 ack 接口
|
||||
- ack 后状态可区分 pending/applied/failed
|
||||
- QA 验证:检查 definition / assembly / call / entry 四层
|
||||
|
||||
#### D-3 gateway 消费方最小入口
|
||||
- Owner:Engineer / 对接方
|
||||
- 交付物:真实 poll/apply/ack 入口
|
||||
- 完成标准:
|
||||
- 不是只定义接口
|
||||
- 至少有一个真实消费任务/入口
|
||||
- QA 验证:没有真实入口则本阶段不通过
|
||||
|
||||
#### D-4 Phase D QA Gate
|
||||
- Owner:QA
|
||||
- 放行条件:
|
||||
- published != applied 证据充分
|
||||
- package event + ack 闭环真实存在
|
||||
- 无“同步调用 gateway 管理接口才算发布成功”的回退实现
|
||||
|
||||
### Phase E:受控自动补给最小边界
|
||||
|
||||
目标:补齐首期最小自动补给能力,但不膨胀为深自动注册。
|
||||
|
||||
#### E-1 自动补给配置与白名单约束
|
||||
- Owner:Engineer
|
||||
- 交付物:auto-supply 配置、阈值、白名单、审批边界
|
||||
- 完成标准:
|
||||
- 非白名单供应商不自动补给
|
||||
- 配置按主仓既有方式存储
|
||||
- QA 验证:检查 guardrail 是否真实生效
|
||||
|
||||
#### E-2 自动补给任务流
|
||||
- Owner:Engineer
|
||||
- 交付物:补给任务创建 / 受理 / 待验证回写
|
||||
- 完成标准:
|
||||
- 低于阈值触发任务
|
||||
- 成功后进入 pending_verify / pending_enable
|
||||
- 不允许直接 active
|
||||
- QA 验证:检查自动启用是否被阻断
|
||||
|
||||
#### E-3 fail-closed
|
||||
- Owner:Engineer
|
||||
- 交付物:通知网关/补给受理/KMS 异常阻断逻辑
|
||||
- 完成标准:
|
||||
- 失败不伪成功
|
||||
- 明文不落日志/DB
|
||||
- QA 验证:检查失败证据和审计闭环
|
||||
|
||||
#### E-4 Phase E QA Gate
|
||||
- Owner:QA
|
||||
- 放行条件:
|
||||
- 未引入浏览器自动化注册主路径
|
||||
- 未引入验证码编排主路径
|
||||
- 未允许无审批直接自动激活
|
||||
|
||||
### Phase F:工作台、配置、权限与完善性补齐
|
||||
|
||||
目标:补足可操作性与交付完整性,但不得改变前述主链路口径。
|
||||
|
||||
#### F-1 工作台最小读写能力
|
||||
- Owner:Engineer
|
||||
- 交付物:账号页、模型页、待处理页、确认上架、忽略、手动探针
|
||||
- QA 验证:检查关键操作真实连到主链路,不是空按钮
|
||||
|
||||
#### F-2 配置与审计
|
||||
- Owner:Engineer
|
||||
- 交付物:配置读取/修改、审计日志
|
||||
- QA 验证:检查配置生效路径与审计记录
|
||||
|
||||
#### F-3 权限与内部/外部路由边界
|
||||
- Owner:Engineer
|
||||
- 交付物:认证、角色权限、内部接口与外部接口分离
|
||||
- QA 验证:检查 `/internal/supply-intelligence/` 与外部暴露面的边界
|
||||
|
||||
#### F-4 Phase F QA Gate
|
||||
- Owner:QA
|
||||
- 放行条件:
|
||||
- 权限边界清楚
|
||||
- OpenAPI 与真实路由一致
|
||||
- 不新增超范围平台化能力
|
||||
|
||||
## 3. 明确禁止的提前扩张
|
||||
|
||||
以下事项在前述主链路未闭环前,禁止插队进入主开发路径:
|
||||
- pricing / prediction / recommendation
|
||||
- 向量数据库 / 向量检索
|
||||
- SFI 仪表盘
|
||||
- WebSocket 实时推送
|
||||
- 独立 API + worker 集群重部署
|
||||
- 浏览器自动化注册主路径
|
||||
- 验证码编排主路径
|
||||
- 以 Redis / Temporal 为首期硬前置
|
||||
|
||||
## 4. QA 统一复核问题单
|
||||
|
||||
每个阶段 QA 都必须回答:
|
||||
1. 定义是否存在?
|
||||
2. 装配是否存在?
|
||||
3. 调用点是否真实存在?
|
||||
4. 外部/内部入口是否真实挂载?
|
||||
5. 是否出现实施漂移?
|
||||
6. 是否回退到了历史草案口径?
|
||||
|
||||
## 5. 工程启动建议阅读顺序
|
||||
|
||||
1. `/home/long/project/立交桥/projects/supply-intelligence/tech/CURRENT_SOURCE_OF_TRUTH_2026-05.md`
|
||||
2. `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
|
||||
3. `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
4. `/home/long/project/立交桥/projects/supply-intelligence/tech/TEST_DESIGN.md`
|
||||
5. `/home/long/project/立交桥/projects/supply-intelligence/tech/IMPLEMENTATION_TASK_BOARD_V1_2026-05.md`
|
||||
6. `/home/long/project/立交桥/projects/supply-intelligence/specs/功能清单.md`
|
||||
275
tech/INTERFACE.md
Normal file
275
tech/INTERFACE.md
Normal file
@@ -0,0 +1,275 @@
|
||||
# Supply-Intelligence 核心接口设计
|
||||
|
||||
> 状态说明(2026-05 收敛修订):本文件保留为旧版接口草案,已不再作为当前实现真源。
|
||||
> 当前接口真源以 /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md 为准。
|
||||
> 以下旧接口定义已废止,不得继续作为实现入口:
|
||||
> - pricing comparison / recommendations / predictions 相关接口
|
||||
> - 与新 candidate 状态机不一致的旧状态枚举
|
||||
> - 未区分 published 与 gateway applied 的旧消费口径
|
||||
|
||||
> 版本:v1.0 | 状态:初稿
|
||||
|
||||
---
|
||||
|
||||
## 1. 内部模块间接口
|
||||
|
||||
### 1.1 ProbeService
|
||||
|
||||
```go
|
||||
type ProbeService interface {
|
||||
// 执行单次探针
|
||||
Probe(ctx context.Context, accountID string) (*ProbeResult, error)
|
||||
// 批量探针(按供应商或全量)
|
||||
ProbeBatch(ctx context.Context, filter ProbeFilter) (*BatchProbeResult, error)
|
||||
// 获取探针结果历史
|
||||
GetProbeHistory(ctx context.Context, accountID string, limit int) ([]ProbeResult, error)
|
||||
// 手动触发掠针(运营干预)
|
||||
TriggerManualProbe(ctx context.Context, accountID string, actorID string) (*ProbeResult, error)
|
||||
}
|
||||
|
||||
type ProbeResult struct {
|
||||
AccountID string
|
||||
Status string // active suspended disabled
|
||||
RiskScore int // 0-100
|
||||
RiskReason string
|
||||
LatencyMs int
|
||||
ResponseCode int
|
||||
CheckedAt time.Time
|
||||
NextCheckAt time.Time
|
||||
}
|
||||
|
||||
type ProbeFilter struct {
|
||||
Platform *string
|
||||
Status *string
|
||||
RiskScoreMin *int
|
||||
RiskScoreMax *int
|
||||
}
|
||||
```
|
||||
|
||||
### 1.2 DiscoveryService
|
||||
|
||||
```go
|
||||
type DiscoveryService interface {
|
||||
// 执行单次全网扫描
|
||||
Scan(ctx context.Context) (*ScanResult, error)
|
||||
// 获取最近扫描结果
|
||||
GetLastScan(ctx context.Context) (*ScanResult, error)
|
||||
// 获取候选模型列表
|
||||
ListCandidates(ctx context.Context, filter CandidateFilter) ([]ModelCandidate, error)
|
||||
// 手动触发扫描
|
||||
TriggerManualScan(ctx context.Context, actorID string) (*ScanResult, error)
|
||||
// 忽略候选模型
|
||||
IgnoreCandidate(ctx context.Context, candidateID string, reason string, actorID string) error
|
||||
}
|
||||
|
||||
type ScanResult struct {
|
||||
ScannedAt time.Time
|
||||
Platforms []string
|
||||
NewModels int
|
||||
RemovedModels int
|
||||
Errors []ScanError
|
||||
}
|
||||
|
||||
type ModelCandidate struct {
|
||||
ID string
|
||||
Platform string
|
||||
ModelID string
|
||||
Status string // discovered queued testing test_passed test_failed ignored
|
||||
DiscoveredAt time.Time
|
||||
TestedAt *time.Time
|
||||
TestResult *TestResult
|
||||
}
|
||||
```
|
||||
|
||||
### 1.3 AdmissionService
|
||||
|
||||
```go
|
||||
type AdmissionService interface {
|
||||
// 执行准入测试
|
||||
RunTest(ctx context.Context, candidateID string) (*TestResult, error)
|
||||
// 获取测试结果
|
||||
GetTestResult(ctx context.Context, candidateID string) (*TestResult, error)
|
||||
// 手动确认上架(运营干预)
|
||||
Publish(ctx context.Context, candidateID string, actorID string) error
|
||||
// 强制上架(测试失败但运营确认)
|
||||
ForcePublish(ctx context.Context, candidateID string, reason string, actorID string) error
|
||||
}
|
||||
|
||||
type TestResult struct {
|
||||
CandidateID string
|
||||
Status string // passed failed
|
||||
Dimensions []TestDimension
|
||||
FailedReason *string
|
||||
ExecutedAt time.Time
|
||||
DurationMs int
|
||||
}
|
||||
|
||||
type TestDimension struct {
|
||||
Name string
|
||||
Passed bool
|
||||
Detail string
|
||||
}
|
||||
```
|
||||
|
||||
### 1.4 AccountService
|
||||
|
||||
```go
|
||||
type AccountService interface {
|
||||
// 创建账号(手动或自动)
|
||||
CreateAccount(ctx context.Context, req CreateAccountRequest) (*SupplyAccount, error)
|
||||
// 获取账号信息
|
||||
GetAccount(ctx context.Context, accountID string) (*SupplyAccount, error)
|
||||
// 更新账号状态
|
||||
UpdateStatus(ctx context.Context, accountID string, status string, reason string) error
|
||||
// 轮换密钥
|
||||
RotateKey(ctx context.Context, accountID string, actorID string) error
|
||||
// 列表账号
|
||||
ListAccounts(ctx context.Context, filter AccountFilter) ([]SupplyAccount, error)
|
||||
}
|
||||
|
||||
type SupplyAccount struct {
|
||||
ID string
|
||||
Platform string
|
||||
ProxyID string
|
||||
Status string
|
||||
RiskScore int
|
||||
APIKeyHint string // 密钥前 4 后 4
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
}
|
||||
```
|
||||
|
||||
### 1.5 HealthBoardService
|
||||
|
||||
```go
|
||||
type HealthBoardService interface {
|
||||
// 获取供应商健康大盘
|
||||
GetBoard(ctx context.Context, scope BoardScope) (*HealthBoard, error)
|
||||
// 获取模型比价报表
|
||||
GetPricingComparison(ctx context.Context, modelID string) ([]PricingComparison, error)
|
||||
// 获取供应链覆盖率
|
||||
GetCoverage(ctx context.Context) (*CoverageReport, error)
|
||||
// 获取预测分析
|
||||
GetPredictions(ctx context.Context, minConfidence float64) ([]Prediction, error)
|
||||
}
|
||||
|
||||
type HealthBoard struct {
|
||||
Accounts []AccountHealth
|
||||
Candidates []CandidateSummary
|
||||
Coverage float64
|
||||
FreshnessIndex float64
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. 外部系统集成接口
|
||||
|
||||
### 2.1 与 Bridge Gateway 集成
|
||||
|
||||
| 方法 | 路径 | 请求 | 响应 | 说明 |
|
||||
|------|------|------|------|------|
|
||||
| 查询账号状态 | `GET /internal/supply-intelligence/accounts/{id}/health` | - | `ProbeResult` | Gateway 路由决策时查询 |
|
||||
| 查询模型定价 | `GET /internal/supply-intelligence/pricing/{model_id}` | - | `PricingInfo` | 动态定价参考 |
|
||||
| 获取推荐供应商 | `GET /internal/supply-intelligence/recommendations` | `?model={model_id}&strategy=cost` | `[]Recommendation` | 智能路由推荐 |
|
||||
|
||||
### 2.2 与 supply-api 集成
|
||||
|
||||
| 方法 | 路径 | 请求 | 响应 | 说明 |
|
||||
|------|------|------|------|------|
|
||||
| 读取账号列表 | `GET /internal/supply/accounts` | - | `[]SupplyAccount` | 探针器获取待检测账号 |
|
||||
| 更新账号状态 | `POST /internal/supply/accounts/{id}/status` | `{"status":"suspended","reason":""}` | `{"success":true}` | 探针结果写回 |
|
||||
| 读取模型列表 | `GET /internal/supply/packages` | - | `[]SupplyPackage` | 扫描比对基准 |
|
||||
| 创建模型 | `POST /internal/supply/packages` | `SupplyPackage` | `{"id":""}` | 准入测试通过后上架 |
|
||||
| 获取审计日志格式 | `GET /internal/supply/audit/schema` | - | `{"schema":{}}` | 审计事件格式一致 |
|
||||
|
||||
---
|
||||
|
||||
## 3. API 接口规范
|
||||
|
||||
### 3.1 REST API 基础
|
||||
|
||||
- **基础路径**: `/api/v1/supply-intelligence/`
|
||||
- **内部路径** (集成模式): `/internal/supply-intelligence/`
|
||||
- **内容类型**: `application/json`
|
||||
- **错误响应格式**:
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"code": "SI_PRB_4001",
|
||||
"message": "供应商账号不存在",
|
||||
"details": {}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 3.2 核心端点
|
||||
|
||||
#### 探针管理
|
||||
|
||||
| 方法 | 路径 | 描述 |
|
||||
|------|------|------|
|
||||
| GET | `/api/v1/supply-intelligence/probes` | 列表探针结果 |
|
||||
| POST | `/api/v1/supply-intelligence/probes/{account_id}` | 手动触发探针 |
|
||||
| GET | `/api/v1/supply-intelligence/probes/{account_id}/history` | 探针历史 |
|
||||
|
||||
#### 扫描与发现
|
||||
|
||||
| 方法 | 路径 | 描述 |
|
||||
|------|------|------|
|
||||
| POST | `/api/v1/supply-intelligence/discovery/scan` | 手动触发全网扫描 |
|
||||
| GET | `/api/v1/supply-intelligence/discovery/candidates` | 列表候选模型 |
|
||||
| GET | `/api/v1/supply-intelligence/discovery/candidates/{id}` | 获取候选模型详情 |
|
||||
| POST | `/api/v1/supply-intelligence/discovery/candidates/{id}/ignore` | 忽略候选模型 |
|
||||
|
||||
#### 准入测试
|
||||
|
||||
| 方法 | 路径 | 描述 |
|
||||
|------|------|------|
|
||||
| POST | `/api/v1/supply-intelligence/admission/{candidate_id}/test` | 手动执行准入测试 |
|
||||
| GET | `/api/v1/supply-intelligence/admission/{candidate_id}/result` | 获取测试结果 |
|
||||
| POST | `/api/v1/supply-intelligence/admission/{candidate_id}/publish` | 确认上架 |
|
||||
| POST | `/api/v1/supply-intelligence/admission/{candidate_id}/force-publish` | 强制上架 |
|
||||
|
||||
#### 账号管理
|
||||
|
||||
| 方法 | 路径 | 描述 |
|
||||
|------|------|------|
|
||||
| GET | `/api/v1/supply-intelligence/accounts` | 列表账号 |
|
||||
| POST | `/api/v1/supply-intelligence/accounts` | 创建账号 |
|
||||
| GET | `/api/v1/supply-intelligence/accounts/{id}` | 获取账号 |
|
||||
| POST | `/api/v1/supply-intelligence/accounts/{id}/rotate-key` | 轮换密钥 |
|
||||
| POST | `/api/v1/supply-intelligence/accounts/{id}/status` | 更新状态 |
|
||||
|
||||
#### 健康大盘
|
||||
|
||||
| 方法 | 路径 | 描述 |
|
||||
|------|------|------|
|
||||
| GET | `/api/v1/supply-intelligence/health-board` | 获取健康大盘 |
|
||||
| GET | `/api/v1/supply-intelligence/pricing/{model_id}/comparison` | 模型比价 |
|
||||
| GET | `/api/v1/supply-intelligence/coverage` | 供应链覆盖率 |
|
||||
| GET | `/api/v1/supply-intelligence/predictions` | 预测分析 |
|
||||
|
||||
### 3.3 错误码定义
|
||||
|
||||
| 错误码 | HTTP 状态 | 说明 |
|
||||
|---------|-----------|------|
|
||||
| `SI_PRB_4001` | 404 | 供应商账号不存在 |
|
||||
| `SI_PRB_4002` | 429 | 探针频率过高,请等待 |
|
||||
| `SI_DIS_4001` | 404 | 候选模型不存在 |
|
||||
| `SI_DIS_4002` | 409 | 候选模型状态不允许忽略 |
|
||||
| `SI_ADM_4001` | 404 | 准入测试任务不存在 |
|
||||
| `SI_ADM_4002` | 409 | 准入测试正在执行中 |
|
||||
| `SI_ADM_4003` | 400 | 测试未通过,无法上架 |
|
||||
| `SI_ACC_4001` | 404 | 账号不存在 |
|
||||
| `SI_ACC_4002` | 409 | 账号状态不允许此操作 |
|
||||
| `SI_ACC_4003` | 403 | 无权执行此操作 |
|
||||
| `SI_BRD_4001` | 400 | 查询参数无效 |
|
||||
|
||||
### 3.4 WebSocket 接口
|
||||
|
||||
**路径**: `/ws/v1/supply-intelligence/board`
|
||||
|
||||
- 运营工作台订阅后,实时推送探针结果、候选模型变更、状态变更待办。
|
||||
- 心跳间隔 30 秒。
|
||||
355
tech/TEST_DESIGN.md
Normal file
355
tech/TEST_DESIGN.md
Normal file
@@ -0,0 +1,355 @@
|
||||
# Supply Intelligence 测试设计方案
|
||||
|
||||
> 状态说明(2026-05 收敛修订):本文件已转为“收敛后测试门禁文档”,必须按新基线解释。
|
||||
> 若与旧 PRD/HLD/INTERFACE 的测试口径冲突,以 /home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md 与最新 PM 基线为准。
|
||||
> 以下旧测试口径不得继续作为放行依据:
|
||||
> - 以独立重部署、向量数据库、WebSocket、预测/比价能力为默认测试前提
|
||||
> - 将自动注册深链路视为本期不可降期的默认主路径
|
||||
> - 将 published 等同于 gateway 已消费生效
|
||||
|
||||
> 版本:v1.0
|
||||
> 日期:2026-04-27
|
||||
> 状态:初稿
|
||||
> 覆盖:AC-01 ~ AC-12、异常/边缘流程 FP-01 ~ FP-10、场景 S1~S4
|
||||
|
||||
---
|
||||
|
||||
## 1. 测试策略
|
||||
|
||||
### 1.1 测试分层模型
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ E2E Tests (黑盒) │
|
||||
│ 场景:从探针调度到状态变更、从发现到上架全链路 │
|
||||
│ 工具:Go test + httptest + 自制 E2E runner │
|
||||
└─────────────────────────────────────────────────┘
|
||||
▲
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Integration Tests (灰盒) │
|
||||
│ 场景:Service 间协作、异步任务队列、外部 API Mock│
|
||||
│ 工具:Go test + testify + sqlmock + gock │
|
||||
│ 覆盖率门槛:service ≥ 80%, handler ≥ 80% │
|
||||
└─────────────────────────────────────────────────┘
|
||||
▲
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Unit Tests (白盒) │
|
||||
│ 场景:状态机逻辑、探针评估、风险评分计算 │
|
||||
│ 工具:Go test + testify + gomock │
|
||||
│ 覆盖率门槛:domain ≥ 70% │
|
||||
└─────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### 1.2 测试通过标准
|
||||
|
||||
| 维度 | 标准 |
|
||||
|------|------|
|
||||
| 覆盖率 | domain ≥ 70%, service/handler ≥ 80% |
|
||||
| 模块 A(探针) | AC-01 ~ AC-03 全部通过 |
|
||||
| 模块 B(发现) | AC-04 ~ AC-05 全部通过 |
|
||||
| 模块 C(准入测试) | AC-06 ~ AC-07 全部通过 |
|
||||
| 模块 D(受控自动补给) | AC-08 ~ AC-09 全部通过(按首期最小边界解释) |
|
||||
| 模块 E(工作台) | AC-10 ~ AC-12 全部通过 |
|
||||
| 异常/边缘流程 | FP-01 ~ FP-10 全部有验证测试 |
|
||||
| 误报率 | 7 天连续运行 false positive ≤ 1% |
|
||||
|
||||
### 1.3 外部依赖 Mock
|
||||
|
||||
| 依赖 | Mock 方案 | 工具 |
|
||||
|------|---------|------|
|
||||
| **供应商 API(探针目标)** | Mock server 返回 200/401/403/429/500 | gock |
|
||||
| **供应商模型列表 API** | Mock 返回 JSON 模型列表 | gock |
|
||||
| **供应商补给接口 / 人工补录入口** | Mock 返回受理成功/400/500 | gock |
|
||||
| **通知网关(飞书/邮件)** | Mock server 接收通知或确认消息 | httptest |
|
||||
| **KMS 服务** | Mock 加密/解密逻辑 | 接口层 Mock |
|
||||
| **Job Scheduler / 主仓调度器** | 使用主仓调度抽象或本地调度测试桩 | go test + test double |
|
||||
| **supply-api 数据库** | sqlmock 拦截读写 | go-sqlmock |
|
||||
|
||||
---
|
||||
|
||||
## 2. 模块 A 测试用例(供应商品质探针)
|
||||
|
||||
### AC-01 探针覆盖度
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TA-01-01 | 15 分钟内探针覆盖率 ≥99% | Functional | Given 100 条 active/suspended 账号 When 15min 后统计 Then ≥99 条被探针 |
|
||||
| TA-01-02 | suspended 账号同等探针 | Functional | Given suspended 账号 When 探针执行 Then 同样被覆盖 |
|
||||
| TA-01-03 | 暂停探针账号不被覆盖 | Edge | Given 账号设置 pause_probe=true When 探针执行 Then 该账号被跳过 |
|
||||
|
||||
### AC-02 状态变更正确性
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TA-02-01 | active → suspended(1次401) | Happy Path | Given active 账号 When 连续 1 次返回 401 Then 60s 内状态变为 suspended |
|
||||
| TA-02-02 | suspended → disabled(连续3次401) | Happy Path | Given suspended 账号 When 连续 3 次返回 401 Then 60s 内状态变为 disabled |
|
||||
| TA-02-03 | 429 单次不改变状态 | Edge | Given active 账号 When 返回 429 一次 Then 15min 内状态保持 active |
|
||||
| TA-02-04 | 指数退避重试逻辑 | Functional | Given 返回 429 When 探针执行 Then 按 1→2→4min 退避重试 |
|
||||
| TA-02-05 | 状态机不允许 active→disabled 直变 | Edge | Given active 账号 When 连续 3 次失败 Then 不会直接变为 disabled(必须先 suspended) |
|
||||
| TA-02-06 | 手动暂停账号状态不自动变更 | Edge | Given 账号 pause_probe=true When 供应商返回异常 Then 状态不变 |
|
||||
|
||||
### AC-03 误报率
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TA-03-01 | 7 天误报率 ≤1% | Long Run | Given 100 条正常账号 When 连续运行 7 天 Then 误变更次数 ≤7 |
|
||||
| TA-03-02 | 探针与手动操作并发 | Concurrency | Given 手动修改状态的同时 When 探针执行 Then 乐观锁冲突处理正确 |
|
||||
|
||||
---
|
||||
|
||||
## 3. 模块 B 测试用例(全网模型发现)
|
||||
|
||||
### AC-04 新模型发现延迟
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TB-04-01 | 新模型在 2 扫描周期内被发现 | Functional | Given 供应商新增 model_id When 扫描执行 Then 2h 内 model_candidates 出现 discovered 记录 |
|
||||
| TB-04-02 | 模型比对去重正确 | Functional | Given 已存在的 active model When 全网扫描 Then 不会重复创建 candidate |
|
||||
| TB-04-03 | 模型下架告警触发 | Functional | Given active package 对应的 model_id 从供应商列表消失 When 2 扫描周期后 Then 运营工作台出现下架告警 |
|
||||
|
||||
### AC-05 已下架模型告警
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TB-05-01 | 下架模型不自动变更 package 状态 | Edge | Given model_id 消失 When 扫描执行 Then package 状态保持 active,生成告警 |
|
||||
| TB-05-02 | 分页获取完整模型列表 | Functional | Given 供应商返回分页 When 扫描 Then 正确处理所有分页数据 |
|
||||
|
||||
---
|
||||
|
||||
## 4. 模块 C 测试用例(模型准入测试)
|
||||
|
||||
### AC-06 准入测试通过
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TC-06-01 | discovered → test_passed + 草稿生成 | Happy Path | Given discovered candidate When 测试全部通过 Then 状态 test_passed,supply_package 草稿生成 |
|
||||
| TC-06-02 | 草稿字段完整性 | Functional | Given 草稿生成 When 检查字段 Then platform/model/price/suggested 正确 |
|
||||
| TC-06-03 | 准入测试 30 分钟内完成 | Performance | Given discovered candidate When 测试执行 Then ≤30min 完成 |
|
||||
|
||||
### AC-07 准入测试失败
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TC-07-01 | discovered → test_failed | Negative | Given discovered candidate When 测试返回 500 Then 30min 内状态 test_failed,failure_reason 非空 |
|
||||
| TC-07-02 | 超时视为失败 | Edge | Given 测试用例 60s 无响应 When Then 整体标记为 test_failed,reason = timeout |
|
||||
| TC-07-03 | 测试账号 suspended 时任务失败 | Edge | Given 测试账号变为 suspended When 准入测试执行 Then 任务标记 test_failed,reason = test_account_unavailable |
|
||||
| TC-07-04 | ignore 账号 7 天内不重扫 | Edge | Given 运营标记 ignore When 7 天内扫描 Then 该 candidate 不出现 |
|
||||
|
||||
---
|
||||
|
||||
## 5. 模块 D 测试用例(受控自动补给)
|
||||
|
||||
### AC-08 受控自动补给触发与落单
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TD-08-01 | 可用账号数 < 阈值时触发补给任务 | Functional | Given 白名单供应商的可用账号数 < 阈值 When 系统检测 Then 10min 内生成补给任务或补给申请 |
|
||||
| TD-08-02 | 非白名单供应商不自动补给 | Guardrail | Given 非白名单供应商账号不足 When 系统检测 Then 不自动触发补给,仅记录告警或人工待办 |
|
||||
| TD-08-03 | 补给结果进入待验证/待启用 | Happy Path | Given 补给流程受理成功 When 补给完成 Then 新账号或候选资源进入 pending_verify / pending_enable 等受控状态,而非直接 active |
|
||||
| TD-08-04 | 补给结果关联 task | Functional | Given 补给任务完成 When 检查任务记录 Then auto_supply_tasks 或等价任务状态为 completed/pending_verify |
|
||||
|
||||
### AC-09 受控自动补给 fail-closed
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TD-09-01 | 通知/补给网关不可用时 fail-closed | Resilience | Given 通知网关或补给受理接口返回 503 When 补给执行 Then 60s 内任务 failed,审计日志完整,无虚假成功 |
|
||||
| TD-09-02 | 补给接口返回 400 | Edge | Given 补给请求参数非法或资源已存在 When 补给执行 Then 任务 failed,不重复盲目重试 |
|
||||
| TD-09-03 | KMS 不可用时 fail-closed | Resilience | Given KMS 超时 When 凭证加密步骤执行 Then 60s 内任务 failed,明文凭证不出现在日志/DB |
|
||||
| TD-09-04 | 无审批/越权配置时阻断自动启用 | Guardrail | Given 缺少审批或超出受控边界 When 补给结果回写 Then 保持 pending_verify / pending_enable,不允许直接进入 active |
|
||||
|
||||
---
|
||||
|
||||
## 6. 模块 E 测试用例(运营工作台)
|
||||
|
||||
### AC-10 审计日志完整性
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TE-10-01 | 状态变更 5s 内写入审计 | Performance | Given 状态变更 When 执行完成 Then ≤5s 审计记录存在 |
|
||||
| TE-10-02 | 审计字段完整性 | Functional | Given 审计记录 When 检查 Then 包含 object_type/id/action/before_state/after_state/request_id |
|
||||
| TE-10-03 | 探针执行记录审计 | Functional | Given 探针执行 When 完成 Then probe_execution_logs 有记录 |
|
||||
|
||||
### AC-11 运营工作台干预
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TE-11-01 | 确认上架 draft → active | Happy Path | Given draft package When 点击确认 Then 3s 内变为 active |
|
||||
| TE-11-02 | 忽略模型 7 天内不出现 | Edge | Given 点击忽略 When Then 7 天内 candidate 不出现在待处理列表 |
|
||||
| TE-11-03 | 手动触发单账号探针 | Functional | Given 运营手动触发 When Then 立即执行探针,结果可见 |
|
||||
| TE-11-04 | 并发操作冲突处理 | Concurrency | Given 同时点击确认和忽略 When Then 返回 409,只一个生效 |
|
||||
|
||||
### AC-12 配置热更新
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TE-12-01 | 探针周期修改 60s 内生效 | Functional | Given 修改探针周期 When 下发配置 Then 60s 后新周期生效 |
|
||||
|
||||
---
|
||||
|
||||
## 7. 异常/边缘流程测试(FP-01 ~ FP-10)
|
||||
|
||||
| 用例 ID | 场景 | 验证点 | 预期行为 |
|
||||
|---------|------|-------|---------|
|
||||
| TFP-01 | 供应商探针 DNS/TCP 超时 | 状态不变 | 标记 inconclusive,指数退避,不触发状态变更 |
|
||||
| TFP-02 | 供应商返回空/格式突变 | 状态不变 | 解析失败标记 inconclusive,记录日志 |
|
||||
| TFP-03 | 探针与手动操作并发 | 乐观锁 | 更新失败,探针记录冲突日志,下次覆盖 |
|
||||
| TFP-04 | 准入测试期间测试账号 suspended | 任务标记失败 | 任务标记 test_failed,reason = test_account_unavailable |
|
||||
| TFP-05 | 补给接口返回 400 或资源冲突 | 任务失败 | 任务 failed,不重复盲目重试,审计记录完整 |
|
||||
| TFP-06 | 补给成功但验证/启用失败 | pending 不变 | 账号保持 pending_verify/pending_enable,任务标记 verify_failed,触发告警 |
|
||||
| TFP-07 | 供应商模型列表分页 500 | 整体不中断 | 已获取部分正常处理,失败页下次重试 |
|
||||
| TFP-08 | 探针期间数据库不可用 | 任务失败重试 | 探针任务失败,连续 5 次失败后暂停批次,触发系统告警 |
|
||||
| TFP-09 | 确认上架与忽略并发 | 409 冲突 | 只有一个生效,返回 409 |
|
||||
| TFP-10 | KMS 不可用时注册 | 明文不落盘 | 加密步骤阻塞/失败,明文凭证不出现 |
|
||||
|
||||
---
|
||||
|
||||
## 8. 灰度发布验证计划
|
||||
|
||||
### 8.1 各 Phase 验证内容
|
||||
|
||||
| Phase | 交付内容 | 通过标准 | 依赖项 |
|
||||
|-------|---------|---------|--------|
|
||||
| **Phase 1** | 模块 A(探针)+ 模块 E 只读视图 | AC-01~AC-03, AC-10~AC-11(只读部分) | 主仓调度能力或本地调度测试桩 |
|
||||
| **Phase 2** | 模块 B(发现)+ 模块 C(准入测试) | AC-04~AC-07 | Phase 1 + 供应商 API 清单 |
|
||||
| **Phase 3** | 模块 D(受控自动补给)+ 模块 E 完整 | AC-08~AC-12 | Phase 1+2 + KMS/通知与补给受理链路就绪 |
|
||||
|
||||
### 8.2 灰度门禁
|
||||
|
||||
每次 Phase 升级前:
|
||||
- [ ] 全部 AC 测试用例通过
|
||||
- [ ] 覆盖率达标
|
||||
- [ ] 灰度开关独立验证(每个开关可单独打开/关闭)
|
||||
- [ ] 回滚条件演练(误报率>5% / 状态变更导致错误率上升>2%)
|
||||
|
||||
---
|
||||
|
||||
## 9. 回归测试集
|
||||
|
||||
### 9.1 快速回归(每次 PR,~10 分钟)
|
||||
|
||||
```
|
||||
TA-01-01, TA-02-01, TA-02-02, TA-02-05,
|
||||
TB-04-01, TC-06-01, TC-07-01,
|
||||
TD-08-01, TD-09-01,
|
||||
TE-10-01, TE-11-01
|
||||
共 11 条
|
||||
```
|
||||
|
||||
### 9.2 完整回归(Phase 升级,~45 分钟)
|
||||
|
||||
```
|
||||
TA-01-01 ~ TA-03-02(全 8 条)
|
||||
TB-04-01 ~ TB-05-02(全 4 条)
|
||||
TC-06-01 ~ TC-07-04(全 4 条)
|
||||
TD-08-01 ~ TD-09-03(全 4 条)
|
||||
TE-10-01 ~ TE-12-01(全 7 条)
|
||||
TFP-01 ~ TFP-10(全 10 条)
|
||||
共 37 条
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. 技术栈与集成约束验证
|
||||
|
||||
### 10.1 统一技术栈与双运行模式验证
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TSI-RUN-01 | 独立运行模式启动 | Happy Path | Given 独立 `config.yaml` 与独立数据库/Redis When 启动 `cmd/supply-intelligence/main.go` Then `/actuator/health/ready` 返回 200,`/api/v1/supply-intelligence/*` 可访问 |
|
||||
| TSI-RUN-02 | 集成运行模式挂载 | Integration | Given supply-api 主进程加载 `IntegrationPlugin` When 启动 Then `/internal/supply-intelligence/*` 路由与后台任务注册成功 |
|
||||
| TSI-RUN-03 | 配置分离加载 | Functional | Given 独立模式与集成模式分别启动 When 读取配置 Then 独立模式只加载自身配置,集成模式合并主项目配置且不覆盖无关模块 |
|
||||
| TSI-RUN-04 | 数据库前缀隔离 | Structural | Given 执行迁移 When 检查 schema Then 仅创建 `supply_intelligence_` 前缀表 |
|
||||
|
||||
### 10.2 独立运行与集成运行验证
|
||||
|
||||
### 10.3 IntegrationPlugin 与模块挂载验证
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TSI-PLG-01 | IntegrationPlugin 注册 HTTP 路由 | Integration | Given 集成模式 When 插件注册 Then Probe/Discovery/Admission/AutoReg/OpsWorkBench 路由挂载成功 |
|
||||
| TSI-PLG-02 | 模块开关生效 | Functional | Given `enabled_modules` 关闭某模块 When 启动 Then 对应路由/worker 不注册,其他模块可用 |
|
||||
| TSI-PLG-03 | 集成模式共享资源 | Integration | Given supply-api 注入共享 DB/Redis/logger When 插件启动 Then 使用共享资源且不重复初始化冲突依赖 |
|
||||
|
||||
### 10.3 OpenAPI 契约验证
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TSI-OAS-01 | OpenAPI 文档可访问 | Functional | Given 服务启动 When 请求 `/openapi.json` 或 `/docs` Then 返回 200 且包含探针、发现、准入测试、运营工作台接口 |
|
||||
| TSI-OAS-02 | 路由与 OpenAPI 一致 | Contract | Given 导出的 OpenAPI 文档 When 对照 HTTP 路由 Then 请求/响应/错误码与实现一致,无缺失公开接口 |
|
||||
| TSI-OAS-03 | 集成前缀可配置 | Contract | Given 集成模式配置内部前缀 When 导出文档 Then 文档反映 `/internal/supply-intelligence/` 前缀或明确区分暴露面 |
|
||||
|
||||
### 10.4 NewAPI / Sub2API 适配层验证
|
||||
|
||||
| 用例 ID | 描述 | 类型 | 验证条件 |
|
||||
|---------|------|------|---------|
|
||||
| TSI-ADP-01 | 供应商状态同步适配 | Contract | Given NewAPI/Sub2API 拉取供应商状态 When 调用标准化接口 Then 返回字段稳定、延迟满足约束、状态映射正确 |
|
||||
| TSI-ADP-02 | 模型列表推送适配 | Contract | Given 外部系统拉取模型列表 When 调用 `/models` Then 只返回已发现且允许暴露的数据,字段与约定一致 |
|
||||
| TSI-ADP-03 | 账号状态适配边界 | Contract | Given 外部系统读取账号状态 When 通过适配层执行 Then 仅返回允许暴露的状态字段,不暴露凭证/探针日志/内部风险细节 |
|
||||
|
||||
---
|
||||
|
||||
## 11. 发布门禁与阶段结论
|
||||
|
||||
### 11.1 发布门禁检查表
|
||||
|
||||
以下门禁项全部通过前,不得认定达到生产要求:
|
||||
|
||||
- [ ] 独立运行 / 集成运行两种模式均完成启动验证,路由、worker、内部接口真实挂载
|
||||
- [ ] `IntegrationPlugin`、OpenAPI、NewAPI/Sub2API 适配层合同测试全部通过
|
||||
- [ ] 凭证保护经日志/DB/异常路径验证无明文,KMS 不可用时 fail-closed
|
||||
- [ ] 受控自动补给链路具备白名单限制、阈值触发、审批/待验证边界、重复提交阻断与审计留痕
|
||||
- [ ] 状态机迁移、审计写入、Gateway package event + ack、外部只读适配链路完成一致性验证
|
||||
- [ ] 首次生产放量场景遵循“只告警不自动变更状态”,并验证撤销与人工接管流程
|
||||
- [ ] 调度器失效、补给受理失败、外部适配越权、错误状态传播四类高风险回归通过
|
||||
- [ ] 至少一条探针、一条模型发现、一条准入测试、一条受控自动补给链路完成端到端验证
|
||||
|
||||
### 11.2 阶段门控结论
|
||||
|
||||
**当前结论:APPROVED(设计已可进入 Engineer 实现)**
|
||||
|
||||
**结论解释:**
|
||||
- 本文档首页所述“收敛后测试门禁文档”口径已生效。
|
||||
- 当前放行依据不再是旧 HLD/PRD/INTERFACE/DEPLOYMENT,而是:
|
||||
- `/home/long/project/立交桥/projects/supply-intelligence/tech/BASELINE_TECHLEAD_V2.md`
|
||||
- `/home/long/project/立交桥/projects/supply-intelligence/tech/GATEWAY_CONSUMER_DECISION_2026-05.md`
|
||||
- 因此,本节不再沿用历史性 `REQUEST_CHANGES` 作为当前总门控。
|
||||
|
||||
**当前仍需在实现阶段持续验证的高风险项:**
|
||||
- 凭证保护必须能证明 fail-closed,且日志/审计/异常路径无明文泄漏。
|
||||
- 状态同步、审计写入、package event + ack 必须形成可追踪闭环。
|
||||
- 关键链路必须能完成“定义 → 装配 → 调用 → 入口”四层验证,不能只停留在接口存在。
|
||||
- 自动补给按首期最小边界解释:允许白名单供应商、阈值触发、任务化补给、待验证/待启用;不把浏览器自动化深链路作为首期阻断门槛。
|
||||
|
||||
**实现前约束:**
|
||||
- 若实现与 `BASELINE_TECHLEAD_V2.md` 或 `GATEWAY_CONSUMER_DECISION_2026-05.md` 冲突,应以两者为准并回退旧测试假设。
|
||||
- 若下游消费方未落真实 poll/apply/ack 入口,不得宣称 package 发布链路已完成。
|
||||
- 若 NewAPI/Sub2API 适配超出“只读/受控暴露边界”,应判定为实施漂移。
|
||||
|
||||
**重新转为 REQUEST_CHANGES / BLOCKED 的条件:**
|
||||
- 实现阶段发现 published/applied 再次混淆。
|
||||
- gateway 消费闭环缺少真实消费方入口或 ack 回写。
|
||||
- 自动补给被重新扩张为首期深自动注册硬门槛。
|
||||
- 核心链路无法提供四层调用链证据。
|
||||
|
||||
---
|
||||
|
||||
## 12. 性能与安全测试
|
||||
|
||||
### 12.1 性能基准
|
||||
|
||||
| 指标 | 目标值 | 测试方法 |
|
||||
|------|-------|---------|
|
||||
| 探针执行(单账号) | <2s | 计时 1000 次取 P99 |
|
||||
| 全网扫描(10 供应商) | <5min | 从调度触发到完成计 |
|
||||
| 准入测试(5 用例) | <30min P99 | 从 discovered 到 test_passed/failed |
|
||||
| 供应商状态查询 API | <50ms P99 | 并发 100 请求 |
|
||||
| 审计日志写入 | <1s P99 | 单次变更后计时 |
|
||||
|
||||
### 12.2 安全测试
|
||||
|
||||
| 测试项 | 方法 | 验证 |
|
||||
|-------|------|------|
|
||||
| 凭证明文保护 | 检查日志/DB/内存 dump | 无明文凭证 |
|
||||
| KMS 密钥轮换 | Mock KMS 不可用 | fail-closed,不暴露明文 |
|
||||
| 供应商 API 限流绕过 | 连续探针超限 | 正确触发 rate limit |
|
||||
| 注册接口重复提交 | 并发同一邮箱注册 | 只有一次成功,其余 failed |
|
||||
98
test/CASES.md
Normal file
98
test/CASES.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# Supply-Intelligence 测试用例
|
||||
|
||||
> 版本:v1.0 | 状态:初稿
|
||||
|
||||
---
|
||||
|
||||
## AC-01 探针覆盖度
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-01.1 | 99% 覆盖率达标 | 插入 100 条测试账号 | 1. 等待 15 分钟 2. 统计探针日志 | 探针覆盖率 ≥ 99% | P0 |
|
||||
| TC-01.2 | 探针周期可配置 | 已配置探针任务 | 1. 修改探针周期为 3 分钟 2. 等待 60 秒 | 周期在 60 秒内生效 | P1 |
|
||||
|
||||
## AC-02 状态变更正确性
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-02.1 | active → suspended | 账号为 active | 1. Mock 返回 401 | 60s 内状态变为 suspended | P0 |
|
||||
| TC-02.2 | suspended → disabled | 账号为 suspended | 1. Mock 连续 3 次返回 401 | 60s 内状态变为 disabled | P0 |
|
||||
| TC-02.3 | 429 不变更 | 账号为 active | 1. Mock 返回 429 | 15 分钟内状态保持 active | P0 |
|
||||
| TC-02.4 | 状态机违规 | 账号为 active | 1. 尝试直接变更为 disabled | 被拒绝,返回错误码 | P0 |
|
||||
| TC-02.5 | 状态回复 | 账号为 suspended | 1. Mock 返回 200 | 60s 内状态变为 active | P1 |
|
||||
|
||||
## AC-03 误报率
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-03.1 | 7 天误报率 | 全部账号正常 | 1. 运行 7 天 2. 统计状态误变更次数 | 误报率 ≤ 1% | P0 |
|
||||
|
||||
## AC-04 新模型发现延迟
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-04.1 | 2h 内发现 | 已对接供应商 | 1. T0 在 Mock 响应中新增 model_id 2. T0+2h 查询数据库 | candidate 存在,status=discovered | P0 |
|
||||
|
||||
## AC-05 已下架模型告警
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-05.1 | 不自动下架 | package 为 active | 1. 从 Mock 中移除 model_id 2. 等待 2h | package 状态保持 active | P0 |
|
||||
| TC-05.2 | 生成告警待办 | package 为 active | 1. 从 Mock 中移除 model_id 2. 等待 2h | 运营工作台出现告警 | P0 |
|
||||
|
||||
## AC-06 准入测试通过
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-06.1 | 测试通过 | candidate 为 discovered | 1. 触发准入测试 2. 等待 30min | 状态变为 test_passed,生成 package 草稿 | P0 |
|
||||
| TC-06.2 | 草稿字段完整 | 测试通过后 | 1. 查询生成的 package 草稿 | 包含 platform、model、price 字段 | P1 |
|
||||
|
||||
## AC-07 准入测试失败
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-07.1 | 接口返回 500 | candidate 为 discovered | 1. Mock 返回 500 2. 等待测试完成 | 状态变为 test_failed,failure_reason 非空 | P0 |
|
||||
| TC-07.2 | 前端展示 | candidate 为 test_failed | 1. 访问运营工作台 | 展示失败详情 | P1 |
|
||||
|
||||
## AC-08 自动注册成功
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-08.1 | 注册流程 | 已配置白名单 | 1. 触发自动注册 2. 等待 30min | 新增 active 账号 | P0 |
|
||||
| TC-08.2 | 密钥加密 | 注册完成后 | 1. 查询数据库 | API Key 已加密存储 | P1 |
|
||||
|
||||
## AC-09 自动注册 fail-closed
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-09.1 | 网关不可用 | 配置启用 | 1. Mock 邮件网关返回 503 2. 等待 60s | 任务状态为 failed,审计日志记录失败 | P0 |
|
||||
| TC-09.2 | 不返回成功 | 注册失败后 | 1. 检查对上游响应 | 不返回成功状态码 | P0 |
|
||||
|
||||
## AC-10 审计日志完整性
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-10.1 | 字段完整性 | 触发操作后 | 1. 5s 内查询审计日志 | 包含所有必要字段 | P0 |
|
||||
| TC-10.2 | 自动化操作审计 | 自动化操作后 | 1. 查询审计日志 | 存在对应记录 | P0 |
|
||||
|
||||
## AC-11 运营工作台干预
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-11.1 | 一键上架 | package 为 draft | 1. 点击确认上架 2. 等待 3s | 状态变为 active | P0 |
|
||||
| TC-11.2 | 忽略模型 | candidate 为 discovered | 1. 点击忽略 | 不在待处理列表中,7 天后恢复 | P0 |
|
||||
|
||||
## AC-12 配置热更新
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-12.1 | 探针周期热更新 | 已运行 | 1. 修改配置 2. 观察调度行为 | 60s 内生效 | P1 |
|
||||
|
||||
## 边缘场景 / 失败路径
|
||||
|
||||
| 用例编号 | 名称 | 前置条件 | 测试步骤 | 预期结果 | 优先级 |
|
||||
|---------|------|---------|---------|---------|--------|
|
||||
| TC-E1 | DNS 失败 | 探针任务已配置 | 1. 模拟 DNS 解析失败 | 状态不变更,记录日志 | P1 |
|
||||
| TC-E2 | 空响应体 | 探针任务已配置 | 1. Mock 返回空 JSON | 状态不变更,记录日志 | P1 |
|
||||
| TC-E3 | 并发乐观锁 | 探针任务已配置 | 1. 同时触发手动更新和探针 | 乐观锁冲突,探针记录失败 | P1 |
|
||||
| TC-E4 | 测试账号不可用 | 准入测试进行中 | 1. 将测试账号标记为 suspended | 测试标记为 failed,原因为 test_account_unavailable | P1 |
|
||||
75
test/STRATEGY.md
Normal file
75
test/STRATEGY.md
Normal file
@@ -0,0 +1,75 @@
|
||||
# Supply-Intelligence 测试策略
|
||||
|
||||
> 版本:v1.0 | 状态:初稿
|
||||
|
||||
---
|
||||
|
||||
## 1. 测试目标
|
||||
|
||||
| 目标 | 指标 | 验证方式 |
|
||||
|------|------|---------|
|
||||
| 功能正确性 | 所有 AC 通过率 100% | 每个 AC 至少 1 正向 + 1 负向测试用例 |
|
||||
| 状态机正确性 | 状态迁移符合状态图 | 所有状态转换路径覆盖 |
|
||||
| 安全性 | 无越权、审计日志完整 | 渗透测试 + 审计追溯 |
|
||||
| 性能 | 探针 P99 < 50ms,扫描完成 < 30min | 负载测试 |
|
||||
|
||||
## 2. 测试层级
|
||||
|
||||
```
|
||||
├── 单元测试 (Unit Test)
|
||||
│ ├── 状态机转换逻辑
|
||||
│ ├── 探针策略逻辑
|
||||
│ ├── 扫描比对算法
|
||||
│ └── 准入测试判定逻辑
|
||||
│
|
||||
├── 集成测试 (Integration Test)
|
||||
│ ├── 数据库交互(状态变更、审计日志)
|
||||
│ ├── Redis 缓存交互
|
||||
│ ├── 供应商 API Mock
|
||||
│ ├── 邮件/短信网关 Mock
|
||||
│ └── 向量数据库检索
|
||||
│
|
||||
├── E2E 测试 (End-to-End Test)
|
||||
│ ├── 探针到状态变更整条链路
|
||||
│ ├── 扫描到候选模型整条链路
|
||||
│ ├── 准入测试到上架整条链路
|
||||
│ └── 账号注册整条链路
|
||||
│
|
||||
└── 稳定性测试 (Stability Test)
|
||||
├── 7 天连续探针运行
|
||||
└── 高并发扫描/测试
|
||||
```
|
||||
|
||||
## 3. 测试工具
|
||||
|
||||
| 层级 | 工具 | 说明 |
|
||||
|------|------|------|
|
||||
| 单元测试 | Go testing + testify + mockery | 覆盖率门槛 domain ≥ 70%、service ≥ 80% |
|
||||
| 数据库测试 | testcontainers-go (PostgreSQL) | 每次测试启动独立容器 |
|
||||
| 缓存测试 | miniredis | 轻量级 Redis Mock |
|
||||
| 供应商 Mock | gock / httptest | 模拟供应商 API 响应 |
|
||||
| E2E 测试 | 自定义 Go E2E 框架 | 启动完整服务 + 数据库 |
|
||||
| 稳定性测试 | 自定义脚本 | 7 天连续运行监控 |
|
||||
|
||||
## 4. 测试环境
|
||||
|
||||
| 环境 | 用途 | 数据 |
|
||||
|------|------|------|
|
||||
| 本地开发 | 单元 + 快速集成测试 | 测试数据生成 |
|
||||
| CI | 自动化单元 + 集成测试 | 测试数据生成 |
|
||||
| 测试环境 | E2E + 性能基准 | 模拟生产数据 |
|
||||
| 生产前 | 稳定性验证 | 生产数据副本(脱敏) |
|
||||
| 生产环境 | 灰度监控 | 真实数据 |
|
||||
|
||||
## 5. 测试数据管理
|
||||
|
||||
- 供应商 API 响应使用 `test/fixtures/supplier_responses/` 下的 JSON 文件管理。
|
||||
- 测试用例集使用 `test/fixtures/test_cases/` 下的 YAML 文件管理。
|
||||
- 每个测试用例自洁,启动前加载固定数据集,结束后清理。
|
||||
|
||||
## 6. 特殊测试要求
|
||||
|
||||
- **探针测试**:必须覆盖 429 、 401 、 403 、 500 、 503 、超时、空响应、DNS 失败、TCP 超时等所有常见异常场景。
|
||||
- **状态机测试**:必须覆盖所有状态转换路径,特别是 `active` → `disabled` 的违规路径必须被拒绝。
|
||||
- **审计测试**:所有自动化操作必须在 5 秒内生成审计记录,且字段完整。
|
||||
- **并发测试**:探针任务与运营人员手动操作的并发场景必须测试,验证乐观锁机制。
|
||||
Reference in New Issue
Block a user