From 256975e10c770a6ed7057a13fb54a08842a729ea Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Fri, 15 May 2026 22:34:22 +0800 Subject: [PATCH] feat(audit): add pricing signature guards and reporting Add snapshot, signature, and drift guard support for Vertex AI, Cloudflare Workers AI, and Perplexity API, backed by a queryable audit table and recent-window view. This commit also wires the audit query layer into daily signal materialization and report generation so structure drift becomes a first-class signal instead of a log-only artifact. --- .../013_official_import_signature_audit.sql | 31 + ...ial_import_signature_audit_recent_view.sql | 57 + scripts/cloudflare_pricing_import_runner.go | 66 ++ scripts/cloudflare_pricing_lib.go | 108 ++ scripts/cloudflare_pricing_signature_guard.go | 51 + .../cloudflare_pricing_signature_guard_lib.go | 136 +++ ...cloudflare_pricing_signature_guard_test.go | 102 ++ scripts/cloudflare_pricing_snapshot_lib.go | 24 + scripts/cloudflare_pricing_snapshot_test.go | 90 ++ scripts/generate_daily_report.go | 507 ++++++++- scripts/generate_daily_report_test.go | 364 ++++++- scripts/import_cloudflare_pricing.go | 58 ++ scripts/import_cloudflare_pricing_test.go | 81 ++ scripts/import_perplexity_pricing.go | 58 ++ scripts/import_perplexity_pricing_test.go | 80 ++ scripts/import_vertex_pricing.go | 58 ++ scripts/import_vertex_pricing_test.go | 171 +++ scripts/live_pricing_smoke_runner.go | 222 ++++ scripts/live_pricing_smoke_runner_test.go | 104 ++ scripts/materialize_daily_signals.go | 978 ++++++++++++++++++ scripts/materialize_daily_signals_test.go | 33 + .../official_import_signature_audit_lib.go | 111 ++ ...ficial_import_signature_audit_query_lib.go | 196 ++++ scripts/perplexity_pricing_import_runner.go | 66 ++ scripts/perplexity_pricing_lib.go | 150 +++ scripts/perplexity_pricing_signature_guard.go | 51 + .../perplexity_pricing_signature_guard_lib.go | 136 +++ ...perplexity_pricing_signature_guard_test.go | 102 ++ scripts/perplexity_pricing_snapshot_lib.go | 24 + scripts/perplexity_pricing_snapshot_test.go | 92 ++ scripts/pricing_markdown_snapshot_lib.go | 251 +++++ .../query_official_import_signature_audit.go | 43 + ...ry_official_import_signature_audit_test.go | 80 ++ scripts/rebuild_historical_report.sh | 2 +- scripts/signature_guard_common.go | 16 + scripts/testdata/cloudflare_pricing_sample.md | 10 + scripts/testdata/perplexity_pricing_sample.md | 9 + scripts/testdata/vertex_pricing_sample.html | 73 ++ scripts/verify_phase3.sh | 2 +- scripts/vertex_pricing_import_runner.go | 66 ++ scripts/vertex_pricing_lib.go | 277 +++++ scripts/vertex_pricing_signature_guard.go | 51 + scripts/vertex_pricing_signature_guard_lib.go | 159 +++ .../vertex_pricing_signature_guard_test.go | 236 +++++ scripts/vertex_pricing_snapshot_lib.go | 173 ++++ scripts/vertex_pricing_snapshot_test.go | 101 ++ 46 files changed, 5822 insertions(+), 34 deletions(-) create mode 100644 db/migrations/013_official_import_signature_audit.sql create mode 100644 db/migrations/014_official_import_signature_audit_recent_view.sql create mode 100644 scripts/cloudflare_pricing_import_runner.go create mode 100644 scripts/cloudflare_pricing_lib.go create mode 100644 scripts/cloudflare_pricing_signature_guard.go create mode 100644 scripts/cloudflare_pricing_signature_guard_lib.go create mode 100644 scripts/cloudflare_pricing_signature_guard_test.go create mode 100644 scripts/cloudflare_pricing_snapshot_lib.go create mode 100644 scripts/cloudflare_pricing_snapshot_test.go create mode 100644 scripts/import_cloudflare_pricing.go create mode 100644 scripts/import_cloudflare_pricing_test.go create mode 100644 scripts/import_perplexity_pricing.go create mode 100644 scripts/import_perplexity_pricing_test.go create mode 100644 scripts/import_vertex_pricing.go create mode 100644 scripts/import_vertex_pricing_test.go create mode 100644 scripts/live_pricing_smoke_runner.go create mode 100644 scripts/live_pricing_smoke_runner_test.go create mode 100644 scripts/materialize_daily_signals.go create mode 100644 scripts/materialize_daily_signals_test.go create mode 100644 scripts/official_import_signature_audit_lib.go create mode 100644 scripts/official_import_signature_audit_query_lib.go create mode 100644 scripts/perplexity_pricing_import_runner.go create mode 100644 scripts/perplexity_pricing_lib.go create mode 100644 scripts/perplexity_pricing_signature_guard.go create mode 100644 scripts/perplexity_pricing_signature_guard_lib.go create mode 100644 scripts/perplexity_pricing_signature_guard_test.go create mode 100644 scripts/perplexity_pricing_snapshot_lib.go create mode 100644 scripts/perplexity_pricing_snapshot_test.go create mode 100644 scripts/pricing_markdown_snapshot_lib.go create mode 100644 scripts/query_official_import_signature_audit.go create mode 100644 scripts/query_official_import_signature_audit_test.go create mode 100644 scripts/signature_guard_common.go create mode 100644 scripts/testdata/cloudflare_pricing_sample.md create mode 100644 scripts/testdata/perplexity_pricing_sample.md create mode 100644 scripts/testdata/vertex_pricing_sample.html create mode 100644 scripts/vertex_pricing_import_runner.go create mode 100644 scripts/vertex_pricing_lib.go create mode 100644 scripts/vertex_pricing_signature_guard.go create mode 100644 scripts/vertex_pricing_signature_guard_lib.go create mode 100644 scripts/vertex_pricing_signature_guard_test.go create mode 100644 scripts/vertex_pricing_snapshot_lib.go create mode 100644 scripts/vertex_pricing_snapshot_test.go diff --git a/db/migrations/013_official_import_signature_audit.sql b/db/migrations/013_official_import_signature_audit.sql new file mode 100644 index 0000000..22d4e5d --- /dev/null +++ b/db/migrations/013_official_import_signature_audit.sql @@ -0,0 +1,31 @@ +-- 官方导入结构签名审计 + +CREATE TABLE IF NOT EXISTS official_import_signature_audit ( + id BIGSERIAL PRIMARY KEY, + source_key TEXT NOT NULL, + checked_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + status TEXT NOT NULL, + drift_detected BOOLEAN NOT NULL DEFAULT FALSE, + baseline_initialized BOOLEAN NOT NULL DEFAULT FALSE, + source_url TEXT, + fixture_path TEXT, + snapshot_path TEXT, + signature_path TEXT, + baseline_path TEXT, + structure_sha256 TEXT, + previous_structure_sha256 TEXT, + byte_size INTEGER, + signature_payload JSONB, + error_message TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_official_import_signature_audit_source_checked_at + ON official_import_signature_audit(source_key, checked_at DESC); +CREATE INDEX IF NOT EXISTS idx_official_import_signature_audit_status + ON official_import_signature_audit(status); +CREATE INDEX IF NOT EXISTS idx_official_import_signature_audit_structure_sha256 + ON official_import_signature_audit(structure_sha256); + +COMMENT ON TABLE official_import_signature_audit IS '官方导入结构签名巡检审计表,记录每次 guard 抓取、签名与漂移判定结果'; +COMMENT ON COLUMN official_import_signature_audit.signature_payload IS '当前抓取页面的结构签名 JSONB 快照'; diff --git a/db/migrations/014_official_import_signature_audit_recent_view.sql b/db/migrations/014_official_import_signature_audit_recent_view.sql new file mode 100644 index 0000000..b79b437 --- /dev/null +++ b/db/migrations/014_official_import_signature_audit_recent_view.sql @@ -0,0 +1,57 @@ +-- 官方导入结构签名近期变化视图 + +CREATE OR REPLACE VIEW official_import_signature_audit_recent_view AS +WITH ordered AS ( + SELECT + a.*, + ROW_NUMBER() OVER ( + PARTITION BY a.source_key + ORDER BY a.checked_at DESC, a.id DESC + ) AS recent_rank, + LAG(a.structure_sha256) OVER ( + PARTITION BY a.source_key + ORDER BY a.checked_at, a.id + ) AS previous_observed_structure_sha256, + LAG(a.checked_at) OVER ( + PARTITION BY a.source_key + ORDER BY a.checked_at, a.id + ) AS previous_checked_at + FROM official_import_signature_audit a +) +SELECT + id, + source_key, + checked_at, + status, + drift_detected, + baseline_initialized, + source_url, + fixture_path, + snapshot_path, + signature_path, + baseline_path, + structure_sha256, + previous_structure_sha256, + previous_observed_structure_sha256, + byte_size, + signature_payload, + error_message, + created_at, + recent_rank, + CASE + WHEN previous_observed_structure_sha256 IS NULL THEN FALSE + WHEN previous_observed_structure_sha256 IS DISTINCT FROM structure_sha256 THEN TRUE + ELSE FALSE + END AS structure_changed, + CASE + WHEN previous_observed_structure_sha256 IS NULL THEN 'initial' + WHEN previous_observed_structure_sha256 IS DISTINCT FROM structure_sha256 THEN 'changed' + ELSE 'stable' + END AS structure_state, + CASE + WHEN previous_checked_at IS NULL THEN NULL + ELSE EXTRACT(EPOCH FROM (checked_at - previous_checked_at))::BIGINT + END AS seconds_since_previous +FROM ordered; + +COMMENT ON VIEW official_import_signature_audit_recent_view IS '官方导入结构签名近期变化视图,按 source_key 给出 recent_rank、结构是否变化与变化状态'; diff --git a/scripts/cloudflare_pricing_import_runner.go b/scripts/cloudflare_pricing_import_runner.go new file mode 100644 index 0000000..8769422 --- /dev/null +++ b/scripts/cloudflare_pricing_import_runner.go @@ -0,0 +1,66 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type cloudflarePricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration + SnapshotOnly bool + SnapshotOut string + SignatureOut string +} + +func runCloudflarePricingImport(cfg cloudflarePricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + if cfg.SnapshotOnly || strings.TrimSpace(cfg.SnapshotOut) != "" || strings.TrimSpace(cfg.SignatureOut) != "" { + snapshotPath, signaturePath := resolveCloudflarePricingSnapshotPaths(cfg.SnapshotOut, cfg.SignatureOut, "", time.Now()) + signature, err := writeCloudflarePricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, time.Now()) + if err != nil { + return err + } + if cfg.SnapshotOnly { + _, err = fmt.Fprintf(out, + "source=cloudflare-pricing-snapshot snapshot_only=true byte_size=%d sha256=%s structure_sha256=%s snapshot_out=%s signature_out=%s\n", + signature.ByteSize, signature.SHA256, signature.StructureSHA256, snapshotPath, signaturePath, + ) + return err + } + } + records, err := parseCloudflarePricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=cloudflare-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "cloudflare-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=cloudflare-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/cloudflare_pricing_lib.go b/scripts/cloudflare_pricing_lib.go new file mode 100644 index 0000000..4647907 --- /dev/null +++ b/scripts/cloudflare_pricing_lib.go @@ -0,0 +1,108 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "strings" +) + +const ( + defaultCloudflarePricingFetchURL = "https://developers.cloudflare.com/workers-ai/platform/pricing/index.md" + defaultCloudflarePricingSourceURL = "https://developers.cloudflare.com/workers-ai/platform/pricing/" +) + +func parseCloudflarePricingCatalog(raw string) ([]officialPricingRecord, error) { + section, ok := extractCloudflareLLMPricingSection(raw) + if !ok { + return nil, fmt.Errorf("unexpected cloudflare pricing content") + } + + lines := strings.Split(section, "\n") + records := make([]officialPricingRecord, 0) + for _, line := range lines { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "| @cf/") { + continue + } + parts := strings.Split(line, "|") + if len(parts) < 4 { + continue + } + modelPath := strings.Trim(strings.TrimSpace(parts[1]), "`") + priceCell := strings.TrimSpace(parts[2]) + prices := extractCloudflarePrices(priceCell) + if len(prices) < 2 { + continue + } + providerName := providerFromModelPath(strings.TrimPrefix(modelPath, "@cf/")) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + record := officialPricingRecord{ + ModelID: normalizeExternalID("cloudflare", modelPath), + ModelName: modelPath, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Cloudflare Workers AI", + OperatorNameCn: "Cloudflare Workers AI", + OperatorCountry: "US", + OperatorWebsite: "https://developers.cloudflare.com/workers-ai/", + OperatorType: "cloud", + Region: "global", + Currency: "USD", + InputPrice: prices[0], + OutputPrice: prices[1], + SourceURL: defaultCloudflarePricingSourceURL, + ModelSourceURL: defaultCloudflarePricingSourceURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelPath), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + } + if len(records) == 0 { + return nil, fmt.Errorf("no cloudflare llm pricing rows found") + } + return records, nil +} + +func extractCloudflarePrices(raw string) []float64 { + fields := strings.Split(raw, "$") + prices := make([]float64, 0, 3) + for _, field := range fields[1:] { + value := strings.TrimSpace(field) + end := strings.Index(value, " per ") + if end == -1 { + continue + } + prices = append(prices, mustParseSubscriptionPrice(value[:end])) + } + return prices +} + +func extractCloudflareLLMPricingSection(raw string) (string, bool) { + lines := strings.Split(raw, "\n") + start := -1 + end := len(lines) + for i, line := range lines { + trimmed := strings.TrimSpace(line) + if !strings.HasPrefix(trimmed, "## ") { + continue + } + title := strings.ToLower(strings.TrimSpace(strings.TrimPrefix(trimmed, "## "))) + if start == -1 { + if strings.Contains(title, "llm") && strings.Contains(title, "pricing") { + start = i + } + continue + } + end = i + break + } + if start == -1 { + return "", false + } + return strings.Join(lines[start:end], "\n"), true +} diff --git a/scripts/cloudflare_pricing_signature_guard.go b/scripts/cloudflare_pricing_signature_guard.go new file mode 100644 index 0000000..7be0ef8 --- /dev/null +++ b/scripts/cloudflare_pricing_signature_guard.go @@ -0,0 +1,51 @@ +//go:build llm_script + +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var snapshotDir string + var baselinePath string + var timeoutSeconds int + var allowBootstrap bool + + flag.StringVar(&url, "url", defaultCloudflarePricingFetchURL, "Cloudflare Workers AI 官方价格 markdown") + flag.StringVar(&fixture, "fixture", "", "Cloudflare Workers AI 价格样例文件") + flag.StringVar(&snapshotDir, "snapshot-dir", "", "Cloudflare snapshot 输出目录") + flag.StringVar(&baselinePath, "baseline-path", "", "Cloudflare 结构基线签名路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化") + flag.Parse() + + now := time.Now() + cfg := cloudflarePricingSignatureGuardConfig{ + URL: url, + Fixture: fixture, + SnapshotDir: snapshotDir, + BaselinePath: baselinePath, + Timeout: time.Duration(timeoutSeconds) * time.Second, + AllowBootstrap: allowBootstrap, + } + result, err := runCloudflarePricingSignatureGuard(cfg, now) + if auditErr := persistCloudflarePricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil { + fmt.Fprintf(os.Stderr, "cloudflare_pricing_signature_guard audit: %v\n", auditErr) + if err == nil { + err = auditErr + } + } + fmt.Println(formatCloudflarePricingSignatureGuardSummary(result)) + if err != nil { + fmt.Fprintf(os.Stderr, "cloudflare_pricing_signature_guard: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/cloudflare_pricing_signature_guard_lib.go b/scripts/cloudflare_pricing_signature_guard_lib.go new file mode 100644 index 0000000..d546c3b --- /dev/null +++ b/scripts/cloudflare_pricing_signature_guard_lib.go @@ -0,0 +1,136 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" +) + +type cloudflarePricingSignatureGuardConfig struct { + URL string + Fixture string + SnapshotDir string + BaselinePath string + Timeout time.Duration + AllowBootstrap bool +} + +type cloudflarePricingSignatureGuardResult struct { + SnapshotPath string + SignaturePath string + BaselinePath string + DriftDetected bool + BaselineInitialized bool + PreviousBaselineHash string + CurrentSignature markdownPricingStructureSignature +} + +func runCloudflarePricingSignatureGuard(cfg cloudflarePricingSignatureGuardConfig, now time.Time) (cloudflarePricingSignatureGuardResult, error) { + snapshotDir := cfg.SnapshotDir + if snapshotDir == "" { + snapshotDir = filepath.Join("logs", "cloudflare-pricing-snapshots") + } + if err := os.MkdirAll(snapshotDir, 0o755); err != nil { + return cloudflarePricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + + snapshotPath, signaturePath := resolveCloudflarePricingSnapshotPaths("", "", snapshotDir, now) + baselinePath := cfg.BaselinePath + if baselinePath == "" { + baselinePath = filepath.Join(snapshotDir, "baseline.signature.json") + } + + clientCfg := cloudflarePricingImportConfig{ + URL: cfg.URL, + Fixture: cfg.Fixture, + DryRun: true, + Timeout: cfg.Timeout, + SnapshotOnly: true, + SnapshotOut: snapshotPath, + SignatureOut: signaturePath, + } + if err := runCloudflarePricingImport(clientCfg, nil, ioDiscard{}); err != nil { + return cloudflarePricingSignatureGuardResult{}, err + } + + current, err := readMarkdownPricingStructureSignature(signaturePath) + if err != nil { + return cloudflarePricingSignatureGuardResult{}, err + } + result := cloudflarePricingSignatureGuardResult{ + SnapshotPath: snapshotPath, + SignaturePath: signaturePath, + BaselinePath: baselinePath, + CurrentSignature: current, + } + + previous, err := readMarkdownPricingStructureSignature(baselinePath) + if err != nil { + if os.IsNotExist(err) { + if !cfg.AllowBootstrap { + return result, fmt.Errorf("cloudflare pricing baseline missing: %s", baselinePath) + } + if err := copyFileCommon(signaturePath, baselinePath); err != nil { + return result, fmt.Errorf("initialize baseline: %w", err) + } + result.BaselineInitialized = true + return result, nil + } + return result, err + } + + result.PreviousBaselineHash = previous.StructureSHA256 + if previous.StructureSHA256 != current.StructureSHA256 { + result.DriftDetected = true + return result, fmt.Errorf( + "cloudflare pricing structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s", + previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath, + ) + } + return result, nil +} + +func formatCloudflarePricingSignatureGuardSummary(result cloudflarePricingSignatureGuardResult) string { + return fmt.Sprintf( + "source=cloudflare-pricing-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s", + result.DriftDetected, + result.BaselineInitialized, + result.CurrentSignature.StructureSHA256, + emptyIfBlank(result.PreviousBaselineHash), + result.SnapshotPath, + result.SignaturePath, + result.BaselinePath, + ) +} + +func buildCloudflarePricingSignatureAuditRecord(cfg cloudflarePricingSignatureGuardConfig, result cloudflarePricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord { + record := officialImportSignatureAuditRecord{ + SourceKey: "cloudflare_pricing_signature", + CheckedAt: checkedAt, + Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr), + DriftDetected: result.DriftDetected, + BaselineInitialized: result.BaselineInitialized, + SourceURL: strings.TrimSpace(cfg.URL), + FixturePath: strings.TrimSpace(cfg.Fixture), + SnapshotPath: strings.TrimSpace(result.SnapshotPath), + SignaturePath: strings.TrimSpace(result.SignaturePath), + BaselinePath: strings.TrimSpace(result.BaselinePath), + StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256), + PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash), + ByteSize: result.CurrentSignature.ByteSize, + ErrorMessage: errorMessageText(runErr), + } + if hasMarkdownPricingStructureSignature(result.CurrentSignature) { + signatureCopy := result.CurrentSignature + record.SignaturePayload = &signatureCopy + } + return record +} + +func persistCloudflarePricingSignatureAuditIfConfigured(cfg cloudflarePricingSignatureGuardConfig, result cloudflarePricingSignatureGuardResult, checkedAt time.Time, runErr error) error { + return persistOfficialImportSignatureAuditIfConfigured(buildCloudflarePricingSignatureAuditRecord(cfg, result, checkedAt, runErr)) +} diff --git a/scripts/cloudflare_pricing_signature_guard_test.go b/scripts/cloudflare_pricing_signature_guard_test.go new file mode 100644 index 0000000..5c8f30e --- /dev/null +++ b/scripts/cloudflare_pricing_signature_guard_test.go @@ -0,0 +1,102 @@ +//go:build llm_script + +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestRunCloudflarePricingSignatureGuardInitializesBaseline(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + + result, err := runCloudflarePricingSignatureGuard(cloudflarePricingSignatureGuardConfig{ + URL: defaultCloudflarePricingFetchURL, + Fixture: filepath.Join("testdata", "cloudflare_pricing_sample.md"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 15, 20, 30, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("runCloudflarePricingSignatureGuard 返回错误: %v", err) + } + if !result.BaselineInitialized { + t.Fatalf("期望初始化 baseline") + } + if result.DriftDetected { + t.Fatalf("首次初始化不应判定为漂移") + } + if _, err := os.Stat(baselinePath); err != nil { + t.Fatalf("baseline 未写入: %v", err) + } +} + +func TestRunCloudflarePricingSignatureGuardDetectsDrift(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + + _, err := runCloudflarePricingSignatureGuard(cloudflarePricingSignatureGuardConfig{ + URL: defaultCloudflarePricingFetchURL, + Fixture: filepath.Join("testdata", "cloudflare_pricing_sample.md"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 15, 20, 31, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("初始化 baseline 失败: %v", err) + } + + driftFixture := "## Text model pricing\n\n| Model | Price |\n| --- | --- |\n| @cf/meta/llama-3.1-8b-instruct | $1 |\n" + driftPath := filepath.Join(tempDir, "cloudflare-drift.md") + if err := os.WriteFile(driftPath, []byte(driftFixture), 0o644); err != nil { + t.Fatalf("写入 drift fixture 失败: %v", err) + } + + result, err := runCloudflarePricingSignatureGuard(cloudflarePricingSignatureGuardConfig{ + URL: defaultCloudflarePricingFetchURL, + Fixture: driftPath, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: false, + }, time.Date(2026, 5, 15, 20, 32, 0, 0, time.FixedZone("CST", 8*3600))) + if err == nil { + t.Fatalf("期望结构漂移时报错") + } + if !result.DriftDetected { + t.Fatalf("期望 driftDetected=true") + } + if !strings.Contains(err.Error(), "cloudflare pricing structure drift detected") { + t.Fatalf("期望返回 drift 错误,实际: %v", err) + } +} + +func TestFormatCloudflarePricingSignatureGuardSummary(t *testing.T) { + result := cloudflarePricingSignatureGuardResult{ + SnapshotPath: "/tmp/cloudflare.md", + SignaturePath: "/tmp/cloudflare.signature.json", + BaselinePath: "/tmp/baseline.signature.json", + DriftDetected: false, + BaselineInitialized: true, + CurrentSignature: markdownPricingStructureSignature{ + StructureSHA256: "abc123", + }, + } + summary := formatCloudflarePricingSignatureGuardSummary(result) + for _, want := range []string{ + "source=cloudflare-pricing-signature-guard", + "drift=false", + "baseline_initialized=true", + "structure_sha256=abc123", + } { + if !strings.Contains(summary, want) { + t.Fatalf("summary 缺少 %q,实际: %q", want, summary) + } + } +} diff --git a/scripts/cloudflare_pricing_snapshot_lib.go b/scripts/cloudflare_pricing_snapshot_lib.go new file mode 100644 index 0000000..f09ed49 --- /dev/null +++ b/scripts/cloudflare_pricing_snapshot_lib.go @@ -0,0 +1,24 @@ +//go:build llm_script + +package main + +import "time" + +var cloudflarePricingSignatureContainsNeedles = map[string]string{ + "llm": "llm", + "pricing": "pricing", + "cf_model_prefix": "@cf/", + "price_tokens": "price in tokens", +} + +func buildCloudflarePricingStructureSignature(raw string) markdownPricingStructureSignature { + return buildMarkdownPricingStructureSignature(raw, cloudflarePricingSignatureContainsNeedles) +} + +func writeCloudflarePricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (markdownPricingStructureSignature, error) { + return writeMarkdownPricingSnapshotArtifacts(raw, sourceURL, snapshotPath, signaturePath, now, cloudflarePricingSignatureContainsNeedles) +} + +func resolveCloudflarePricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, now time.Time) (string, string) { + return resolveMarkdownPricingSnapshotPaths(snapshotPath, signaturePath, snapshotDir, "cloudflare-pricing", now) +} diff --git a/scripts/cloudflare_pricing_snapshot_test.go b/scripts/cloudflare_pricing_snapshot_test.go new file mode 100644 index 0000000..fbb680a --- /dev/null +++ b/scripts/cloudflare_pricing_snapshot_test.go @@ -0,0 +1,90 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestBuildCloudflarePricingStructureSignatureCapturesShape(t *testing.T) { + raw := ` +## LLM pricing + +| Model | Price in Tokens | Price in Neurons | +| --- | --- | --- | +| @cf/meta/llama-3.1-8b-instruct | $0.20 per M input tokens $1.00 per M output tokens | ignored | +` + + signature := buildCloudflarePricingStructureSignature(raw) + if signature.ByteSize == 0 { + t.Fatalf("期望 byte_size 非 0") + } + if signature.SHA256 == "" || signature.StructureSHA256 == "" { + t.Fatalf("期望生成 sha256 签名: %+v", signature) + } + if len(signature.Headings) == 0 || signature.Headings[0] != "LLM pricing" { + t.Fatalf("标题提取错误: %+v", signature.Headings) + } + if len(signature.TableHeaders) == 0 || !strings.Contains(signature.TableHeaders[0], "Price in Tokens") { + t.Fatalf("表头提取错误: %+v", signature.TableHeaders) + } + if !signature.Contains["llm"] || !signature.Contains["pricing"] || !signature.Contains["cf_model_prefix"] { + t.Fatalf("期望识别 Cloudflare 关键结构: %+v", signature.Contains) + } +} + +func TestRunCloudflarePricingImportSnapshotOnlyWritesArtifacts(t *testing.T) { + tempDir := t.TempDir() + snapshotPath := filepath.Join(tempDir, "cloudflare-live.md") + signaturePath := filepath.Join(tempDir, "cloudflare-live.signature.json") + + var out bytes.Buffer + err := runCloudflarePricingImport(cloudflarePricingImportConfig{ + URL: defaultCloudflarePricingFetchURL, + Fixture: filepath.Join("testdata", "cloudflare_pricing_sample.md"), + DryRun: true, + SnapshotOnly: true, + SnapshotOut: snapshotPath, + SignatureOut: signaturePath, + }, nil, &out) + if err != nil { + t.Fatalf("runCloudflarePricingImport 返回错误: %v", err) + } + + snapshotBytes, err := os.ReadFile(snapshotPath) + if err != nil { + t.Fatalf("读取 snapshot 失败: %v", err) + } + if !strings.Contains(string(snapshotBytes), "@cf/meta/llama-3.2-1b-instruct") { + t.Fatalf("snapshot 内容错误") + } + + signatureBytes, err := os.ReadFile(signaturePath) + if err != nil { + t.Fatalf("读取 signature 失败: %v", err) + } + var signature markdownPricingStructureSignature + if err := json.Unmarshal(signatureBytes, &signature); err != nil { + t.Fatalf("signature JSON 解析失败: %v", err) + } + if !signature.Contains["cf_model_prefix"] { + t.Fatalf("期望 signature 含 cf_model_prefix: %+v", signature.Contains) + } + + output := out.String() + for _, want := range []string{ + "source=cloudflare-pricing-snapshot", + "snapshot_only=true", + "signature_out=" + signaturePath, + "snapshot_out=" + snapshotPath, + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/generate_daily_report.go b/scripts/generate_daily_report.go index 6b5834b..fc31410 100644 --- a/scripts/generate_daily_report.go +++ b/scripts/generate_daily_report.go @@ -14,6 +14,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "time" @@ -204,6 +205,25 @@ func resolveReportRunContext(reportDate string, now time.Time, envRunKind, envTr } } +func resolveSignatureAuditReportConfig() SignatureAuditReportConfig { + return SignatureAuditReportConfig{ + Window: positiveEnvIntOrDefault("REPORT_SIGNATURE_AUDIT_WINDOW", 5), + ChangedRunsThreshold: positiveEnvIntOrDefault("REPORT_SIGNATURE_AUDIT_CHANGED_THRESHOLD", 1), + } +} + +func positiveEnvIntOrDefault(key string, fallback int) int { + raw := strings.TrimSpace(os.Getenv(key)) + if raw == "" { + return fallback + } + value, err := strconv.Atoi(raw) + if err != nil || value <= 0 { + return fallback + } + return value +} + func composeTrackedSummary(summary string, runContext ReportRunContext) string { runtimeAudit := strings.TrimSpace(runContext.RuntimeAudit) summary = strings.TrimSpace(summary) @@ -264,6 +284,9 @@ type ReportV3 struct { SceneSections []SceneSection AppendixLinks []AppendixLink ModelEvents []ModelEvent + SignatureAuditSummaries []SignatureAuditSourceSummary + SignatureAuditRows []SignatureAuditReportRow + SignatureAuditConfig SignatureAuditReportConfig } type DailySignals struct { @@ -274,6 +297,38 @@ type DailySignals struct { UnknownFree int } +type SignatureAuditSourceSummary struct { + SourceKey string + SourceLabel string + RunsInWindow int + ChangedRuns int + LatestCheckedAt string + LatestStatus string + LatestStructureState string +} + +type SignatureAuditReportRow struct { + SourceKey string + SourceLabel string + RecentRank int + CheckedAt string + StructureState string + StructureChanged bool + Status string + DriftDetected bool + BaselineInitialized bool + StructureSHA256 string + PreviousStructureSHA256 string + SnapshotPath string + SignaturePath string + ErrorMessage string +} + +type SignatureAuditReportConfig struct { + Window int + ChangedRunsThreshold int +} + type FreeSourceStat struct { Label string Description string @@ -375,22 +430,31 @@ type DataQualitySummary struct { } type SubscriptionPlanInfo struct { + ProviderName string + ProviderCN string + OperatorName string + OperatorCN string PlanName string PlanFamily string Tier string + BillingCycle string Currency string ListPrice float64 + PriceUnit string QuotaValue int64 QuotaUnit string ContextWindow int ModelCount int ModelPreview string SourceURL string + EffectiveDate string + Notes string } // ============ 数据查询(新Schema) ============ func generateReportDataV3(db *sql.DB, date string) (*ReportV3, error) { + signatureAuditCfg := resolveSignatureAuditReportConfig() // 查询模型+厂商+定价+运营商信息 rows, err := db.Query(` WITH latest_prices AS ( @@ -615,38 +679,156 @@ func generateReportDataV3(db *sql.DB, date string) (*ReportV3, error) { CNY: cny, USD: usd, }, + SignatureAuditConfig: signatureAuditCfg, } - if signals, err := loadDailySignals(db, date); err != nil { - logger.Warn("加载日报变化信号失败", "error", err) - } else { + if signals, events, ok, err := loadMaterializedDailySignalSnapshot(db, date); err != nil { + logger.Warn("加载物化关键信号失败", "error", err) + } else if ok { report.DailySignals = signals - } - if events, err := loadModelEvents(db, date); err != nil { - logger.Warn("加载模型级事件失败", "error", err) - } else { report.ModelEvents = events } + if report.DailySignals == (DailySignals{}) { + if signals, err := loadDailySignals(db, date); err != nil { + logger.Warn("加载日报变化信号失败", "error", err) + } else { + report.DailySignals = signals + } + } + if len(report.ModelEvents) == 0 { + if events, err := loadModelEvents(db, date); err != nil { + logger.Warn("加载模型级事件失败", "error", err) + } else { + report.ModelEvents = events + } + } + if summaries, rows, ok, err := loadSignatureAuditSection(db, signatureAuditCfg.Window); err != nil { + logger.Warn("加载结构签名稳定性摘要失败", "error", err) + } else if ok { + report.SignatureAuditSummaries = summaries + report.SignatureAuditRows = rows + } decorateReportV1(report) return report, nil } +func loadMaterializedDailySignalSnapshot(db *sql.DB, date string) (DailySignals, []ModelEvent, bool, error) { + var ( + signals DailySignals + rawTopEvents string + ) + err := db.QueryRow(` + SELECT + new_models, + price_changes, + official_free, + aggregator_free, + unknown_free, + COALESCE(top_events::text, '[]') + FROM daily_signal_snapshot + WHERE signal_date = $1::date + AND status = 'generated' + `, date).Scan( + &signals.NewModels, + &signals.PriceChanges, + &signals.OfficialFree, + &signals.AggregatorFree, + &signals.UnknownFree, + &rawTopEvents, + ) + if err == sql.ErrNoRows { + return DailySignals{}, nil, false, nil + } + if err != nil { + if strings.Contains(err.Error(), `relation "daily_signal_snapshot" does not exist`) { + return DailySignals{}, nil, false, nil + } + return DailySignals{}, nil, false, err + } + + var events []ModelEvent + if err := json.Unmarshal([]byte(rawTopEvents), &events); err != nil { + return DailySignals{}, nil, false, fmt.Errorf("unmarshal materialized top_events: %w", err) + } + return signals, events, true, nil +} + +func loadSignatureAuditSection(db *sql.DB, limitPerSource int) ([]SignatureAuditSourceSummary, []SignatureAuditReportRow, bool, error) { + summaries, rows, err := queryOfficialImportSignatureAuditWindow(db, limitPerSource, "", false) + if err != nil { + if strings.Contains(err.Error(), `relation "official_import_signature_audit_recent_view" does not exist`) || + strings.Contains(err.Error(), `relation "official_import_signature_audit" does not exist`) { + return nil, nil, false, nil + } + return nil, nil, false, err + } + if len(summaries) == 0 { + return nil, nil, false, nil + } + + reportSummaries := make([]SignatureAuditSourceSummary, 0, len(summaries)) + for _, summary := range summaries { + reportSummaries = append(reportSummaries, SignatureAuditSourceSummary{ + SourceKey: summary.SourceKey, + SourceLabel: signatureAuditSourceLabel(summary.SourceKey), + RunsInWindow: summary.RunsInWindow, + ChangedRuns: summary.ChangedRuns, + LatestCheckedAt: summary.LatestCheckedAt.Format("2006-01-02 15:04:05"), + LatestStatus: summary.LatestStatus, + LatestStructureState: summary.LatestStructureState, + }) + } + + reportRows := make([]SignatureAuditReportRow, 0, len(rows)) + for _, row := range rows { + reportRows = append(reportRows, SignatureAuditReportRow{ + SourceKey: row.SourceKey, + SourceLabel: signatureAuditSourceLabel(row.SourceKey), + RecentRank: row.RecentRank, + CheckedAt: row.CheckedAt.Format("2006-01-02 15:04:05"), + StructureState: row.StructureState, + StructureChanged: row.StructureChanged, + Status: row.Status, + DriftDetected: row.DriftDetected, + BaselineInitialized: row.BaselineInitialized, + StructureSHA256: row.StructureSHA256, + PreviousStructureSHA256: nullStringOrNone(row.PreviousObservedSHA256), + SnapshotPath: nullStringOrNone(row.SnapshotPath), + SignaturePath: nullStringOrNone(row.SignaturePath), + ErrorMessage: nullStringOrNone(row.ErrorMessage), + }) + } + return reportSummaries, reportRows, true, nil +} + func loadTencentSubscriptionPlans(db *sql.DB) ([]SubscriptionPlanInfo, error) { rows, err := db.Query(` SELECT + COALESCE(mp.name, 'unknown') AS provider_name, + COALESCE(mp.name_cn, mp.name, 'unknown') AS provider_name_cn, + COALESCE(o.name, 'unknown') AS operator_name, + COALESCE(o.name_cn, o.name, 'unknown') AS operator_name_cn, sp.plan_name, sp.plan_family, sp.tier, + COALESCE(sp.billing_cycle, ''), sp.currency, sp.list_price, + COALESCE(sp.price_unit, ''), COALESCE(sp.quota_value, 0), COALESCE(sp.quota_unit, ''), COALESCE(sp.context_window, 0), COALESCE(sp.model_scope, '[]'), - COALESCE(sp.source_url, '') + COALESCE(sp.source_url, ''), + COALESCE(TO_CHAR(sp.effective_date, 'YYYY-MM-DD'), ''), + COALESCE(sp.notes, '') FROM subscription_plan sp JOIN model_provider mp ON mp.id = sp.provider_id - WHERE mp.name = 'Tencent' - ORDER BY sp.list_price ASC, sp.plan_name ASC + LEFT JOIN operator o ON o.id = sp.operator_id + ORDER BY + COALESCE(o.name_cn, o.name, 'unknown') ASC, + sp.plan_family ASC, + sp.list_price ASC, + sp.plan_name ASC `) if err != nil { if strings.Contains(err.Error(), `relation "subscription_plan" does not exist`) { @@ -661,16 +843,24 @@ func loadTencentSubscriptionPlans(db *sql.DB) ([]SubscriptionPlanInfo, error) { var plan SubscriptionPlanInfo var modelScopeRaw string if err := rows.Scan( + &plan.ProviderName, + &plan.ProviderCN, + &plan.OperatorName, + &plan.OperatorCN, &plan.PlanName, &plan.PlanFamily, &plan.Tier, + &plan.BillingCycle, &plan.Currency, &plan.ListPrice, + &plan.PriceUnit, &plan.QuotaValue, &plan.QuotaUnit, &plan.ContextWindow, &modelScopeRaw, &plan.SourceURL, + &plan.EffectiveDate, + &plan.Notes, ); err != nil { return nil, err } @@ -753,17 +943,78 @@ func formatPriceUSD(price float64) string { return fmt.Sprintf("$%.2f", price) } -func formatSubscriptionPrice(price float64, currency string) string { - switch currency { - case "CNY": +func formatSubscriptionPrice(price float64, currency string, priceUnit string) string { + unit := strings.ToLower(strings.TrimSpace(priceUnit)) + switch { + case currency == "CNY" && unit == "cny/pack": + return fmt.Sprintf("¥%.2f/包", price) + case currency == "CNY": return fmt.Sprintf("¥%.2f/月", price) - case "USD": + case currency == "USD" && unit == "usd/pack": + return fmt.Sprintf("$%.2f/pack", price) + case currency == "USD": return fmt.Sprintf("$%.2f/month", price) default: + if strings.TrimSpace(priceUnit) != "" { + return fmt.Sprintf("%.2f %s", price, priceUnit) + } return fmt.Sprintf("%.2f %s", price, currency) } } +func formatPlanFamily(planFamily string) string { + switch strings.ToLower(strings.TrimSpace(planFamily)) { + case "token_plan": + return "Token Plan" + case "coding_plan": + return "Coding Plan" + case "package_plan": + return "套餐包" + default: + if strings.TrimSpace(planFamily) == "" { + return "-" + } + return planFamily + } +} + +func formatBillingCycle(cycle string) string { + switch strings.ToLower(strings.TrimSpace(cycle)) { + case "monthly": + return "包月" + case "quarterly": + return "3个月" + case "": + return "-" + default: + return cycle + } +} + +func formatPlanOperator(plan SubscriptionPlanInfo) string { + if strings.TrimSpace(plan.OperatorCN) != "" && strings.TrimSpace(plan.OperatorCN) != "unknown" { + return plan.OperatorCN + } + if strings.TrimSpace(plan.OperatorName) != "" && strings.TrimSpace(plan.OperatorName) != "unknown" { + return plan.OperatorName + } + if strings.TrimSpace(plan.ProviderCN) != "" && strings.TrimSpace(plan.ProviderCN) != "unknown" { + return plan.ProviderCN + } + if strings.TrimSpace(plan.ProviderName) != "" { + return plan.ProviderName + } + return "-" +} + +func formatPlanNotes(notes string) string { + notes = strings.TrimSpace(notes) + if notes == "" { + return "-" + } + return notes +} + func formatSubscriptionQuota(value int64, unit string) string { if value <= 0 { return "-" @@ -1563,6 +1814,10 @@ func buildHeroSummary(r *ReportV3) (string, string) { return fmt.Sprintf("今天最值得关注的是 %s 已进入活动窗口,优先判断这次活动是否值得改变默认成本策略。", promo.ModelName), fmt.Sprintf("主来源:%s", promo.PrimarySource) } + if summary, changedCount := topChangedSignatureAuditSummary(r.SignatureAuditSummaries, effectiveSignatureAuditReportConfig(r).ChangedRunsThreshold); summary != nil { + return fmt.Sprintf("今天最值得关注的是 %s 的价格页结构开始抖动,优先复查抓取和解析结果是否仍然可信。", summary.SourceLabel), + fmt.Sprintf("最近 %d 次中出现 %d 次结构变化;当前有 %d 个平台处于变化窗口。", summary.RunsInWindow, summary.ChangedRuns, changedCount) + } switch r.PageMode { case "hot": return fmt.Sprintf( @@ -1591,12 +1846,18 @@ func firstEventByType(events []ModelEvent, eventType string) *ModelEvent { } func buildHeadlineItems(r *ReportV3) []HeadlineItem { - if items := buildHeadlineItemsFromEvents(r.ModelEvents); len(items) > 0 { + var items []HeadlineItem + if auditItem, ok := buildSignatureAuditHeadlineItem(r.SignatureAuditSummaries, effectiveSignatureAuditReportConfig(r).ChangedRunsThreshold); ok { + items = append(items, auditItem) + } + if eventItems := buildHeadlineItemsFromEvents(r.ModelEvents); len(eventItems) > 0 { + items = append(items, eventItems...) + if len(items) > 4 { + return items[:4] + } return items } - var items []HeadlineItem - if r.DailySignals.NewModels > 0 { items = append(items, HeadlineItem{ Label: "新模型", @@ -1830,6 +2091,9 @@ func formatEventUpdatedAt(value, fallbackDate string) string { func buildActionItems(r *ReportV3) []ActionItem { var actions []ActionItem + if action, ok := buildSignatureAuditActionItem(r.SignatureAuditSummaries, effectiveSignatureAuditReportConfig(r).ChangedRunsThreshold); ok { + actions = append(actions, action) + } if section := findSceneSection(r.SceneSections, "低成本编码"); section != nil { actions = append(actions, ActionItem{ @@ -1861,6 +2125,81 @@ func buildActionItems(r *ReportV3) []ActionItem { return actions } +func topChangedSignatureAuditSummary(summaries []SignatureAuditSourceSummary, changedRunsThreshold int) (*SignatureAuditSourceSummary, int) { + var selected *SignatureAuditSourceSummary + changedCount := 0 + for i := range summaries { + summary := &summaries[i] + if summary.ChangedRuns < changedRunsThreshold { + continue + } + changedCount++ + if selected == nil { + selected = summary + continue + } + if summary.ChangedRuns > selected.ChangedRuns { + selected = summary + continue + } + if summary.ChangedRuns == selected.ChangedRuns && summary.SourceLabel < selected.SourceLabel { + selected = summary + } + } + return selected, changedCount +} + +func buildSignatureAuditHeadlineItem(summaries []SignatureAuditSourceSummary, changedRunsThreshold int) (HeadlineItem, bool) { + summary, changedCount := topChangedSignatureAuditSummary(summaries, changedRunsThreshold) + if summary == nil { + return HeadlineItem{}, false + } + item := HeadlineItem{ + Label: "结构波动", + Title: fmt.Sprintf("%s 结构签名开始抖动", summary.SourceLabel), + Summary: fmt.Sprintf("最近 %d 次中出现 %d 次结构变化,当前共有 %d 个平台进入变化窗口。", summary.RunsInWindow, summary.ChangedRuns, changedCount), + Audience: "适合维护官方价格 importer、需要优先确认抓取与解析可信度的团队", + Baseline: "近期结构签名窗口", + TrustLabel: "结构签名巡检", + SourceKindLabel: "官方价格页结构签名", + PrimarySource: "official_import_signature_audit_recent_view", + UpdatedAt: summary.LatestCheckedAt, + EvidenceDetail: fmt.Sprintf("最新状态=%s,最新结构状态=%s", summary.LatestStatus, summary.LatestStructureState), + Tone: "caution", + } + return item, true +} + +func buildSignatureAuditActionItem(summaries []SignatureAuditSourceSummary, changedRunsThreshold int) (ActionItem, bool) { + summary, changedCount := topChangedSignatureAuditSummary(summaries, changedRunsThreshold) + if summary == nil { + return ActionItem{}, false + } + return ActionItem{ + Title: fmt.Sprintf("优先复查 %s 价格 importer", summary.SourceLabel), + Audience: "适合负责官方价格采集、需要先确认页面结构是否漂移的维护者", + Evidence: fmt.Sprintf("最近 %d 次中出现 %d 次结构变化;当前共有 %d 个平台进入变化窗口。", summary.RunsInWindow, summary.ChangedRuns, changedCount), + Tags: []string{"结构稳定性", "官方价格页", summary.SourceLabel}, + }, true +} + +func effectiveSignatureAuditReportConfig(r *ReportV3) SignatureAuditReportConfig { + cfg := SignatureAuditReportConfig{ + Window: 5, + ChangedRunsThreshold: 1, + } + if r == nil { + return cfg + } + if r.SignatureAuditConfig.Window > 0 { + cfg.Window = r.SignatureAuditConfig.Window + } + if r.SignatureAuditConfig.ChangedRunsThreshold > 0 { + cfg.ChangedRunsThreshold = r.SignatureAuditConfig.ChangedRunsThreshold + } + return cfg +} + func findSceneSection(sections []SceneSection, title string) *SceneSection { for i := range sections { if sections[i].Title == title { @@ -1961,6 +2300,46 @@ func buildSceneSections(r *ReportV3) []SceneSection { return sections } +func signatureAuditSourceLabel(sourceKey string) string { + switch strings.TrimSpace(sourceKey) { + case "vertex_pricing_signature": + return "Google Cloud Vertex AI" + case "cloudflare_pricing_signature": + return "Cloudflare Workers AI" + case "perplexity_pricing_signature": + return "Perplexity API" + default: + if strings.TrimSpace(sourceKey) == "" { + return "未知平台" + } + return sourceKey + } +} + +func buildSignatureAuditSectionLead(r *ReportV3) string { + if len(r.SignatureAuditSummaries) == 0 { + return "" + } + cfg := effectiveSignatureAuditReportConfig(r) + changedSources := make([]string, 0) + for _, summary := range r.SignatureAuditSummaries { + if summary.ChangedRuns >= cfg.ChangedRunsThreshold { + changedSources = append(changedSources, summary.SourceLabel) + } + } + if len(changedSources) == 0 { + return fmt.Sprintf("最近窗口内未出现达到阈值的结构变化,当前阈值为 %d 次,官方价格页结构整体稳定。", cfg.ChangedRunsThreshold) + } + return fmt.Sprintf("最近窗口内有 %d 个平台达到结构变化阈值(%d 次),优先复查 %s。", len(changedSources), cfg.ChangedRunsThreshold, strings.Join(changedSources, " / ")) +} + +func signatureAuditSummaryTone(r *ReportV3, summary SignatureAuditSourceSummary) string { + if summary.ChangedRuns >= effectiveSignatureAuditReportConfig(r).ChangedRunsThreshold { + return "warning" + } + return "official" +} + func buildRecommendations(models []ModelInfo, limit int) []Recommendation { seen := make(map[string]struct{}) var result []Recommendation @@ -2174,6 +2553,28 @@ func generateMarkdownV3(r *ReportV3, path string) error { fmt.Fprintf(f, "- 可信度: %s\n\n", item.TrustLabel) } + if len(r.SignatureAuditSummaries) > 0 { + fmt.Fprintf(f, "## 结构稳定性\n\n") + if lead := buildSignatureAuditSectionLead(r); lead != "" { + fmt.Fprintf(f, "> %s\n\n", lead) + } + fmt.Fprintf(f, "| 平台 | 近期窗口 | 最新状态 | 最新结构状态 | 最近检查 |\n|------|----------|----------|--------------|----------|\n") + for _, item := range r.SignatureAuditSummaries { + fmt.Fprintf(f, "| %s | 最近 %d 次中出现 %d 次结构变化 | %s | %s | %s |\n", + item.SourceLabel, item.RunsInWindow, item.ChangedRuns, item.LatestStatus, item.LatestStructureState, item.LatestCheckedAt) + } + fmt.Fprintf(f, "\n") + if len(r.SignatureAuditRows) > 0 { + fmt.Fprintf(f, "### 近期结构记录\n\n") + fmt.Fprintf(f, "| 平台 | recent_rank | 检查时间 | 结构状态 | 状态 | 结构签名 |\n|------|-------------|----------|----------|------|----------|\n") + for _, item := range r.SignatureAuditRows { + fmt.Fprintf(f, "| %s | %d | %s | %s | %s | %s |\n", + item.SourceLabel, item.RecentRank, item.CheckedAt, item.StructureState, item.Status, item.StructureSHA256) + } + fmt.Fprintf(f, "\n") + } + } + if len(r.FreeBreakdown) > 0 { fmt.Fprintf(f, "### 免费来源分层\n\n") fmt.Fprintf(f, "| 类型 | 数量 | 说明 |\n|------|------|------|\n") @@ -2239,18 +2640,21 @@ func generateMarkdownV3(r *ReportV3, path string) error { } if len(r.TencentSubscriptionPlans) > 0 { - fmt.Fprintf(f, "## 💳 腾讯云套餐订阅价\n\n") - fmt.Fprintf(f, "> 以下为套餐订阅价,不参与按模型输入/输出单价排行。\n\n") - fmt.Fprintf(f, "| 套餐 | 月费 | 月额度 | 上下文上限 | 覆盖模型 |\n") - fmt.Fprintf(f, "|------|------|--------|------------|----------|\n") + fmt.Fprintf(f, "## 💳 中转平台套餐订阅价\n\n") + fmt.Fprintf(f, "> 以下为云平台 / 中转平台套餐订阅价,包含标准月套餐与首购活动套餐,不参与按模型输入/输出单价排行。\n\n") + fmt.Fprintf(f, "| 平台 | 套餐类型 | 套餐 | 周期 | 价格 | 套餐额度 | 活动说明 | 覆盖模型 |\n") + fmt.Fprintf(f, "|------|----------|------|------|------|----------|----------|----------|\n") for _, plan := range r.TencentSubscriptionPlans { fmt.Fprintf( f, - "| %s | %s | %s | %s | %d 个(%s) |\n", + "| %s | %s | %s | %s | %s | %s | %s | %d 个(%s) |\n", + formatPlanOperator(plan), + formatPlanFamily(plan.PlanFamily), plan.PlanName, - formatSubscriptionPrice(plan.ListPrice, plan.Currency), + formatBillingCycle(plan.BillingCycle), + formatSubscriptionPrice(plan.ListPrice, plan.Currency, plan.PriceUnit), formatSubscriptionQuota(plan.QuotaValue, plan.QuotaUnit), - formatContextWindowCompact(plan.ContextWindow), + formatPlanNotes(plan.Notes), plan.ModelCount, plan.ModelPreview, ) @@ -2740,6 +3144,40 @@ th { +{{if .SignatureAuditSummaries}} +
+

结构稳定性

+

{{signatureAuditSectionLead .}}

+
+ {{range .SignatureAuditSummaries}} +
+
{{.SourceLabel}}
+
最近 {{.RunsInWindow}} 次中出现 {{.ChangedRuns}} 次结构变化
+
最新状态:{{.LatestStatus}} · 最新结构状态:{{.LatestStructureState}}
+
最近检查:{{.LatestCheckedAt}}
+
+ {{end}} +
+ {{if .SignatureAuditRows}} +
+ + + {{range .SignatureAuditRows}} + + + + + + + + + {{end}} +
平台recent_rank检查时间结构状态状态结构签名
{{.SourceLabel}}{{.RecentRank}}{{.CheckedAt}}{{.StructureState}}{{.Status}}{{.StructureSHA256}}
+
+ {{end}} +
+{{end}} +

免费来源分层

免费可用不等于官方长期免费,必须先区分来源。

@@ -2875,17 +3313,20 @@ th { {{if .TencentSubscriptionPlans}}
-

💳 腾讯云套餐订阅价

-

以下为套餐订阅价,不参与按模型输入/输出单价排行。

+

💳 中转平台套餐订阅价

+

以下为云平台 / 中转平台套餐订阅价,包含标准月套餐与首购活动套餐,不参与按模型输入/输出单价排行。

- + {{range .TencentSubscriptionPlans}} + + - + + - - + + {{end}}
套餐月费月额度上下文上限覆盖模型
平台套餐类型套餐周期价格套餐额度活动说明覆盖模型
{{formatPlanOperator .}}{{formatPlanFamily .PlanFamily}} {{.PlanName}}{{formatSubscriptionPrice .ListPrice .Currency}}{{formatBillingCycle .BillingCycle}}{{formatSubscriptionPrice .ListPrice .Currency .PriceUnit}} {{formatSubscriptionQuota .QuotaValue .QuotaUnit}}{{formatContextWindowCompact .ContextWindow}}{{.ModelCount}} 个{{if .ModelPreview}}({{.ModelPreview}}){{end}}{{formatPlanNotes .Notes}}{{.ModelCount}} 个{{if .ModelPreview}}({{.ModelPreview}}){{end}}{{if gt .ContextWindow 0}} · {{formatContextWindowCompact .ContextWindow}}{{end}}
@@ -2910,6 +3351,12 @@ th { "formatSubscriptionPrice": formatSubscriptionPrice, "formatSubscriptionQuota": formatSubscriptionQuota, "formatContextWindowCompact": formatContextWindowCompact, + "formatPlanFamily": formatPlanFamily, + "formatBillingCycle": formatBillingCycle, + "formatPlanOperator": formatPlanOperator, + "formatPlanNotes": formatPlanNotes, + "signatureAuditSectionLead": buildSignatureAuditSectionLead, + "signatureAuditSummaryTone": signatureAuditSummaryTone, } t := template.Must(template.New("report").Funcs(funcMap).Parse(tmpl)) diff --git a/scripts/generate_daily_report_test.go b/scripts/generate_daily_report_test.go index 9398e7b..0b872bb 100644 --- a/scripts/generate_daily_report_test.go +++ b/scripts/generate_daily_report_test.go @@ -260,6 +260,32 @@ func TestResolveReportRunContextMarksHistoricalRebuildAsNonOfficial(t *testing.T } } +func TestResolveSignatureAuditReportConfigDefaults(t *testing.T) { + t.Setenv("REPORT_SIGNATURE_AUDIT_WINDOW", "") + t.Setenv("REPORT_SIGNATURE_AUDIT_CHANGED_THRESHOLD", "") + + cfg := resolveSignatureAuditReportConfig() + if cfg.Window != 5 { + t.Fatalf("window = %d, want 5", cfg.Window) + } + if cfg.ChangedRunsThreshold != 1 { + t.Fatalf("changed threshold = %d, want 1", cfg.ChangedRunsThreshold) + } +} + +func TestResolveSignatureAuditReportConfigReadsEnvOverride(t *testing.T) { + t.Setenv("REPORT_SIGNATURE_AUDIT_WINDOW", "9") + t.Setenv("REPORT_SIGNATURE_AUDIT_CHANGED_THRESHOLD", "3") + + cfg := resolveSignatureAuditReportConfig() + if cfg.Window != 9 { + t.Fatalf("window = %d, want 9", cfg.Window) + } + if cfg.ChangedRunsThreshold != 3 { + t.Fatalf("changed threshold = %d, want 3", cfg.ChangedRunsThreshold) + } +} + func TestComposeTrackedSummaryPrependsRuntimeAudit(t *testing.T) { summary := composeTrackedSummary( "models=42 free=3 intl=5 domestic=10", @@ -498,7 +524,7 @@ func TestGenerateMarkdownV3IncludesTencentSubscriptionSection(t *testing.T) { "主来源: OpenRouter / region_pricing", "更新时间: 2026-05-13 09:30", "判定依据: models.created_at = 今日,且已存在最新价格快照", - "## 💳 腾讯云套餐订阅价", + "## 💳 中转平台套餐订阅价", "通用 Token Plan Lite", "Hy Token Plan Max", "¥39.00/月", @@ -618,7 +644,185 @@ func TestGenerateHTMLV3IncludesTencentSubscriptionSection(t *testing.T) { "官方免费", "聚合免费", "待确认", - "💳 腾讯云套餐订阅价", + "💳 中转平台套餐订阅价", + } { + if !strings.Contains(content, want) { + t.Fatalf("html missing %q\n%s", want, content) + } + } +} + +func TestGenerateHTMLV3IncludesResellerSubscriptionComparison(t *testing.T) { + path := filepath.Join(t.TempDir(), "daily_report.html") + report := sampleReportForV1() + report.TencentSubscriptionPlans = []SubscriptionPlanInfo{ + { + ProviderName: "Tencent", + OperatorName: "Tencent Cloud", + PlanName: "通用 Token Plan 首月活动版", + PlanFamily: "token_plan", + Tier: "首月活动版", + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: 19, + PriceUnit: "CNY/month", + QuotaValue: 35000000, + QuotaUnit: "tokens/month", + ContextWindow: 131072, + ModelCount: 2, + ModelPreview: "glm-5, hunyuan-t1", + Notes: "首购用户首月优惠,次月恢复标准价。", + EffectiveDate: "2026-05-14", + }, + { + ProviderName: "Alibaba", + OperatorName: "Alibaba Cloud Bailian", + PlanName: "百炼 Coding Plan 首购版", + PlanFamily: "coding_plan", + Tier: "首购版", + BillingCycle: "monthly", + Currency: "CNY", + ListPrice: 29, + PriceUnit: "CNY/month", + QuotaValue: 50000000, + QuotaUnit: "tokens/month", + ContextWindow: 262144, + ModelCount: 3, + ModelPreview: "qwen-coder-plus, qwen3-coder, deepseek-r1", + Notes: "首月活动价,适合低成本试用编码模型。", + EffectiveDate: "2026-05-14", + }, + } + decorateReportV1(report) + + if err := generateHTMLV3(report, path); err != nil { + t.Fatalf("generateHTMLV3 returned error: %v", err) + } + + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read html output: %v", err) + } + + content := string(body) + for _, want := range []string{ + "💳 中转平台套餐订阅价", + "Tencent Cloud", + "Alibaba Cloud Bailian", + "通用 Token Plan 首月活动版", + "百炼 Coding Plan 首购版", + "首购用户首月优惠,次月恢复标准价。", + "首月活动价,适合低成本试用编码模型。", + "Token Plan", + "Coding Plan", + } { + if !strings.Contains(content, want) { + t.Fatalf("html missing %q\n%s", want, content) + } + } +} + +func TestGenerateMarkdownV3IncludesSignatureStabilitySection(t *testing.T) { + path := filepath.Join(t.TempDir(), "daily_report.md") + report := sampleReportForV1() + report.SignatureAuditSummaries = []SignatureAuditSourceSummary{ + { + SourceKey: "cloudflare_pricing_signature", + SourceLabel: "Cloudflare Workers AI", + RunsInWindow: 5, + ChangedRuns: 2, + LatestStatus: "passed", + LatestStructureState: "stable", + LatestCheckedAt: "2026-05-15 20:01:46", + }, + { + SourceKey: "vertex_pricing_signature", + SourceLabel: "Google Cloud Vertex AI", + RunsInWindow: 5, + ChangedRuns: 0, + LatestStatus: "passed", + LatestStructureState: "stable", + LatestCheckedAt: "2026-05-15 19:47:11", + }, + } + report.SignatureAuditRows = []SignatureAuditReportRow{ + { + SourceKey: "cloudflare_pricing_signature", + SourceLabel: "Cloudflare Workers AI", + RecentRank: 2, + CheckedAt: "2026-05-14 20:01:46", + StructureState: "changed", + StructureChanged: true, + Status: "drift_detected", + StructureSHA256: "def456", + }, + } + decorateReportV1(report) + + if err := generateMarkdownV3(report, path); err != nil { + t.Fatalf("generateMarkdownV3 returned error: %v", err) + } + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read markdown output: %v", err) + } + content := string(body) + for _, want := range []string{ + "## 结构稳定性", + "Cloudflare Workers AI", + "Google Cloud Vertex AI", + "最近 5 次中出现 2 次结构变化", + "changed", + "def456", + } { + if !strings.Contains(content, want) { + t.Fatalf("markdown missing %q\n%s", want, content) + } + } +} + +func TestGenerateHTMLV3IncludesSignatureStabilitySection(t *testing.T) { + path := filepath.Join(t.TempDir(), "daily_report.html") + report := sampleReportForV1() + report.SignatureAuditSummaries = []SignatureAuditSourceSummary{ + { + SourceKey: "perplexity_pricing_signature", + SourceLabel: "Perplexity API", + RunsInWindow: 5, + ChangedRuns: 1, + LatestStatus: "baseline_initialized", + LatestStructureState: "initial", + LatestCheckedAt: "2026-05-15 20:01:46", + }, + } + report.SignatureAuditRows = []SignatureAuditReportRow{ + { + SourceKey: "perplexity_pricing_signature", + SourceLabel: "Perplexity API", + RecentRank: 1, + CheckedAt: "2026-05-15 20:01:46", + StructureState: "initial", + StructureChanged: false, + Status: "baseline_initialized", + StructureSHA256: "abc123", + }, + } + decorateReportV1(report) + + if err := generateHTMLV3(report, path); err != nil { + t.Fatalf("generateHTMLV3 returned error: %v", err) + } + body, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read html output: %v", err) + } + content := string(body) + for _, want := range []string{ + "结构稳定性", + "Perplexity API", + "最近 5 次中出现 1 次结构变化", + "baseline_initialized", + "abc123", } { if !strings.Contains(content, want) { t.Fatalf("html missing %q\n%s", want, content) @@ -746,6 +950,162 @@ func TestBuildHeadlineItemsDeduplicatesSameModel(t *testing.T) { } } +func TestBuildHeadlineItemsElevatesSignatureDrift(t *testing.T) { + report := sampleReportForV1() + report.SignatureAuditConfig = SignatureAuditReportConfig{Window: 5, ChangedRunsThreshold: 2} + report.ModelEvents = []ModelEvent{ + { + EventType: "new_model", + ModelName: "DeepSeek-V4-Flash", + ProviderName: "DeepSeek", + TrustLabel: "聚合来源", + Baseline: "首次出现", + Summary: "新模型进入情报池。", + SourceKindLabel: "模型快照", + PrimarySource: "OpenRouter / region_pricing", + UpdatedAt: "2026-05-13 09:30", + EvidenceDetail: "models.created_at = 今日,且已存在最新价格快照", + Priority: 90, + }, + } + report.SignatureAuditSummaries = []SignatureAuditSourceSummary{ + { + SourceKey: "cloudflare_pricing_signature", + SourceLabel: "Cloudflare Workers AI", + RunsInWindow: 5, + ChangedRuns: 2, + LatestCheckedAt: "2026-05-15 20:01:46", + LatestStatus: "drift_detected", + LatestStructureState: "changed", + }, + } + + items := buildHeadlineItems(report) + if len(items) == 0 { + t.Fatalf("expected headline items") + } + if items[0].Label != "结构波动" { + t.Fatalf("expected signature drift headline first, got %+v", items[0]) + } + if !strings.Contains(items[0].Title, "Cloudflare Workers AI") { + t.Fatalf("expected drift headline title to mention source, got %+v", items[0]) + } +} + +func TestBuildActionItemsElevatesSignatureDrift(t *testing.T) { + report := sampleReportForV1() + report.SignatureAuditConfig = SignatureAuditReportConfig{Window: 5, ChangedRunsThreshold: 1} + report.SignatureAuditSummaries = []SignatureAuditSourceSummary{ + { + SourceKey: "perplexity_pricing_signature", + SourceLabel: "Perplexity API", + RunsInWindow: 5, + ChangedRuns: 1, + LatestCheckedAt: "2026-05-15 20:01:46", + LatestStatus: "drift_detected", + LatestStructureState: "changed", + }, + } + decorateReportV1(report) + + if len(report.ActionItems) == 0 { + t.Fatalf("expected action items") + } + if !strings.Contains(report.ActionItems[0].Title, "Perplexity API") { + t.Fatalf("expected first action item to elevate signature drift, got %+v", report.ActionItems[0]) + } + if !strings.Contains(report.ActionItems[0].Evidence, "最近 5 次中出现 1 次结构变化") { + t.Fatalf("expected signature drift evidence, got %+v", report.ActionItems[0]) + } +} + +func TestBuildHeadlineItemsDoesNotElevateSignatureDriftBelowThreshold(t *testing.T) { + report := sampleReportForV1() + report.SignatureAuditConfig = SignatureAuditReportConfig{Window: 5, ChangedRunsThreshold: 3} + report.ModelEvents = []ModelEvent{ + { + EventType: "new_model", + ModelName: "DeepSeek-V4-Flash", + ProviderName: "DeepSeek", + TrustLabel: "聚合来源", + Baseline: "首次出现", + Summary: "新模型进入情报池。", + SourceKindLabel: "模型快照", + PrimarySource: "OpenRouter / region_pricing", + UpdatedAt: "2026-05-13 09:30", + EvidenceDetail: "models.created_at = 今日,且已存在最新价格快照", + Priority: 90, + }, + } + report.SignatureAuditSummaries = []SignatureAuditSourceSummary{ + { + SourceKey: "cloudflare_pricing_signature", + SourceLabel: "Cloudflare Workers AI", + RunsInWindow: 5, + ChangedRuns: 2, + LatestCheckedAt: "2026-05-15 20:01:46", + LatestStatus: "drift_detected", + LatestStructureState: "changed", + }, + } + + items := buildHeadlineItems(report) + if len(items) == 0 { + t.Fatalf("expected headline items") + } + if items[0].Label == "结构波动" { + t.Fatalf("signature drift should stay below threshold, got %+v", items[0]) + } +} + +func TestDecorateReportV1ElevatesSignatureDriftIntoHeroSummary(t *testing.T) { + report := sampleReportForV1() + report.ModelEvents = nil + report.SignatureAuditConfig = SignatureAuditReportConfig{Window: 5, ChangedRunsThreshold: 2} + report.SignatureAuditSummaries = []SignatureAuditSourceSummary{ + { + SourceKey: "vertex_pricing_signature", + SourceLabel: "Google Cloud Vertex AI", + RunsInWindow: 5, + ChangedRuns: 3, + LatestCheckedAt: "2026-05-15 20:01:46", + LatestStatus: "drift_detected", + LatestStructureState: "changed", + }, + } + + decorateReportV1(report) + + if !strings.Contains(report.HeroSummary, "Google Cloud Vertex AI") { + t.Fatalf("expected hero summary to mention signature drift source, got %q", report.HeroSummary) + } + if !strings.Contains(report.HeroEvidence, "最近 5 次中出现 3 次结构变化") { + t.Fatalf("expected hero evidence to mention drift count, got %q", report.HeroEvidence) + } +} + +func TestSignatureAuditSummaryToneRespectsConfiguredThreshold(t *testing.T) { + report := &ReportV3{ + SignatureAuditConfig: SignatureAuditReportConfig{Window: 5, ChangedRunsThreshold: 3}, + } + + if tone := signatureAuditSummaryTone(report, SignatureAuditSourceSummary{ + SourceLabel: "Cloudflare Workers AI", + ChangedRuns: 2, + RunsInWindow: 5, + }); tone != "official" { + t.Fatalf("tone below threshold = %q, want official", tone) + } + + if tone := signatureAuditSummaryTone(report, SignatureAuditSourceSummary{ + SourceLabel: "Cloudflare Workers AI", + ChangedRuns: 3, + RunsInWindow: 5, + }); tone != "warning" { + t.Fatalf("tone at threshold = %q, want warning", tone) + } +} + func TestHeadlineItemFromModelEventIncludesEvidenceFields(t *testing.T) { item := headlineItemFromModelEvent(ModelEvent{ EventType: "new_model", diff --git a/scripts/import_cloudflare_pricing.go b/scripts/import_cloudflare_pricing.go new file mode 100644 index 0000000..3a05285 --- /dev/null +++ b/scripts/import_cloudflare_pricing.go @@ -0,0 +1,58 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + var snapshotOnly bool + var snapshotOut string + var signatureOut string + + flag.StringVar(&url, "url", defaultCloudflarePricingFetchURL, "Cloudflare Workers AI 官方价格 markdown") + flag.StringVar(&fixture, "fixture", "", "Cloudflare Workers AI 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.BoolVar(&snapshotOnly, "snapshot-only", false, "仅抓取并落盘 Cloudflare 价格页快照与结构签名") + flag.StringVar(&snapshotOut, "snapshot-out", "", "Cloudflare 原始 markdown 快照输出路径") + flag.StringVar(&signatureOut, "signature-out", "", "Cloudflare 结构签名 JSON 输出路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := cloudflarePricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + SnapshotOnly: snapshotOnly, + SnapshotOut: snapshotOut, + SignatureOut: signatureOut, + } + + var db *sql.DB + var err error + if !cfg.DryRun && !cfg.SnapshotOnly { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runCloudflarePricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_cloudflare_pricing: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/import_cloudflare_pricing_test.go b/scripts/import_cloudflare_pricing_test.go new file mode 100644 index 0000000..8922c92 --- /dev/null +++ b/scripts/import_cloudflare_pricing_test.go @@ -0,0 +1,81 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseCloudflarePricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "cloudflare_pricing_sample.md")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseCloudflarePricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseCloudflarePricingCatalog 返回错误: %v", err) + } + if len(records) != 4 { + t.Fatalf("期望 4 条 Cloudflare 价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "cloudflare-cf-meta-llama-3-2-1b-instruct" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[1].OutputPrice != 2.253 { + t.Fatalf("第二条输出价错误: %v", records[1].OutputPrice) + } + if records[3].ProviderName != "Moonshot AI" { + t.Fatalf("Kimi provider 归一化错误: %q", records[3].ProviderName) + } +} + +func TestRunCloudflarePricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runCloudflarePricingImport(cloudflarePricingImportConfig{ + URL: defaultCloudflarePricingFetchURL, + Fixture: filepath.Join("testdata", "cloudflare_pricing_sample.md"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runCloudflarePricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=cloudflare-pricing-import", + "models=4", + "operator=Cloudflare Workers AI", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} + +func TestParseCloudflarePricingCatalogAcceptsFlexibleSectionBoundary(t *testing.T) { + raw := ` +## LLM pricing + +| Model | Price in Tokens | Price in Neurons | +| --- | --- | --- | +| @cf/meta/llama-3.1-8b-instruct | $0.200 per M input tokens $1.000 per M output tokens | ignored | + +## Image generation pricing +` + + records, err := parseCloudflarePricingCatalog(raw) + if err != nil { + t.Fatalf("parseCloudflarePricingCatalog 返回错误: %v", err) + } + if len(records) != 1 { + t.Fatalf("期望 1 条 Cloudflare 价格记录,实际 %d", len(records)) + } + if records[0].ModelName != "@cf/meta/llama-3.1-8b-instruct" { + t.Fatalf("模型名错误: %q", records[0].ModelName) + } +} diff --git a/scripts/import_perplexity_pricing.go b/scripts/import_perplexity_pricing.go new file mode 100644 index 0000000..e5c01dd --- /dev/null +++ b/scripts/import_perplexity_pricing.go @@ -0,0 +1,58 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + var snapshotOnly bool + var snapshotOut string + var signatureOut string + + flag.StringVar(&url, "url", defaultPerplexityPricingFetchURL, "Perplexity Agent API 官方模型价格 markdown") + flag.StringVar(&fixture, "fixture", "", "Perplexity 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.BoolVar(&snapshotOnly, "snapshot-only", false, "仅抓取并落盘 Perplexity 价格页快照与结构签名") + flag.StringVar(&snapshotOut, "snapshot-out", "", "Perplexity 原始 markdown 快照输出路径") + flag.StringVar(&signatureOut, "signature-out", "", "Perplexity 结构签名 JSON 输出路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := perplexityPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + SnapshotOnly: snapshotOnly, + SnapshotOut: snapshotOut, + SignatureOut: signatureOut, + } + + var db *sql.DB + var err error + if !cfg.DryRun && !cfg.SnapshotOnly { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runPerplexityPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_perplexity_pricing: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/import_perplexity_pricing_test.go b/scripts/import_perplexity_pricing_test.go new file mode 100644 index 0000000..d5ad075 --- /dev/null +++ b/scripts/import_perplexity_pricing_test.go @@ -0,0 +1,80 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParsePerplexityPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "perplexity_pricing_sample.md")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parsePerplexityPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parsePerplexityPricingCatalog 返回错误: %v", err) + } + if len(records) != 5 { + t.Fatalf("期望 5 条 Perplexity 价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "perplexity-perplexity-sonar" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if records[1].ProviderName != "Anthropic" { + t.Fatalf("Anthropic provider 归一化错误: %q", records[1].ProviderName) + } + if records[3].ModelSourceURL != "https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview" { + t.Fatalf("Gemini 模型文档链接错误: %q", records[3].ModelSourceURL) + } +} + +func TestRunPerplexityPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runPerplexityPricingImport(perplexityPricingImportConfig{ + URL: defaultPerplexityPricingFetchURL, + Fixture: filepath.Join("testdata", "perplexity_pricing_sample.md"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runPerplexityPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=perplexity-pricing-import", + "models=5", + "operator=Perplexity API", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} + +func TestParsePerplexityPricingCatalogAcceptsExtraColumnsAndDocAtTail(t *testing.T) { + raw := "" + + "\n# Models\n" + + "\n| Model | Family | Input Price | Output Price | Notes | Provider Documentation |\n" + + "| --- | --- | --- | --- | --- | --- |\n" + + "| `openai/gpt-5.5` | GPT-5 | \\$1.25 / 1M tokens | \\$10.00 / 1M tokens | flagship | [GPT-5.5](https://platform.openai.com/docs/models/gpt-5.5) |\n" + + records, err := parsePerplexityPricingCatalog(raw) + if err != nil { + t.Fatalf("parsePerplexityPricingCatalog 返回错误: %v", err) + } + if len(records) != 1 { + t.Fatalf("期望 1 条 Perplexity 价格记录,实际 %d", len(records)) + } + if records[0].ModelSourceURL != "https://platform.openai.com/docs/models/gpt-5.5" { + t.Fatalf("文档链接错误: %q", records[0].ModelSourceURL) + } + if records[0].InputPrice != 1.25 || records[0].OutputPrice != 10 { + t.Fatalf("价格解析错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } +} diff --git a/scripts/import_vertex_pricing.go b/scripts/import_vertex_pricing.go new file mode 100644 index 0000000..9073537 --- /dev/null +++ b/scripts/import_vertex_pricing.go @@ -0,0 +1,58 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + var snapshotOnly bool + var snapshotOut string + var signatureOut string + + flag.StringVar(&url, "url", defaultVertexPricingURL, "Vertex AI 官方价格页") + flag.StringVar(&fixture, "fixture", "", "Vertex AI 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.BoolVar(&snapshotOnly, "snapshot-only", false, "仅抓取并落盘 Vertex 价格页快照与结构签名") + flag.StringVar(&snapshotOut, "snapshot-out", "", "Vertex 原始 HTML 快照输出路径") + flag.StringVar(&signatureOut, "signature-out", "", "Vertex 结构签名 JSON 输出路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := vertexPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + SnapshotOnly: snapshotOnly, + SnapshotOut: snapshotOut, + SignatureOut: signatureOut, + } + + var db *sql.DB + var err error + if !cfg.DryRun && !cfg.SnapshotOnly { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runVertexPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_vertex_pricing: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/import_vertex_pricing_test.go b/scripts/import_vertex_pricing_test.go new file mode 100644 index 0000000..2c21ea0 --- /dev/null +++ b/scripts/import_vertex_pricing_test.go @@ -0,0 +1,171 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseVertexPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "vertex_pricing_sample.html")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseVertexPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseVertexPricingCatalog 返回错误: %v", err) + } + if len(records) != 4 { + t.Fatalf("期望 4 条 Vertex 价格记录,实际 %d", len(records)) + } + if records[0].ModelName != "Gemini 3.1 Pro Preview" { + t.Fatalf("首条模型名错误: %q", records[0].ModelName) + } + if records[1].InputPrice != 0.5 || records[1].OutputPrice != 3 { + t.Fatalf("Gemini 3.1 Flash Image 定价错误: %v / %v", records[1].InputPrice, records[1].OutputPrice) + } + if records[2].InputPrice != 0.25 || records[2].OutputPrice != 1.5 { + t.Fatalf("Gemini 3.1 Flash-Lite 定价错误: %v / %v", records[2].InputPrice, records[2].OutputPrice) + } +} + +func TestRunVertexPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runVertexPricingImport(vertexPricingImportConfig{ + URL: defaultVertexPricingURL, + Fixture: filepath.Join("testdata", "vertex_pricing_sample.html"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runVertexPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=vertex-pricing-import", + "models=4", + "operator=Google Cloud Vertex AI", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} + +func TestParseVertexPricingCatalogAcceptsGenericStandardTableMarkup(t *testing.T) { + raw := ` +

Gemini 2.5

+
+

Standard

+ + + + + + + + + + + + + + + + + + + +
ModelTypePrice
Gemini 2.5 Flash
Input (text, image, video)$0.30
Text output$2.50
+
+` + + records, err := parseVertexPricingCatalog(raw) + if err != nil { + t.Fatalf("parseVertexPricingCatalog 返回错误: %v", err) + } + if len(records) != 1 { + t.Fatalf("期望 1 条 Vertex 价格记录,实际 %d", len(records)) + } + if records[0].ModelName != "Gemini 2.5 Flash" { + t.Fatalf("模型名错误: %q", records[0].ModelName) + } + if records[0].InputPrice != 0.3 || records[0].OutputPrice != 2.5 { + t.Fatalf("价格解析错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } +} + +func TestParseVertexPricingCatalogFallsBackToStandardTextBlocks(t *testing.T) { + raw := ` +
### Standard
+
Model Type Price (/1M tokens) <= 200K input tokens Price (/1M tokens) > 200K input tokens
+
Gemini 2.5
+
Flash
+
Input (text, image, video) $0.54 $0.54 $0.05 $0.05
+
Audio Input $1.80 $1.80 $0.18 $0.18
+
Text output (response and reasoning) $4.50 $4.50 N/A N/A
+
### Flex/Batch
+` + + records, err := parseVertexPricingCatalog(raw) + if err != nil { + t.Fatalf("parseVertexPricingCatalog 返回错误: %v", err) + } + if len(records) != 1 { + t.Fatalf("期望 1 条 Vertex 价格记录,实际 %d", len(records)) + } + if records[0].ModelName != "Gemini 2.5 Flash" { + t.Fatalf("模型名错误: %q", records[0].ModelName) + } + if records[0].InputPrice != 0.54 || records[0].OutputPrice != 4.5 { + t.Fatalf("价格解析错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } +} + +func TestParseVertexPricingCatalogSupportsChineseStandardTable(t *testing.T) { + raw := ` +

Gemini 3

+
+

标准

+ + + + + + + + + + + + + + + + + + + +
模型类型价格
Gemini 3 Pro 预览版
输入(文本、图片、视频、音频)$2
文本输出(回答和推理)$12
+
+` + + records, err := parseVertexPricingCatalog(raw) + if err != nil { + t.Fatalf("parseVertexPricingCatalog 返回错误: %v", err) + } + if len(records) != 1 { + t.Fatalf("期望 1 条记录,实际 %d", len(records)) + } + if records[0].ModelName != "Gemini 3 Pro 预览版" { + t.Fatalf("模型名错误: %q", records[0].ModelName) + } + if records[0].InputPrice != 2 || records[0].OutputPrice != 12 { + t.Fatalf("价格解析错误: %v / %v", records[0].InputPrice, records[0].OutputPrice) + } +} diff --git a/scripts/live_pricing_smoke_runner.go b/scripts/live_pricing_smoke_runner.go new file mode 100644 index 0000000..ad70559 --- /dev/null +++ b/scripts/live_pricing_smoke_runner.go @@ -0,0 +1,222 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "flag" + "fmt" + "io" + "os" + "strconv" + "strings" + "time" +) + +type pricingSmokeCheck struct { + Name string + URL string + Run func() (string, error) +} + +type pricingSmokeSummary struct { + Source string + ModelCount int + Operator string +} + +type pricingSmokeResult struct { + Name string + URL string + Source string + Operator string + ModelCount int + DurationMS int64 + Success bool + Error string +} + +func main() { + loadSubscriptionImportEnv() + + var timeoutSeconds int + var vertexURL string + var cloudflareURL string + var perplexityURL string + var vertexFixture string + var cloudflareFixture string + var perplexityFixture string + + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.StringVar(&vertexURL, "vertex-url", defaultVertexPricingURL, "Vertex AI 官方价格页") + flag.StringVar(&cloudflareURL, "cloudflare-url", defaultCloudflarePricingFetchURL, "Cloudflare Workers AI 官方价格页") + flag.StringVar(&perplexityURL, "perplexity-url", defaultPerplexityPricingFetchURL, "Perplexity API 官方模型页") + flag.StringVar(&vertexFixture, "vertex-fixture", "", "Vertex AI fixture 文件") + flag.StringVar(&cloudflareFixture, "cloudflare-fixture", "", "Cloudflare fixture 文件") + flag.StringVar(&perplexityFixture, "perplexity-fixture", "", "Perplexity fixture 文件") + flag.Parse() + + timeout := time.Duration(timeoutSeconds) * time.Second + checks := []pricingSmokeCheck{ + buildVertexSmokeCheck(vertexURL, vertexFixture, timeout), + buildCloudflareSmokeCheck(cloudflareURL, cloudflareFixture, timeout), + buildPerplexitySmokeCheck(perplexityURL, perplexityFixture, timeout), + } + results := runPricingSmokeChecks(checks, time.Now) + renderPricingSmokeTextReport(os.Stdout, results, time.Now()) + if hasFailedPricingSmoke(results) { + os.Exit(1) + } +} + +func buildVertexSmokeCheck(url, fixture string, timeout time.Duration) pricingSmokeCheck { + return pricingSmokeCheck{ + Name: "Vertex", + URL: url, + Run: func() (string, error) { + var out bytes.Buffer + err := runVertexPricingImport(vertexPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: true, + Timeout: timeout, + }, nil, &out) + return strings.TrimSpace(out.String()), err + }, + } +} + +func buildCloudflareSmokeCheck(url, fixture string, timeout time.Duration) pricingSmokeCheck { + return pricingSmokeCheck{ + Name: "Cloudflare", + URL: url, + Run: func() (string, error) { + var out bytes.Buffer + err := runCloudflarePricingImport(cloudflarePricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: true, + Timeout: timeout, + }, nil, &out) + return strings.TrimSpace(out.String()), err + }, + } +} + +func buildPerplexitySmokeCheck(url, fixture string, timeout time.Duration) pricingSmokeCheck { + return pricingSmokeCheck{ + Name: "Perplexity", + URL: url, + Run: func() (string, error) { + var out bytes.Buffer + err := runPerplexityPricingImport(perplexityPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: true, + Timeout: timeout, + }, nil, &out) + return strings.TrimSpace(out.String()), err + }, + } +} + +func runPricingSmokeChecks(checks []pricingSmokeCheck, now func() time.Time) []pricingSmokeResult { + results := make([]pricingSmokeResult, 0, len(checks)) + for _, check := range checks { + start := now() + result := pricingSmokeResult{ + Name: check.Name, + URL: check.URL, + } + summaryLine, err := check.Run() + result.DurationMS = now().Sub(start).Milliseconds() + if err != nil { + result.Error = err.Error() + results = append(results, result) + continue + } + summary, ok := parsePricingSmokeSummaryLine(summaryLine) + if !ok { + result.Error = fmt.Sprintf("invalid dry-run summary: %q", summaryLine) + results = append(results, result) + continue + } + result.Success = true + result.Source = summary.Source + result.Operator = summary.Operator + result.ModelCount = summary.ModelCount + results = append(results, result) + } + return results +} + +func parsePricingSmokeSummaryLine(line string) (pricingSmokeSummary, bool) { + fields := strings.Fields(strings.TrimSpace(line)) + if len(fields) == 0 { + return pricingSmokeSummary{}, false + } + + source := "" + modelCount := -1 + operatorParts := make([]string, 0) + capturingOperator := false + for _, field := range fields { + switch { + case strings.HasPrefix(field, "source="): + source = strings.TrimPrefix(field, "source=") + capturingOperator = false + case strings.HasPrefix(field, "models="): + value := strings.TrimPrefix(field, "models=") + parsed, err := strconv.Atoi(value) + if err != nil { + return pricingSmokeSummary{}, false + } + modelCount = parsed + capturingOperator = false + case strings.HasPrefix(field, "operator="): + capturingOperator = true + operatorParts = append(operatorParts, strings.TrimPrefix(field, "operator=")) + case strings.HasPrefix(field, "dry_run="), strings.HasPrefix(field, "table_rows="): + capturingOperator = false + default: + if capturingOperator { + operatorParts = append(operatorParts, field) + } + } + } + if source == "" || modelCount < 0 || len(operatorParts) == 0 { + return pricingSmokeSummary{}, false + } + return pricingSmokeSummary{ + Source: source, + ModelCount: modelCount, + Operator: strings.Join(operatorParts, " "), + }, true +} + +func renderPricingSmokeTextReport(out io.Writer, results []pricingSmokeResult, now time.Time) { + passed := 0 + failed := 0 + _, _ = fmt.Fprintf(out, "=== Live Pricing Smoke Report (%s) ===\n", now.Format("2006-01-02 15:04")) + for _, result := range results { + if result.Success { + passed++ + _, _ = fmt.Fprintf(out, "PASS %s source=%s models=%d operator=%s duration_ms=%d url=%s\n", + result.Name, result.Source, result.ModelCount, result.Operator, result.DurationMS, result.URL) + continue + } + failed++ + _, _ = fmt.Fprintf(out, "FAIL %s duration_ms=%d error=%s url=%s\n", + result.Name, result.DurationMS, result.Error, result.URL) + } + _, _ = fmt.Fprintf(out, "Summary: %d passed, %d failed\n", passed, failed) +} + +func hasFailedPricingSmoke(results []pricingSmokeResult) bool { + for _, result := range results { + if !result.Success { + return true + } + } + return false +} diff --git a/scripts/live_pricing_smoke_runner_test.go b/scripts/live_pricing_smoke_runner_test.go new file mode 100644 index 0000000..0e209b8 --- /dev/null +++ b/scripts/live_pricing_smoke_runner_test.go @@ -0,0 +1,104 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "errors" + "strings" + "testing" + "time" +) + +func TestRunPricingSmokeChecksCollectsSummariesAndFailures(t *testing.T) { + checks := []pricingSmokeCheck{ + { + Name: "Vertex", + URL: "https://vertex.example/pricing", + Run: func() (string, error) { + return "source=vertex-pricing-import models=4 operator=Google Cloud Vertex AI dry_run=true", nil + }, + }, + { + Name: "Cloudflare", + URL: "https://cloudflare.example/pricing", + Run: func() (string, error) { + return "", errors.New("fetch failed") + }, + }, + } + + results := runPricingSmokeChecks(checks, func() time.Time { + return time.UnixMilli(1710000000000) + }) + if len(results) != 2 { + t.Fatalf("期望 2 条结果,实际 %d", len(results)) + } + if !results[0].Success { + t.Fatalf("期望 Vertex 成功,结果: %+v", results[0]) + } + if results[0].ModelCount != 4 { + t.Fatalf("期望 Vertex 模型数为 4,实际 %d", results[0].ModelCount) + } + if results[0].Operator != "Google Cloud Vertex AI" { + t.Fatalf("期望 Vertex operator 解析成功,实际 %q", results[0].Operator) + } + if results[1].Success { + t.Fatalf("期望 Cloudflare 失败,结果: %+v", results[1]) + } + if !strings.Contains(results[1].Error, "fetch failed") { + t.Fatalf("期望 Cloudflare 错误被透传,实际 %q", results[1].Error) + } +} + +func TestRenderPricingSmokeTextReportPrintsSummary(t *testing.T) { + results := []pricingSmokeResult{ + { + Name: "Vertex", + URL: "https://vertex.example/pricing", + Source: "vertex-pricing-import", + Operator: "Google Cloud Vertex AI", + ModelCount: 4, + Success: true, + DurationMS: 123, + }, + { + Name: "Perplexity", + URL: "https://perplexity.example/models", + Success: false, + DurationMS: 456, + Error: "unexpected perplexity pricing content", + }, + } + + var out bytes.Buffer + renderPricingSmokeTextReport(&out, results, time.Date(2026, 5, 15, 16, 4, 0, 0, time.FixedZone("CST", 8*3600))) + text := out.String() + for _, want := range []string{ + "=== Live Pricing Smoke Report (2026-05-15 16:04) ===", + "PASS Vertex source=vertex-pricing-import models=4 operator=Google Cloud Vertex AI duration_ms=123", + "FAIL Perplexity duration_ms=456 error=unexpected perplexity pricing content", + "Summary: 1 passed, 1 failed", + } { + if !strings.Contains(text, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, text) + } + } +} + +func TestParsePricingSmokeSummaryLine(t *testing.T) { + line := "source=cloudflare-pricing-import models=4 operator=Cloudflare Workers AI dry_run=true" + summary, ok := parsePricingSmokeSummaryLine(line) + if !ok { + t.Fatalf("期望成功解析 summary line") + } + if summary.Source != "cloudflare-pricing-import" { + t.Fatalf("source 错误: %q", summary.Source) + } + if summary.ModelCount != 4 { + t.Fatalf("models 错误: %d", summary.ModelCount) + } + if summary.Operator != "Cloudflare Workers AI" { + t.Fatalf("operator 错误: %q", summary.Operator) + } +} diff --git a/scripts/materialize_daily_signals.go b/scripts/materialize_daily_signals.go new file mode 100644 index 0000000..813ffc9 --- /dev/null +++ b/scripts/materialize_daily_signals.go @@ -0,0 +1,978 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "flag" + "fmt" + "log/slog" + "os" + "sort" + "strings" + "time" + + _ "github.com/lib/pq" +) + +type signalModelInfo struct { + Name string + ProviderName string + ProviderCountry string + ContextLength int + InputPrice float64 + OutputPrice float64 + Currency string + IsFree bool + OperatorName string + OperatorType string +} + +type signalDailySignals struct { + NewModels int `json:"new_models"` + PriceChanges int `json:"price_changes"` + OfficialFree int `json:"official_free"` + AggregatorFree int `json:"aggregator_free"` + UnknownFree int `json:"unknown_free"` +} + +type signalModelEvent struct { + EventType string `json:"event_type"` + ModelName string `json:"model_name"` + ProviderName string `json:"provider_name"` + OperatorName string `json:"operator_name"` + Audience string `json:"audience"` + TrustLabel string `json:"trust_label"` + SourceKindLabel string `json:"source_kind_label"` + PrimarySource string `json:"primary_source"` + UpdatedAt string `json:"updated_at"` + EvidenceDetail string `json:"evidence_detail"` + Baseline string `json:"baseline"` + Summary string `json:"summary"` + Currency string `json:"currency"` + OldInputPrice float64 `json:"old_input_price"` + NewInputPrice float64 `json:"new_input_price"` + OldOutputPrice float64 `json:"old_output_price"` + NewOutputPrice float64 `json:"new_output_price"` + PriceChangePct float64 `json:"price_change_pct"` + Priority int `json:"priority"` +} + +type signalPromoCampaignDefinition struct { + Date string `json:"date"` + ModelName string `json:"model_name"` + ProviderName string `json:"provider_name"` + OperatorName string `json:"operator_name"` + Summary string `json:"summary"` + Audience string `json:"audience"` + Baseline string `json:"baseline"` + TrustLabel string `json:"trust_label"` + SourceKindLabel string `json:"source_kind_label"` + PrimarySource string `json:"primary_source"` + EvidenceDetail string `json:"evidence_detail"` + Priority int `json:"priority"` +} + +type dailySignalSnapshot struct { + SignalDate string + Status string + Signals signalDailySignals + EventCount int + PageMode string + EventTypeCounts map[string]int + TopEvents []signalModelEvent + SourceAudit string +} + +type materializeDailySignalsConfig struct { + Date string + SourceAudit string + DryRun bool +} + +var signalLogger *slog.Logger + +const signalUSDToCNY = 7.25 + +func init() { + signalLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})) +} + +func main() { + loadSignalEnv() + + var cfg materializeDailySignalsConfig + flag.StringVar(&cfg.Date, "date", signalDateValue(), "信号日期,格式 YYYY-MM-DD") + flag.StringVar(&cfg.SourceAudit, "source-audit", os.Getenv("SIGNAL_SOURCE_AUDIT"), "运行审计摘要") + flag.BoolVar(&cfg.DryRun, "dry-run", false, "仅计算并打印摘要,不写入数据库") + flag.Parse() + + db, err := sql.Open("postgres", defaultSignalDSN()) + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + + if err := runMaterializeDailySignals(db, cfg); err != nil { + fmt.Fprintf(os.Stderr, "materialize_daily_signals: %v\n", err) + os.Exit(1) + } +} + +func loadSignalEnv() { + for _, path := range []string{".env.local", ".env"} { + data, err := os.ReadFile(path) + if err != nil { + continue + } + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + key, value, ok := strings.Cut(line, "=") + if !ok { + continue + } + key = strings.TrimSpace(key) + value = strings.Trim(strings.TrimSpace(value), `"'`) + if key == "" { + continue + } + if _, exists := os.LookupEnv(key); exists { + continue + } + _ = os.Setenv(key, value) + } + } +} + +func defaultSignalDSN() string { + if dsn := os.Getenv("DATABASE_URL"); dsn != "" { + return dsn + } + return "postgres://long@/llm_intelligence?host=/var/run/postgresql" +} + +func signalDateValue() string { + if value := strings.TrimSpace(os.Getenv("REPORT_DATE")); value != "" { + return value + } + return time.Now().Format("2006-01-02") +} + +func runMaterializeDailySignals(db *sql.DB, cfg materializeDailySignalsConfig) error { + signals, err := loadSignalDailySignals(db, cfg.Date) + if err != nil { + return err + } + freeSignals, err := loadSignalFreeBreakdown(db) + if err != nil { + return err + } + signals.OfficialFree = freeSignals.OfficialFree + signals.AggregatorFree = freeSignals.AggregatorFree + signals.UnknownFree = freeSignals.UnknownFree + + events, err := loadSignalModelEvents(db, cfg.Date) + if err != nil { + return err + } + + snapshot := dailySignalSnapshot{ + SignalDate: cfg.Date, + Status: "generated", + Signals: signals, + EventCount: len(events), + PageMode: buildSignalPageMode(signals, events), + EventTypeCounts: summarizeSignalEventTypes(events), + TopEvents: events, + SourceAudit: strings.TrimSpace(cfg.SourceAudit), + } + + if cfg.DryRun { + fmt.Printf("source=daily-signal-materializer date=%s new_models=%d price_changes=%d event_count=%d page_mode=%s dry_run=true\n", + snapshot.SignalDate, snapshot.Signals.NewModels, snapshot.Signals.PriceChanges, snapshot.EventCount, snapshot.PageMode) + return nil + } + + if err := upsertDailySignalSnapshot(db, snapshot); err != nil { + return err + } + fmt.Printf("source=daily-signal-materializer date=%s new_models=%d price_changes=%d event_count=%d page_mode=%s dry_run=false\n", + snapshot.SignalDate, snapshot.Signals.NewModels, snapshot.Signals.PriceChanges, snapshot.EventCount, snapshot.PageMode) + return nil +} + +func upsertDailySignalSnapshot(db *sql.DB, snapshot dailySignalSnapshot) error { + eventTypeCounts, err := json.Marshal(snapshot.EventTypeCounts) + if err != nil { + return fmt.Errorf("marshal event_type_counts: %w", err) + } + topEvents, err := json.Marshal(snapshot.TopEvents) + if err != nil { + return fmt.Errorf("marshal top_events: %w", err) + } + + _, err = db.Exec( + `INSERT INTO daily_signal_snapshot ( + signal_date, status, new_models, price_changes, + official_free, aggregator_free, unknown_free, + event_count, page_mode, event_type_counts, top_events, source_audit + ) VALUES ( + $1::date, $2, $3, $4, + $5, $6, $7, + $8, $9, $10::jsonb, $11::jsonb, $12 + ) + ON CONFLICT (signal_date) + DO UPDATE SET + status = EXCLUDED.status, + new_models = EXCLUDED.new_models, + price_changes = EXCLUDED.price_changes, + official_free = EXCLUDED.official_free, + aggregator_free = EXCLUDED.aggregator_free, + unknown_free = EXCLUDED.unknown_free, + event_count = EXCLUDED.event_count, + page_mode = EXCLUDED.page_mode, + event_type_counts = EXCLUDED.event_type_counts, + top_events = EXCLUDED.top_events, + source_audit = EXCLUDED.source_audit, + generated_at = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP`, + snapshot.SignalDate, snapshot.Status, snapshot.Signals.NewModels, snapshot.Signals.PriceChanges, + snapshot.Signals.OfficialFree, snapshot.Signals.AggregatorFree, snapshot.Signals.UnknownFree, + snapshot.EventCount, snapshot.PageMode, string(eventTypeCounts), string(topEvents), signalNullIfBlank(snapshot.SourceAudit), + ) + if err != nil { + return fmt.Errorf("upsert daily_signal_snapshot: %w", err) + } + return nil +} + +func loadSignalDailySignals(db *sql.DB, date string) (signalDailySignals, error) { + signals := signalDailySignals{} + + if err := db.QueryRow(` + SELECT COUNT(*) + FROM models + WHERE deleted_at IS NULL + AND DATE(created_at) = $1::date + `, date).Scan(&signals.NewModels); err != nil { + return signals, err + } + + if err := db.QueryRow(` + SELECT COUNT(*) + FROM pricing_history + WHERE DATE(changed_at) = $1::date + `, date).Scan(&signals.PriceChanges); err != nil { + return signals, err + } + return signals, nil +} + +func loadSignalFreeBreakdown(db *sql.DB) (signalDailySignals, error) { + rows, err := db.Query(` + WITH latest_prices AS ( + SELECT + rp.model_id, + COALESCE(o.name, 'Unknown') AS operator_name, + COALESCE(o.type, 'reseller') AS operator_type, + rp.currency, + rp.input_price_per_mtok, + rp.output_price_per_mtok, + rp.is_free, + ROW_NUMBER() OVER ( + PARTITION BY rp.model_id + ORDER BY rp.effective_date DESC NULLS LAST, rp.id DESC + ) AS rn + FROM region_pricing rp + LEFT JOIN operator o ON rp.operator_id = o.id + ) + SELECT + COALESCE(NULLIF(m.name, ''), m.external_id) AS model_name, + COALESCE(mp.name, split_part(m.external_id, '/', 1)) AS provider_name, + COALESCE(mp.country, 'unknown') AS provider_country, + COALESCE(m.context_length, 0) AS context_length, + COALESCE(lp.input_price_per_mtok, 0) AS input_price, + COALESCE(lp.output_price_per_mtok, 0) AS output_price, + COALESCE(lp.currency, 'USD') AS currency, + COALESCE(lp.operator_name, 'OpenRouter') AS operator_name, + COALESCE(lp.operator_type, 'reseller') AS operator_type + FROM models m + LEFT JOIN model_provider mp ON m.provider_id = mp.id + LEFT JOIN latest_prices lp ON lp.model_id = m.id AND lp.rn = 1 + WHERE m.deleted_at IS NULL + AND COALESCE(lp.is_free, false) = true + `) + if err != nil { + return signalDailySignals{}, err + } + defer rows.Close() + + signals := signalDailySignals{} + for rows.Next() { + var model signalModelInfo + if err := rows.Scan( + &model.Name, + &model.ProviderName, + &model.ProviderCountry, + &model.ContextLength, + &model.InputPrice, + &model.OutputPrice, + &model.Currency, + &model.OperatorName, + &model.OperatorType, + ); err != nil { + return signalDailySignals{}, err + } + switch classifySignalFreeSource(model) { + case "官方免费": + signals.OfficialFree++ + case "聚合免费": + signals.AggregatorFree++ + default: + signals.UnknownFree++ + } + } + return signals, rows.Err() +} + +func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error) { + var events []signalModelEvent + + newModelEvents, err := loadSignalNewModelEvents(db, date) + if err != nil { + return nil, err + } + events = append(events, newModelEvents...) + + releaseEvents, err := loadSignalOfficialReleaseEvents(db, date) + if err != nil { + return nil, err + } + events = append(events, releaseEvents...) + + promoEvents, err := loadSignalPromoCampaignEvents(date) + if err != nil { + return nil, err + } + events = append(events, promoEvents...) + + priceEvents, err := loadSignalPriceChangeEvents(db, date) + if err != nil { + return nil, err + } + events = append(events, priceEvents...) + + sort.Slice(events, func(i, j int) bool { + if events[i].Priority != events[j].Priority { + return events[i].Priority > events[j].Priority + } + return events[i].ModelName < events[j].ModelName + }) + + return dedupeSignalEvents(events), nil +} + +func loadSignalPromoCampaignEvents(date string) ([]signalModelEvent, error) { + path, err := resolveSignalPromoCampaignDataPath() + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + body, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var definitions []signalPromoCampaignDefinition + if err := json.Unmarshal(body, &definitions); err != nil { + return nil, err + } + + events := make([]signalModelEvent, 0) + for _, definition := range definitions { + if definition.Date != date { + continue + } + events = append(events, signalModelEvent{ + EventType: "promo_campaign", + ModelName: definition.ModelName, + ProviderName: definition.ProviderName, + OperatorName: definition.OperatorName, + Audience: signalFirstNonEmpty(definition.Audience, "适合计划利用活动窗口压低成本的团队"), + TrustLabel: signalFirstNonEmpty(definition.TrustLabel, "官方来源 / 一级证据"), + SourceKindLabel: signalFirstNonEmpty(definition.SourceKindLabel, "官方活动页"), + PrimarySource: definition.PrimarySource, + UpdatedAt: signalFormatEventUpdatedAt("", definition.Date), + EvidenceDetail: definition.EvidenceDetail, + Baseline: signalFirstNonEmpty(definition.Baseline, "活动窗口开启"), + Summary: definition.Summary, + Priority: signalMaxInt(definition.Priority, 115), + }) + } + return events, nil +} + +func resolveSignalPromoCampaignDataPath() (string, error) { + candidates := []string{ + filepathJoin("scripts", "testdata", "report_promo_campaigns.json"), + filepathJoin("testdata", "report_promo_campaigns.json"), + } + for _, candidate := range candidates { + if _, err := os.Stat(candidate); err == nil { + return candidate, nil + } + } + return "", os.ErrNotExist +} + +func loadSignalOfficialReleaseEvents(db *sql.DB, date string) ([]signalModelEvent, error) { + rows, err := db.Query(` + WITH latest_prices AS ( + SELECT + rp.model_id, + COALESCE(o.name, 'Unknown') AS operator_name, + COALESCE(o.type, 'reseller') AS operator_type, + rp.currency, + ROW_NUMBER() OVER ( + PARTITION BY rp.model_id + ORDER BY rp.effective_date DESC NULLS LAST, rp.id DESC + ) AS rn + FROM region_pricing rp + LEFT JOIN operator o ON rp.operator_id = o.id + ) + SELECT + COALESCE(NULLIF(m.name, ''), m.external_id) AS model_name, + COALESCE(mp.name, split_part(m.external_id, '/', 1)) AS provider_name, + COALESCE(lp.operator_name, 'Unknown') AS operator_name, + COALESCE(lp.operator_type, 'reseller') AS operator_type, + COALESCE(m.source_url, '') AS source_url, + COALESCE(m.date_confidence, 'unknown') AS date_confidence, + COALESCE(m.date_source_kind, 'unknown') AS date_source_kind, + COALESCE(mp.country, 'unknown') AS provider_country, + COALESCE(m.release_date, m.created_at::date) AS release_date, + COALESCE(lp.currency, 'USD') AS currency + FROM models m + LEFT JOIN model_provider mp ON m.provider_id = mp.id + LEFT JOIN latest_prices lp ON lp.model_id = m.id AND lp.rn = 1 + WHERE m.deleted_at IS NULL + AND m.release_date = $1::date + AND COALESCE(m.source_url, '') <> '' + AND COALESCE(lp.operator_type, 'reseller') IN ('official', 'cloud') + ORDER BY m.release_date DESC, m.id DESC + LIMIT 8 + `, date) + if err != nil { + return nil, err + } + defer rows.Close() + + var events []signalModelEvent + for rows.Next() { + var ( + modelName string + providerName string + operatorName string + operatorType string + sourceURL string + dateConfidence string + dateSourceKind string + providerCountry string + releaseDate time.Time + currency string + ) + if err := rows.Scan( + &modelName, + &providerName, + &operatorName, + &operatorType, + &sourceURL, + &dateConfidence, + &dateSourceKind, + &providerCountry, + &releaseDate, + ¤cy, + ); err != nil { + return nil, err + } + + model := signalModelInfo{ + Name: modelName, + ProviderName: providerName, + ProviderCountry: providerCountry, + Currency: currency, + OperatorName: operatorName, + OperatorType: operatorType, + } + + events = append(events, signalModelEvent{ + EventType: "official_release", + ModelName: modelName, + ProviderName: providerName, + OperatorName: operatorName, + Audience: "适合需要复查默认选型与路线图判断的团队", + TrustLabel: buildSignalReleaseTrustLabel(model, dateConfidence), + SourceKindLabel: buildSignalReleaseSourceKindLabel(dateSourceKind, dateConfidence), + PrimarySource: sourceURL, + UpdatedAt: releaseDate.Format("2006-01-02 15:04"), + EvidenceDetail: buildSignalReleaseEvidenceDetail(dateSourceKind, dateConfidence), + Baseline: "官方首次发布", + Summary: fmt.Sprintf("%s 官方发布新模型,值得优先复查默认选型。", providerName), + Currency: currency, + Priority: 120, + }) + } + return events, rows.Err() +} + +func loadSignalNewModelEvents(db *sql.DB, date string) ([]signalModelEvent, error) { + rows, err := db.Query(` + WITH latest_prices AS ( + SELECT + rp.model_id, + COALESCE(o.name, 'Unknown') AS operator_name, + COALESCE(o.type, 'reseller') AS operator_type, + rp.currency, + rp.input_price_per_mtok, + rp.output_price_per_mtok, + rp.is_free, + ROW_NUMBER() OVER ( + PARTITION BY rp.model_id + ORDER BY rp.effective_date DESC NULLS LAST, rp.id DESC + ) AS rn + FROM region_pricing rp + LEFT JOIN operator o ON rp.operator_id = o.id + ) + SELECT + COALESCE(NULLIF(m.name, ''), m.external_id) AS model_name, + COALESCE(mp.name, split_part(m.external_id, '/', 1)) AS provider_name, + COALESCE(lp.operator_name, 'OpenRouter') AS operator_name, + COALESCE(lp.operator_type, 'reseller') AS operator_type, + COALESCE(lp.currency, 'USD') AS currency, + COALESCE(lp.input_price_per_mtok, 0) AS input_price, + COALESCE(lp.output_price_per_mtok, 0) AS output_price, + COALESCE(lp.is_free, false) AS is_free, + COALESCE(m.context_length, 0) AS context_length, + COALESCE(mp.country, 'unknown') AS provider_country, + m.created_at + FROM models m + LEFT JOIN model_provider mp ON m.provider_id = mp.id + LEFT JOIN latest_prices lp ON lp.model_id = m.id AND lp.rn = 1 + WHERE m.deleted_at IS NULL + AND DATE(m.created_at) = $1::date + ORDER BY m.created_at DESC, m.id DESC + LIMIT 8 + `, date) + if err != nil { + return nil, err + } + defer rows.Close() + + var events []signalModelEvent + for rows.Next() { + var model signalModelInfo + var createdAt time.Time + if err := rows.Scan( + &model.Name, + &model.ProviderName, + &model.OperatorName, + &model.OperatorType, + &model.Currency, + &model.InputPrice, + &model.OutputPrice, + &model.IsFree, + &model.ContextLength, + &model.ProviderCountry, + &createdAt, + ); err != nil { + return nil, err + } + + summary := "新模型进入情报池,值得重新评估当前默认选择。" + if model.IsFree { + summary = fmt.Sprintf("新模型首日可免费试用,需注意其免费来源属于%s。", classifySignalFreeSource(model)) + } else if model.ContextLength >= 1024*256 { + summary = fmt.Sprintf("新模型带来 %s 长上下文,值得复查 Agent 和代码场景。", signalFormatContextWindowCompact(model.ContextLength)) + } + + events = append(events, signalModelEvent{ + EventType: "new_model", + ModelName: model.Name, + ProviderName: model.ProviderName, + OperatorName: model.OperatorName, + Audience: "适合想尽快验证新模型价值的选型读者", + TrustLabel: buildSignalTrustLabel(model), + SourceKindLabel: "模型快照", + PrimarySource: buildSignalPrimarySource("region_pricing", model.OperatorName), + UpdatedAt: createdAt.Format("2006-01-02 15:04"), + EvidenceDetail: "models.created_at = 今日,且已存在最新价格快照", + Baseline: "首次出现", + Summary: summary, + Currency: model.Currency, + NewInputPrice: model.InputPrice, + NewOutputPrice: model.OutputPrice, + Priority: 85 + signalMinInt(model.ContextLength/(1024*128), 10), + }) + } + return events, rows.Err() +} + +func loadSignalPriceChangeEvents(db *sql.DB, date string) ([]signalModelEvent, error) { + rows, err := db.Query(` + WITH latest_prices AS ( + SELECT + rp.model_id, + COALESCE(o.name, 'Unknown') AS operator_name, + COALESCE(o.type, 'reseller') AS operator_type, + ROW_NUMBER() OVER ( + PARTITION BY rp.model_id + ORDER BY rp.effective_date DESC NULLS LAST, rp.id DESC + ) AS rn + FROM region_pricing rp + LEFT JOIN operator o ON rp.operator_id = o.id + ) + SELECT + COALESCE(NULLIF(m.name, ''), m.external_id) AS model_name, + COALESCE(mp.name, split_part(m.external_id, '/', 1)) AS provider_name, + COALESCE(lp.operator_name, 'OpenRouter') AS operator_name, + COALESCE(lp.operator_type, 'reseller') AS operator_type, + ph.currency, + COALESCE(ph.old_input_price, 0), + COALESCE(ph.new_input_price, 0), + COALESCE(ph.old_output_price, 0), + COALESCE(ph.new_output_price, 0), + COALESCE(mp.country, 'unknown') AS provider_country, + ph.changed_at + FROM pricing_history ph + JOIN models m ON ph.model_id = m.id + LEFT JOIN model_provider mp ON m.provider_id = mp.id + LEFT JOIN latest_prices lp ON lp.model_id = m.id AND lp.rn = 1 + WHERE DATE(ph.changed_at) = $1::date + ORDER BY ph.changed_at DESC, ph.id DESC + LIMIT 16 + `, date) + if err != nil { + return nil, err + } + defer rows.Close() + + var events []signalModelEvent + for rows.Next() { + var ( + model signalModelInfo + oldInputPrice float64 + newInputPrice float64 + oldOutputPrice float64 + newOutputPrice float64 + changedAt time.Time + ) + if err := rows.Scan( + &model.Name, + &model.ProviderName, + &model.OperatorName, + &model.OperatorType, + &model.Currency, + &oldInputPrice, + &newInputPrice, + &oldOutputPrice, + &newOutputPrice, + &model.ProviderCountry, + &changedAt, + ); err != nil { + return nil, err + } + + changePct := signalSignedPriceChangePct(oldInputPrice, newInputPrice, oldOutputPrice, newOutputPrice) + if changePct == 0 { + continue + } + + eventType := "price_increase" + summary := "价格上调已足以影响默认成本,需要确认备用模型。" + if changePct < 0 { + eventType = "price_cut" + summary = "价格下降已足以影响默认选型,值得重新评估同类模型。" + } + + events = append(events, signalModelEvent{ + EventType: eventType, + ModelName: model.Name, + ProviderName: model.ProviderName, + OperatorName: model.OperatorName, + Audience: buildSignalPriceEventAudience(changePct), + TrustLabel: buildSignalTrustLabel(model), + SourceKindLabel: "价格快照", + PrimarySource: "pricing_history", + UpdatedAt: changedAt.Format("2006-01-02 15:04"), + EvidenceDetail: buildSignalPriceEvidenceDetail(changePct, oldInputPrice, newInputPrice, model.Currency), + Baseline: fmt.Sprintf("较昨日 %+.0f%%", changePct), + Summary: summary, + Currency: model.Currency, + OldInputPrice: oldInputPrice, + NewInputPrice: newInputPrice, + OldOutputPrice: oldOutputPrice, + NewOutputPrice: newOutputPrice, + PriceChangePct: changePct, + Priority: 70 + signalMinInt(int(signalAbs(changePct)), 25), + }) + } + return events, rows.Err() +} + +func summarizeSignalEventTypes(events []signalModelEvent) map[string]int { + counts := make(map[string]int) + for _, event := range events { + counts[event.EventType]++ + } + return counts +} + +func dedupeSignalEvents(events []signalModelEvent) []signalModelEvent { + seen := make(map[string]struct{}) + result := make([]signalModelEvent, 0, len(events)) + for _, event := range events { + key := event.EventType + "|" + event.ModelName + if _, exists := seen[key]; exists { + continue + } + seen[key] = struct{}{} + result = append(result, event) + } + return result +} + +func classifySignalFreeSource(model signalModelInfo) string { + switch model.OperatorType { + case "official", "cloud": + return "官方免费" + case "reseller": + if isSignalVerifiedAggregator(model.OperatorName) { + return "聚合免费" + } + } + return "待确认" +} + +func isSignalVerifiedAggregator(name string) bool { + switch strings.ToLower(strings.TrimSpace(name)) { + case "openrouter", "siliconflow", "fireworks", "groq": + return true + default: + return false + } +} + +func buildSignalPageMode(signals signalDailySignals, events []signalModelEvent) string { + if hasSignalEventType(events, "official_release") || hasSignalEventType(events, "promo_campaign") { + return "hot" + } + if signals.NewModels == 0 && signals.PriceChanges == 0 { + return "calm" + } + if signals.NewModels+signals.PriceChanges >= 3 { + return "hot" + } + return "standard" +} + +func hasSignalEventType(events []signalModelEvent, eventType string) bool { + for _, event := range events { + if event.EventType == eventType { + return true + } + } + return false +} + +func buildSignalTrustLabel(model signalModelInfo) string { + switch model.OperatorType { + case "official", "cloud": + return "官方来源" + case "reseller": + if isSignalVerifiedAggregator(model.OperatorName) { + return "聚合来源" + } + } + return "待验证来源" +} + +func buildSignalPrimarySource(sourceKind, operatorName string) string { + switch sourceKind { + case "region_pricing": + if operatorName == "" { + return "region_pricing" + } + return operatorName + " / region_pricing" + default: + return sourceKind + } +} + +func buildSignalPriceEvidenceDetail(changePct, oldPrice, newPrice float64, currency string) string { + direction := "上涨" + if changePct < 0 { + direction = "下降" + } + return fmt.Sprintf( + "pricing_history 记录到输入价格由 %s 调整为 %s,较昨日%s %.0f%%", + signalFormatPrice(oldPrice, currency), + signalFormatPrice(newPrice, currency), + direction, + signalAbs(changePct), + ) +} + +func buildSignalReleaseSourceKindLabel(dateSourceKind, dateConfidence string) string { + switch { + case dateSourceKind == "secondary_authoritative_report" || dateConfidence == "secondary_authoritative": + return "二级权威佐证发布" + case dateSourceKind == "official_announcement" && dateConfidence == "official_primary": + return "一级官方发布" + case dateSourceKind == "official_product_page": + return "官方产品页" + case dateSourceKind == "catalog_backfill": + return "目录回填" + default: + return "一级官方发布" + } +} + +func buildSignalReleaseEvidenceDetail(dateSourceKind, dateConfidence string) string { + switch { + case dateSourceKind == "secondary_authoritative_report" || dateConfidence == "secondary_authoritative": + return "models.release_date = 今日,发布日期采用次级权威报道佐证,模型来源页保留官方文档" + case dateSourceKind == "official_announcement" && dateConfidence == "official_primary": + return "models.release_date = 今日,且 source_url 指向官方发布页" + case dateSourceKind == "official_product_page": + return "models.release_date = 今日,来源页为官方产品页,发布日期置信度待确认" + case dateSourceKind == "catalog_backfill": + return "models.release_date = 今日,发布日期来自目录级元数据回填" + default: + return "models.release_date = 今日,且已记录发布日期证据元数据" + } +} + +func buildSignalReleaseTrustLabel(model signalModelInfo, dateConfidence string) string { + base := buildSignalTrustLabel(model) + switch dateConfidence { + case "official_primary": + return base + " / 一级证据" + case "secondary_authoritative": + return base + " / 二级佐证" + default: + return base + } +} + +func buildSignalPriceEventAudience(changePct float64) string { + if changePct < 0 { + return "适合以成本为先、准备趁降价重排默认选型的团队" + } + return "适合需要提前准备替代模型和预算回退方案的团队" +} + +func signalFormatEventUpdatedAt(value, fallbackDate string) string { + if strings.TrimSpace(value) != "" { + return value + } + if fallbackDate != "" { + return fallbackDate + " 00:00" + } + return "-" +} + +func signalFormatPrice(price float64, currency string) string { + if price <= 0 { + return "免费" + } + if currency == "CNY" { + if price < 1 { + return fmt.Sprintf("¥%.2f", price) + } + return fmt.Sprintf("¥%.1f", price) + } + cny := price * signalUSDToCNY + if cny < 1 { + return fmt.Sprintf("¥%.2f", cny) + } + return fmt.Sprintf("¥%.1f", cny) +} + +func signalFormatContextWindowCompact(value int) string { + if value <= 0 { + return "-" + } + if value%(1024*1024) == 0 { + return fmt.Sprintf("%dM", value/(1024*1024)) + } + if value%1024 == 0 { + return fmt.Sprintf("%dK", value/1024) + } + return fmt.Sprintf("%d", value) +} + +func signalSignedPriceChangePct(oldInput, newInput, oldOutput, newOutput float64) float64 { + inputPct := signalSignedChange(oldInput, newInput) + outputPct := signalSignedChange(oldOutput, newOutput) + if signalAbs(inputPct) >= signalAbs(outputPct) { + return inputPct + } + return outputPct +} + +func signalSignedChange(oldValue, newValue float64) float64 { + if oldValue == 0 { + if newValue == 0 { + return 0 + } + return 100 + } + return ((newValue - oldValue) / oldValue) * 100 +} + +func signalFirstNonEmpty(values ...string) string { + for _, value := range values { + if strings.TrimSpace(value) != "" { + return value + } + } + return "" +} + +func signalAbs(v float64) float64 { + if v < 0 { + return -v + } + return v +} + +func signalMinInt(a, b int) int { + if a < b { + return a + } + return b +} + +func signalMaxInt(a, b int) int { + if a > b { + return a + } + return b +} + +func filepathJoin(parts ...string) string { + return strings.Join(parts, string(os.PathSeparator)) +} + +func signalNullIfBlank(value string) any { + if strings.TrimSpace(value) == "" { + return nil + } + return value +} diff --git a/scripts/materialize_daily_signals_test.go b/scripts/materialize_daily_signals_test.go new file mode 100644 index 0000000..9b2f45b --- /dev/null +++ b/scripts/materialize_daily_signals_test.go @@ -0,0 +1,33 @@ +//go:build llm_script + +package main + +import "testing" + +func TestSummarizeSignalEventTypes(t *testing.T) { + events := []signalModelEvent{ + {EventType: "new_model", ModelName: "A"}, + {EventType: "new_model", ModelName: "B"}, + {EventType: "price_cut", ModelName: "C"}, + } + + counts := summarizeSignalEventTypes(events) + if counts["new_model"] != 2 { + t.Fatalf("new_model 计数错误: %d", counts["new_model"]) + } + if counts["price_cut"] != 1 { + t.Fatalf("price_cut 计数错误: %d", counts["price_cut"]) + } +} + +func TestBuildSignalPageMode(t *testing.T) { + if got := buildSignalPageMode(signalDailySignals{}, nil); got != "calm" { + t.Fatalf("平静日 page_mode 错误: %q", got) + } + if got := buildSignalPageMode(signalDailySignals{NewModels: 2, PriceChanges: 1}, nil); got != "hot" { + t.Fatalf("高变化日 page_mode 错误: %q", got) + } + if got := buildSignalPageMode(signalDailySignals{}, []signalModelEvent{{EventType: "official_release"}}); got != "hot" { + t.Fatalf("官方发布日 page_mode 错误: %q", got) + } +} diff --git a/scripts/official_import_signature_audit_lib.go b/scripts/official_import_signature_audit_lib.go new file mode 100644 index 0000000..5c4b790 --- /dev/null +++ b/scripts/official_import_signature_audit_lib.go @@ -0,0 +1,111 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "fmt" + "os" + "strings" + "time" +) + +type officialImportSignatureAuditRecord struct { + SourceKey string + CheckedAt time.Time + Status string + DriftDetected bool + BaselineInitialized bool + SourceURL string + FixturePath string + SnapshotPath string + SignaturePath string + BaselinePath string + StructureSHA256 string + PreviousStructureSHA256 string + ByteSize int + SignaturePayload any + ErrorMessage string +} + +func persistOfficialImportSignatureAuditIfConfigured(record officialImportSignatureAuditRecord) error { + if strings.TrimSpace(os.Getenv("DATABASE_URL")) == "" { + return nil + } + db, err := subscriptionImportDB() + if err != nil { + return fmt.Errorf("open db for official import signature audit: %w", err) + } + defer db.Close() + if err := insertOfficialImportSignatureAudit(db, record); err != nil { + return fmt.Errorf("insert official import signature audit: %w", err) + } + return nil +} + +func insertOfficialImportSignatureAudit(db *sql.DB, record officialImportSignatureAuditRecord) error { + if db == nil { + return fmt.Errorf("official import signature audit db is nil") + } + + var signaturePayload any + if record.SignaturePayload != nil { + payload, err := json.Marshal(record.SignaturePayload) + if err != nil { + return fmt.Errorf("marshal signature payload: %w", err) + } + signaturePayload = string(payload) + } + + _, err := db.Exec( + `INSERT INTO official_import_signature_audit ( + source_key, checked_at, status, drift_detected, baseline_initialized, + source_url, fixture_path, snapshot_path, signature_path, baseline_path, + structure_sha256, previous_structure_sha256, byte_size, signature_payload, error_message + ) VALUES ( + $1, $2, $3, $4, $5, + $6, $7, $8, $9, $10, + $11, $12, $13, $14::jsonb, $15 + )`, + record.SourceKey, + record.CheckedAt, + record.Status, + record.DriftDetected, + record.BaselineInitialized, + nullIfBlank(record.SourceURL), + nullIfBlank(record.FixturePath), + nullIfBlank(record.SnapshotPath), + nullIfBlank(record.SignaturePath), + nullIfBlank(record.BaselinePath), + nullIfBlank(record.StructureSHA256), + nullIfBlank(record.PreviousStructureSHA256), + nullIfZeroIntCommon(record.ByteSize), + signaturePayload, + nullIfBlank(record.ErrorMessage), + ) + if err != nil { + return fmt.Errorf("insert official_import_signature_audit: %w", err) + } + return nil +} + +func officialImportSignatureAuditStatus(driftDetected bool, baselineInitialized bool, runErr error) string { + switch { + case driftDetected: + return "drift_detected" + case baselineInitialized: + return "baseline_initialized" + case runErr != nil: + return "failed" + default: + return "passed" + } +} + +func errorMessageText(err error) string { + if err == nil { + return "" + } + return strings.TrimSpace(err.Error()) +} diff --git a/scripts/official_import_signature_audit_query_lib.go b/scripts/official_import_signature_audit_query_lib.go new file mode 100644 index 0000000..f4e4cea --- /dev/null +++ b/scripts/official_import_signature_audit_query_lib.go @@ -0,0 +1,196 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "fmt" + "io" + "strings" + "time" +) + +type officialImportSignatureAuditViewRow struct { + SourceKey string + RecentRank int + CheckedAt time.Time + Status string + StructureState string + StructureChanged bool + DriftDetected bool + BaselineInitialized bool + StructureSHA256 string + PreviousObservedSHA256 sql.NullString + SnapshotPath sql.NullString + SignaturePath sql.NullString + ErrorMessage sql.NullString +} + +type officialImportSignatureAuditSourceSummary struct { + SourceKey string + RunsInWindow int + ChangedRuns int + LatestCheckedAt time.Time + LatestStatus string + LatestStructureState string +} + +func queryOfficialImportSignatureAuditWindow(db *sql.DB, limitPerSource int, sourceKey string, changesOnly bool) ([]officialImportSignatureAuditSourceSummary, []officialImportSignatureAuditViewRow, error) { + query, args := buildOfficialImportSignatureAuditViewQuery(limitPerSource, sourceKey, changesOnly) + rows, err := db.Query(query, args...) + if err != nil { + return nil, nil, fmt.Errorf("query recent signature audit view: %w", err) + } + defer rows.Close() + + items := make([]officialImportSignatureAuditViewRow, 0) + for rows.Next() { + var item officialImportSignatureAuditViewRow + if err := rows.Scan( + &item.SourceKey, + &item.RecentRank, + &item.CheckedAt, + &item.Status, + &item.StructureState, + &item.StructureChanged, + &item.DriftDetected, + &item.BaselineInitialized, + &item.StructureSHA256, + &item.PreviousObservedSHA256, + &item.SnapshotPath, + &item.SignaturePath, + &item.ErrorMessage, + ); err != nil { + return nil, nil, fmt.Errorf("scan recent signature audit view: %w", err) + } + items = append(items, item) + } + if err := rows.Err(); err != nil { + return nil, nil, err + } + + summaries := summarizeOfficialImportSignatureAuditRows(items) + return summaries, items, nil +} + +func buildOfficialImportSignatureAuditViewQuery(limitPerSource int, sourceKey string, changesOnly bool) (string, []any) { + filters := []string{"recent_rank <= $1"} + args := []any{limitPerSource} + if strings.TrimSpace(sourceKey) != "" { + filters = append(filters, fmt.Sprintf("source_key = $%d", len(args)+1)) + args = append(args, strings.TrimSpace(sourceKey)) + } + if changesOnly { + filters = append(filters, "structure_changed = TRUE") + } + + query := fmt.Sprintf( + `SELECT + source_key, + recent_rank, + checked_at, + status, + structure_state, + structure_changed, + drift_detected, + baseline_initialized, + structure_sha256, + previous_observed_structure_sha256, + snapshot_path, + signature_path, + error_message + FROM official_import_signature_audit_recent_view + WHERE %s + ORDER BY source_key, checked_at DESC, recent_rank ASC`, + strings.Join(filters, " AND "), + ) + return query, args +} + +func summarizeOfficialImportSignatureAuditRows(rows []officialImportSignatureAuditViewRow) []officialImportSignatureAuditSourceSummary { + if len(rows) == 0 { + return nil + } + summaries := make([]officialImportSignatureAuditSourceSummary, 0) + indexBySource := make(map[string]int) + for _, row := range rows { + index, exists := indexBySource[row.SourceKey] + if !exists { + index = len(summaries) + indexBySource[row.SourceKey] = index + summaries = append(summaries, officialImportSignatureAuditSourceSummary{ + SourceKey: row.SourceKey, + LatestCheckedAt: row.CheckedAt, + LatestStatus: row.Status, + LatestStructureState: row.StructureState, + }) + } + summary := &summaries[index] + summary.RunsInWindow++ + if row.StructureChanged { + summary.ChangedRuns++ + } + if row.RecentRank == 1 { + summary.LatestCheckedAt = row.CheckedAt + summary.LatestStatus = row.Status + summary.LatestStructureState = row.StructureState + } + } + return summaries +} + +func renderOfficialImportSignatureAuditReport(out io.Writer, limitPerSource int, sourceKey string, changesOnly bool, summaries []officialImportSignatureAuditSourceSummary, rows []officialImportSignatureAuditViewRow) { + _, _ = fmt.Fprintf(out, "Official Import Signature Audit Report window_per_source=%d source_key=%s changes_only=%t\n", + limitPerSource, valueOrAll(sourceKey), changesOnly) + if len(summaries) == 0 { + _, _ = fmt.Fprintln(out, "summary: no rows") + return + } + + _, _ = fmt.Fprintln(out, "summary:") + for _, summary := range summaries { + _, _ = fmt.Fprintf(out, + "source=%s runs=%d changed_runs=%d latest_checked_at=%s latest_state=%s latest_status=%s\n", + summary.SourceKey, + summary.RunsInWindow, + summary.ChangedRuns, + summary.LatestCheckedAt.Format("2006-01-02 15:04:05"), + summary.LatestStructureState, + summary.LatestStatus, + ) + } + + _, _ = fmt.Fprintln(out, "rows:") + for _, row := range rows { + _, _ = fmt.Fprintf(out, + "source=%s recent_rank=%d checked_at=%s state=%s changed=%t status=%s drift=%t baseline_initialized=%t sha=%s previous_sha=%s snapshot=%s signature=%s error=%s\n", + row.SourceKey, + row.RecentRank, + row.CheckedAt.Format("2006-01-02 15:04:05"), + row.StructureState, + row.StructureChanged, + row.Status, + row.DriftDetected, + row.BaselineInitialized, + row.StructureSHA256, + nullStringOrNone(row.PreviousObservedSHA256), + nullStringOrNone(row.SnapshotPath), + nullStringOrNone(row.SignaturePath), + nullStringOrNone(row.ErrorMessage), + ) + } +} + +func nullStringOrNone(value sql.NullString) string { + if !value.Valid || strings.TrimSpace(value.String) == "" { + return "none" + } + return value.String +} + +func valueOrAll(value string) string { + if strings.TrimSpace(value) == "" { + return "all" + } + return strings.TrimSpace(value) +} diff --git a/scripts/perplexity_pricing_import_runner.go b/scripts/perplexity_pricing_import_runner.go new file mode 100644 index 0000000..51d114f --- /dev/null +++ b/scripts/perplexity_pricing_import_runner.go @@ -0,0 +1,66 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type perplexityPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration + SnapshotOnly bool + SnapshotOut string + SignatureOut string +} + +func runPerplexityPricingImport(cfg perplexityPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + if cfg.SnapshotOnly || strings.TrimSpace(cfg.SnapshotOut) != "" || strings.TrimSpace(cfg.SignatureOut) != "" { + snapshotPath, signaturePath := resolvePerplexityPricingSnapshotPaths(cfg.SnapshotOut, cfg.SignatureOut, "", time.Now()) + signature, err := writePerplexityPricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, time.Now()) + if err != nil { + return err + } + if cfg.SnapshotOnly { + _, err = fmt.Fprintf(out, + "source=perplexity-pricing-snapshot snapshot_only=true byte_size=%d sha256=%s structure_sha256=%s snapshot_out=%s signature_out=%s\n", + signature.ByteSize, signature.SHA256, signature.StructureSHA256, snapshotPath, signaturePath, + ) + return err + } + } + records, err := parsePerplexityPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=perplexity-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "perplexity-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=perplexity-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/perplexity_pricing_lib.go b/scripts/perplexity_pricing_lib.go new file mode 100644 index 0000000..91e9612 --- /dev/null +++ b/scripts/perplexity_pricing_lib.go @@ -0,0 +1,150 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const ( + defaultPerplexityPricingFetchURL = "https://docs.perplexity.ai/docs/agent-api/models.md" + defaultPerplexityPricingSourceURL = "https://docs.perplexity.ai/docs/agent-api/models" +) + +var markdownLinkPattern = regexp.MustCompile(`\[(.*?)\]\((https://[^)]+)\)`) + +func parsePerplexityPricingCatalog(raw string) ([]officialPricingRecord, error) { + lines := strings.Split(raw, "\n") + records := make([]officialPricingRecord, 0) + header := []string(nil) + modelIndex := -1 + inputIndex := -1 + outputIndex := -1 + docIndex := -1 + for _, line := range lines { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "|") { + continue + } + + parts := splitMarkdownTableRow(line) + if len(parts) == 0 { + continue + } + if header == nil { + header = parts + modelIndex, inputIndex, outputIndex, docIndex = detectPerplexityTableColumns(parts) + continue + } + if isMarkdownTableSeparator(parts) { + continue + } + if modelIndex < 0 || inputIndex < 0 || outputIndex < 0 || modelIndex >= len(parts) || inputIndex >= len(parts) || outputIndex >= len(parts) { + continue + } + + modelPath := strings.Trim(parts[modelIndex], "`") + inputCell := parts[inputIndex] + outputCell := parts[outputIndex] + inputPrice, ok := firstDollarPrice(inputCell) + if !ok { + continue + } + outputPrice, ok := firstDollarPrice(outputCell) + if !ok { + continue + } + sourceURL := defaultPerplexityPricingSourceURL + if docIndex >= 0 && docIndex < len(parts) { + if matches := markdownLinkPattern.FindStringSubmatch(parts[docIndex]); len(matches) == 3 { + sourceURL = matches[2] + } + } + providerName := providerFromModelPath(modelPath) + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + record := officialPricingRecord{ + ModelID: normalizeExternalID("perplexity", modelPath), + ModelName: modelPath, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Perplexity API", + OperatorNameCn: "Perplexity API", + OperatorCountry: "US", + OperatorWebsite: "https://docs.perplexity.ai", + OperatorType: "relay", + Region: "global", + Currency: "USD", + InputPrice: inputPrice, + OutputPrice: outputPrice, + SourceURL: defaultPerplexityPricingSourceURL, + ModelSourceURL: sourceURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelPath), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + } + if len(records) == 0 { + return nil, fmt.Errorf("unexpected perplexity pricing content") + } + return records, nil +} + +func splitMarkdownTableRow(line string) []string { + trimmed := strings.TrimSpace(line) + trimmed = strings.TrimPrefix(trimmed, "|") + trimmed = strings.TrimSuffix(trimmed, "|") + if trimmed == "" { + return nil + } + parts := strings.Split(trimmed, "|") + result := make([]string, 0, len(parts)) + for _, part := range parts { + result = append(result, strings.TrimSpace(part)) + } + return result +} + +func detectPerplexityTableColumns(header []string) (int, int, int, int) { + modelIndex := -1 + inputIndex := -1 + outputIndex := -1 + docIndex := -1 + for i, col := range header { + lower := strings.ToLower(strings.TrimSpace(col)) + switch { + case strings.Contains(lower, "model") && modelIndex == -1: + modelIndex = i + case strings.Contains(lower, "input") && strings.Contains(lower, "price") && inputIndex == -1: + inputIndex = i + case strings.Contains(lower, "output") && strings.Contains(lower, "price") && outputIndex == -1: + outputIndex = i + case (strings.Contains(lower, "documentation") || strings.Contains(lower, "docs")) && docIndex == -1: + docIndex = i + } + } + return modelIndex, inputIndex, outputIndex, docIndex +} + +func isMarkdownTableSeparator(parts []string) bool { + if len(parts) == 0 { + return false + } + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed == "" { + return false + } + for _, ch := range trimmed { + if ch != '-' && ch != ':' { + return false + } + } + } + return true +} diff --git a/scripts/perplexity_pricing_signature_guard.go b/scripts/perplexity_pricing_signature_guard.go new file mode 100644 index 0000000..fbd8a4e --- /dev/null +++ b/scripts/perplexity_pricing_signature_guard.go @@ -0,0 +1,51 @@ +//go:build llm_script + +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var snapshotDir string + var baselinePath string + var timeoutSeconds int + var allowBootstrap bool + + flag.StringVar(&url, "url", defaultPerplexityPricingFetchURL, "Perplexity Agent API 官方模型价格 markdown") + flag.StringVar(&fixture, "fixture", "", "Perplexity 价格样例文件") + flag.StringVar(&snapshotDir, "snapshot-dir", "", "Perplexity snapshot 输出目录") + flag.StringVar(&baselinePath, "baseline-path", "", "Perplexity 结构基线签名路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化") + flag.Parse() + + now := time.Now() + cfg := perplexityPricingSignatureGuardConfig{ + URL: url, + Fixture: fixture, + SnapshotDir: snapshotDir, + BaselinePath: baselinePath, + Timeout: time.Duration(timeoutSeconds) * time.Second, + AllowBootstrap: allowBootstrap, + } + result, err := runPerplexityPricingSignatureGuard(cfg, now) + if auditErr := persistPerplexityPricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil { + fmt.Fprintf(os.Stderr, "perplexity_pricing_signature_guard audit: %v\n", auditErr) + if err == nil { + err = auditErr + } + } + fmt.Println(formatPerplexityPricingSignatureGuardSummary(result)) + if err != nil { + fmt.Fprintf(os.Stderr, "perplexity_pricing_signature_guard: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/perplexity_pricing_signature_guard_lib.go b/scripts/perplexity_pricing_signature_guard_lib.go new file mode 100644 index 0000000..05a8245 --- /dev/null +++ b/scripts/perplexity_pricing_signature_guard_lib.go @@ -0,0 +1,136 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" +) + +type perplexityPricingSignatureGuardConfig struct { + URL string + Fixture string + SnapshotDir string + BaselinePath string + Timeout time.Duration + AllowBootstrap bool +} + +type perplexityPricingSignatureGuardResult struct { + SnapshotPath string + SignaturePath string + BaselinePath string + DriftDetected bool + BaselineInitialized bool + PreviousBaselineHash string + CurrentSignature markdownPricingStructureSignature +} + +func runPerplexityPricingSignatureGuard(cfg perplexityPricingSignatureGuardConfig, now time.Time) (perplexityPricingSignatureGuardResult, error) { + snapshotDir := cfg.SnapshotDir + if snapshotDir == "" { + snapshotDir = filepath.Join("logs", "perplexity-pricing-snapshots") + } + if err := os.MkdirAll(snapshotDir, 0o755); err != nil { + return perplexityPricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + + snapshotPath, signaturePath := resolvePerplexityPricingSnapshotPaths("", "", snapshotDir, now) + baselinePath := cfg.BaselinePath + if baselinePath == "" { + baselinePath = filepath.Join(snapshotDir, "baseline.signature.json") + } + + clientCfg := perplexityPricingImportConfig{ + URL: cfg.URL, + Fixture: cfg.Fixture, + DryRun: true, + Timeout: cfg.Timeout, + SnapshotOnly: true, + SnapshotOut: snapshotPath, + SignatureOut: signaturePath, + } + if err := runPerplexityPricingImport(clientCfg, nil, ioDiscard{}); err != nil { + return perplexityPricingSignatureGuardResult{}, err + } + + current, err := readMarkdownPricingStructureSignature(signaturePath) + if err != nil { + return perplexityPricingSignatureGuardResult{}, err + } + result := perplexityPricingSignatureGuardResult{ + SnapshotPath: snapshotPath, + SignaturePath: signaturePath, + BaselinePath: baselinePath, + CurrentSignature: current, + } + + previous, err := readMarkdownPricingStructureSignature(baselinePath) + if err != nil { + if os.IsNotExist(err) { + if !cfg.AllowBootstrap { + return result, fmt.Errorf("perplexity pricing baseline missing: %s", baselinePath) + } + if err := copyFileCommon(signaturePath, baselinePath); err != nil { + return result, fmt.Errorf("initialize baseline: %w", err) + } + result.BaselineInitialized = true + return result, nil + } + return result, err + } + + result.PreviousBaselineHash = previous.StructureSHA256 + if previous.StructureSHA256 != current.StructureSHA256 { + result.DriftDetected = true + return result, fmt.Errorf( + "perplexity pricing structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s", + previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath, + ) + } + return result, nil +} + +func formatPerplexityPricingSignatureGuardSummary(result perplexityPricingSignatureGuardResult) string { + return fmt.Sprintf( + "source=perplexity-pricing-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s", + result.DriftDetected, + result.BaselineInitialized, + result.CurrentSignature.StructureSHA256, + emptyIfBlank(result.PreviousBaselineHash), + result.SnapshotPath, + result.SignaturePath, + result.BaselinePath, + ) +} + +func buildPerplexityPricingSignatureAuditRecord(cfg perplexityPricingSignatureGuardConfig, result perplexityPricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord { + record := officialImportSignatureAuditRecord{ + SourceKey: "perplexity_pricing_signature", + CheckedAt: checkedAt, + Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr), + DriftDetected: result.DriftDetected, + BaselineInitialized: result.BaselineInitialized, + SourceURL: strings.TrimSpace(cfg.URL), + FixturePath: strings.TrimSpace(cfg.Fixture), + SnapshotPath: strings.TrimSpace(result.SnapshotPath), + SignaturePath: strings.TrimSpace(result.SignaturePath), + BaselinePath: strings.TrimSpace(result.BaselinePath), + StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256), + PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash), + ByteSize: result.CurrentSignature.ByteSize, + ErrorMessage: errorMessageText(runErr), + } + if hasMarkdownPricingStructureSignature(result.CurrentSignature) { + signatureCopy := result.CurrentSignature + record.SignaturePayload = &signatureCopy + } + return record +} + +func persistPerplexityPricingSignatureAuditIfConfigured(cfg perplexityPricingSignatureGuardConfig, result perplexityPricingSignatureGuardResult, checkedAt time.Time, runErr error) error { + return persistOfficialImportSignatureAuditIfConfigured(buildPerplexityPricingSignatureAuditRecord(cfg, result, checkedAt, runErr)) +} diff --git a/scripts/perplexity_pricing_signature_guard_test.go b/scripts/perplexity_pricing_signature_guard_test.go new file mode 100644 index 0000000..2f23f1a --- /dev/null +++ b/scripts/perplexity_pricing_signature_guard_test.go @@ -0,0 +1,102 @@ +//go:build llm_script + +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestRunPerplexityPricingSignatureGuardInitializesBaseline(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + + result, err := runPerplexityPricingSignatureGuard(perplexityPricingSignatureGuardConfig{ + URL: defaultPerplexityPricingFetchURL, + Fixture: filepath.Join("testdata", "perplexity_pricing_sample.md"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 15, 20, 40, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("runPerplexityPricingSignatureGuard 返回错误: %v", err) + } + if !result.BaselineInitialized { + t.Fatalf("期望初始化 baseline") + } + if result.DriftDetected { + t.Fatalf("首次初始化不应判定为漂移") + } + if _, err := os.Stat(baselinePath); err != nil { + t.Fatalf("baseline 未写入: %v", err) + } +} + +func TestRunPerplexityPricingSignatureGuardDetectsDrift(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + + _, err := runPerplexityPricingSignatureGuard(perplexityPricingSignatureGuardConfig{ + URL: defaultPerplexityPricingFetchURL, + Fixture: filepath.Join("testdata", "perplexity_pricing_sample.md"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 15, 20, 41, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("初始化 baseline 失败: %v", err) + } + + driftFixture := "# Models\n\n| Name | Pricing |\n| --- | --- |\n| sonar | $1 |\n" + driftPath := filepath.Join(tempDir, "perplexity-drift.md") + if err := os.WriteFile(driftPath, []byte(driftFixture), 0o644); err != nil { + t.Fatalf("写入 drift fixture 失败: %v", err) + } + + result, err := runPerplexityPricingSignatureGuard(perplexityPricingSignatureGuardConfig{ + URL: defaultPerplexityPricingFetchURL, + Fixture: driftPath, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: false, + }, time.Date(2026, 5, 15, 20, 42, 0, 0, time.FixedZone("CST", 8*3600))) + if err == nil { + t.Fatalf("期望结构漂移时报错") + } + if !result.DriftDetected { + t.Fatalf("期望 driftDetected=true") + } + if !strings.Contains(err.Error(), "perplexity pricing structure drift detected") { + t.Fatalf("期望返回 drift 错误,实际: %v", err) + } +} + +func TestFormatPerplexityPricingSignatureGuardSummary(t *testing.T) { + result := perplexityPricingSignatureGuardResult{ + SnapshotPath: "/tmp/perplexity.md", + SignaturePath: "/tmp/perplexity.signature.json", + BaselinePath: "/tmp/baseline.signature.json", + DriftDetected: false, + BaselineInitialized: true, + CurrentSignature: markdownPricingStructureSignature{ + StructureSHA256: "abc123", + }, + } + summary := formatPerplexityPricingSignatureGuardSummary(result) + for _, want := range []string{ + "source=perplexity-pricing-signature-guard", + "drift=false", + "baseline_initialized=true", + "structure_sha256=abc123", + } { + if !strings.Contains(summary, want) { + t.Fatalf("summary 缺少 %q,实际: %q", want, summary) + } + } +} diff --git a/scripts/perplexity_pricing_snapshot_lib.go b/scripts/perplexity_pricing_snapshot_lib.go new file mode 100644 index 0000000..d810cef --- /dev/null +++ b/scripts/perplexity_pricing_snapshot_lib.go @@ -0,0 +1,24 @@ +//go:build llm_script + +package main + +import "time" + +var perplexityPricingSignatureContainsNeedles = map[string]string{ + "model_column": "| model |", + "input_price_column": "input price", + "output_price_column": "output price", + "documentation_column": "documentation", +} + +func buildPerplexityPricingStructureSignature(raw string) markdownPricingStructureSignature { + return buildMarkdownPricingStructureSignature(raw, perplexityPricingSignatureContainsNeedles) +} + +func writePerplexityPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (markdownPricingStructureSignature, error) { + return writeMarkdownPricingSnapshotArtifacts(raw, sourceURL, snapshotPath, signaturePath, now, perplexityPricingSignatureContainsNeedles) +} + +func resolvePerplexityPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, now time.Time) (string, string) { + return resolveMarkdownPricingSnapshotPaths(snapshotPath, signaturePath, snapshotDir, "perplexity-pricing", now) +} diff --git a/scripts/perplexity_pricing_snapshot_test.go b/scripts/perplexity_pricing_snapshot_test.go new file mode 100644 index 0000000..0326284 --- /dev/null +++ b/scripts/perplexity_pricing_snapshot_test.go @@ -0,0 +1,92 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestBuildPerplexityPricingStructureSignatureCapturesShape(t *testing.T) { + raw := ` +# Models + +| Model | Input Price | Output Price | Documentation | +| --- | --- | --- | --- | +| sonar | $1.00 / 1M tokens | $5.00 / 1M tokens | [Docs](https://example.com) | +` + + signature := buildPerplexityPricingStructureSignature(raw) + if signature.ByteSize == 0 { + t.Fatalf("期望 byte_size 非 0") + } + if signature.SHA256 == "" || signature.StructureSHA256 == "" { + t.Fatalf("期望生成 sha256 签名: %+v", signature) + } + if len(signature.Headings) == 0 || signature.Headings[0] != "Models" { + t.Fatalf("标题提取错误: %+v", signature.Headings) + } + if len(signature.TableHeaders) == 0 || !strings.Contains(signature.TableHeaders[0], "Input Price") { + t.Fatalf("表头提取错误: %+v", signature.TableHeaders) + } + for _, key := range []string{"model_column", "input_price_column", "output_price_column", "documentation_column"} { + if !signature.Contains[key] { + t.Fatalf("期望识别 %s: %+v", key, signature.Contains) + } + } +} + +func TestRunPerplexityPricingImportSnapshotOnlyWritesArtifacts(t *testing.T) { + tempDir := t.TempDir() + snapshotPath := filepath.Join(tempDir, "perplexity-live.md") + signaturePath := filepath.Join(tempDir, "perplexity-live.signature.json") + + var out bytes.Buffer + err := runPerplexityPricingImport(perplexityPricingImportConfig{ + URL: defaultPerplexityPricingFetchURL, + Fixture: filepath.Join("testdata", "perplexity_pricing_sample.md"), + DryRun: true, + SnapshotOnly: true, + SnapshotOut: snapshotPath, + SignatureOut: signaturePath, + }, nil, &out) + if err != nil { + t.Fatalf("runPerplexityPricingImport 返回错误: %v", err) + } + + snapshotBytes, err := os.ReadFile(snapshotPath) + if err != nil { + t.Fatalf("读取 snapshot 失败: %v", err) + } + if !strings.Contains(string(snapshotBytes), "Input Price") { + t.Fatalf("snapshot 内容错误") + } + + signatureBytes, err := os.ReadFile(signaturePath) + if err != nil { + t.Fatalf("读取 signature 失败: %v", err) + } + var signature markdownPricingStructureSignature + if err := json.Unmarshal(signatureBytes, &signature); err != nil { + t.Fatalf("signature JSON 解析失败: %v", err) + } + if !signature.Contains["documentation_column"] { + t.Fatalf("期望 signature 含 documentation_column: %+v", signature.Contains) + } + + output := out.String() + for _, want := range []string{ + "source=perplexity-pricing-snapshot", + "snapshot_only=true", + "signature_out=" + signaturePath, + "snapshot_out=" + snapshotPath, + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/pricing_markdown_snapshot_lib.go b/scripts/pricing_markdown_snapshot_lib.go new file mode 100644 index 0000000..f3685f5 --- /dev/null +++ b/scripts/pricing_markdown_snapshot_lib.go @@ -0,0 +1,251 @@ +//go:build llm_script + +package main + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "time" +) + +type markdownPricingStructureSignature struct { + ByteSize int `json:"byte_size"` + SHA256 string `json:"sha256"` + StructureSHA256 string `json:"structure_sha256"` + NormalizedLineCount int `json:"normalized_line_count"` + Headings []string `json:"headings"` + TableHeaders []string `json:"table_headers"` + Contains map[string]bool `json:"contains"` + GeneratedAt string `json:"generated_at,omitempty"` + SourceURL string `json:"source_url,omitempty"` + SnapshotPath string `json:"snapshot_path,omitempty"` +} + +func buildMarkdownPricingStructureSignature(raw string, containsNeedles map[string]string) markdownPricingStructureSignature { + lines := markdownPricingLines(raw) + headings := extractMarkdownPricingHeadings(lines) + tableHeaders := extractMarkdownPricingTableHeaders(lines) + contains := make(map[string]bool, len(containsNeedles)) + for key, needle := range containsNeedles { + contains[key] = strings.Contains(strings.ToLower(raw), strings.ToLower(needle)) + } + + signature := markdownPricingStructureSignature{ + ByteSize: len([]byte(raw)), + SHA256: markdownPricingSHA256Hex(raw), + NormalizedLineCount: len(lines), + Headings: headings, + TableHeaders: tableHeaders, + Contains: contains, + } + signature.StructureSHA256 = markdownPricingSHA256Hex(markdownPricingStructureDigestPayload(signature)) + return signature +} + +func writeMarkdownPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time, containsNeedles map[string]string) (markdownPricingStructureSignature, error) { + if strings.TrimSpace(snapshotPath) == "" { + return markdownPricingStructureSignature{}, fmt.Errorf("snapshot path is required") + } + if strings.TrimSpace(signaturePath) == "" { + return markdownPricingStructureSignature{}, fmt.Errorf("signature path is required") + } + if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil { + return markdownPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil { + return markdownPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err) + } + if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil { + return markdownPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err) + } + + signature := buildMarkdownPricingStructureSignature(raw, containsNeedles) + signature.GeneratedAt = now.Format(time.RFC3339) + signature.SourceURL = sourceURL + signature.SnapshotPath = snapshotPath + payload, err := json.MarshalIndent(signature, "", " ") + if err != nil { + return markdownPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err) + } + if err := os.WriteFile(signaturePath, payload, 0o644); err != nil { + return markdownPricingStructureSignature{}, fmt.Errorf("write signature: %w", err) + } + return signature, nil +} + +func resolveMarkdownPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) { + if strings.TrimSpace(snapshotDir) == "" { + snapshotDir = filepath.Join("logs", baseName+"-snapshots") + } + if strings.TrimSpace(snapshotPath) == "" { + base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405"))) + snapshotPath = base + ".md" + if strings.TrimSpace(signaturePath) == "" { + signaturePath = base + ".signature.json" + } + } + if strings.TrimSpace(signaturePath) == "" { + signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json" + } + return snapshotPath, signaturePath +} + +func readMarkdownPricingStructureSignature(path string) (markdownPricingStructureSignature, error) { + data, err := os.ReadFile(path) + if err != nil { + return markdownPricingStructureSignature{}, err + } + var signature markdownPricingStructureSignature + if err := json.Unmarshal(data, &signature); err != nil { + return markdownPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err) + } + return signature, nil +} + +func hasMarkdownPricingStructureSignature(signature markdownPricingStructureSignature) bool { + return signature.ByteSize > 0 || + strings.TrimSpace(signature.StructureSHA256) != "" || + strings.TrimSpace(signature.SHA256) != "" || + len(signature.Headings) > 0 || + len(signature.TableHeaders) > 0 || + len(signature.Contains) > 0 +} + +func markdownPricingLines(raw string) []string { + text := strings.ReplaceAll(raw, "\r\n", "\n") + text = strings.ReplaceAll(text, "\r", "\n") + rawLines := strings.Split(text, "\n") + lines := make([]string, 0, len(rawLines)) + for _, line := range rawLines { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + lines = append(lines, trimmed) + } + return lines +} + +func extractMarkdownPricingHeadings(lines []string) []string { + headings := make([]string, 0, 12) + seen := make(map[string]struct{}) + for _, line := range lines { + if !strings.HasPrefix(line, "#") { + continue + } + heading := strings.TrimSpace(strings.TrimLeft(line, "#")) + if heading == "" { + continue + } + if _, exists := seen[heading]; exists { + continue + } + seen[heading] = struct{}{} + headings = append(headings, heading) + if len(headings) >= 12 { + break + } + } + return headings +} + +func extractMarkdownPricingTableHeaders(lines []string) []string { + headers := make([]string, 0, 6) + for i, line := range lines { + if !strings.HasPrefix(line, "|") { + continue + } + if i+1 >= len(lines) || !isMarkdownSnapshotTableSeparator(splitMarkdownSnapshotTableRow(lines[i+1])) { + continue + } + headers = append(headers, line) + if len(headers) >= 6 { + break + } + } + return headers +} + +func markdownPricingStructureDigestPayload(signature markdownPricingStructureSignature) string { + type containsEntry struct { + Name string `json:"name"` + Value bool `json:"value"` + } + keys := make([]string, 0, len(signature.Contains)) + for key := range signature.Contains { + keys = append(keys, key) + } + sort.Strings(keys) + entries := make([]containsEntry, 0, len(keys)) + for _, key := range keys { + entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]}) + } + payload := struct { + NormalizedLineCount int `json:"normalized_line_count"` + Headings []string `json:"headings"` + TableHeaders []string `json:"table_headers"` + Contains []containsEntry `json:"contains"` + }{ + NormalizedLineCount: signature.NormalizedLineCount, + Headings: signature.Headings, + TableHeaders: signature.TableHeaders, + Contains: entries, + } + bytes, _ := json.Marshal(payload) + return string(bytes) +} + +func markdownPricingSHA256Hex(raw string) string { + sum := sha256.Sum256([]byte(raw)) + return hex.EncodeToString(sum[:]) +} + +func splitMarkdownSnapshotTableRow(line string) []string { + trimmed := strings.TrimSpace(line) + trimmed = strings.TrimPrefix(trimmed, "|") + trimmed = strings.TrimSuffix(trimmed, "|") + if trimmed == "" { + return nil + } + parts := strings.Split(trimmed, "|") + result := make([]string, 0, len(parts)) + for _, part := range parts { + result = append(result, strings.TrimSpace(part)) + } + return result +} + +func isMarkdownSnapshotTableSeparator(parts []string) bool { + if len(parts) == 0 { + return false + } + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed == "" { + return false + } + for _, ch := range trimmed { + if ch != '-' && ch != ':' { + return false + } + } + } + return true +} + +func copyFileCommon(src string, dst string) error { + data, err := os.ReadFile(src) + if err != nil { + return err + } + if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { + return err + } + return os.WriteFile(dst, data, 0o644) +} diff --git a/scripts/query_official_import_signature_audit.go b/scripts/query_official_import_signature_audit.go new file mode 100644 index 0000000..b361e7e --- /dev/null +++ b/scripts/query_official_import_signature_audit.go @@ -0,0 +1,43 @@ +//go:build llm_script + +package main + +import ( + "flag" + "fmt" + "os" + + _ "github.com/lib/pq" +) + +func main() { + loadSubscriptionImportEnv() + + var limitPerSource int + var sourceKey string + var changesOnly bool + + flag.IntVar(&limitPerSource, "limit-per-source", 5, "每个 source_key 展示最近多少次记录") + flag.StringVar(&sourceKey, "source-key", "", "只看单个 source_key") + flag.BoolVar(&changesOnly, "changes-only", false, "仅展示结构发生变化的记录") + flag.Parse() + + if limitPerSource <= 0 { + fmt.Fprintln(os.Stderr, "limit-per-source 必须大于 0") + os.Exit(2) + } + + db, err := subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + + summaries, rows, err := queryOfficialImportSignatureAuditWindow(db, limitPerSource, sourceKey, changesOnly) + if err != nil { + fmt.Fprintf(os.Stderr, "query_official_import_signature_audit: %v\n", err) + os.Exit(1) + } + renderOfficialImportSignatureAuditReport(os.Stdout, limitPerSource, sourceKey, changesOnly, summaries, rows) +} diff --git a/scripts/query_official_import_signature_audit_test.go b/scripts/query_official_import_signature_audit_test.go new file mode 100644 index 0000000..78e2b71 --- /dev/null +++ b/scripts/query_official_import_signature_audit_test.go @@ -0,0 +1,80 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "strings" + "testing" + "time" +) + +func TestBuildOfficialImportSignatureAuditViewQueryIncludesWindowAndFilters(t *testing.T) { + query, args := buildOfficialImportSignatureAuditViewQuery(5, "vertex_pricing_signature", true) + for _, want := range []string{ + "FROM official_import_signature_audit_recent_view", + "recent_rank <= $1", + "source_key = $2", + "structure_changed = TRUE", + } { + if !strings.Contains(query, want) { + t.Fatalf("query 缺少 %q,实际: %s", want, query) + } + } + if len(args) != 2 { + t.Fatalf("参数个数错误: %d", len(args)) + } + if args[0] != 5 || args[1] != "vertex_pricing_signature" { + t.Fatalf("参数错误: %#v", args) + } +} + +func TestRenderOfficialImportSignatureAuditReportPrintsSummaryAndRows(t *testing.T) { + summaries := []officialImportSignatureAuditSourceSummary{ + { + SourceKey: "cloudflare_pricing_signature", + RunsInWindow: 5, + ChangedRuns: 2, + LatestCheckedAt: time.Date(2026, 5, 15, 20, 0, 0, 0, time.UTC), + LatestStatus: "passed", + LatestStructureState: "stable", + }, + } + rows := []officialImportSignatureAuditViewRow{ + { + SourceKey: "cloudflare_pricing_signature", + RecentRank: 1, + CheckedAt: time.Date(2026, 5, 15, 20, 0, 0, 0, time.UTC), + Status: "passed", + StructureState: "stable", + StructureChanged: false, + StructureSHA256: "abc123", + }, + { + SourceKey: "cloudflare_pricing_signature", + RecentRank: 2, + CheckedAt: time.Date(2026, 5, 14, 20, 0, 0, 0, time.UTC), + Status: "drift_detected", + StructureState: "changed", + StructureChanged: true, + StructureSHA256: "def456", + }, + } + + var out bytes.Buffer + renderOfficialImportSignatureAuditReport(&out, 5, "", false, summaries, rows) + output := out.String() + for _, want := range []string{ + "Official Import Signature Audit Report", + "window_per_source=5", + "cloudflare_pricing_signature", + "changed_runs=2", + "recent_rank=2", + "state=changed", + "sha=def456", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/rebuild_historical_report.sh b/scripts/rebuild_historical_report.sh index c03da29..8e0c279 100755 --- a/scripts/rebuild_historical_report.sh +++ b/scripts/rebuild_historical_report.sh @@ -25,4 +25,4 @@ REPORT_DATE="$REPORT_DATE" \ REPORT_RUN_KIND="historical_rebuild" \ REPORT_TRIGGER_SOURCE="rebuild_script" \ REPORT_IS_OFFICIAL_DAILY="false" \ -go run -tags llm_script ./scripts/generate_daily_report.go "$@" +go run -tags llm_script ./scripts/generate_daily_report.go ./scripts/official_import_signature_audit_query_lib.go "$@" diff --git a/scripts/signature_guard_common.go b/scripts/signature_guard_common.go new file mode 100644 index 0000000..07b3c7d --- /dev/null +++ b/scripts/signature_guard_common.go @@ -0,0 +1,16 @@ +//go:build llm_script + +package main + +type ioDiscard struct{} + +func (ioDiscard) Write(p []byte) (int, error) { + return len(p), nil +} + +func emptyIfBlank(value string) string { + if value == "" { + return "none" + } + return value +} diff --git a/scripts/testdata/cloudflare_pricing_sample.md b/scripts/testdata/cloudflare_pricing_sample.md new file mode 100644 index 0000000..27ca611 --- /dev/null +++ b/scripts/testdata/cloudflare_pricing_sample.md @@ -0,0 +1,10 @@ +## LLM model pricing + +| Model | Price in Tokens | Price in Neurons | +| -------------------------------------------- | --------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- | +| @cf/meta/llama-3.2-1b-instruct | $0.027 per M input tokens $0.201 per M output tokens | 2457 neurons per M input tokens 18252 neurons per M output tokens | +| @cf/meta/llama-3.3-70b-instruct-fp8-fast | $0.293 per M input tokens $2.253 per M output tokens | 26668 neurons per M input tokens 204805 neurons per M output tokens | +| @cf/qwen/qwen2.5-coder-32b-instruct | $0.660 per M input tokens $1.000 per M output tokens | 60000 neurons per M input tokens 90909 neurons per M output tokens | +| @cf/moonshotai/kimi-k2.5 | $0.600 per M input tokens $0.100 per M cached input tokens $3.000 per M output tokens | 54545 neurons per M input tokens 9091 neurons per M cached input tokens 272727 neurons per M output tokens | + +## Embeddings model pricing diff --git a/scripts/testdata/perplexity_pricing_sample.md b/scripts/testdata/perplexity_pricing_sample.md new file mode 100644 index 0000000..e9bc408 --- /dev/null +++ b/scripts/testdata/perplexity_pricing_sample.md @@ -0,0 +1,9 @@ +# Models + +| Model | Input Price | Output Price | Cache Read Price | Provider Documentation | +| -------------------------------------- | -------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | -------------------- | ----------------------------------------------------------------------------------------------------------- | +| `perplexity/sonar` | \$0.25 / 1M tokens | \$2.50 / 1M tokens | \$0.0625 / 1M tokens | [Sonar](https://docs.perplexity.ai/docs/sonar/models/sonar) | +| `anthropic/claude-sonnet-4-6` | \$3 / 1M tokens | \$15 / 1M tokens | \$0.30 / 1M tokens | [Claude Sonnet 4.6](https://www.anthropic.com/news/claude-sonnet-4-6) | +| `openai/gpt-5.4` | \$2.50 / 1M tokens | \$15.00 / 1M tokens | \$0.25 / 1M tokens | [GPT-5.4](https://platform.openai.com/docs/models/gpt-5.4) | +| `google/gemini-3.1-pro-preview` | \$2.00 / 1M tokens (≤200k context)
\$4.00 / 1M tokens (>200k context) | \$12.00 / 1M tokens (≤200k context)
\$18.00 / 1M tokens (>200k context) | 90% discount | [Gemini 3.1 Pro](https://ai.google.dev/gemini-api/docs/models#gemini-3.1-pro-preview) | +| `xai/grok-4.3` | \$1.25 / 1M tokens | \$2.50 / 1M tokens | \$0.20 / 1M tokens | [Grok 4.3](https://docs.x.ai/developers/models) | diff --git a/scripts/testdata/vertex_pricing_sample.html b/scripts/testdata/vertex_pricing_sample.html new file mode 100644 index 0000000..60c32cc --- /dev/null +++ b/scripts/testdata/vertex_pricing_sample.html @@ -0,0 +1,73 @@ +

Gemini 3

+
+
+

Standard

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelTypePrice (/1M tokens) <= 200K input tokens
Gemini 3.1 Pro Preview
Input (text, image, video, audio)$2
Text output (response and reasoning)$12
Gemini 3.1 Flash Image Preview
Input (text, image)$0.50
Text output (response and reasoning)$3
Image Output***$60
Gemini 3.1 Flash-Lite
Input (text, image, video)$0.25 (Global)

$0.275 (Non-global)*
Input (audio)$0.5 (Global)

$0.55 (Non-global)*
Text output (response and reasoning)$1.5 (Global)

$1.65 (Non-global)*
Gemini 3 Flash Preview
Input (text, image, video)$0.5
Input (audio)$1
Text output (response and reasoning)$3
+
+
diff --git a/scripts/verify_phase3.sh b/scripts/verify_phase3.sh index 022dc12..08f53ca 100755 --- a/scripts/verify_phase3.sh +++ b/scripts/verify_phase3.sh @@ -16,7 +16,7 @@ echo "=== Phase 3 验收检查 ===" check_executable "scripts/run_daily.sh" "日报流水线脚本可执行" check_executable "scripts/feishu_alert.sh" "飞书告警脚本可执行" -check_shell "日报生成器可独立构建" "go build -o /dev/null ./scripts/generate_daily_report.go" +check_shell "日报生成器可独立构建" "go build -o /dev/null ./scripts/generate_daily_report.go ./scripts/official_import_signature_audit_query_lib.go" check_shell "日报脚本包含降级逻辑" "grep -q 'fallback_report' scripts/run_daily.sh" check_shell "日报脚本包含飞书告警逻辑" "grep -q 'send_alert' scripts/run_daily.sh" check_shell "正式调度链启用严格真实采集" "grep -q -- '-strict-real' scripts/run_daily.sh && grep -q -- '-strict-real' scripts/run_real_pipeline.sh" diff --git a/scripts/vertex_pricing_import_runner.go b/scripts/vertex_pricing_import_runner.go new file mode 100644 index 0000000..c3ed16c --- /dev/null +++ b/scripts/vertex_pricing_import_runner.go @@ -0,0 +1,66 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "fmt" + "io" + "net/http" + "strings" + "time" +) + +type vertexPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration + SnapshotOnly bool + SnapshotOut string + SignatureOut string +} + +func runVertexPricingImport(cfg vertexPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + if cfg.SnapshotOnly || strings.TrimSpace(cfg.SnapshotOut) != "" || strings.TrimSpace(cfg.SignatureOut) != "" { + snapshotPath, signaturePath := resolveVertexPricingSnapshotPaths(cfg.SnapshotOut, cfg.SignatureOut, time.Now()) + signature, err := writeVertexPricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, time.Now()) + if err != nil { + return err + } + if cfg.SnapshotOnly { + _, err = fmt.Fprintf(out, + "source=vertex-pricing-snapshot snapshot_only=true byte_size=%d sha256=%s structure_sha256=%s snapshot_out=%s signature_out=%s\n", + signature.ByteSize, signature.SHA256, signature.StructureSHA256, snapshotPath, signaturePath, + ) + return err + } + } + records, err := parseVertexPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=vertex-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "vertex-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=vertex-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/vertex_pricing_lib.go b/scripts/vertex_pricing_lib.go new file mode 100644 index 0000000..687014e --- /dev/null +++ b/scripts/vertex_pricing_lib.go @@ -0,0 +1,277 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "html" + "regexp" + "strings" +) + +const defaultVertexPricingURL = "https://cloud.google.com/gemini-enterprise-agent-platform/generative-ai/pricing" + +var ( + vertexRowPattern = regexp.MustCompile(`(?s)(.*?)`) + vertexCellPattern = regexp.MustCompile(`(?s)]*>(.*?)`) + vertexHeadingPattern = regexp.MustCompile(`(?is)]*>(.*?)`) + vertexTablePattern = regexp.MustCompile(`(?is)]*>(.*?)`) + vertexStandardHeadingPattern = regexp.MustCompile(`(?is)]*>\s*(standard|标准)\s*`) +) + +func parseVertexPricingCatalog(raw string) ([]officialPricingRecord, error) { + familyBlocks := splitVertexFamilyBlocks(raw) + records := make([]officialPricingRecord, 0) + if len(familyBlocks) > 0 { + for _, block := range familyBlocks { + tableHTML := extractVertexStandardTable(block) + if strings.TrimSpace(tableHTML) == "" { + continue + } + records = append(records, parseVertexStandardTable(tableHTML)...) + } + } + if len(records) > 0 { + return records, nil + } + records = parseVertexStandardTextBlocks(raw) + if len(records) > 0 { + return records, nil + } + if len(familyBlocks) == 0 { + return nil, fmt.Errorf("unexpected vertex pricing content") + } + return nil, fmt.Errorf("no vertex standard pricing rows found") +} + +func parseVertexStandardTable(table string) []officialPricingRecord { + rows := vertexRowPattern.FindAllStringSubmatch(table, -1) + records := make([]officialPricingRecord, 0) + currentModel := "" + currentInput := 0.0 + + for _, row := range rows { + cells := vertexCellPattern.FindAllStringSubmatch(row[1], -1) + if len(cells) == 0 { + continue + } + + values := make([]string, 0, len(cells)) + for _, cell := range cells { + values = append(values, cleanHTMLText(cell[1])) + } + if len(values) == 1 && !strings.Contains(values[0], "Model") { + currentModel = values[0] + currentInput = 0 + continue + } + if len(values) < 2 { + continue + } + + rowType := values[0] + priceCell := values[1] + if len(values) > 2 && strings.Contains(strings.ToLower(values[0]), "gemini") { + currentModel = values[0] + rowType = values[1] + priceCell = values[2] + } + if strings.TrimSpace(currentModel) == "" || strings.EqualFold(currentModel, "Model") { + continue + } + + switch { + case strings.HasPrefix(rowType, "Input (text"), strings.HasPrefix(rowType, "输入(文本"): + price, ok := firstDollarPrice(priceCell) + if ok { + currentInput = price + } + case strings.HasPrefix(rowType, "Text output"), strings.HasPrefix(rowType, "文本输出"): + outputPrice, ok := firstDollarPrice(priceCell) + if !ok || currentInput == 0 { + continue + } + providerNameCn, providerCountry, providerWebsite := providerMetadata("Google") + record := officialPricingRecord{ + ModelID: normalizeExternalID("vertex", currentModel), + ModelName: currentModel, + ProviderName: "Google", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Google Cloud Vertex AI", + OperatorNameCn: "Google Cloud Vertex AI", + OperatorCountry: "US", + OperatorWebsite: "https://cloud.google.com/vertex-ai", + OperatorType: "cloud", + Region: "global", + Currency: "USD", + InputPrice: currentInput, + OutputPrice: outputPrice, + SourceURL: defaultVertexPricingURL, + ModelSourceURL: defaultVertexPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(currentModel), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + } + } + return records +} + +func splitVertexFamilyBlocks(raw string) []string { + indices := make([]int, 0) + matches := vertexHeadingPattern.FindAllStringSubmatchIndex(raw, -1) + for _, match := range matches { + label := cleanHTMLText(raw[match[2]:match[3]]) + if !strings.Contains(strings.ToLower(label), "gemini") { + continue + } + indices = append(indices, match[0]) + } + blocks := make([]string, 0, len(indices)) + for i, start := range indices { + end := len(raw) + if i+1 < len(indices) { + end = indices[i+1] + } + blocks = append(blocks, raw[start:end]) + } + return blocks +} + +func extractVertexStandardTable(raw string) string { + heading := vertexStandardHeadingPattern.FindStringIndex(raw) + if heading == nil { + return "" + } + segment := raw[heading[1]:] + table := vertexTablePattern.FindStringSubmatch(segment) + if len(table) != 2 { + return "" + } + return table[1] +} + +func parseVertexStandardTextBlocks(raw string) []officialPricingRecord { + lines := htmlLines(raw) + records := make([]officialPricingRecord, 0) + currentModelParts := make([]string, 0) + currentInput := 0.0 + inStandard := false + + for _, line := range lines { + lower := strings.ToLower(line) + sectionTitle := normalizeVertexSectionTitle(lower) + switch { + case sectionTitle != "": + inStandard = sectionTitle == "standard" || sectionTitle == "标准" + currentModelParts = currentModelParts[:0] + currentInput = 0 + continue + case !inStandard: + continue + case strings.Contains(lower, "model type price"): + continue + case strings.Contains(line, "$"): + modelName := strings.TrimSpace(strings.Join(currentModelParts, " ")) + if modelName == "" { + continue + } + switch { + case strings.HasPrefix(lower, "input (text"), strings.HasPrefix(lower, "1m input text tokens"): + if price, ok := firstDollarPrice(line); ok { + currentInput = price + } + case strings.HasPrefix(lower, "text output"), strings.HasPrefix(lower, "1m output text tokens"): + outputPrice, ok := firstDollarPrice(line) + if !ok || currentInput == 0 { + continue + } + providerNameCn, providerCountry, providerWebsite := providerMetadata("Google") + record := officialPricingRecord{ + ModelID: normalizeExternalID("vertex", modelName), + ModelName: modelName, + ProviderName: "Google", + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "Google Cloud Vertex AI", + OperatorNameCn: "Google Cloud Vertex AI", + OperatorCountry: "US", + OperatorWebsite: "https://cloud.google.com/vertex-ai", + OperatorType: "cloud", + Region: "global", + Currency: "USD", + InputPrice: currentInput, + OutputPrice: outputPrice, + SourceURL: defaultVertexPricingURL, + ModelSourceURL: defaultVertexPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_pricing", + Modality: detectModality(modelName), + } + record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 + records = append(records, record) + currentModelParts = currentModelParts[:0] + currentInput = 0 + } + default: + currentModelParts = append(currentModelParts, line) + } + } + + return dedupeOfficialPricingRecords(records) +} + +func normalizeVertexSectionTitle(line string) string { + title := strings.TrimSpace(strings.TrimLeft(line, "#")) + title = strings.TrimSpace(title) + switch title { + case "standard", "标准", "priority", "优先级", "flex/batch", "灵活/批处理", "batch api", "live api": + return title + default: + return "" + } +} + +func htmlLines(raw string) []string { + replacer := strings.NewReplacer( + "
", "\n", + "
", "\n", + "
", "\n", + "

", "\n", + "", "\n", + "
", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + "", "\n", + ) + withBreaks := replacer.Replace(raw) + tagPattern := regexp.MustCompile(`(?is)<[^>]+>`) + spacePattern := regexp.MustCompile(`[ \t]+`) + cleaned := html.UnescapeString(withBreaks) + cleaned = strings.ReplaceAll(cleaned, "\r\n", "\n") + cleaned = strings.ReplaceAll(cleaned, "\r", "\n") + cleaned = strings.ReplaceAll(cleaned, "\u00a0", " ") + cleaned = tagPattern.ReplaceAllString(cleaned, "") + rawLines := strings.Split(cleaned, "\n") + lines := make([]string, 0, len(rawLines)) + for _, line := range rawLines { + line = strings.TrimSpace(spacePattern.ReplaceAllString(line, " ")) + if line == "" { + continue + } + lines = append(lines, line) + } + return lines +} diff --git a/scripts/vertex_pricing_signature_guard.go b/scripts/vertex_pricing_signature_guard.go new file mode 100644 index 0000000..daeae7a --- /dev/null +++ b/scripts/vertex_pricing_signature_guard.go @@ -0,0 +1,51 @@ +//go:build llm_script + +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var snapshotDir string + var baselinePath string + var timeoutSeconds int + var allowBootstrap bool + + flag.StringVar(&url, "url", defaultVertexPricingURL, "Vertex AI 官方价格页") + flag.StringVar(&fixture, "fixture", "", "Vertex AI 价格样例文件") + flag.StringVar(&snapshotDir, "snapshot-dir", "", "Vertex snapshot 输出目录") + flag.StringVar(&baselinePath, "baseline-path", "", "Vertex 结构基线签名路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化") + flag.Parse() + + now := time.Now() + cfg := vertexPricingSignatureGuardConfig{ + URL: url, + Fixture: fixture, + SnapshotDir: snapshotDir, + BaselinePath: baselinePath, + Timeout: time.Duration(timeoutSeconds) * time.Second, + AllowBootstrap: allowBootstrap, + } + result, err := runVertexPricingSignatureGuard(cfg, now) + if auditErr := persistVertexPricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil { + fmt.Fprintf(os.Stderr, "vertex_pricing_signature_guard audit: %v\n", auditErr) + if err == nil { + err = auditErr + } + } + fmt.Println(formatVertexPricingSignatureGuardSummary(result)) + if err != nil { + fmt.Fprintf(os.Stderr, "vertex_pricing_signature_guard: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/vertex_pricing_signature_guard_lib.go b/scripts/vertex_pricing_signature_guard_lib.go new file mode 100644 index 0000000..9cb0d63 --- /dev/null +++ b/scripts/vertex_pricing_signature_guard_lib.go @@ -0,0 +1,159 @@ +//go:build llm_script + +package main + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "time" +) + +type vertexPricingSignatureGuardConfig struct { + URL string + Fixture string + SnapshotDir string + BaselinePath string + Timeout time.Duration + AllowBootstrap bool +} + +type vertexPricingSignatureGuardResult struct { + SnapshotPath string + SignaturePath string + BaselinePath string + DriftDetected bool + BaselineInitialized bool + PreviousBaselineHash string + CurrentSignature vertexPricingStructureSignature +} + +func runVertexPricingSignatureGuard(cfg vertexPricingSignatureGuardConfig, now time.Time) (vertexPricingSignatureGuardResult, error) { + snapshotDir := cfg.SnapshotDir + if snapshotDir == "" { + snapshotDir = filepath.Join("logs", "vertex-pricing-snapshots") + } + if err := os.MkdirAll(snapshotDir, 0o755); err != nil { + return vertexPricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + + baseName := fmt.Sprintf("vertex-pricing-%s", now.Format("20060102-150405")) + snapshotPath := filepath.Join(snapshotDir, baseName+".html") + signaturePath := filepath.Join(snapshotDir, baseName+".signature.json") + baselinePath := cfg.BaselinePath + if baselinePath == "" { + baselinePath = filepath.Join(snapshotDir, "baseline.signature.json") + } + + clientCfg := vertexPricingImportConfig{ + URL: cfg.URL, + Fixture: cfg.Fixture, + DryRun: true, + Timeout: cfg.Timeout, + SnapshotOnly: true, + SnapshotOut: snapshotPath, + SignatureOut: signaturePath, + } + if err := runVertexPricingImport(clientCfg, nil, ioDiscard{}); err != nil { + return vertexPricingSignatureGuardResult{}, err + } + + current, err := readVertexPricingStructureSignature(signaturePath) + if err != nil { + return vertexPricingSignatureGuardResult{}, err + } + result := vertexPricingSignatureGuardResult{ + SnapshotPath: snapshotPath, + SignaturePath: signaturePath, + BaselinePath: baselinePath, + CurrentSignature: current, + } + + previous, err := readVertexPricingStructureSignature(baselinePath) + if err != nil { + if os.IsNotExist(err) { + if !cfg.AllowBootstrap { + return result, fmt.Errorf("vertex pricing baseline missing: %s", baselinePath) + } + if err := copyFileCommon(signaturePath, baselinePath); err != nil { + return result, fmt.Errorf("initialize baseline: %w", err) + } + result.BaselineInitialized = true + return result, nil + } + return result, err + } + + result.PreviousBaselineHash = previous.StructureSHA256 + if previous.StructureSHA256 != current.StructureSHA256 { + result.DriftDetected = true + return result, fmt.Errorf( + "vertex pricing structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s", + previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath, + ) + } + return result, nil +} + +func formatVertexPricingSignatureGuardSummary(result vertexPricingSignatureGuardResult) string { + return fmt.Sprintf( + "source=vertex-pricing-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s", + result.DriftDetected, + result.BaselineInitialized, + result.CurrentSignature.StructureSHA256, + emptyIfBlank(result.PreviousBaselineHash), + result.SnapshotPath, + result.SignaturePath, + result.BaselinePath, + ) +} + +func buildVertexPricingSignatureAuditRecord(cfg vertexPricingSignatureGuardConfig, result vertexPricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord { + record := officialImportSignatureAuditRecord{ + SourceKey: "vertex_pricing_signature", + CheckedAt: checkedAt, + Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr), + DriftDetected: result.DriftDetected, + BaselineInitialized: result.BaselineInitialized, + SourceURL: strings.TrimSpace(cfg.URL), + FixturePath: strings.TrimSpace(cfg.Fixture), + SnapshotPath: strings.TrimSpace(result.SnapshotPath), + SignaturePath: strings.TrimSpace(result.SignaturePath), + BaselinePath: strings.TrimSpace(result.BaselinePath), + StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256), + PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash), + ByteSize: result.CurrentSignature.ByteSize, + ErrorMessage: errorMessageText(runErr), + } + if hasVertexPricingStructureSignature(result.CurrentSignature) { + signatureCopy := result.CurrentSignature + record.SignaturePayload = &signatureCopy + } + return record +} + +func persistVertexPricingSignatureAuditIfConfigured(cfg vertexPricingSignatureGuardConfig, result vertexPricingSignatureGuardResult, checkedAt time.Time, runErr error) error { + return persistOfficialImportSignatureAuditIfConfigured(buildVertexPricingSignatureAuditRecord(cfg, result, checkedAt, runErr)) +} + +func readVertexPricingStructureSignature(path string) (vertexPricingStructureSignature, error) { + data, err := os.ReadFile(path) + if err != nil { + return vertexPricingStructureSignature{}, err + } + var signature vertexPricingStructureSignature + if err := json.Unmarshal(data, &signature); err != nil { + return vertexPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err) + } + return signature, nil +} + +func hasVertexPricingStructureSignature(signature vertexPricingStructureSignature) bool { + return signature.ByteSize > 0 || + strings.TrimSpace(signature.StructureSHA256) != "" || + strings.TrimSpace(signature.SHA256) != "" || + len(signature.TagCounts) > 0 || + len(signature.Headings) > 0 +} diff --git a/scripts/vertex_pricing_signature_guard_test.go b/scripts/vertex_pricing_signature_guard_test.go new file mode 100644 index 0000000..1c7c072 --- /dev/null +++ b/scripts/vertex_pricing_signature_guard_test.go @@ -0,0 +1,236 @@ +//go:build llm_script + +package main + +import ( + "context" + "database/sql" + "database/sql/driver" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "testing" + "time" +) + +func TestRunVertexPricingSignatureGuardInitializesBaseline(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + + result, err := runVertexPricingSignatureGuard(vertexPricingSignatureGuardConfig{ + URL: defaultVertexPricingURL, + Fixture: filepath.Join("testdata", "vertex_pricing_sample.html"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 15, 19, 40, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("runVertexPricingSignatureGuard 返回错误: %v", err) + } + if !result.BaselineInitialized { + t.Fatalf("期望初始化 baseline") + } + if result.DriftDetected { + t.Fatalf("首次初始化不应判定为漂移") + } + if _, err := os.Stat(baselinePath); err != nil { + t.Fatalf("baseline 未写入: %v", err) + } + if _, err := os.Stat(result.SnapshotPath); err != nil { + t.Fatalf("snapshot 未写入: %v", err) + } +} + +func TestRunVertexPricingSignatureGuardDetectsDrift(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + + initialResult, err := runVertexPricingSignatureGuard(vertexPricingSignatureGuardConfig{ + URL: defaultVertexPricingURL, + Fixture: filepath.Join("testdata", "vertex_pricing_sample.html"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 15, 19, 41, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("初始化 baseline 失败: %v", err) + } + + driftFixture := `

Google 模型

标准

新结构
` + driftPath := filepath.Join(tempDir, "vertex-drift.html") + if err := os.WriteFile(driftPath, []byte(driftFixture), 0o644); err != nil { + t.Fatalf("写入 drift fixture 失败: %v", err) + } + + result, err := runVertexPricingSignatureGuard(vertexPricingSignatureGuardConfig{ + URL: defaultVertexPricingURL, + Fixture: driftPath, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: false, + }, time.Date(2026, 5, 15, 19, 42, 0, 0, time.FixedZone("CST", 8*3600))) + if err == nil { + t.Fatalf("期望结构漂移时报错") + } + if !result.DriftDetected { + t.Fatalf("期望 driftDetected=true") + } + if result.CurrentSignature.StructureSHA256 == initialResult.CurrentSignature.StructureSHA256 { + t.Fatalf("期望结构签名发生变化") + } + if !strings.Contains(err.Error(), "vertex pricing structure drift detected") { + t.Fatalf("期望返回 drift 错误,实际: %v", err) + } +} + +func TestFormatVertexPricingSignatureGuardSummary(t *testing.T) { + result := vertexPricingSignatureGuardResult{ + SnapshotPath: "/tmp/vertex.html", + SignaturePath: "/tmp/vertex.signature.json", + BaselinePath: "/tmp/baseline.signature.json", + DriftDetected: false, + BaselineInitialized: true, + CurrentSignature: vertexPricingStructureSignature{StructureSHA256: "abc123", ByteSize: 99}, + PreviousBaselineHash: "", + } + summary := formatVertexPricingSignatureGuardSummary(result) + for _, want := range []string{ + "source=vertex-pricing-signature-guard", + "drift=false", + "baseline_initialized=true", + "structure_sha256=abc123", + "snapshot_out=/tmp/vertex.html", + } { + if !strings.Contains(summary, want) { + t.Fatalf("summary 缺少 %q,实际: %q", want, summary) + } + } +} + +func TestInsertOfficialImportSignatureAuditPersistsStructuredRecord(t *testing.T) { + db, calls := openVertexSignatureAuditRecordingDB(t) + checkedAt := time.Date(2026, 5, 15, 20, 15, 0, 0, time.FixedZone("CST", 8*3600)) + record := officialImportSignatureAuditRecord{ + SourceKey: "vertex_pricing_signature", + CheckedAt: checkedAt, + Status: "drift_detected", + DriftDetected: true, + BaselineInitialized: false, + SourceURL: defaultVertexPricingURL, + SnapshotPath: "/tmp/vertex.html", + SignaturePath: "/tmp/vertex.signature.json", + BaselinePath: "/tmp/baseline.signature.json", + StructureSHA256: "current-sha", + PreviousStructureSHA256: "baseline-sha", + ByteSize: 813810, + SignaturePayload: &vertexPricingStructureSignature{ + ByteSize: 813810, + StructureSHA256: "current-sha", + Headings: []string{"Gemini 2.5 Pro", "标准"}, + TagCounts: map[string]int{"table": 1, "h2": 2}, + ContainsGemini: true, + ContainsTable: true, + }, + ErrorMessage: "vertex pricing structure drift detected", + } + + if err := insertOfficialImportSignatureAudit(db, record); err != nil { + t.Fatalf("insertOfficialImportSignatureAudit 返回错误: %v", err) + } + if len(calls.calls) != 1 { + t.Fatalf("期望 1 次写库,实际 %d", len(calls.calls)) + } + call := calls.calls[0] + if !strings.Contains(call.query, "INSERT INTO official_import_signature_audit") { + t.Fatalf("期望写入 official_import_signature_audit,实际 SQL: %s", call.query) + } + if got := call.args[0]; got != "vertex_pricing_signature" { + t.Fatalf("source_key 不匹配,实际 %#v", got) + } + if got := call.args[2]; got != "drift_detected" { + t.Fatalf("status 不匹配,实际 %#v", got) + } + if got := call.args[3]; got != true { + t.Fatalf("drift_detected 不匹配,实际 %#v", got) + } + if got := call.args[10]; got != "current-sha" { + t.Fatalf("structure_sha256 不匹配,实际 %#v", got) + } + if got := call.args[11]; got != "baseline-sha" { + t.Fatalf("previous_structure_sha256 不匹配,实际 %#v", got) + } + if got := call.args[13]; !strings.Contains(fmt.Sprint(got), `"structure_sha256":"current-sha"`) { + t.Fatalf("signature_payload 未写入结构化 JSON,实际 %#v", got) + } + if got := call.args[14]; got != "vertex pricing structure drift detected" { + t.Fatalf("error_message 不匹配,实际 %#v", got) + } +} + +type vertexSignatureAuditExecCall struct { + query string + args []any +} + +type vertexSignatureAuditExecRecorder struct { + mu sync.Mutex + calls []vertexSignatureAuditExecCall +} + +type vertexSignatureAuditDriver struct { + recorder *vertexSignatureAuditExecRecorder +} + +type vertexSignatureAuditConn struct { + recorder *vertexSignatureAuditExecRecorder +} + +func openVertexSignatureAuditRecordingDB(t *testing.T) (*sql.DB, *vertexSignatureAuditExecRecorder) { + t.Helper() + name := fmt.Sprintf("vertex-signature-audit-%d", time.Now().UnixNano()) + recorder := &vertexSignatureAuditExecRecorder{} + sql.Register(name, vertexSignatureAuditDriver{recorder: recorder}) + db, err := sql.Open(name, "") + if err != nil { + t.Fatalf("open recording db: %v", err) + } + t.Cleanup(func() { + _ = db.Close() + }) + return db, recorder +} + +func (d vertexSignatureAuditDriver) Open(string) (driver.Conn, error) { + return vertexSignatureAuditConn{recorder: d.recorder}, nil +} + +func (c vertexSignatureAuditConn) Prepare(string) (driver.Stmt, error) { + return nil, fmt.Errorf("not implemented") +} + +func (c vertexSignatureAuditConn) Close() error { + return nil +} + +func (c vertexSignatureAuditConn) Begin() (driver.Tx, error) { + return nil, fmt.Errorf("not implemented") +} + +func (c vertexSignatureAuditConn) ExecContext(_ context.Context, query string, args []driver.NamedValue) (driver.Result, error) { + values := make([]any, 0, len(args)) + for _, arg := range args { + values = append(values, arg.Value) + } + c.recorder.mu.Lock() + c.recorder.calls = append(c.recorder.calls, vertexSignatureAuditExecCall{ + query: query, + args: values, + }) + c.recorder.mu.Unlock() + return driver.RowsAffected(1), nil +} diff --git a/scripts/vertex_pricing_snapshot_lib.go b/scripts/vertex_pricing_snapshot_lib.go new file mode 100644 index 0000000..cfc7900 --- /dev/null +++ b/scripts/vertex_pricing_snapshot_lib.go @@ -0,0 +1,173 @@ +//go:build llm_script + +package main + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "time" +) + +type vertexPricingStructureSignature struct { + ByteSize int `json:"byte_size"` + SHA256 string `json:"sha256"` + StructureSHA256 string `json:"structure_sha256"` + NormalizedLineCount int `json:"normalized_line_count"` + TagCounts map[string]int `json:"tag_counts"` + Headings []string `json:"headings"` + ContainsGemini bool `json:"contains_gemini"` + ContainsStandard bool `json:"contains_standard"` + ContainsPriceText bool `json:"contains_price_text"` + ContainsTable bool `json:"contains_table"` + GeneratedAt string `json:"generated_at,omitempty"` + SourceURL string `json:"source_url,omitempty"` + SnapshotPath string `json:"snapshot_path,omitempty"` +} + +var vertexSignatureTagPattern = regexp.MustCompile(`(?is)<(html|body|section|div|table|tr|td|th|h1|h2|h3|h4|h5|h6|script|article)\b`) + +func buildVertexPricingStructureSignature(raw string) vertexPricingStructureSignature { + lines := htmlLines(raw) + tagCounts := make(map[string]int) + matches := vertexSignatureTagPattern.FindAllStringSubmatch(raw, -1) + for _, match := range matches { + tagCounts[strings.ToLower(match[1])]++ + } + + headings := extractVertexSignatureHeadings(raw) + signature := vertexPricingStructureSignature{ + ByteSize: len([]byte(raw)), + SHA256: sha256Hex(raw), + NormalizedLineCount: len(lines), + TagCounts: tagCounts, + Headings: headings, + ContainsGemini: strings.Contains(strings.ToLower(raw), "gemini"), + ContainsStandard: containsLine(lines, "standard"), + ContainsPriceText: strings.Contains(strings.ToLower(raw), "price"), + ContainsTable: tagCounts["table"] > 0, + } + signature.StructureSHA256 = sha256Hex(vertexStructureDigestPayload(signature)) + return signature +} + +func writeVertexPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (vertexPricingStructureSignature, error) { + if strings.TrimSpace(snapshotPath) == "" { + return vertexPricingStructureSignature{}, fmt.Errorf("snapshot path is required") + } + if strings.TrimSpace(signaturePath) == "" { + return vertexPricingStructureSignature{}, fmt.Errorf("signature path is required") + } + if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil { + return vertexPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil { + return vertexPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err) + } + if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil { + return vertexPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err) + } + + signature := buildVertexPricingStructureSignature(raw) + signature.GeneratedAt = now.Format(time.RFC3339) + signature.SourceURL = sourceURL + signature.SnapshotPath = snapshotPath + payload, err := json.MarshalIndent(signature, "", " ") + if err != nil { + return vertexPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err) + } + if err := os.WriteFile(signaturePath, payload, 0o644); err != nil { + return vertexPricingStructureSignature{}, fmt.Errorf("write signature: %w", err) + } + return signature, nil +} + +func resolveVertexPricingSnapshotPaths(snapshotPath string, signaturePath string, now time.Time) (string, string) { + if strings.TrimSpace(snapshotPath) == "" { + base := filepath.Join("logs", "vertex-pricing-snapshots", fmt.Sprintf("vertex-pricing-%s", now.Format("20060102-150405"))) + snapshotPath = base + ".html" + if strings.TrimSpace(signaturePath) == "" { + signaturePath = base + ".signature.json" + } + } + if strings.TrimSpace(signaturePath) == "" { + signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json" + } + return snapshotPath, signaturePath +} + +func extractVertexSignatureHeadings(raw string) []string { + matches := vertexHeadingPattern.FindAllStringSubmatchIndex(raw, -1) + headings := make([]string, 0, len(matches)) + seen := make(map[string]struct{}) + for _, match := range matches { + heading := cleanHTMLText(raw[match[2]:match[3]]) + if heading == "" { + continue + } + if _, exists := seen[heading]; exists { + continue + } + seen[heading] = struct{}{} + headings = append(headings, heading) + if len(headings) >= 12 { + break + } + } + return headings +} + +func vertexStructureDigestPayload(signature vertexPricingStructureSignature) string { + type tagCount struct { + Name string `json:"name"` + Count int `json:"count"` + } + tagNames := make([]string, 0, len(signature.TagCounts)) + for name := range signature.TagCounts { + tagNames = append(tagNames, name) + } + sort.Strings(tagNames) + tagCounts := make([]tagCount, 0, len(tagNames)) + for _, name := range tagNames { + tagCounts = append(tagCounts, tagCount{Name: name, Count: signature.TagCounts[name]}) + } + payload := struct { + NormalizedLineCount int `json:"normalized_line_count"` + TagCounts []tagCount `json:"tag_counts"` + Headings []string `json:"headings"` + ContainsGemini bool `json:"contains_gemini"` + ContainsStandard bool `json:"contains_standard"` + ContainsPriceText bool `json:"contains_price_text"` + ContainsTable bool `json:"contains_table"` + }{ + NormalizedLineCount: signature.NormalizedLineCount, + TagCounts: tagCounts, + Headings: signature.Headings, + ContainsGemini: signature.ContainsGemini, + ContainsStandard: signature.ContainsStandard, + ContainsPriceText: signature.ContainsPriceText, + ContainsTable: signature.ContainsTable, + } + bytes, _ := json.Marshal(payload) + return string(bytes) +} + +func sha256Hex(raw string) string { + sum := sha256.Sum256([]byte(raw)) + return hex.EncodeToString(sum[:]) +} + +func containsLine(lines []string, target string) bool { + for _, line := range lines { + if strings.EqualFold(strings.TrimSpace(line), target) { + return true + } + } + return false +} diff --git a/scripts/vertex_pricing_snapshot_test.go b/scripts/vertex_pricing_snapshot_test.go new file mode 100644 index 0000000..edc729f --- /dev/null +++ b/scripts/vertex_pricing_snapshot_test.go @@ -0,0 +1,101 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestBuildVertexPricingStructureSignatureCapturesShape(t *testing.T) { + raw := ` + + +

Gemini 2.5

+
+

Standard

+ + + + +
ModelTypePrice
Gemini 2.5 FlashInput (text, image, video)$0.30
Gemini 2.5 FlashText output$2.50
+
+ + +` + + signature := buildVertexPricingStructureSignature(raw) + if signature.ByteSize == 0 { + t.Fatalf("期望 byte_size 非 0") + } + if signature.SHA256 == "" || signature.StructureSHA256 == "" { + t.Fatalf("期望生成 sha256 签名: %+v", signature) + } + if signature.TagCounts["table"] != 1 { + t.Fatalf("期望 table 数为 1,实际 %+v", signature.TagCounts) + } + if !signature.ContainsStandard { + t.Fatalf("期望识别 Standard 区块") + } + if !signature.ContainsGemini { + t.Fatalf("期望识别 Gemini 关键词") + } + if len(signature.Headings) == 0 || signature.Headings[0] != "Gemini 2.5" { + t.Fatalf("标题提取错误: %+v", signature.Headings) + } +} + +func TestRunVertexPricingImportSnapshotOnlyWritesArtifacts(t *testing.T) { + tempDir := t.TempDir() + snapshotPath := filepath.Join(tempDir, "vertex-live.html") + signaturePath := filepath.Join(tempDir, "vertex-live.signature.json") + + var out bytes.Buffer + err := runVertexPricingImport(vertexPricingImportConfig{ + URL: defaultVertexPricingURL, + Fixture: filepath.Join("testdata", "vertex_pricing_sample.html"), + DryRun: true, + SnapshotOnly: true, + SnapshotOut: snapshotPath, + SignatureOut: signaturePath, + }, nil, &out) + if err != nil { + t.Fatalf("runVertexPricingImport 返回错误: %v", err) + } + + snapshotBytes, err := os.ReadFile(snapshotPath) + if err != nil { + t.Fatalf("读取 snapshot 失败: %v", err) + } + if !strings.Contains(string(snapshotBytes), "Gemini 3.1 Pro Preview") { + t.Fatalf("snapshot 内容错误") + } + + signatureBytes, err := os.ReadFile(signaturePath) + if err != nil { + t.Fatalf("读取 signature 失败: %v", err) + } + var signature vertexPricingStructureSignature + if err := json.Unmarshal(signatureBytes, &signature); err != nil { + t.Fatalf("signature JSON 解析失败: %v", err) + } + if signature.TagCounts["table"] == 0 { + t.Fatalf("期望 signature 含 table 计数: %+v", signature.TagCounts) + } + + output := out.String() + for _, want := range []string{ + "source=vertex-pricing-snapshot", + "snapshot_only=true", + "signature_out=" + signaturePath, + "snapshot_out=" + snapshotPath, + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +}