diff --git a/docs/plans/2026-05-27-intraday-price-watch-plan.md b/docs/plans/2026-05-27-intraday-price-watch-plan.md index 78711b6..bf557a7 100644 --- a/docs/plans/2026-05-27-intraday-price-watch-plan.md +++ b/docs/plans/2026-05-27-intraday-price-watch-plan.md @@ -55,6 +55,6 @@ ## 下一步建议 -1. 为 `run_intraday_discovery_watch.sh` 补充生产级 provider adapter 和调度说明 -2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间”提示 +1. 已补充 `run_intraday_discovery_watch.sh` 与 DeepSeek 官方新闻页结构签名 guard,可继续扩展到 DeepSeek pricing 页面 +2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间 / 最近一次官方页 drift 检查时间”提示 3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 或候选情报面板 diff --git a/scripts/deepseek_news_signature_guard.go b/scripts/deepseek_news_signature_guard.go new file mode 100644 index 0000000..1910ddb --- /dev/null +++ b/scripts/deepseek_news_signature_guard.go @@ -0,0 +1,51 @@ +//go:build llm_script + +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var snapshotDir string + var baselinePath string + var timeoutSeconds int + var allowBootstrap bool + + flag.StringVar(&url, "url", defaultDeepSeekNewsFetchURL, "DeepSeek 官方新闻页") + flag.StringVar(&fixture, "fixture", "", "DeepSeek 新闻页样例文件") + flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek news snapshot 输出目录") + flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek news 结构基线签名路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化") + flag.Parse() + + now := time.Now() + cfg := deepseekNewsSignatureGuardConfig{ + URL: url, + Fixture: fixture, + SnapshotDir: snapshotDir, + BaselinePath: baselinePath, + Timeout: time.Duration(timeoutSeconds) * time.Second, + AllowBootstrap: allowBootstrap, + } + result, err := runDeepSeekNewsSignatureGuard(cfg, now) + if auditErr := persistDeepSeekNewsSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil { + fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard audit: %v\n", auditErr) + if err == nil { + err = auditErr + } + } + fmt.Println(formatDeepSeekNewsSignatureGuardSummary(result)) + if err != nil { + fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/deepseek_news_signature_guard_lib.go b/scripts/deepseek_news_signature_guard_lib.go new file mode 100644 index 0000000..ee23a87 --- /dev/null +++ b/scripts/deepseek_news_signature_guard_lib.go @@ -0,0 +1,127 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "net/http" + "os" + "path/filepath" + "strings" + "time" +) + +type deepseekNewsSignatureGuardConfig struct { + URL string + Fixture string + SnapshotDir string + BaselinePath string + Timeout time.Duration + AllowBootstrap bool +} + +type deepseekNewsSignatureGuardResult struct { + SnapshotPath string + SignaturePath string + BaselinePath string + DriftDetected bool + BaselineInitialized bool + PreviousBaselineHash string + CurrentSignature deepseekNewsStructureSignature +} + +const defaultDeepSeekNewsFetchURL = "https://api-docs.deepseek.com/news/news250120" + +func runDeepSeekNewsSignatureGuard(cfg deepseekNewsSignatureGuardConfig, now time.Time) (deepseekNewsSignatureGuardResult, error) { + snapshotDir := cfg.SnapshotDir + if snapshotDir == "" { + snapshotDir = filepath.Join("logs", "deepseek-news-snapshots") + } + if err := os.MkdirAll(snapshotDir, 0o755); err != nil { + return deepseekNewsSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + snapshotPath, signaturePath := resolveDeepSeekNewsSnapshotPaths("", "", snapshotDir, now) + baselinePath := cfg.BaselinePath + if baselinePath == "" { + baselinePath = filepath.Join(snapshotDir, "baseline.signature.json") + } + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return deepseekNewsSignatureGuardResult{}, err + } + current, err := writeDeepSeekNewsSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now) + if err != nil { + return deepseekNewsSignatureGuardResult{}, err + } + result := deepseekNewsSignatureGuardResult{ + SnapshotPath: snapshotPath, + SignaturePath: signaturePath, + BaselinePath: baselinePath, + CurrentSignature: current, + } + previous, err := readDeepSeekNewsStructureSignature(baselinePath) + if err != nil { + if os.IsNotExist(err) { + if !cfg.AllowBootstrap { + return result, fmt.Errorf("deepseek news baseline missing: %s", baselinePath) + } + if err := copyFileCommon(signaturePath, baselinePath); err != nil { + return result, fmt.Errorf("initialize baseline: %w", err) + } + result.BaselineInitialized = true + return result, nil + } + return result, err + } + result.PreviousBaselineHash = previous.StructureSHA256 + if previous.StructureSHA256 != current.StructureSHA256 { + result.DriftDetected = true + return result, fmt.Errorf( + "deepseek news structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s", + previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath, + ) + } + return result, nil +} + +func formatDeepSeekNewsSignatureGuardSummary(result deepseekNewsSignatureGuardResult) string { + return fmt.Sprintf( + "source=deepseek-news-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s", + result.DriftDetected, + result.BaselineInitialized, + result.CurrentSignature.StructureSHA256, + emptyIfBlank(result.PreviousBaselineHash), + result.SnapshotPath, + result.SignaturePath, + result.BaselinePath, + ) +} + +func buildDeepSeekNewsSignatureAuditRecord(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord { + record := officialImportSignatureAuditRecord{ + SourceKey: "deepseek_news_signature", + CheckedAt: checkedAt, + Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr), + DriftDetected: result.DriftDetected, + BaselineInitialized: result.BaselineInitialized, + SourceURL: strings.TrimSpace(cfg.URL), + FixturePath: strings.TrimSpace(cfg.Fixture), + SnapshotPath: strings.TrimSpace(result.SnapshotPath), + SignaturePath: strings.TrimSpace(result.SignaturePath), + BaselinePath: strings.TrimSpace(result.BaselinePath), + StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256), + PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash), + ByteSize: result.CurrentSignature.ByteSize, + ErrorMessage: errorMessageText(runErr), + } + if hasDeepSeekNewsStructureSignature(result.CurrentSignature) { + signatureCopy := result.CurrentSignature + record.SignaturePayload = &signatureCopy + } + return record +} + +func persistDeepSeekNewsSignatureAuditIfConfigured(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) error { + return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekNewsSignatureAuditRecord(cfg, result, checkedAt, runErr)) +} diff --git a/scripts/deepseek_news_signature_guard_test.go b/scripts/deepseek_news_signature_guard_test.go new file mode 100644 index 0000000..6227422 --- /dev/null +++ b/scripts/deepseek_news_signature_guard_test.go @@ -0,0 +1,88 @@ +//go:build llm_script + +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestRunDeepSeekNewsSignatureGuardInitializesBaseline(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{ + URL: defaultDeepSeekNewsFetchURL, + Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 27, 21, 0, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("runDeepSeekNewsSignatureGuard 返回错误: %v", err) + } + if !result.BaselineInitialized { + t.Fatal("期望初始化 baseline") + } + if _, err := os.Stat(baselinePath); err != nil { + t.Fatalf("baseline 未写入: %v", err) + } +} + +func TestRunDeepSeekNewsSignatureGuardDetectsDrift(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + _, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{ + URL: defaultDeepSeekNewsFetchURL, + Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"), + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + }, time.Date(2026, 5, 27, 21, 1, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("初始化 baseline 失败: %v", err) + } + driftFixture := filepath.Join(tempDir, "drift.html") + if err := os.WriteFile(driftFixture, []byte("DeepSeek-V4 Release

DeepSeek V4 Release

"), 0o644); err != nil { + t.Fatalf("写入 drift fixture 失败: %v", err) + } + result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{ + URL: defaultDeepSeekNewsFetchURL, + Fixture: driftFixture, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: false, + }, time.Date(2026, 5, 27, 21, 2, 0, 0, time.FixedZone("CST", 8*3600))) + if err == nil { + t.Fatal("期望结构漂移时报错") + } + if !result.DriftDetected { + t.Fatal("期望 driftDetected=true") + } + if !strings.Contains(err.Error(), "deepseek news structure drift detected") { + t.Fatalf("期望返回 drift 错误,实际: %v", err) + } +} + +func TestFormatDeepSeekNewsSignatureGuardSummary(t *testing.T) { + result := deepseekNewsSignatureGuardResult{ + SnapshotPath: "/tmp/deepseek-news.html", + SignaturePath: "/tmp/deepseek-news.signature.json", + BaselinePath: "/tmp/baseline.signature.json", + BaselineInitialized: true, + CurrentSignature: deepseekNewsStructureSignature{ + StructureSHA256: "abc123", + }, + } + summary := formatDeepSeekNewsSignatureGuardSummary(result) + for _, want := range []string{"source=deepseek-news-signature-guard", "baseline_initialized=true", "structure_sha256=abc123"} { + if !strings.Contains(summary, want) { + t.Fatalf("summary 缺少 %q,实际: %q", want, summary) + } + } +} diff --git a/scripts/deepseek_news_snapshot_lib.go b/scripts/deepseek_news_snapshot_lib.go new file mode 100644 index 0000000..b258879 --- /dev/null +++ b/scripts/deepseek_news_snapshot_lib.go @@ -0,0 +1,196 @@ +//go:build llm_script + +package main + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "time" +) + +type deepseekNewsStructureSignature struct { + ByteSize int `json:"byte_size"` + SHA256 string `json:"sha256"` + StructureSHA256 string `json:"structure_sha256"` + Title string `json:"title"` + MetaDescription string `json:"meta_description"` + Headings []string `json:"headings"` + Contains map[string]bool `json:"contains"` + GeneratedAt string `json:"generated_at,omitempty"` + SourceURL string `json:"source_url,omitempty"` + SnapshotPath string `json:"snapshot_path,omitempty"` +} + +var deepseekNewsContainsNeedles = map[string]string{ + "deepseek": "deepseek", + "release": "release", + "news": "news", + "api_docs": "api docs", +} + +var htmlTagRe = regexp.MustCompile(`(?s)<[^>]+>`) +var titleRe = regexp.MustCompile(`(?is)]*>(.*?)`) +var metaDescRe = regexp.MustCompile(`(?is)]+name=["']description["'][^>]+content=["']([^"']+)["']`) +var h1Re = regexp.MustCompile(`(?is)]*>(.*?)`) + +func buildDeepSeekNewsStructureSignature(raw string) deepseekNewsStructureSignature { + title := firstHTMLMatch(titleRe, raw) + meta := firstHTMLMatch(metaDescRe, raw) + h1Matches := h1Re.FindAllStringSubmatch(raw, -1) + headings := make([]string, 0, len(h1Matches)) + seen := make(map[string]struct{}) + for _, match := range h1Matches { + if len(match) < 2 { + continue + } + clean := cleanHTMLText(match[1]) + if clean == "" { + continue + } + if _, exists := seen[clean]; exists { + continue + } + seen[clean] = struct{}{} + headings = append(headings, clean) + } + contains := make(map[string]bool, len(deepseekNewsContainsNeedles)) + lower := strings.ToLower(raw) + for key, needle := range deepseekNewsContainsNeedles { + contains[key] = strings.Contains(lower, strings.ToLower(needle)) + } + signature := deepseekNewsStructureSignature{ + ByteSize: len([]byte(raw)), + SHA256: deepseekNewsSHA256Hex(raw), + Title: title, + MetaDescription: meta, + Headings: headings, + Contains: contains, + } + signature.StructureSHA256 = deepseekNewsSHA256Hex(deepseekNewsStructureDigestPayload(signature)) + return signature +} + +func writeDeepSeekNewsSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekNewsStructureSignature, error) { + if strings.TrimSpace(snapshotPath) == "" { + return deepseekNewsStructureSignature{}, fmt.Errorf("snapshot path is required") + } + if strings.TrimSpace(signaturePath) == "" { + return deepseekNewsStructureSignature{}, fmt.Errorf("signature path is required") + } + if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil { + return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil { + return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err) + } + if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil { + return deepseekNewsStructureSignature{}, fmt.Errorf("write snapshot: %w", err) + } + signature := buildDeepSeekNewsStructureSignature(raw) + signature.GeneratedAt = now.Format(time.RFC3339) + signature.SourceURL = sourceURL + signature.SnapshotPath = snapshotPath + payload, err := json.MarshalIndent(signature, "", " ") + if err != nil { + return deepseekNewsStructureSignature{}, fmt.Errorf("marshal signature: %w", err) + } + if err := os.WriteFile(signaturePath, payload, 0o644); err != nil { + return deepseekNewsStructureSignature{}, fmt.Errorf("write signature: %w", err) + } + return signature, nil +} + +func resolveDeepSeekNewsSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, now time.Time) (string, string) { + if strings.TrimSpace(snapshotDir) == "" { + snapshotDir = filepath.Join("logs", "deepseek-news-snapshots") + } + if strings.TrimSpace(snapshotPath) == "" { + base := filepath.Join(snapshotDir, fmt.Sprintf("deepseek-news-%s", now.Format("20060102-150405"))) + snapshotPath = base + ".html" + if strings.TrimSpace(signaturePath) == "" { + signaturePath = base + ".signature.json" + } + } + if strings.TrimSpace(signaturePath) == "" { + signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json" + } + return snapshotPath, signaturePath +} + +func readDeepSeekNewsStructureSignature(path string) (deepseekNewsStructureSignature, error) { + data, err := os.ReadFile(path) + if err != nil { + return deepseekNewsStructureSignature{}, err + } + var signature deepseekNewsStructureSignature + if err := json.Unmarshal(data, &signature); err != nil { + return deepseekNewsStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err) + } + return signature, nil +} + +func hasDeepSeekNewsStructureSignature(signature deepseekNewsStructureSignature) bool { + return signature.ByteSize > 0 || + strings.TrimSpace(signature.StructureSHA256) != "" || + strings.TrimSpace(signature.SHA256) != "" || + strings.TrimSpace(signature.Title) != "" || + len(signature.Headings) > 0 || + len(signature.Contains) > 0 +} + +func deepseekNewsStructureDigestPayload(signature deepseekNewsStructureSignature) string { + type containsEntry struct { + Name string `json:"name"` + Value bool `json:"value"` + } + keys := make([]string, 0, len(signature.Contains)) + for key := range signature.Contains { + keys = append(keys, key) + } + sort.Strings(keys) + entries := make([]containsEntry, 0, len(keys)) + for _, key := range keys { + entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]}) + } + payload := struct { + Title string `json:"title"` + MetaDescription string `json:"meta_description"` + Headings []string `json:"headings"` + Contains []containsEntry `json:"contains"` + }{ + Title: signature.Title, + MetaDescription: signature.MetaDescription, + Headings: signature.Headings, + Contains: entries, + } + bytes, _ := json.Marshal(payload) + return string(bytes) +} + +func deepseekNewsSHA256Hex(raw string) string { + sum := sha256.Sum256([]byte(raw)) + return hex.EncodeToString(sum[:]) +} + +func firstHTMLMatch(re *regexp.Regexp, raw string) string { + match := re.FindStringSubmatch(raw) + if len(match) < 2 { + return "" + } + return cleanHTMLText(match[1]) +} + +func cleanHTMLText(raw string) string { + text := htmlTagRe.ReplaceAllString(raw, " ") + text = strings.ReplaceAll(text, "&", "&") + text = strings.ReplaceAll(text, " ", " ") + text = strings.Join(strings.Fields(text), " ") + return strings.TrimSpace(text) +} diff --git a/scripts/deepseek_pricing_signature_guard.go b/scripts/deepseek_pricing_signature_guard.go new file mode 100644 index 0000000..d9f075b --- /dev/null +++ b/scripts/deepseek_pricing_signature_guard.go @@ -0,0 +1,57 @@ +//go:build llm_script + +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var snapshotDir string + var baselinePath string + var timeoutSeconds int + var allowBootstrap bool + var sourceKey string + var snapshotBase string + + flag.StringVar(&sourceKey, "source-key", "deepseek_pricing_signature", "审计 source_key") + flag.StringVar(&snapshotBase, "snapshot-base", "deepseek-pricing", "snapshot 文件名前缀") + flag.StringVar(&url, "url", defaultDeepSeekPricingFetchURL, "DeepSeek 官方价格页") + flag.StringVar(&fixture, "fixture", "", "DeepSeek 价格页样例文件") + flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek pricing snapshot 输出目录") + flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek pricing 结构基线签名路径") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化") + flag.Parse() + + now := time.Now() + cfg := deepseekPricingSignatureGuardConfig{ + SourceKey: sourceKey, + URL: url, + Fixture: fixture, + SnapshotDir: snapshotDir, + BaselinePath: baselinePath, + Timeout: time.Duration(timeoutSeconds) * time.Second, + AllowBootstrap: allowBootstrap, + SnapshotBase: snapshotBase, + } + result, err := runDeepSeekPricingSignatureGuard(cfg, now) + if auditErr := persistDeepSeekPricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil { + fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard audit: %v\n", auditErr) + if err == nil { + err = auditErr + } + } + fmt.Println(formatDeepSeekPricingSignatureGuardSummary(sourceKey, result)) + if err != nil { + fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard: %v\n", err) + os.Exit(1) + } +} diff --git a/scripts/deepseek_pricing_signature_guard_lib.go b/scripts/deepseek_pricing_signature_guard_lib.go new file mode 100644 index 0000000..ea8685a --- /dev/null +++ b/scripts/deepseek_pricing_signature_guard_lib.go @@ -0,0 +1,132 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "net/http" + "os" + "path/filepath" + "strings" + "time" +) + +type deepseekPricingSignatureGuardConfig struct { + SourceKey string + URL string + Fixture string + SnapshotDir string + BaselinePath string + Timeout time.Duration + AllowBootstrap bool + SnapshotBase string + SourceKindLabel string +} + +type deepseekPricingSignatureGuardResult struct { + SnapshotPath string + SignaturePath string + BaselinePath string + DriftDetected bool + BaselineInitialized bool + PreviousBaselineHash string + CurrentSignature deepseekPricingStructureSignature +} + +const defaultDeepSeekPricingFetchURL = "https://platform.deepseek.com/pricing" +const defaultDeepSeekAPIPricingFetchURL = "https://platform.deepseek.com/docs/api-pricing" + +func runDeepSeekPricingSignatureGuard(cfg deepseekPricingSignatureGuardConfig, now time.Time) (deepseekPricingSignatureGuardResult, error) { + snapshotDir := cfg.SnapshotDir + if snapshotDir == "" { + snapshotDir = filepath.Join("logs", cfg.SnapshotBase+"-snapshots") + } + if err := os.MkdirAll(snapshotDir, 0o755); err != nil { + return deepseekPricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + snapshotPath, signaturePath := resolveDeepSeekPricingSnapshotPaths("", "", snapshotDir, cfg.SnapshotBase, now) + baselinePath := cfg.BaselinePath + if baselinePath == "" { + baselinePath = filepath.Join(snapshotDir, "baseline.signature.json") + } + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return deepseekPricingSignatureGuardResult{}, err + } + current, err := writeDeepSeekPricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now) + if err != nil { + return deepseekPricingSignatureGuardResult{}, err + } + result := deepseekPricingSignatureGuardResult{ + SnapshotPath: snapshotPath, + SignaturePath: signaturePath, + BaselinePath: baselinePath, + CurrentSignature: current, + } + previous, err := readDeepSeekPricingStructureSignature(baselinePath) + if err != nil { + if os.IsNotExist(err) { + if !cfg.AllowBootstrap { + return result, fmt.Errorf("%s baseline missing: %s", cfg.SourceKey, baselinePath) + } + if err := copyFileCommon(signaturePath, baselinePath); err != nil { + return result, fmt.Errorf("initialize baseline: %w", err) + } + result.BaselineInitialized = true + return result, nil + } + return result, err + } + result.PreviousBaselineHash = previous.StructureSHA256 + if previous.StructureSHA256 != current.StructureSHA256 { + result.DriftDetected = true + return result, fmt.Errorf( + "%s structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s", + cfg.SourceKey, previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath, + ) + } + return result, nil +} + +func formatDeepSeekPricingSignatureGuardSummary(sourceKey string, result deepseekPricingSignatureGuardResult) string { + return fmt.Sprintf( + "source=%s drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s", + sourceKey, + result.DriftDetected, + result.BaselineInitialized, + result.CurrentSignature.StructureSHA256, + emptyIfBlank(result.PreviousBaselineHash), + result.SnapshotPath, + result.SignaturePath, + result.BaselinePath, + ) +} + +func buildDeepSeekPricingSignatureAuditRecord(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord { + record := officialImportSignatureAuditRecord{ + SourceKey: cfg.SourceKey, + CheckedAt: checkedAt, + Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr), + DriftDetected: result.DriftDetected, + BaselineInitialized: result.BaselineInitialized, + SourceURL: strings.TrimSpace(cfg.URL), + FixturePath: strings.TrimSpace(cfg.Fixture), + SnapshotPath: strings.TrimSpace(result.SnapshotPath), + SignaturePath: strings.TrimSpace(result.SignaturePath), + BaselinePath: strings.TrimSpace(result.BaselinePath), + StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256), + PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash), + ByteSize: result.CurrentSignature.ByteSize, + ErrorMessage: errorMessageText(runErr), + } + if hasDeepSeekPricingStructureSignature(result.CurrentSignature) { + signatureCopy := result.CurrentSignature + record.SignaturePayload = &signatureCopy + } + return record +} + +func persistDeepSeekPricingSignatureAuditIfConfigured(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) error { + return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekPricingSignatureAuditRecord(cfg, result, checkedAt, runErr)) +} diff --git a/scripts/deepseek_pricing_signature_guard_test.go b/scripts/deepseek_pricing_signature_guard_test.go new file mode 100644 index 0000000..f40a4a9 --- /dev/null +++ b/scripts/deepseek_pricing_signature_guard_test.go @@ -0,0 +1,96 @@ +//go:build llm_script + +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestRunDeepSeekPricingSignatureGuardInitializesBaseline(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + fixture := filepath.Join(tempDir, "pricing.html") + if err := os.WriteFile(fixture, []byte(`DeepSeekpricing`), 0o644); err != nil { + t.Fatalf("写入 fixture 失败: %v", err) + } + result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{ + SourceKey: "deepseek_pricing_signature", + URL: defaultDeepSeekPricingFetchURL, + Fixture: fixture, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + SnapshotBase: "deepseek-pricing", + }, time.Date(2026, 5, 27, 22, 0, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("runDeepSeekPricingSignatureGuard 返回错误: %v", err) + } + if !result.BaselineInitialized { + t.Fatal("期望初始化 baseline") + } +} + +func TestRunDeepSeekPricingSignatureGuardDetectsDrift(t *testing.T) { + tempDir := t.TempDir() + baselinePath := filepath.Join(tempDir, "baseline.signature.json") + fixture := filepath.Join(tempDir, "pricing.html") + if err := os.WriteFile(fixture, []byte(`DeepSeekpricing`), 0o644); err != nil { + t.Fatalf("写入 fixture 失败: %v", err) + } + _, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{ + SourceKey: "deepseek_pricing_signature", + URL: defaultDeepSeekPricingFetchURL, + Fixture: fixture, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: true, + SnapshotBase: "deepseek-pricing", + }, time.Date(2026, 5, 27, 22, 1, 0, 0, time.FixedZone("CST", 8*3600))) + if err != nil { + t.Fatalf("初始化 baseline 失败: %v", err) + } + driftFixture := filepath.Join(tempDir, "pricing-drift.html") + if err := os.WriteFile(driftFixture, []byte(`DeepSeek Pricingpricing update`), 0o644); err != nil { + t.Fatalf("写入 drift fixture 失败: %v", err) + } + result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{ + SourceKey: "deepseek_pricing_signature", + URL: defaultDeepSeekPricingFetchURL, + Fixture: driftFixture, + SnapshotDir: tempDir, + BaselinePath: baselinePath, + Timeout: time.Second, + AllowBootstrap: false, + SnapshotBase: "deepseek-pricing", + }, time.Date(2026, 5, 27, 22, 2, 0, 0, time.FixedZone("CST", 8*3600))) + if err == nil { + t.Fatal("期望结构漂移时报错") + } + if !result.DriftDetected { + t.Fatal("期望 driftDetected=true") + } +} + +func TestFormatDeepSeekPricingSignatureGuardSummary(t *testing.T) { + result := deepseekPricingSignatureGuardResult{ + SnapshotPath: "/tmp/deepseek-pricing.html", + SignaturePath: "/tmp/deepseek-pricing.signature.json", + BaselinePath: "/tmp/baseline.signature.json", + BaselineInitialized: true, + CurrentSignature: deepseekPricingStructureSignature{ + StructureSHA256: "abc123", + }, + } + summary := formatDeepSeekPricingSignatureGuardSummary("deepseek_pricing_signature", result) + for _, want := range []string{"source=deepseek_pricing_signature", "baseline_initialized=true", "structure_sha256=abc123"} { + if !strings.Contains(summary, want) { + t.Fatalf("summary 缺少 %q,实际: %q", want, summary) + } + } +} diff --git a/scripts/deepseek_pricing_snapshot_lib.go b/scripts/deepseek_pricing_snapshot_lib.go new file mode 100644 index 0000000..161d63c --- /dev/null +++ b/scripts/deepseek_pricing_snapshot_lib.go @@ -0,0 +1,183 @@ +//go:build llm_script + +package main + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "time" +) + +type deepseekPricingStructureSignature struct { + ByteSize int `json:"byte_size"` + SHA256 string `json:"sha256"` + StructureSHA256 string `json:"structure_sha256"` + Title string `json:"title"` + MetaDescription string `json:"meta_description"` + CommitID string `json:"commit_id"` + CanonicalURL string `json:"canonical_url"` + Contains map[string]bool `json:"contains"` + GeneratedAt string `json:"generated_at,omitempty"` + SourceURL string `json:"source_url,omitempty"` + SnapshotPath string `json:"snapshot_path,omitempty"` +} + +var deepseekPricingContainsNeedles = map[string]string{ + "deepseek": "deepseek", + "platform": "platform", + "pricing": "pricing", + "api_docs": "api", + "developer": "developer resources", +} + +var deepseekPricingTitleRe = regexp.MustCompile(`(?is)]*>(.*?)`) +var deepseekPricingMetaDescRe = regexp.MustCompile(`(?is)]+name=["']description["'][^>]+content=["']([^"']+)["']`) +var deepseekPricingCommitRe = regexp.MustCompile(`(?is)]+name=["']commit-id["'][^>]+content=["']([^"']+)["']`) +var deepseekPricingCanonicalRe = regexp.MustCompile(`(?is)]+property=["']og:url["'][^>]+content=["']([^"']+)["']`) +var deepseekPricingHTMLTagRe = regexp.MustCompile(`(?s)<[^>]+>`) + +func buildDeepSeekPricingStructureSignature(raw string) deepseekPricingStructureSignature { + title := firstDeepSeekPricingHTMLMatch(deepseekPricingTitleRe, raw) + meta := firstDeepSeekPricingHTMLMatch(deepseekPricingMetaDescRe, raw) + commitID := firstDeepSeekPricingHTMLMatch(deepseekPricingCommitRe, raw) + canonicalURL := firstDeepSeekPricingHTMLMatch(deepseekPricingCanonicalRe, raw) + contains := make(map[string]bool, len(deepseekPricingContainsNeedles)) + lower := strings.ToLower(raw) + for key, needle := range deepseekPricingContainsNeedles { + contains[key] = strings.Contains(lower, strings.ToLower(needle)) + } + signature := deepseekPricingStructureSignature{ + ByteSize: len([]byte(raw)), + SHA256: deepseekPricingSHA256Hex(raw), + Title: title, + MetaDescription: meta, + CommitID: commitID, + CanonicalURL: canonicalURL, + Contains: contains, + } + signature.StructureSHA256 = deepseekPricingSHA256Hex(deepseekPricingStructureDigestPayload(signature)) + return signature +} + +func writeDeepSeekPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekPricingStructureSignature, error) { + if strings.TrimSpace(snapshotPath) == "" { + return deepseekPricingStructureSignature{}, fmt.Errorf("snapshot path is required") + } + if strings.TrimSpace(signaturePath) == "" { + return deepseekPricingStructureSignature{}, fmt.Errorf("signature path is required") + } + if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil { + return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err) + } + if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil { + return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err) + } + if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil { + return deepseekPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err) + } + signature := buildDeepSeekPricingStructureSignature(raw) + signature.GeneratedAt = now.Format(time.RFC3339) + signature.SourceURL = sourceURL + signature.SnapshotPath = snapshotPath + payload, err := json.MarshalIndent(signature, "", " ") + if err != nil { + return deepseekPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err) + } + if err := os.WriteFile(signaturePath, payload, 0o644); err != nil { + return deepseekPricingStructureSignature{}, fmt.Errorf("write signature: %w", err) + } + return signature, nil +} + +func resolveDeepSeekPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) { + if strings.TrimSpace(snapshotDir) == "" { + snapshotDir = filepath.Join("logs", baseName+"-snapshots") + } + if strings.TrimSpace(snapshotPath) == "" { + base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405"))) + snapshotPath = base + ".html" + if strings.TrimSpace(signaturePath) == "" { + signaturePath = base + ".signature.json" + } + } + if strings.TrimSpace(signaturePath) == "" { + signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json" + } + return snapshotPath, signaturePath +} + +func readDeepSeekPricingStructureSignature(path string) (deepseekPricingStructureSignature, error) { + data, err := os.ReadFile(path) + if err != nil { + return deepseekPricingStructureSignature{}, err + } + var signature deepseekPricingStructureSignature + if err := json.Unmarshal(data, &signature); err != nil { + return deepseekPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err) + } + return signature, nil +} + +func hasDeepSeekPricingStructureSignature(signature deepseekPricingStructureSignature) bool { + return signature.ByteSize > 0 || + strings.TrimSpace(signature.StructureSHA256) != "" || + strings.TrimSpace(signature.SHA256) != "" || + strings.TrimSpace(signature.Title) != "" || + strings.TrimSpace(signature.CommitID) != "" || + len(signature.Contains) > 0 +} + +func deepseekPricingStructureDigestPayload(signature deepseekPricingStructureSignature) string { + type containsEntry struct { + Name string `json:"name"` + Value bool `json:"value"` + } + keys := make([]string, 0, len(signature.Contains)) + for key := range signature.Contains { + keys = append(keys, key) + } + sort.Strings(keys) + entries := make([]containsEntry, 0, len(keys)) + for _, key := range keys { + entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]}) + } + payload := struct { + Title string `json:"title"` + MetaDescription string `json:"meta_description"` + CommitID string `json:"commit_id"` + CanonicalURL string `json:"canonical_url"` + Contains []containsEntry `json:"contains"` + }{ + Title: signature.Title, + MetaDescription: signature.MetaDescription, + CommitID: signature.CommitID, + CanonicalURL: signature.CanonicalURL, + Contains: entries, + } + bytes, _ := json.Marshal(payload) + return string(bytes) +} + +func deepseekPricingSHA256Hex(raw string) string { + sum := sha256.Sum256([]byte(raw)) + return hex.EncodeToString(sum[:]) +} + +func firstDeepSeekPricingHTMLMatch(re *regexp.Regexp, raw string) string { + match := re.FindStringSubmatch(raw) + if len(match) < 2 { + return "" + } + text := deepseekPricingHTMLTagRe.ReplaceAllString(match[1], " ") + text = strings.ReplaceAll(text, "&", "&") + text = strings.ReplaceAll(text, " ", " ") + text = strings.Join(strings.Fields(text), " ") + return strings.TrimSpace(text) +} diff --git a/scripts/discover_intraday_news_candidates.go b/scripts/discover_intraday_news_candidates.go index d89e653..fba2a34 100644 --- a/scripts/discover_intraday_news_candidates.go +++ b/scripts/discover_intraday_news_candidates.go @@ -154,19 +154,14 @@ func validateIntradayProviderConfig(name string, cfg intradayProviderConfig) err } func buildIntradayQueries(date string, providerLimit int) []string { - providers := []string{ - "OpenAI", "Anthropic", "Google Gemini", "xAI", "DeepSeek", - "DashScope", "Qwen", "智谱", "百度文心", "腾讯混元", "火山方舟", "MiniMax", + queries := []string{ + "site:platform.deepseek.com DeepSeek pricing", + "site:api-docs.deepseek.com DeepSeek release news", + "site:docs.anthropic.com Claude Sonnet 4 announcement", + "site:openrouter.ai OpenRouter models", } - keywords := []string{"pricing release announcement", "模型 降价 发布 活动"} - if providerLimit > 0 && providerLimit < len(providers) { - providers = providers[:providerLimit] - } - queries := make([]string, 0, len(providers)*len(keywords)) - for _, provider := range providers { - for _, keyword := range keywords { - queries = append(queries, strings.TrimSpace(date+" "+provider+" "+keyword)) - } + if providerLimit > 0 && providerLimit < len(queries) { + return queries[:providerLimit] } return queries } @@ -217,22 +212,36 @@ func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex m Status: "candidate", VerificationConfidence: "candidate", } + matchedSearch := false + filteredURLs := make([]string, 0, len(candidate.CandidateURLs)) for _, url := range candidate.CandidateURLs { - if searchRecord, ok := searchIndex[url]; ok { - candidate.DiscoverySource = "web_search+llm" - candidate.DiscoveryQuery = searchRecord.Title - candidate.DiscoveryEvidence["search_record"] = searchRecord - if candidate.ProviderName == "" { - candidate.ProviderName = strings.TrimSpace(searchRecord.Provider) - } - if candidate.Title == "" { - candidate.Title = strings.TrimSpace(searchRecord.Title) - } - if candidate.Summary == "" { - candidate.Summary = strings.TrimSpace(searchRecord.Summary) - } + searchRecord, ok := searchIndex[url] + if !ok { + continue + } + if !searchRecordMatchesDate(searchRecord, date) { + continue + } + matchedSearch = true + filteredURLs = append(filteredURLs, url) + candidate.DiscoverySource = "web_search+llm" + candidate.DiscoveryQuery = searchRecord.Title + candidate.DiscoveryEvidence["search_record"] = searchRecord + if candidate.ProviderName == "" { + candidate.ProviderName = strings.TrimSpace(searchRecord.Provider) + } + if candidate.Title == "" { + candidate.Title = strings.TrimSpace(searchRecord.Title) + } + if candidate.Summary == "" { + candidate.Summary = strings.TrimSpace(searchRecord.Summary) } } + if !matchedSearch { + candidate.CandidateURLs = nil + return candidate + } + candidate.CandidateURLs = dedupeStrings(filteredURLs) return candidate } @@ -294,6 +303,36 @@ func buildIntradayNormalizedKey(candidate intradayNewsCandidate) string { }, "|") } +func searchRecordMatchesDate(record intradaySearchRecord, date string) bool { + published := strings.TrimSpace(record.PublishedAt) + if published == "" { + return false + } + if ts, ok := parseSearchPublishedAt(published); ok { + return ts == date + } + return strings.Contains(published, date) +} + +func parseSearchPublishedAt(value string) (string, bool) { + for _, layout := range []string{time.RFC3339, "2006-01-02", "Mon, 02 Jan 2006 15:04:05 MST", "Mon, 2 Jan 2006 15:04:05 MST"} { + if ts, err := time.Parse(layout, value); err == nil { + return ts.Format("2006-01-02"), true + } + } + localized := strings.NewReplacer( + "周一", "Mon", "周二", "Tue", "周三", "Wed", "周四", "Thu", "周五", "Fri", "周六", "Sat", "周日", "Sun", + "1月", "Jan", "2月", "Feb", "3月", "Mar", "4月", "Apr", "5月", "May", "6月", "Jun", + "7月", "Jul", "8月", "Aug", "9月", "Sep", "10月", "Oct", "11月", "Nov", "12月", "Dec", + ).Replace(value) + for _, layout := range []string{"Mon, 2 Jan 2006 15:04:05 MST", "Mon, 02 Jan 2006 15:04:05 MST"} { + if ts, err := time.Parse(layout, localized); err == nil { + return ts.Format("2006-01-02"), true + } + } + return "", false +} + func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary { eventTypeCounts := make(map[string]int) diff --git a/scripts/discover_intraday_news_candidates_test.go b/scripts/discover_intraday_news_candidates_test.go index 5c82d35..ad27c87 100644 --- a/scripts/discover_intraday_news_candidates_test.go +++ b/scripts/discover_intraday_news_candidates_test.go @@ -46,10 +46,11 @@ func TestLoadIntradayLLMRecordsFromFixture(t *testing.T) { func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) { searchRecords := []intradaySearchRecord{{ - Title: "OpenAI announces GPT-5.6 preview pricing update", - Summary: "Search summary", - URL: "https://openai.example.com/news/gpt-5-6-pricing", - Provider: "OpenAI", + Title: "OpenAI announces GPT-5.6 preview pricing update", + Summary: "Search summary", + URL: "https://openai.example.com/news/gpt-5-6-pricing", + Provider: "OpenAI", + PublishedAt: "2026-05-25", }} llmRecords := []intradayLLMRecord{ { @@ -80,6 +81,29 @@ func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) { } } +func TestNormalizeIntradayCandidatesDropsOutdatedSearchMatches(t *testing.T) { + searchRecords := []intradaySearchRecord{{ + Title: "Old DeepSeek pricing article", + Summary: "Yesterday record", + URL: "https://deepseek.example.com/pricing", + Provider: "DeepSeek", + PublishedAt: "2026-05-24", + }} + llmRecords := []intradayLLMRecord{{ + EventType: "price_cut", + ProviderName: "DeepSeek", + ModelName: "DeepSeek-V4-Flash", + ProviderCountry: "CN", + Title: "DeepSeek V4 Flash price cut", + Summary: "Should be dropped because search evidence is stale", + CandidateURLs: []string{"https://deepseek.example.com/pricing"}, + }} + candidates := normalizeIntradayCandidates("2026-05-25", searchRecords, llmRecords) + if len(candidates) != 0 { + t.Fatalf("旧闻搜索结果不应进入候选池, got=%d", len(candidates)) + } +} + func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) { llmRecords := []intradayLLMRecord{{ EventType: "promo_campaign", @@ -94,6 +118,13 @@ func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) { } } +func TestSearchRecordMatchesLocalizedBingDate(t *testing.T) { + record := intradaySearchRecord{PublishedAt: "周一, 25 5月 2026 14:08:00 GMT"} + if !searchRecordMatchesDate(record, "2026-05-25") { + t.Fatal("应识别本地化 Bing pubDate 为当天") + } +} + func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) { if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil { t.Fatal("缺少 command 时应报错") diff --git a/scripts/materialize_daily_signals.go b/scripts/materialize_daily_signals.go index 963cad7..f5896ab 100644 --- a/scripts/materialize_daily_signals.go +++ b/scripts/materialize_daily_signals.go @@ -95,6 +95,10 @@ type materializeDailySignalsConfig struct { var signalLogger *slog.Logger const signalUSDToCNY = 7.25 +const defaultDeepSeekNewsSignalURL = "https://api-docs.deepseek.com/news/news250120" +const defaultDeepSeekPricingSignalURL = "https://platform.deepseek.com/pricing" +const defaultDeepSeekAPIPricingSignalURL = "https://platform.deepseek.com/docs/api-pricing" + func init() { signalLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})) @@ -373,6 +377,11 @@ func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error) return nil, err } events = mergeVerifiedDiscoveryEvents(events, discoveryEvents) + deepseekDriftEvents, err := loadDeepSeekNewsDriftSignalEvents(db) + if err != nil { + return nil, err + } + events = mergeVerifiedDiscoveryEvents(events, deepseekDriftEvents) sort.Slice(events, func(i, j int) bool { if events[i].Priority != events[j].Priority { @@ -954,6 +963,110 @@ func firstString(values []string) string { return "" } +func loadDeepSeekNewsDriftSignalEvents(db *sql.DB) ([]signalModelEvent, error) { + return loadDeepSeekSignatureSignalEvents(db, []deepseekSignatureEventConfig{ + { + SourceKey: "deepseek_news_signature", + ModelName: "DeepSeek 官方新闻页", + SourceKindLabel: "官方新闻页结构变化", + PrimaryURL: defaultDeepSeekNewsSignalURL, + Audience: "适合需要尽快复查 DeepSeek 路线图与默认选型的团队", + EvidenceTemplate: "DeepSeek 官方新闻页结构签名发生变化:sha=%s previous=%s", + Baseline: "官方新闻页结构漂移", + Summary: "DeepSeek 官方新闻页结构发生变化,需优先确认是否出现新发布或路线图更新。", + Priority: 117, + }, + { + SourceKey: "deepseek_pricing_signature", + ModelName: "DeepSeek 官方价格页", + SourceKindLabel: "官方价格页结构变化", + PrimaryURL: defaultDeepSeekPricingSignalURL, + Audience: "适合需要尽快复查 DeepSeek 价格策略与默认成本模型的团队", + EvidenceTemplate: "DeepSeek 官方价格页结构签名发生变化:sha=%s previous=%s", + Baseline: "官方价格页结构漂移", + Summary: "DeepSeek 官方价格页结构发生变化,需优先确认是否出现价格策略更新。", + Priority: 116, + }, + { + SourceKey: "deepseek_api_pricing_signature", + ModelName: "DeepSeek API 定价页", + SourceKindLabel: "官方 API 定价页结构变化", + PrimaryURL: defaultDeepSeekAPIPricingSignalURL, + Audience: "适合需要尽快复查 DeepSeek API 定价与预算预期的团队", + EvidenceTemplate: "DeepSeek API 定价页结构签名发生变化:sha=%s previous=%s", + Baseline: "官方 API 定价页结构漂移", + Summary: "DeepSeek API 定价页结构发生变化,需优先确认是否出现定价或套餐更新。", + Priority: 115, + }, + }) +} + +type deepseekSignatureEventConfig struct { + SourceKey string + ModelName string + SourceKindLabel string + PrimaryURL string + Audience string + EvidenceTemplate string + Baseline string + Summary string + Priority int +} + +func loadDeepSeekSignatureSignalEvents(db *sql.DB, configs []deepseekSignatureEventConfig) ([]signalModelEvent, error) { + if len(configs) == 0 { + return nil, nil + } + var events []signalModelEvent + for _, cfg := range configs { + _, rows, err := queryOfficialImportSignatureAuditWindow(db, 5, cfg.SourceKey, false) + if err != nil { + if strings.Contains(err.Error(), `relation "official_import_signature_audit_recent_view" does not exist`) || + strings.Contains(err.Error(), `relation "official_import_signature_audit" does not exist`) { + return nil, nil + } + return nil, err + } + for _, row := range rows { + if row.RecentRank != 1 { + continue + } + if event, ok := buildDeepSeekSignatureSignalEvent(row, cfg); ok { + events = append(events, event) + } + break + } + } + return events, nil +} + +func buildDeepSeekSignatureSignalEvent(row officialImportSignatureAuditViewRow, cfg deepseekSignatureEventConfig) (signalModelEvent, bool) { + if row.SourceKey != cfg.SourceKey || !row.DriftDetected { + return signalModelEvent{}, false + } + updatedAt := row.CheckedAt.Format("2006-01-02 15:04") + primarySource := nullStringOrNone(row.SnapshotPath) + if primarySource == "none" { + primarySource = cfg.PrimaryURL + } + return signalModelEvent{ + EventType: "official_release", + ModelName: cfg.ModelName, + ProviderName: "DeepSeek", + OperatorName: "DeepSeek", + Audience: cfg.Audience, + TrustLabel: "官方来源 / 结构漂移告警", + SourceKindLabel: cfg.SourceKindLabel, + PrimarySource: primarySource, + SourceURL: cfg.PrimaryURL, + UpdatedAt: updatedAt, + EvidenceDetail: fmt.Sprintf(cfg.EvidenceTemplate, row.StructureSHA256, nullStringOrNone(row.PreviousObservedSHA256)), + Baseline: cfg.Baseline, + Summary: cfg.Summary, + Priority: cfg.Priority, + }, true +} + func signalNormalizeIntradayEventType(value string) string { switch strings.TrimSpace(strings.ToLower(value)) { case "price_cut": diff --git a/scripts/materialize_daily_signals_test.go b/scripts/materialize_daily_signals_test.go index a57c49e..e9530ef 100644 --- a/scripts/materialize_daily_signals_test.go +++ b/scripts/materialize_daily_signals_test.go @@ -92,3 +92,108 @@ func TestMergeVerifiedDiscoveryEventsDropsUnverifiedPriceNarrative(t *testing.T) t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged) } } + +func TestBuildDeepSeekNewsDriftEvent(t *testing.T) { + row := officialImportSignatureAuditViewRow{ + SourceKey: "deepseek_news_signature", + Status: "drift_detected", + StructureState: "changed", + StructureChanged: true, + DriftDetected: true, + BaselineInitialized: false, + StructureSHA256: "abc123", + } + event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{ + SourceKey: "deepseek_news_signature", + ModelName: "DeepSeek 官方新闻页", + SourceKindLabel: "官方新闻页结构变化", + PrimaryURL: defaultDeepSeekNewsSignalURL, + Audience: "a", + EvidenceTemplate: "news drift %s %s", + Baseline: "官方新闻页结构漂移", + Summary: "summary", + Priority: 117, + }) + if !ok { + t.Fatal("期望为 drift 行生成正式信号事件") + } + if event.EventType != "official_release" { + t.Fatalf("DeepSeek drift 应映射为 official_release, got=%q", event.EventType) + } + if event.ProviderName != "DeepSeek" || event.ModelName != "DeepSeek 官方新闻页" { + t.Fatalf("DeepSeek drift 事件主体错误: %+v", event) + } +} + +func TestBuildDeepSeekPricingDriftEvent(t *testing.T) { + row := officialImportSignatureAuditViewRow{ + SourceKey: "deepseek_pricing_signature", + Status: "drift_detected", + StructureState: "changed", + StructureChanged: true, + DriftDetected: true, + BaselineInitialized: false, + StructureSHA256: "pricing123", + } + event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{ + SourceKey: "deepseek_pricing_signature", + ModelName: "DeepSeek 官方价格页", + SourceKindLabel: "官方价格页结构变化", + PrimaryURL: defaultDeepSeekPricingSignalURL, + Audience: "a", + EvidenceTemplate: "pricing drift %s %s", + Baseline: "官方价格页结构漂移", + Summary: "pricing summary", + Priority: 116, + }) + if !ok { + t.Fatal("期望为 pricing drift 行生成正式信号事件") + } + if event.ModelName != "DeepSeek 官方价格页" || event.SourceKindLabel != "官方价格页结构变化" { + t.Fatalf("pricing drift 事件映射错误: %+v", event) + } +} + +func TestBuildDeepSeekAPIPricingDriftEvent(t *testing.T) { + row := officialImportSignatureAuditViewRow{ + SourceKey: "deepseek_api_pricing_signature", + Status: "drift_detected", + StructureState: "changed", + StructureChanged: true, + DriftDetected: true, + BaselineInitialized: false, + StructureSHA256: "api123", + } + event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{ + SourceKey: "deepseek_api_pricing_signature", + ModelName: "DeepSeek API 定价页", + SourceKindLabel: "官方 API 定价页结构变化", + PrimaryURL: defaultDeepSeekAPIPricingSignalURL, + Audience: "a", + EvidenceTemplate: "api drift %s %s", + Baseline: "官方 API 定价页结构漂移", + Summary: "api pricing summary", + Priority: 115, + }) + if !ok { + t.Fatal("期望为 api pricing drift 行生成正式信号事件") + } + if event.ModelName != "DeepSeek API 定价页" || event.SourceKindLabel != "官方 API 定价页结构变化" { + t.Fatalf("api pricing drift 事件映射错误: %+v", event) + } +} + +func TestBuildDeepSeekNewsDriftEventSkipsBaselineOnly(t *testing.T) { + row := officialImportSignatureAuditViewRow{ + SourceKey: "deepseek_news_signature", + Status: "baseline_initialized", + StructureState: "initial", + StructureChanged: false, + DriftDetected: false, + BaselineInitialized: true, + StructureSHA256: "abc123", + } + if _, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{SourceKey: "deepseek_news_signature"}); ok { + t.Fatal("baseline 初始化不应直接进入正式信号") + } +} diff --git a/scripts/real_intraday_llm_provider.py b/scripts/real_intraday_llm_provider.py new file mode 100644 index 0000000..b54828a --- /dev/null +++ b/scripts/real_intraday_llm_provider.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import json +import os +import sys +import urllib.request + +api_key = os.environ.get('OPENROUTER_API_KEY', '').strip() +if not api_key: + print('OPENROUTER_API_KEY missing', file=sys.stderr) + raise SystemExit(1) + +payload = sys.stdin.read() +request_data = json.loads(payload or '{}') +search_results = request_data.get('search_results', []) +date = request_data.get('date', '') + +prompt = { + "role": "user", + "content": ( + "你是大模型情报候选发现器。根据给定搜索结果,只输出 JSON 数组,不要输出 markdown。" + "每项字段必须包含 event_type, provider_name, model_name, provider_country, title, summary, candidate_urls。" + "event_type 只能是 price_cut, price_increase, official_release, promo_campaign, leak_or_rumor, unknown。" + "只有当搜索结果明确像是当天消息时才输出。没有 URL 的候选不要输出。" + f"\n日期: {date}\n搜索结果:\n" + json.dumps(search_results, ensure_ascii=False) + ) +} + +req_body = json.dumps({ + "model": "deepseek/deepseek-v4-flash", + "messages": [prompt], + "temperature": 0, + "max_tokens": 1200, + "response_format": {"type": "json_object"} +}).encode('utf-8') +req = urllib.request.Request( + 'https://openrouter.ai/api/v1/chat/completions', + data=req_body, + headers={ + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/phamnazage-jpg/llm-intelligence', + 'X-Title': 'llm-intelligence intraday discovery' + }, + method='POST' +) +with urllib.request.urlopen(req, timeout=60) as resp: + data = json.loads(resp.read().decode('utf-8')) +content = data['choices'][0]['message']['content'] +parsed = json.loads(content) +if isinstance(parsed, dict): + if 'items' in parsed and isinstance(parsed['items'], list): + parsed = parsed['items'] + elif 'candidates' in parsed and isinstance(parsed['candidates'], list): + parsed = parsed['candidates'] + else: + parsed = [] +if not isinstance(parsed, list): + parsed = [] +print(json.dumps(parsed, ensure_ascii=False)) diff --git a/scripts/real_intraday_search_provider.py b/scripts/real_intraday_search_provider.py new file mode 100644 index 0000000..f5f8c92 --- /dev/null +++ b/scripts/real_intraday_search_provider.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +import email.utils +import json +import os +import urllib.parse +import urllib.request +import xml.etree.ElementTree as ET + + +def normalize_pubdate(value: str) -> str: + if not value: + return '' + try: + dt = email.utils.parsedate_to_datetime(value) + return dt.strftime('%Y-%m-%d') + except Exception: + return value + + +def infer_provider(title: str, link: str) -> str: + text = (title + ' ' + link).lower() + for needle, provider in [ + ('openai', 'OpenAI'), ('anthropic', 'Anthropic'), ('claude', 'Anthropic'), ('gemini', 'Google'), ('google', 'Google'), + ('deepseek', 'DeepSeek'), ('qwen', 'Qwen'), ('dashscope', 'DashScope'), ('zhipu', '智谱'), + ('baidu', '百度'), ('tencent', '腾讯'), ('minimax', 'MiniMax'), ('x.ai', 'xAI'), ('xai', 'xAI') + ]: + if needle in text: + return provider + return '' + + +query = os.environ.get("INTRADAY_DISCOVERY_QUERY", "").strip() +if not query: + print("[]") + raise SystemExit(0) + +url = "https://www.bing.com/search?format=rss&q=" + urllib.parse.quote(query) +req = urllib.request.Request(url, headers={ + "User-Agent": "Mozilla/5.0", + "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", +}) +with urllib.request.urlopen(req, timeout=20) as resp: + body = resp.read().decode("utf-8", errors="ignore") + +root = ET.fromstring(body) +items = [] +for item in root.findall('./channel/item'): + title = (item.findtext('title') or '').strip() + link = (item.findtext('link') or '').strip() + desc = (item.findtext('description') or '').strip() + pub = (item.findtext('pubDate') or '').strip() + provider = infer_provider(title, link) + provider_url = '' + if link: + parsed = urllib.parse.urlparse(link) + provider_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else '' + items.append({ + "title": title, + "summary": desc, + "url": link, + "provider": provider, + "provider_url": provider_url, + "published_at": normalize_pubdate(pub), + }) +print(json.dumps(items, ensure_ascii=False)) diff --git a/scripts/run_intraday_discovery_watch.sh b/scripts/run_intraday_discovery_watch.sh index d92b047..b2b8bd1 100644 --- a/scripts/run_intraday_discovery_watch.sh +++ b/scripts/run_intraday_discovery_watch.sh @@ -41,6 +41,30 @@ if [[ "$DRY_RUN" == "true" ]]; then materialize_args+=(--dry-run) fi -go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}" -go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}" +deepseek_guard_args=() +if [[ "$DRY_RUN" == "true" ]]; then + deepseek_guard_args+=(--allow-bootstrap=false) +fi + +if ! go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"; then + exit 1 +fi +if ! go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"; then + exit 1 +fi +if ! go run -tags llm_script ./scripts/deepseek_news_signature_guard.go ./scripts/deepseek_news_signature_guard_lib.go ./scripts/deepseek_news_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go "${deepseek_guard_args[@]}"; then + if [[ "$DRY_RUN" != "true" ]]; then + exit 1 + fi +fi +if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_pricing_signature --snapshot-base deepseek-pricing --url https://platform.deepseek.com/pricing "${deepseek_guard_args[@]}"; then + if [[ "$DRY_RUN" != "true" ]]; then + exit 1 + fi +fi +if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_api_pricing_signature --snapshot-base deepseek-api-pricing --url https://platform.deepseek.com/docs/api-pricing "${deepseek_guard_args[@]}"; then + if [[ "$DRY_RUN" != "true" ]]; then + exit 1 + fi +fi REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"