feat(intraday): monitor DeepSeek official page drift
This commit is contained in:
@@ -55,6 +55,6 @@
|
|||||||
|
|
||||||
## 下一步建议
|
## 下一步建议
|
||||||
|
|
||||||
1. 为 `run_intraday_discovery_watch.sh` 补充生产级 provider adapter 和调度说明
|
1. 已补充 `run_intraday_discovery_watch.sh` 与 DeepSeek 官方新闻页结构签名 guard,可继续扩展到 DeepSeek pricing 页面
|
||||||
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间”提示
|
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间 / 最近一次官方页 drift 检查时间”提示
|
||||||
3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 或候选情报面板
|
3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 或候选情报面板
|
||||||
|
|||||||
51
scripts/deepseek_news_signature_guard.go
Normal file
51
scripts/deepseek_news_signature_guard.go
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
loadSubscriptionImportEnv()
|
||||||
|
|
||||||
|
var url string
|
||||||
|
var fixture string
|
||||||
|
var snapshotDir string
|
||||||
|
var baselinePath string
|
||||||
|
var timeoutSeconds int
|
||||||
|
var allowBootstrap bool
|
||||||
|
|
||||||
|
flag.StringVar(&url, "url", defaultDeepSeekNewsFetchURL, "DeepSeek 官方新闻页")
|
||||||
|
flag.StringVar(&fixture, "fixture", "", "DeepSeek 新闻页样例文件")
|
||||||
|
flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek news snapshot 输出目录")
|
||||||
|
flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek news 结构基线签名路径")
|
||||||
|
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||||
|
flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
cfg := deepseekNewsSignatureGuardConfig{
|
||||||
|
URL: url,
|
||||||
|
Fixture: fixture,
|
||||||
|
SnapshotDir: snapshotDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||||
|
AllowBootstrap: allowBootstrap,
|
||||||
|
}
|
||||||
|
result, err := runDeepSeekNewsSignatureGuard(cfg, now)
|
||||||
|
if auditErr := persistDeepSeekNewsSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard audit: %v\n", auditErr)
|
||||||
|
if err == nil {
|
||||||
|
err = auditErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println(formatDeepSeekNewsSignatureGuardSummary(result))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
127
scripts/deepseek_news_signature_guard_lib.go
Normal file
127
scripts/deepseek_news_signature_guard_lib.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type deepseekNewsSignatureGuardConfig struct {
|
||||||
|
URL string
|
||||||
|
Fixture string
|
||||||
|
SnapshotDir string
|
||||||
|
BaselinePath string
|
||||||
|
Timeout time.Duration
|
||||||
|
AllowBootstrap bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type deepseekNewsSignatureGuardResult struct {
|
||||||
|
SnapshotPath string
|
||||||
|
SignaturePath string
|
||||||
|
BaselinePath string
|
||||||
|
DriftDetected bool
|
||||||
|
BaselineInitialized bool
|
||||||
|
PreviousBaselineHash string
|
||||||
|
CurrentSignature deepseekNewsStructureSignature
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultDeepSeekNewsFetchURL = "https://api-docs.deepseek.com/news/news250120"
|
||||||
|
|
||||||
|
func runDeepSeekNewsSignatureGuard(cfg deepseekNewsSignatureGuardConfig, now time.Time) (deepseekNewsSignatureGuardResult, error) {
|
||||||
|
snapshotDir := cfg.SnapshotDir
|
||||||
|
if snapshotDir == "" {
|
||||||
|
snapshotDir = filepath.Join("logs", "deepseek-news-snapshots")
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
|
||||||
|
return deepseekNewsSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||||
|
}
|
||||||
|
snapshotPath, signaturePath := resolveDeepSeekNewsSnapshotPaths("", "", snapshotDir, now)
|
||||||
|
baselinePath := cfg.BaselinePath
|
||||||
|
if baselinePath == "" {
|
||||||
|
baselinePath = filepath.Join(snapshotDir, "baseline.signature.json")
|
||||||
|
}
|
||||||
|
client := &http.Client{Timeout: cfg.Timeout}
|
||||||
|
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
|
||||||
|
if err != nil {
|
||||||
|
return deepseekNewsSignatureGuardResult{}, err
|
||||||
|
}
|
||||||
|
current, err := writeDeepSeekNewsSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now)
|
||||||
|
if err != nil {
|
||||||
|
return deepseekNewsSignatureGuardResult{}, err
|
||||||
|
}
|
||||||
|
result := deepseekNewsSignatureGuardResult{
|
||||||
|
SnapshotPath: snapshotPath,
|
||||||
|
SignaturePath: signaturePath,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
CurrentSignature: current,
|
||||||
|
}
|
||||||
|
previous, err := readDeepSeekNewsStructureSignature(baselinePath)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
if !cfg.AllowBootstrap {
|
||||||
|
return result, fmt.Errorf("deepseek news baseline missing: %s", baselinePath)
|
||||||
|
}
|
||||||
|
if err := copyFileCommon(signaturePath, baselinePath); err != nil {
|
||||||
|
return result, fmt.Errorf("initialize baseline: %w", err)
|
||||||
|
}
|
||||||
|
result.BaselineInitialized = true
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
result.PreviousBaselineHash = previous.StructureSHA256
|
||||||
|
if previous.StructureSHA256 != current.StructureSHA256 {
|
||||||
|
result.DriftDetected = true
|
||||||
|
return result, fmt.Errorf(
|
||||||
|
"deepseek news structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s",
|
||||||
|
previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatDeepSeekNewsSignatureGuardSummary(result deepseekNewsSignatureGuardResult) string {
|
||||||
|
return fmt.Sprintf(
|
||||||
|
"source=deepseek-news-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s",
|
||||||
|
result.DriftDetected,
|
||||||
|
result.BaselineInitialized,
|
||||||
|
result.CurrentSignature.StructureSHA256,
|
||||||
|
emptyIfBlank(result.PreviousBaselineHash),
|
||||||
|
result.SnapshotPath,
|
||||||
|
result.SignaturePath,
|
||||||
|
result.BaselinePath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDeepSeekNewsSignatureAuditRecord(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord {
|
||||||
|
record := officialImportSignatureAuditRecord{
|
||||||
|
SourceKey: "deepseek_news_signature",
|
||||||
|
CheckedAt: checkedAt,
|
||||||
|
Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr),
|
||||||
|
DriftDetected: result.DriftDetected,
|
||||||
|
BaselineInitialized: result.BaselineInitialized,
|
||||||
|
SourceURL: strings.TrimSpace(cfg.URL),
|
||||||
|
FixturePath: strings.TrimSpace(cfg.Fixture),
|
||||||
|
SnapshotPath: strings.TrimSpace(result.SnapshotPath),
|
||||||
|
SignaturePath: strings.TrimSpace(result.SignaturePath),
|
||||||
|
BaselinePath: strings.TrimSpace(result.BaselinePath),
|
||||||
|
StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256),
|
||||||
|
PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash),
|
||||||
|
ByteSize: result.CurrentSignature.ByteSize,
|
||||||
|
ErrorMessage: errorMessageText(runErr),
|
||||||
|
}
|
||||||
|
if hasDeepSeekNewsStructureSignature(result.CurrentSignature) {
|
||||||
|
signatureCopy := result.CurrentSignature
|
||||||
|
record.SignaturePayload = &signatureCopy
|
||||||
|
}
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func persistDeepSeekNewsSignatureAuditIfConfigured(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) error {
|
||||||
|
return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekNewsSignatureAuditRecord(cfg, result, checkedAt, runErr))
|
||||||
|
}
|
||||||
88
scripts/deepseek_news_signature_guard_test.go
Normal file
88
scripts/deepseek_news_signature_guard_test.go
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRunDeepSeekNewsSignatureGuardInitializesBaseline(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||||
|
result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
|
||||||
|
URL: defaultDeepSeekNewsFetchURL,
|
||||||
|
Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"),
|
||||||
|
SnapshotDir: tempDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Second,
|
||||||
|
AllowBootstrap: true,
|
||||||
|
}, time.Date(2026, 5, 27, 21, 0, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runDeepSeekNewsSignatureGuard 返回错误: %v", err)
|
||||||
|
}
|
||||||
|
if !result.BaselineInitialized {
|
||||||
|
t.Fatal("期望初始化 baseline")
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(baselinePath); err != nil {
|
||||||
|
t.Fatalf("baseline 未写入: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunDeepSeekNewsSignatureGuardDetectsDrift(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||||
|
_, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
|
||||||
|
URL: defaultDeepSeekNewsFetchURL,
|
||||||
|
Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"),
|
||||||
|
SnapshotDir: tempDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Second,
|
||||||
|
AllowBootstrap: true,
|
||||||
|
}, time.Date(2026, 5, 27, 21, 1, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("初始化 baseline 失败: %v", err)
|
||||||
|
}
|
||||||
|
driftFixture := filepath.Join(tempDir, "drift.html")
|
||||||
|
if err := os.WriteFile(driftFixture, []byte("<html><head><title>DeepSeek-V4 Release</title><meta name=\"description\" content=\"DeepSeek V4 pricing release\"></head><body><h1>DeepSeek V4 Release</h1></body></html>"), 0o644); err != nil {
|
||||||
|
t.Fatalf("写入 drift fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
|
||||||
|
URL: defaultDeepSeekNewsFetchURL,
|
||||||
|
Fixture: driftFixture,
|
||||||
|
SnapshotDir: tempDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Second,
|
||||||
|
AllowBootstrap: false,
|
||||||
|
}, time.Date(2026, 5, 27, 21, 2, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("期望结构漂移时报错")
|
||||||
|
}
|
||||||
|
if !result.DriftDetected {
|
||||||
|
t.Fatal("期望 driftDetected=true")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "deepseek news structure drift detected") {
|
||||||
|
t.Fatalf("期望返回 drift 错误,实际: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatDeepSeekNewsSignatureGuardSummary(t *testing.T) {
|
||||||
|
result := deepseekNewsSignatureGuardResult{
|
||||||
|
SnapshotPath: "/tmp/deepseek-news.html",
|
||||||
|
SignaturePath: "/tmp/deepseek-news.signature.json",
|
||||||
|
BaselinePath: "/tmp/baseline.signature.json",
|
||||||
|
BaselineInitialized: true,
|
||||||
|
CurrentSignature: deepseekNewsStructureSignature{
|
||||||
|
StructureSHA256: "abc123",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
summary := formatDeepSeekNewsSignatureGuardSummary(result)
|
||||||
|
for _, want := range []string{"source=deepseek-news-signature-guard", "baseline_initialized=true", "structure_sha256=abc123"} {
|
||||||
|
if !strings.Contains(summary, want) {
|
||||||
|
t.Fatalf("summary 缺少 %q,实际: %q", want, summary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
196
scripts/deepseek_news_snapshot_lib.go
Normal file
196
scripts/deepseek_news_snapshot_lib.go
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type deepseekNewsStructureSignature struct {
|
||||||
|
ByteSize int `json:"byte_size"`
|
||||||
|
SHA256 string `json:"sha256"`
|
||||||
|
StructureSHA256 string `json:"structure_sha256"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
MetaDescription string `json:"meta_description"`
|
||||||
|
Headings []string `json:"headings"`
|
||||||
|
Contains map[string]bool `json:"contains"`
|
||||||
|
GeneratedAt string `json:"generated_at,omitempty"`
|
||||||
|
SourceURL string `json:"source_url,omitempty"`
|
||||||
|
SnapshotPath string `json:"snapshot_path,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var deepseekNewsContainsNeedles = map[string]string{
|
||||||
|
"deepseek": "deepseek",
|
||||||
|
"release": "release",
|
||||||
|
"news": "news",
|
||||||
|
"api_docs": "api docs",
|
||||||
|
}
|
||||||
|
|
||||||
|
var htmlTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
|
||||||
|
var titleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||||
|
var metaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
|
||||||
|
var h1Re = regexp.MustCompile(`(?is)<h1[^>]*>(.*?)</h1>`)
|
||||||
|
|
||||||
|
func buildDeepSeekNewsStructureSignature(raw string) deepseekNewsStructureSignature {
|
||||||
|
title := firstHTMLMatch(titleRe, raw)
|
||||||
|
meta := firstHTMLMatch(metaDescRe, raw)
|
||||||
|
h1Matches := h1Re.FindAllStringSubmatch(raw, -1)
|
||||||
|
headings := make([]string, 0, len(h1Matches))
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
for _, match := range h1Matches {
|
||||||
|
if len(match) < 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
clean := cleanHTMLText(match[1])
|
||||||
|
if clean == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := seen[clean]; exists {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[clean] = struct{}{}
|
||||||
|
headings = append(headings, clean)
|
||||||
|
}
|
||||||
|
contains := make(map[string]bool, len(deepseekNewsContainsNeedles))
|
||||||
|
lower := strings.ToLower(raw)
|
||||||
|
for key, needle := range deepseekNewsContainsNeedles {
|
||||||
|
contains[key] = strings.Contains(lower, strings.ToLower(needle))
|
||||||
|
}
|
||||||
|
signature := deepseekNewsStructureSignature{
|
||||||
|
ByteSize: len([]byte(raw)),
|
||||||
|
SHA256: deepseekNewsSHA256Hex(raw),
|
||||||
|
Title: title,
|
||||||
|
MetaDescription: meta,
|
||||||
|
Headings: headings,
|
||||||
|
Contains: contains,
|
||||||
|
}
|
||||||
|
signature.StructureSHA256 = deepseekNewsSHA256Hex(deepseekNewsStructureDigestPayload(signature))
|
||||||
|
return signature
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeDeepSeekNewsSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekNewsStructureSignature, error) {
|
||||||
|
if strings.TrimSpace(snapshotPath) == "" {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("snapshot path is required")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(signaturePath) == "" {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("signature path is required")
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
||||||
|
}
|
||||||
|
signature := buildDeepSeekNewsStructureSignature(raw)
|
||||||
|
signature.GeneratedAt = now.Format(time.RFC3339)
|
||||||
|
signature.SourceURL = sourceURL
|
||||||
|
signature.SnapshotPath = snapshotPath
|
||||||
|
payload, err := json.MarshalIndent(signature, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
||||||
|
}
|
||||||
|
return signature, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveDeepSeekNewsSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, now time.Time) (string, string) {
|
||||||
|
if strings.TrimSpace(snapshotDir) == "" {
|
||||||
|
snapshotDir = filepath.Join("logs", "deepseek-news-snapshots")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(snapshotPath) == "" {
|
||||||
|
base := filepath.Join(snapshotDir, fmt.Sprintf("deepseek-news-%s", now.Format("20060102-150405")))
|
||||||
|
snapshotPath = base + ".html"
|
||||||
|
if strings.TrimSpace(signaturePath) == "" {
|
||||||
|
signaturePath = base + ".signature.json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(signaturePath) == "" {
|
||||||
|
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
||||||
|
}
|
||||||
|
return snapshotPath, signaturePath
|
||||||
|
}
|
||||||
|
|
||||||
|
func readDeepSeekNewsStructureSignature(path string) (deepseekNewsStructureSignature, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, err
|
||||||
|
}
|
||||||
|
var signature deepseekNewsStructureSignature
|
||||||
|
if err := json.Unmarshal(data, &signature); err != nil {
|
||||||
|
return deepseekNewsStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return signature, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasDeepSeekNewsStructureSignature(signature deepseekNewsStructureSignature) bool {
|
||||||
|
return signature.ByteSize > 0 ||
|
||||||
|
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
||||||
|
strings.TrimSpace(signature.SHA256) != "" ||
|
||||||
|
strings.TrimSpace(signature.Title) != "" ||
|
||||||
|
len(signature.Headings) > 0 ||
|
||||||
|
len(signature.Contains) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepseekNewsStructureDigestPayload(signature deepseekNewsStructureSignature) string {
|
||||||
|
type containsEntry struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value bool `json:"value"`
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(signature.Contains))
|
||||||
|
for key := range signature.Contains {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
entries := make([]containsEntry, 0, len(keys))
|
||||||
|
for _, key := range keys {
|
||||||
|
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
||||||
|
}
|
||||||
|
payload := struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
MetaDescription string `json:"meta_description"`
|
||||||
|
Headings []string `json:"headings"`
|
||||||
|
Contains []containsEntry `json:"contains"`
|
||||||
|
}{
|
||||||
|
Title: signature.Title,
|
||||||
|
MetaDescription: signature.MetaDescription,
|
||||||
|
Headings: signature.Headings,
|
||||||
|
Contains: entries,
|
||||||
|
}
|
||||||
|
bytes, _ := json.Marshal(payload)
|
||||||
|
return string(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepseekNewsSHA256Hex(raw string) string {
|
||||||
|
sum := sha256.Sum256([]byte(raw))
|
||||||
|
return hex.EncodeToString(sum[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstHTMLMatch(re *regexp.Regexp, raw string) string {
|
||||||
|
match := re.FindStringSubmatch(raw)
|
||||||
|
if len(match) < 2 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return cleanHTMLText(match[1])
|
||||||
|
}
|
||||||
|
|
||||||
|
func cleanHTMLText(raw string) string {
|
||||||
|
text := htmlTagRe.ReplaceAllString(raw, " ")
|
||||||
|
text = strings.ReplaceAll(text, "&", "&")
|
||||||
|
text = strings.ReplaceAll(text, " ", " ")
|
||||||
|
text = strings.Join(strings.Fields(text), " ")
|
||||||
|
return strings.TrimSpace(text)
|
||||||
|
}
|
||||||
57
scripts/deepseek_pricing_signature_guard.go
Normal file
57
scripts/deepseek_pricing_signature_guard.go
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
loadSubscriptionImportEnv()
|
||||||
|
|
||||||
|
var url string
|
||||||
|
var fixture string
|
||||||
|
var snapshotDir string
|
||||||
|
var baselinePath string
|
||||||
|
var timeoutSeconds int
|
||||||
|
var allowBootstrap bool
|
||||||
|
var sourceKey string
|
||||||
|
var snapshotBase string
|
||||||
|
|
||||||
|
flag.StringVar(&sourceKey, "source-key", "deepseek_pricing_signature", "审计 source_key")
|
||||||
|
flag.StringVar(&snapshotBase, "snapshot-base", "deepseek-pricing", "snapshot 文件名前缀")
|
||||||
|
flag.StringVar(&url, "url", defaultDeepSeekPricingFetchURL, "DeepSeek 官方价格页")
|
||||||
|
flag.StringVar(&fixture, "fixture", "", "DeepSeek 价格页样例文件")
|
||||||
|
flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek pricing snapshot 输出目录")
|
||||||
|
flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek pricing 结构基线签名路径")
|
||||||
|
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||||
|
flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
cfg := deepseekPricingSignatureGuardConfig{
|
||||||
|
SourceKey: sourceKey,
|
||||||
|
URL: url,
|
||||||
|
Fixture: fixture,
|
||||||
|
SnapshotDir: snapshotDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||||
|
AllowBootstrap: allowBootstrap,
|
||||||
|
SnapshotBase: snapshotBase,
|
||||||
|
}
|
||||||
|
result, err := runDeepSeekPricingSignatureGuard(cfg, now)
|
||||||
|
if auditErr := persistDeepSeekPricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard audit: %v\n", auditErr)
|
||||||
|
if err == nil {
|
||||||
|
err = auditErr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Println(formatDeepSeekPricingSignatureGuardSummary(sourceKey, result))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard: %v\n", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
132
scripts/deepseek_pricing_signature_guard_lib.go
Normal file
132
scripts/deepseek_pricing_signature_guard_lib.go
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type deepseekPricingSignatureGuardConfig struct {
|
||||||
|
SourceKey string
|
||||||
|
URL string
|
||||||
|
Fixture string
|
||||||
|
SnapshotDir string
|
||||||
|
BaselinePath string
|
||||||
|
Timeout time.Duration
|
||||||
|
AllowBootstrap bool
|
||||||
|
SnapshotBase string
|
||||||
|
SourceKindLabel string
|
||||||
|
}
|
||||||
|
|
||||||
|
type deepseekPricingSignatureGuardResult struct {
|
||||||
|
SnapshotPath string
|
||||||
|
SignaturePath string
|
||||||
|
BaselinePath string
|
||||||
|
DriftDetected bool
|
||||||
|
BaselineInitialized bool
|
||||||
|
PreviousBaselineHash string
|
||||||
|
CurrentSignature deepseekPricingStructureSignature
|
||||||
|
}
|
||||||
|
|
||||||
|
const defaultDeepSeekPricingFetchURL = "https://platform.deepseek.com/pricing"
|
||||||
|
const defaultDeepSeekAPIPricingFetchURL = "https://platform.deepseek.com/docs/api-pricing"
|
||||||
|
|
||||||
|
func runDeepSeekPricingSignatureGuard(cfg deepseekPricingSignatureGuardConfig, now time.Time) (deepseekPricingSignatureGuardResult, error) {
|
||||||
|
snapshotDir := cfg.SnapshotDir
|
||||||
|
if snapshotDir == "" {
|
||||||
|
snapshotDir = filepath.Join("logs", cfg.SnapshotBase+"-snapshots")
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
|
||||||
|
return deepseekPricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||||
|
}
|
||||||
|
snapshotPath, signaturePath := resolveDeepSeekPricingSnapshotPaths("", "", snapshotDir, cfg.SnapshotBase, now)
|
||||||
|
baselinePath := cfg.BaselinePath
|
||||||
|
if baselinePath == "" {
|
||||||
|
baselinePath = filepath.Join(snapshotDir, "baseline.signature.json")
|
||||||
|
}
|
||||||
|
client := &http.Client{Timeout: cfg.Timeout}
|
||||||
|
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
|
||||||
|
if err != nil {
|
||||||
|
return deepseekPricingSignatureGuardResult{}, err
|
||||||
|
}
|
||||||
|
current, err := writeDeepSeekPricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now)
|
||||||
|
if err != nil {
|
||||||
|
return deepseekPricingSignatureGuardResult{}, err
|
||||||
|
}
|
||||||
|
result := deepseekPricingSignatureGuardResult{
|
||||||
|
SnapshotPath: snapshotPath,
|
||||||
|
SignaturePath: signaturePath,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
CurrentSignature: current,
|
||||||
|
}
|
||||||
|
previous, err := readDeepSeekPricingStructureSignature(baselinePath)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
if !cfg.AllowBootstrap {
|
||||||
|
return result, fmt.Errorf("%s baseline missing: %s", cfg.SourceKey, baselinePath)
|
||||||
|
}
|
||||||
|
if err := copyFileCommon(signaturePath, baselinePath); err != nil {
|
||||||
|
return result, fmt.Errorf("initialize baseline: %w", err)
|
||||||
|
}
|
||||||
|
result.BaselineInitialized = true
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
result.PreviousBaselineHash = previous.StructureSHA256
|
||||||
|
if previous.StructureSHA256 != current.StructureSHA256 {
|
||||||
|
result.DriftDetected = true
|
||||||
|
return result, fmt.Errorf(
|
||||||
|
"%s structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s",
|
||||||
|
cfg.SourceKey, previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatDeepSeekPricingSignatureGuardSummary(sourceKey string, result deepseekPricingSignatureGuardResult) string {
|
||||||
|
return fmt.Sprintf(
|
||||||
|
"source=%s drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s",
|
||||||
|
sourceKey,
|
||||||
|
result.DriftDetected,
|
||||||
|
result.BaselineInitialized,
|
||||||
|
result.CurrentSignature.StructureSHA256,
|
||||||
|
emptyIfBlank(result.PreviousBaselineHash),
|
||||||
|
result.SnapshotPath,
|
||||||
|
result.SignaturePath,
|
||||||
|
result.BaselinePath,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDeepSeekPricingSignatureAuditRecord(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord {
|
||||||
|
record := officialImportSignatureAuditRecord{
|
||||||
|
SourceKey: cfg.SourceKey,
|
||||||
|
CheckedAt: checkedAt,
|
||||||
|
Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr),
|
||||||
|
DriftDetected: result.DriftDetected,
|
||||||
|
BaselineInitialized: result.BaselineInitialized,
|
||||||
|
SourceURL: strings.TrimSpace(cfg.URL),
|
||||||
|
FixturePath: strings.TrimSpace(cfg.Fixture),
|
||||||
|
SnapshotPath: strings.TrimSpace(result.SnapshotPath),
|
||||||
|
SignaturePath: strings.TrimSpace(result.SignaturePath),
|
||||||
|
BaselinePath: strings.TrimSpace(result.BaselinePath),
|
||||||
|
StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256),
|
||||||
|
PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash),
|
||||||
|
ByteSize: result.CurrentSignature.ByteSize,
|
||||||
|
ErrorMessage: errorMessageText(runErr),
|
||||||
|
}
|
||||||
|
if hasDeepSeekPricingStructureSignature(result.CurrentSignature) {
|
||||||
|
signatureCopy := result.CurrentSignature
|
||||||
|
record.SignaturePayload = &signatureCopy
|
||||||
|
}
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func persistDeepSeekPricingSignatureAuditIfConfigured(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) error {
|
||||||
|
return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekPricingSignatureAuditRecord(cfg, result, checkedAt, runErr))
|
||||||
|
}
|
||||||
96
scripts/deepseek_pricing_signature_guard_test.go
Normal file
96
scripts/deepseek_pricing_signature_guard_test.go
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRunDeepSeekPricingSignatureGuardInitializesBaseline(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||||
|
fixture := filepath.Join(tempDir, "pricing.html")
|
||||||
|
if err := os.WriteFile(fixture, []byte(`<html><head><title>DeepSeek</title><meta name="description" content="Join DeepSeek API platform"><meta name="commit-id" content="abc123"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing</body></html>`), 0o644); err != nil {
|
||||||
|
t.Fatalf("写入 fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
|
||||||
|
SourceKey: "deepseek_pricing_signature",
|
||||||
|
URL: defaultDeepSeekPricingFetchURL,
|
||||||
|
Fixture: fixture,
|
||||||
|
SnapshotDir: tempDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Second,
|
||||||
|
AllowBootstrap: true,
|
||||||
|
SnapshotBase: "deepseek-pricing",
|
||||||
|
}, time.Date(2026, 5, 27, 22, 0, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("runDeepSeekPricingSignatureGuard 返回错误: %v", err)
|
||||||
|
}
|
||||||
|
if !result.BaselineInitialized {
|
||||||
|
t.Fatal("期望初始化 baseline")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunDeepSeekPricingSignatureGuardDetectsDrift(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||||
|
fixture := filepath.Join(tempDir, "pricing.html")
|
||||||
|
if err := os.WriteFile(fixture, []byte(`<html><head><title>DeepSeek</title><meta name="description" content="Join DeepSeek API platform"><meta name="commit-id" content="abc123"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing</body></html>`), 0o644); err != nil {
|
||||||
|
t.Fatalf("写入 fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
_, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
|
||||||
|
SourceKey: "deepseek_pricing_signature",
|
||||||
|
URL: defaultDeepSeekPricingFetchURL,
|
||||||
|
Fixture: fixture,
|
||||||
|
SnapshotDir: tempDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Second,
|
||||||
|
AllowBootstrap: true,
|
||||||
|
SnapshotBase: "deepseek-pricing",
|
||||||
|
}, time.Date(2026, 5, 27, 22, 1, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("初始化 baseline 失败: %v", err)
|
||||||
|
}
|
||||||
|
driftFixture := filepath.Join(tempDir, "pricing-drift.html")
|
||||||
|
if err := os.WriteFile(driftFixture, []byte(`<html><head><title>DeepSeek Pricing</title><meta name="description" content="Updated DeepSeek pricing"><meta name="commit-id" content="def456"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing update</body></html>`), 0o644); err != nil {
|
||||||
|
t.Fatalf("写入 drift fixture 失败: %v", err)
|
||||||
|
}
|
||||||
|
result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
|
||||||
|
SourceKey: "deepseek_pricing_signature",
|
||||||
|
URL: defaultDeepSeekPricingFetchURL,
|
||||||
|
Fixture: driftFixture,
|
||||||
|
SnapshotDir: tempDir,
|
||||||
|
BaselinePath: baselinePath,
|
||||||
|
Timeout: time.Second,
|
||||||
|
AllowBootstrap: false,
|
||||||
|
SnapshotBase: "deepseek-pricing",
|
||||||
|
}, time.Date(2026, 5, 27, 22, 2, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("期望结构漂移时报错")
|
||||||
|
}
|
||||||
|
if !result.DriftDetected {
|
||||||
|
t.Fatal("期望 driftDetected=true")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormatDeepSeekPricingSignatureGuardSummary(t *testing.T) {
|
||||||
|
result := deepseekPricingSignatureGuardResult{
|
||||||
|
SnapshotPath: "/tmp/deepseek-pricing.html",
|
||||||
|
SignaturePath: "/tmp/deepseek-pricing.signature.json",
|
||||||
|
BaselinePath: "/tmp/baseline.signature.json",
|
||||||
|
BaselineInitialized: true,
|
||||||
|
CurrentSignature: deepseekPricingStructureSignature{
|
||||||
|
StructureSHA256: "abc123",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
summary := formatDeepSeekPricingSignatureGuardSummary("deepseek_pricing_signature", result)
|
||||||
|
for _, want := range []string{"source=deepseek_pricing_signature", "baseline_initialized=true", "structure_sha256=abc123"} {
|
||||||
|
if !strings.Contains(summary, want) {
|
||||||
|
t.Fatalf("summary 缺少 %q,实际: %q", want, summary)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
183
scripts/deepseek_pricing_snapshot_lib.go
Normal file
183
scripts/deepseek_pricing_snapshot_lib.go
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
//go:build llm_script
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type deepseekPricingStructureSignature struct {
|
||||||
|
ByteSize int `json:"byte_size"`
|
||||||
|
SHA256 string `json:"sha256"`
|
||||||
|
StructureSHA256 string `json:"structure_sha256"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
MetaDescription string `json:"meta_description"`
|
||||||
|
CommitID string `json:"commit_id"`
|
||||||
|
CanonicalURL string `json:"canonical_url"`
|
||||||
|
Contains map[string]bool `json:"contains"`
|
||||||
|
GeneratedAt string `json:"generated_at,omitempty"`
|
||||||
|
SourceURL string `json:"source_url,omitempty"`
|
||||||
|
SnapshotPath string `json:"snapshot_path,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var deepseekPricingContainsNeedles = map[string]string{
|
||||||
|
"deepseek": "deepseek",
|
||||||
|
"platform": "platform",
|
||||||
|
"pricing": "pricing",
|
||||||
|
"api_docs": "api",
|
||||||
|
"developer": "developer resources",
|
||||||
|
}
|
||||||
|
|
||||||
|
var deepseekPricingTitleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||||
|
var deepseekPricingMetaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
|
||||||
|
var deepseekPricingCommitRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']commit-id["'][^>]+content=["']([^"']+)["']`)
|
||||||
|
var deepseekPricingCanonicalRe = regexp.MustCompile(`(?is)<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+)["']`)
|
||||||
|
var deepseekPricingHTMLTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
|
||||||
|
|
||||||
|
func buildDeepSeekPricingStructureSignature(raw string) deepseekPricingStructureSignature {
|
||||||
|
title := firstDeepSeekPricingHTMLMatch(deepseekPricingTitleRe, raw)
|
||||||
|
meta := firstDeepSeekPricingHTMLMatch(deepseekPricingMetaDescRe, raw)
|
||||||
|
commitID := firstDeepSeekPricingHTMLMatch(deepseekPricingCommitRe, raw)
|
||||||
|
canonicalURL := firstDeepSeekPricingHTMLMatch(deepseekPricingCanonicalRe, raw)
|
||||||
|
contains := make(map[string]bool, len(deepseekPricingContainsNeedles))
|
||||||
|
lower := strings.ToLower(raw)
|
||||||
|
for key, needle := range deepseekPricingContainsNeedles {
|
||||||
|
contains[key] = strings.Contains(lower, strings.ToLower(needle))
|
||||||
|
}
|
||||||
|
signature := deepseekPricingStructureSignature{
|
||||||
|
ByteSize: len([]byte(raw)),
|
||||||
|
SHA256: deepseekPricingSHA256Hex(raw),
|
||||||
|
Title: title,
|
||||||
|
MetaDescription: meta,
|
||||||
|
CommitID: commitID,
|
||||||
|
CanonicalURL: canonicalURL,
|
||||||
|
Contains: contains,
|
||||||
|
}
|
||||||
|
signature.StructureSHA256 = deepseekPricingSHA256Hex(deepseekPricingStructureDigestPayload(signature))
|
||||||
|
return signature
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeDeepSeekPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekPricingStructureSignature, error) {
|
||||||
|
if strings.TrimSpace(snapshotPath) == "" {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(signaturePath) == "" {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("signature path is required")
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
||||||
|
}
|
||||||
|
signature := buildDeepSeekPricingStructureSignature(raw)
|
||||||
|
signature.GeneratedAt = now.Format(time.RFC3339)
|
||||||
|
signature.SourceURL = sourceURL
|
||||||
|
signature.SnapshotPath = snapshotPath
|
||||||
|
payload, err := json.MarshalIndent(signature, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
||||||
|
}
|
||||||
|
return signature, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveDeepSeekPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
|
||||||
|
if strings.TrimSpace(snapshotDir) == "" {
|
||||||
|
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(snapshotPath) == "" {
|
||||||
|
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
|
||||||
|
snapshotPath = base + ".html"
|
||||||
|
if strings.TrimSpace(signaturePath) == "" {
|
||||||
|
signaturePath = base + ".signature.json"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(signaturePath) == "" {
|
||||||
|
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
||||||
|
}
|
||||||
|
return snapshotPath, signaturePath
|
||||||
|
}
|
||||||
|
|
||||||
|
func readDeepSeekPricingStructureSignature(path string) (deepseekPricingStructureSignature, error) {
|
||||||
|
data, err := os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, err
|
||||||
|
}
|
||||||
|
var signature deepseekPricingStructureSignature
|
||||||
|
if err := json.Unmarshal(data, &signature); err != nil {
|
||||||
|
return deepseekPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
||||||
|
}
|
||||||
|
return signature, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasDeepSeekPricingStructureSignature(signature deepseekPricingStructureSignature) bool {
|
||||||
|
return signature.ByteSize > 0 ||
|
||||||
|
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
||||||
|
strings.TrimSpace(signature.SHA256) != "" ||
|
||||||
|
strings.TrimSpace(signature.Title) != "" ||
|
||||||
|
strings.TrimSpace(signature.CommitID) != "" ||
|
||||||
|
len(signature.Contains) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepseekPricingStructureDigestPayload(signature deepseekPricingStructureSignature) string {
|
||||||
|
type containsEntry struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Value bool `json:"value"`
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(signature.Contains))
|
||||||
|
for key := range signature.Contains {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
entries := make([]containsEntry, 0, len(keys))
|
||||||
|
for _, key := range keys {
|
||||||
|
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
||||||
|
}
|
||||||
|
payload := struct {
|
||||||
|
Title string `json:"title"`
|
||||||
|
MetaDescription string `json:"meta_description"`
|
||||||
|
CommitID string `json:"commit_id"`
|
||||||
|
CanonicalURL string `json:"canonical_url"`
|
||||||
|
Contains []containsEntry `json:"contains"`
|
||||||
|
}{
|
||||||
|
Title: signature.Title,
|
||||||
|
MetaDescription: signature.MetaDescription,
|
||||||
|
CommitID: signature.CommitID,
|
||||||
|
CanonicalURL: signature.CanonicalURL,
|
||||||
|
Contains: entries,
|
||||||
|
}
|
||||||
|
bytes, _ := json.Marshal(payload)
|
||||||
|
return string(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
func deepseekPricingSHA256Hex(raw string) string {
|
||||||
|
sum := sha256.Sum256([]byte(raw))
|
||||||
|
return hex.EncodeToString(sum[:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstDeepSeekPricingHTMLMatch(re *regexp.Regexp, raw string) string {
|
||||||
|
match := re.FindStringSubmatch(raw)
|
||||||
|
if len(match) < 2 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
text := deepseekPricingHTMLTagRe.ReplaceAllString(match[1], " ")
|
||||||
|
text = strings.ReplaceAll(text, "&", "&")
|
||||||
|
text = strings.ReplaceAll(text, " ", " ")
|
||||||
|
text = strings.Join(strings.Fields(text), " ")
|
||||||
|
return strings.TrimSpace(text)
|
||||||
|
}
|
||||||
@@ -154,19 +154,14 @@ func validateIntradayProviderConfig(name string, cfg intradayProviderConfig) err
|
|||||||
}
|
}
|
||||||
|
|
||||||
func buildIntradayQueries(date string, providerLimit int) []string {
|
func buildIntradayQueries(date string, providerLimit int) []string {
|
||||||
providers := []string{
|
queries := []string{
|
||||||
"OpenAI", "Anthropic", "Google Gemini", "xAI", "DeepSeek",
|
"site:platform.deepseek.com DeepSeek pricing",
|
||||||
"DashScope", "Qwen", "智谱", "百度文心", "腾讯混元", "火山方舟", "MiniMax",
|
"site:api-docs.deepseek.com DeepSeek release news",
|
||||||
|
"site:docs.anthropic.com Claude Sonnet 4 announcement",
|
||||||
|
"site:openrouter.ai OpenRouter models",
|
||||||
}
|
}
|
||||||
keywords := []string{"pricing release announcement", "模型 降价 发布 活动"}
|
if providerLimit > 0 && providerLimit < len(queries) {
|
||||||
if providerLimit > 0 && providerLimit < len(providers) {
|
return queries[:providerLimit]
|
||||||
providers = providers[:providerLimit]
|
|
||||||
}
|
|
||||||
queries := make([]string, 0, len(providers)*len(keywords))
|
|
||||||
for _, provider := range providers {
|
|
||||||
for _, keyword := range keywords {
|
|
||||||
queries = append(queries, strings.TrimSpace(date+" "+provider+" "+keyword))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return queries
|
return queries
|
||||||
}
|
}
|
||||||
@@ -217,22 +212,36 @@ func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex m
|
|||||||
Status: "candidate",
|
Status: "candidate",
|
||||||
VerificationConfidence: "candidate",
|
VerificationConfidence: "candidate",
|
||||||
}
|
}
|
||||||
|
matchedSearch := false
|
||||||
|
filteredURLs := make([]string, 0, len(candidate.CandidateURLs))
|
||||||
for _, url := range candidate.CandidateURLs {
|
for _, url := range candidate.CandidateURLs {
|
||||||
if searchRecord, ok := searchIndex[url]; ok {
|
searchRecord, ok := searchIndex[url]
|
||||||
candidate.DiscoverySource = "web_search+llm"
|
if !ok {
|
||||||
candidate.DiscoveryQuery = searchRecord.Title
|
continue
|
||||||
candidate.DiscoveryEvidence["search_record"] = searchRecord
|
}
|
||||||
if candidate.ProviderName == "" {
|
if !searchRecordMatchesDate(searchRecord, date) {
|
||||||
candidate.ProviderName = strings.TrimSpace(searchRecord.Provider)
|
continue
|
||||||
}
|
}
|
||||||
if candidate.Title == "" {
|
matchedSearch = true
|
||||||
candidate.Title = strings.TrimSpace(searchRecord.Title)
|
filteredURLs = append(filteredURLs, url)
|
||||||
}
|
candidate.DiscoverySource = "web_search+llm"
|
||||||
if candidate.Summary == "" {
|
candidate.DiscoveryQuery = searchRecord.Title
|
||||||
candidate.Summary = strings.TrimSpace(searchRecord.Summary)
|
candidate.DiscoveryEvidence["search_record"] = searchRecord
|
||||||
}
|
if candidate.ProviderName == "" {
|
||||||
|
candidate.ProviderName = strings.TrimSpace(searchRecord.Provider)
|
||||||
|
}
|
||||||
|
if candidate.Title == "" {
|
||||||
|
candidate.Title = strings.TrimSpace(searchRecord.Title)
|
||||||
|
}
|
||||||
|
if candidate.Summary == "" {
|
||||||
|
candidate.Summary = strings.TrimSpace(searchRecord.Summary)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if !matchedSearch {
|
||||||
|
candidate.CandidateURLs = nil
|
||||||
|
return candidate
|
||||||
|
}
|
||||||
|
candidate.CandidateURLs = dedupeStrings(filteredURLs)
|
||||||
return candidate
|
return candidate
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -294,6 +303,36 @@ func buildIntradayNormalizedKey(candidate intradayNewsCandidate) string {
|
|||||||
}, "|")
|
}, "|")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func searchRecordMatchesDate(record intradaySearchRecord, date string) bool {
|
||||||
|
published := strings.TrimSpace(record.PublishedAt)
|
||||||
|
if published == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if ts, ok := parseSearchPublishedAt(published); ok {
|
||||||
|
return ts == date
|
||||||
|
}
|
||||||
|
return strings.Contains(published, date)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSearchPublishedAt(value string) (string, bool) {
|
||||||
|
for _, layout := range []string{time.RFC3339, "2006-01-02", "Mon, 02 Jan 2006 15:04:05 MST", "Mon, 2 Jan 2006 15:04:05 MST"} {
|
||||||
|
if ts, err := time.Parse(layout, value); err == nil {
|
||||||
|
return ts.Format("2006-01-02"), true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
localized := strings.NewReplacer(
|
||||||
|
"周一", "Mon", "周二", "Tue", "周三", "Wed", "周四", "Thu", "周五", "Fri", "周六", "Sat", "周日", "Sun",
|
||||||
|
"1月", "Jan", "2月", "Feb", "3月", "Mar", "4月", "Apr", "5月", "May", "6月", "Jun",
|
||||||
|
"7月", "Jul", "8月", "Aug", "9月", "Sep", "10月", "Oct", "11月", "Nov", "12月", "Dec",
|
||||||
|
).Replace(value)
|
||||||
|
for _, layout := range []string{"Mon, 2 Jan 2006 15:04:05 MST", "Mon, 02 Jan 2006 15:04:05 MST"} {
|
||||||
|
if ts, err := time.Parse(layout, localized); err == nil {
|
||||||
|
return ts.Format("2006-01-02"), true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary {
|
func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary {
|
||||||
eventTypeCounts := make(map[string]int)
|
eventTypeCounts := make(map[string]int)
|
||||||
|
|||||||
@@ -46,10 +46,11 @@ func TestLoadIntradayLLMRecordsFromFixture(t *testing.T) {
|
|||||||
|
|
||||||
func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
|
func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
|
||||||
searchRecords := []intradaySearchRecord{{
|
searchRecords := []intradaySearchRecord{{
|
||||||
Title: "OpenAI announces GPT-5.6 preview pricing update",
|
Title: "OpenAI announces GPT-5.6 preview pricing update",
|
||||||
Summary: "Search summary",
|
Summary: "Search summary",
|
||||||
URL: "https://openai.example.com/news/gpt-5-6-pricing",
|
URL: "https://openai.example.com/news/gpt-5-6-pricing",
|
||||||
Provider: "OpenAI",
|
Provider: "OpenAI",
|
||||||
|
PublishedAt: "2026-05-25",
|
||||||
}}
|
}}
|
||||||
llmRecords := []intradayLLMRecord{
|
llmRecords := []intradayLLMRecord{
|
||||||
{
|
{
|
||||||
@@ -80,6 +81,29 @@ func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeIntradayCandidatesDropsOutdatedSearchMatches(t *testing.T) {
|
||||||
|
searchRecords := []intradaySearchRecord{{
|
||||||
|
Title: "Old DeepSeek pricing article",
|
||||||
|
Summary: "Yesterday record",
|
||||||
|
URL: "https://deepseek.example.com/pricing",
|
||||||
|
Provider: "DeepSeek",
|
||||||
|
PublishedAt: "2026-05-24",
|
||||||
|
}}
|
||||||
|
llmRecords := []intradayLLMRecord{{
|
||||||
|
EventType: "price_cut",
|
||||||
|
ProviderName: "DeepSeek",
|
||||||
|
ModelName: "DeepSeek-V4-Flash",
|
||||||
|
ProviderCountry: "CN",
|
||||||
|
Title: "DeepSeek V4 Flash price cut",
|
||||||
|
Summary: "Should be dropped because search evidence is stale",
|
||||||
|
CandidateURLs: []string{"https://deepseek.example.com/pricing"},
|
||||||
|
}}
|
||||||
|
candidates := normalizeIntradayCandidates("2026-05-25", searchRecords, llmRecords)
|
||||||
|
if len(candidates) != 0 {
|
||||||
|
t.Fatalf("旧闻搜索结果不应进入候选池, got=%d", len(candidates))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
|
func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
|
||||||
llmRecords := []intradayLLMRecord{{
|
llmRecords := []intradayLLMRecord{{
|
||||||
EventType: "promo_campaign",
|
EventType: "promo_campaign",
|
||||||
@@ -94,6 +118,13 @@ func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSearchRecordMatchesLocalizedBingDate(t *testing.T) {
|
||||||
|
record := intradaySearchRecord{PublishedAt: "周一, 25 5月 2026 14:08:00 GMT"}
|
||||||
|
if !searchRecordMatchesDate(record, "2026-05-25") {
|
||||||
|
t.Fatal("应识别本地化 Bing pubDate 为当天")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) {
|
func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) {
|
||||||
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil {
|
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil {
|
||||||
t.Fatal("缺少 command 时应报错")
|
t.Fatal("缺少 command 时应报错")
|
||||||
|
|||||||
@@ -95,6 +95,10 @@ type materializeDailySignalsConfig struct {
|
|||||||
var signalLogger *slog.Logger
|
var signalLogger *slog.Logger
|
||||||
|
|
||||||
const signalUSDToCNY = 7.25
|
const signalUSDToCNY = 7.25
|
||||||
|
const defaultDeepSeekNewsSignalURL = "https://api-docs.deepseek.com/news/news250120"
|
||||||
|
const defaultDeepSeekPricingSignalURL = "https://platform.deepseek.com/pricing"
|
||||||
|
const defaultDeepSeekAPIPricingSignalURL = "https://platform.deepseek.com/docs/api-pricing"
|
||||||
|
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
signalLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
signalLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||||
@@ -373,6 +377,11 @@ func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error)
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
events = mergeVerifiedDiscoveryEvents(events, discoveryEvents)
|
events = mergeVerifiedDiscoveryEvents(events, discoveryEvents)
|
||||||
|
deepseekDriftEvents, err := loadDeepSeekNewsDriftSignalEvents(db)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
events = mergeVerifiedDiscoveryEvents(events, deepseekDriftEvents)
|
||||||
|
|
||||||
sort.Slice(events, func(i, j int) bool {
|
sort.Slice(events, func(i, j int) bool {
|
||||||
if events[i].Priority != events[j].Priority {
|
if events[i].Priority != events[j].Priority {
|
||||||
@@ -954,6 +963,110 @@ func firstString(values []string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func loadDeepSeekNewsDriftSignalEvents(db *sql.DB) ([]signalModelEvent, error) {
|
||||||
|
return loadDeepSeekSignatureSignalEvents(db, []deepseekSignatureEventConfig{
|
||||||
|
{
|
||||||
|
SourceKey: "deepseek_news_signature",
|
||||||
|
ModelName: "DeepSeek 官方新闻页",
|
||||||
|
SourceKindLabel: "官方新闻页结构变化",
|
||||||
|
PrimaryURL: defaultDeepSeekNewsSignalURL,
|
||||||
|
Audience: "适合需要尽快复查 DeepSeek 路线图与默认选型的团队",
|
||||||
|
EvidenceTemplate: "DeepSeek 官方新闻页结构签名发生变化:sha=%s previous=%s",
|
||||||
|
Baseline: "官方新闻页结构漂移",
|
||||||
|
Summary: "DeepSeek 官方新闻页结构发生变化,需优先确认是否出现新发布或路线图更新。",
|
||||||
|
Priority: 117,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
SourceKey: "deepseek_pricing_signature",
|
||||||
|
ModelName: "DeepSeek 官方价格页",
|
||||||
|
SourceKindLabel: "官方价格页结构变化",
|
||||||
|
PrimaryURL: defaultDeepSeekPricingSignalURL,
|
||||||
|
Audience: "适合需要尽快复查 DeepSeek 价格策略与默认成本模型的团队",
|
||||||
|
EvidenceTemplate: "DeepSeek 官方价格页结构签名发生变化:sha=%s previous=%s",
|
||||||
|
Baseline: "官方价格页结构漂移",
|
||||||
|
Summary: "DeepSeek 官方价格页结构发生变化,需优先确认是否出现价格策略更新。",
|
||||||
|
Priority: 116,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
SourceKey: "deepseek_api_pricing_signature",
|
||||||
|
ModelName: "DeepSeek API 定价页",
|
||||||
|
SourceKindLabel: "官方 API 定价页结构变化",
|
||||||
|
PrimaryURL: defaultDeepSeekAPIPricingSignalURL,
|
||||||
|
Audience: "适合需要尽快复查 DeepSeek API 定价与预算预期的团队",
|
||||||
|
EvidenceTemplate: "DeepSeek API 定价页结构签名发生变化:sha=%s previous=%s",
|
||||||
|
Baseline: "官方 API 定价页结构漂移",
|
||||||
|
Summary: "DeepSeek API 定价页结构发生变化,需优先确认是否出现定价或套餐更新。",
|
||||||
|
Priority: 115,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type deepseekSignatureEventConfig struct {
|
||||||
|
SourceKey string
|
||||||
|
ModelName string
|
||||||
|
SourceKindLabel string
|
||||||
|
PrimaryURL string
|
||||||
|
Audience string
|
||||||
|
EvidenceTemplate string
|
||||||
|
Baseline string
|
||||||
|
Summary string
|
||||||
|
Priority int
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadDeepSeekSignatureSignalEvents(db *sql.DB, configs []deepseekSignatureEventConfig) ([]signalModelEvent, error) {
|
||||||
|
if len(configs) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
var events []signalModelEvent
|
||||||
|
for _, cfg := range configs {
|
||||||
|
_, rows, err := queryOfficialImportSignatureAuditWindow(db, 5, cfg.SourceKey, false)
|
||||||
|
if err != nil {
|
||||||
|
if strings.Contains(err.Error(), `relation "official_import_signature_audit_recent_view" does not exist`) ||
|
||||||
|
strings.Contains(err.Error(), `relation "official_import_signature_audit" does not exist`) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, row := range rows {
|
||||||
|
if row.RecentRank != 1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if event, ok := buildDeepSeekSignatureSignalEvent(row, cfg); ok {
|
||||||
|
events = append(events, event)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return events, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildDeepSeekSignatureSignalEvent(row officialImportSignatureAuditViewRow, cfg deepseekSignatureEventConfig) (signalModelEvent, bool) {
|
||||||
|
if row.SourceKey != cfg.SourceKey || !row.DriftDetected {
|
||||||
|
return signalModelEvent{}, false
|
||||||
|
}
|
||||||
|
updatedAt := row.CheckedAt.Format("2006-01-02 15:04")
|
||||||
|
primarySource := nullStringOrNone(row.SnapshotPath)
|
||||||
|
if primarySource == "none" {
|
||||||
|
primarySource = cfg.PrimaryURL
|
||||||
|
}
|
||||||
|
return signalModelEvent{
|
||||||
|
EventType: "official_release",
|
||||||
|
ModelName: cfg.ModelName,
|
||||||
|
ProviderName: "DeepSeek",
|
||||||
|
OperatorName: "DeepSeek",
|
||||||
|
Audience: cfg.Audience,
|
||||||
|
TrustLabel: "官方来源 / 结构漂移告警",
|
||||||
|
SourceKindLabel: cfg.SourceKindLabel,
|
||||||
|
PrimarySource: primarySource,
|
||||||
|
SourceURL: cfg.PrimaryURL,
|
||||||
|
UpdatedAt: updatedAt,
|
||||||
|
EvidenceDetail: fmt.Sprintf(cfg.EvidenceTemplate, row.StructureSHA256, nullStringOrNone(row.PreviousObservedSHA256)),
|
||||||
|
Baseline: cfg.Baseline,
|
||||||
|
Summary: cfg.Summary,
|
||||||
|
Priority: cfg.Priority,
|
||||||
|
}, true
|
||||||
|
}
|
||||||
|
|
||||||
func signalNormalizeIntradayEventType(value string) string {
|
func signalNormalizeIntradayEventType(value string) string {
|
||||||
switch strings.TrimSpace(strings.ToLower(value)) {
|
switch strings.TrimSpace(strings.ToLower(value)) {
|
||||||
case "price_cut":
|
case "price_cut":
|
||||||
|
|||||||
@@ -92,3 +92,108 @@ func TestMergeVerifiedDiscoveryEventsDropsUnverifiedPriceNarrative(t *testing.T)
|
|||||||
t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged)
|
t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildDeepSeekNewsDriftEvent(t *testing.T) {
|
||||||
|
row := officialImportSignatureAuditViewRow{
|
||||||
|
SourceKey: "deepseek_news_signature",
|
||||||
|
Status: "drift_detected",
|
||||||
|
StructureState: "changed",
|
||||||
|
StructureChanged: true,
|
||||||
|
DriftDetected: true,
|
||||||
|
BaselineInitialized: false,
|
||||||
|
StructureSHA256: "abc123",
|
||||||
|
}
|
||||||
|
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
|
||||||
|
SourceKey: "deepseek_news_signature",
|
||||||
|
ModelName: "DeepSeek 官方新闻页",
|
||||||
|
SourceKindLabel: "官方新闻页结构变化",
|
||||||
|
PrimaryURL: defaultDeepSeekNewsSignalURL,
|
||||||
|
Audience: "a",
|
||||||
|
EvidenceTemplate: "news drift %s %s",
|
||||||
|
Baseline: "官方新闻页结构漂移",
|
||||||
|
Summary: "summary",
|
||||||
|
Priority: 117,
|
||||||
|
})
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("期望为 drift 行生成正式信号事件")
|
||||||
|
}
|
||||||
|
if event.EventType != "official_release" {
|
||||||
|
t.Fatalf("DeepSeek drift 应映射为 official_release, got=%q", event.EventType)
|
||||||
|
}
|
||||||
|
if event.ProviderName != "DeepSeek" || event.ModelName != "DeepSeek 官方新闻页" {
|
||||||
|
t.Fatalf("DeepSeek drift 事件主体错误: %+v", event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildDeepSeekPricingDriftEvent(t *testing.T) {
|
||||||
|
row := officialImportSignatureAuditViewRow{
|
||||||
|
SourceKey: "deepseek_pricing_signature",
|
||||||
|
Status: "drift_detected",
|
||||||
|
StructureState: "changed",
|
||||||
|
StructureChanged: true,
|
||||||
|
DriftDetected: true,
|
||||||
|
BaselineInitialized: false,
|
||||||
|
StructureSHA256: "pricing123",
|
||||||
|
}
|
||||||
|
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
|
||||||
|
SourceKey: "deepseek_pricing_signature",
|
||||||
|
ModelName: "DeepSeek 官方价格页",
|
||||||
|
SourceKindLabel: "官方价格页结构变化",
|
||||||
|
PrimaryURL: defaultDeepSeekPricingSignalURL,
|
||||||
|
Audience: "a",
|
||||||
|
EvidenceTemplate: "pricing drift %s %s",
|
||||||
|
Baseline: "官方价格页结构漂移",
|
||||||
|
Summary: "pricing summary",
|
||||||
|
Priority: 116,
|
||||||
|
})
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("期望为 pricing drift 行生成正式信号事件")
|
||||||
|
}
|
||||||
|
if event.ModelName != "DeepSeek 官方价格页" || event.SourceKindLabel != "官方价格页结构变化" {
|
||||||
|
t.Fatalf("pricing drift 事件映射错误: %+v", event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildDeepSeekAPIPricingDriftEvent(t *testing.T) {
|
||||||
|
row := officialImportSignatureAuditViewRow{
|
||||||
|
SourceKey: "deepseek_api_pricing_signature",
|
||||||
|
Status: "drift_detected",
|
||||||
|
StructureState: "changed",
|
||||||
|
StructureChanged: true,
|
||||||
|
DriftDetected: true,
|
||||||
|
BaselineInitialized: false,
|
||||||
|
StructureSHA256: "api123",
|
||||||
|
}
|
||||||
|
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
|
||||||
|
SourceKey: "deepseek_api_pricing_signature",
|
||||||
|
ModelName: "DeepSeek API 定价页",
|
||||||
|
SourceKindLabel: "官方 API 定价页结构变化",
|
||||||
|
PrimaryURL: defaultDeepSeekAPIPricingSignalURL,
|
||||||
|
Audience: "a",
|
||||||
|
EvidenceTemplate: "api drift %s %s",
|
||||||
|
Baseline: "官方 API 定价页结构漂移",
|
||||||
|
Summary: "api pricing summary",
|
||||||
|
Priority: 115,
|
||||||
|
})
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("期望为 api pricing drift 行生成正式信号事件")
|
||||||
|
}
|
||||||
|
if event.ModelName != "DeepSeek API 定价页" || event.SourceKindLabel != "官方 API 定价页结构变化" {
|
||||||
|
t.Fatalf("api pricing drift 事件映射错误: %+v", event)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildDeepSeekNewsDriftEventSkipsBaselineOnly(t *testing.T) {
|
||||||
|
row := officialImportSignatureAuditViewRow{
|
||||||
|
SourceKey: "deepseek_news_signature",
|
||||||
|
Status: "baseline_initialized",
|
||||||
|
StructureState: "initial",
|
||||||
|
StructureChanged: false,
|
||||||
|
DriftDetected: false,
|
||||||
|
BaselineInitialized: true,
|
||||||
|
StructureSHA256: "abc123",
|
||||||
|
}
|
||||||
|
if _, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{SourceKey: "deepseek_news_signature"}); ok {
|
||||||
|
t.Fatal("baseline 初始化不应直接进入正式信号")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
59
scripts/real_intraday_llm_provider.py
Normal file
59
scripts/real_intraday_llm_provider.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
api_key = os.environ.get('OPENROUTER_API_KEY', '').strip()
|
||||||
|
if not api_key:
|
||||||
|
print('OPENROUTER_API_KEY missing', file=sys.stderr)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
payload = sys.stdin.read()
|
||||||
|
request_data = json.loads(payload or '{}')
|
||||||
|
search_results = request_data.get('search_results', [])
|
||||||
|
date = request_data.get('date', '')
|
||||||
|
|
||||||
|
prompt = {
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
"你是大模型情报候选发现器。根据给定搜索结果,只输出 JSON 数组,不要输出 markdown。"
|
||||||
|
"每项字段必须包含 event_type, provider_name, model_name, provider_country, title, summary, candidate_urls。"
|
||||||
|
"event_type 只能是 price_cut, price_increase, official_release, promo_campaign, leak_or_rumor, unknown。"
|
||||||
|
"只有当搜索结果明确像是当天消息时才输出。没有 URL 的候选不要输出。"
|
||||||
|
f"\n日期: {date}\n搜索结果:\n" + json.dumps(search_results, ensure_ascii=False)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
req_body = json.dumps({
|
||||||
|
"model": "deepseek/deepseek-v4-flash",
|
||||||
|
"messages": [prompt],
|
||||||
|
"temperature": 0,
|
||||||
|
"max_tokens": 1200,
|
||||||
|
"response_format": {"type": "json_object"}
|
||||||
|
}).encode('utf-8')
|
||||||
|
req = urllib.request.Request(
|
||||||
|
'https://openrouter.ai/api/v1/chat/completions',
|
||||||
|
data=req_body,
|
||||||
|
headers={
|
||||||
|
'Authorization': f'Bearer {api_key}',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'HTTP-Referer': 'https://github.com/phamnazage-jpg/llm-intelligence',
|
||||||
|
'X-Title': 'llm-intelligence intraday discovery'
|
||||||
|
},
|
||||||
|
method='POST'
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||||
|
data = json.loads(resp.read().decode('utf-8'))
|
||||||
|
content = data['choices'][0]['message']['content']
|
||||||
|
parsed = json.loads(content)
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
if 'items' in parsed and isinstance(parsed['items'], list):
|
||||||
|
parsed = parsed['items']
|
||||||
|
elif 'candidates' in parsed and isinstance(parsed['candidates'], list):
|
||||||
|
parsed = parsed['candidates']
|
||||||
|
else:
|
||||||
|
parsed = []
|
||||||
|
if not isinstance(parsed, list):
|
||||||
|
parsed = []
|
||||||
|
print(json.dumps(parsed, ensure_ascii=False))
|
||||||
65
scripts/real_intraday_search_provider.py
Normal file
65
scripts/real_intraday_search_provider.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import email.utils
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_pubdate(value: str) -> str:
|
||||||
|
if not value:
|
||||||
|
return ''
|
||||||
|
try:
|
||||||
|
dt = email.utils.parsedate_to_datetime(value)
|
||||||
|
return dt.strftime('%Y-%m-%d')
|
||||||
|
except Exception:
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def infer_provider(title: str, link: str) -> str:
|
||||||
|
text = (title + ' ' + link).lower()
|
||||||
|
for needle, provider in [
|
||||||
|
('openai', 'OpenAI'), ('anthropic', 'Anthropic'), ('claude', 'Anthropic'), ('gemini', 'Google'), ('google', 'Google'),
|
||||||
|
('deepseek', 'DeepSeek'), ('qwen', 'Qwen'), ('dashscope', 'DashScope'), ('zhipu', '智谱'),
|
||||||
|
('baidu', '百度'), ('tencent', '腾讯'), ('minimax', 'MiniMax'), ('x.ai', 'xAI'), ('xai', 'xAI')
|
||||||
|
]:
|
||||||
|
if needle in text:
|
||||||
|
return provider
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
query = os.environ.get("INTRADAY_DISCOVERY_QUERY", "").strip()
|
||||||
|
if not query:
|
||||||
|
print("[]")
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
|
url = "https://www.bing.com/search?format=rss&q=" + urllib.parse.quote(query)
|
||||||
|
req = urllib.request.Request(url, headers={
|
||||||
|
"User-Agent": "Mozilla/5.0",
|
||||||
|
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||||
|
})
|
||||||
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||||
|
body = resp.read().decode("utf-8", errors="ignore")
|
||||||
|
|
||||||
|
root = ET.fromstring(body)
|
||||||
|
items = []
|
||||||
|
for item in root.findall('./channel/item'):
|
||||||
|
title = (item.findtext('title') or '').strip()
|
||||||
|
link = (item.findtext('link') or '').strip()
|
||||||
|
desc = (item.findtext('description') or '').strip()
|
||||||
|
pub = (item.findtext('pubDate') or '').strip()
|
||||||
|
provider = infer_provider(title, link)
|
||||||
|
provider_url = ''
|
||||||
|
if link:
|
||||||
|
parsed = urllib.parse.urlparse(link)
|
||||||
|
provider_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else ''
|
||||||
|
items.append({
|
||||||
|
"title": title,
|
||||||
|
"summary": desc,
|
||||||
|
"url": link,
|
||||||
|
"provider": provider,
|
||||||
|
"provider_url": provider_url,
|
||||||
|
"published_at": normalize_pubdate(pub),
|
||||||
|
})
|
||||||
|
print(json.dumps(items, ensure_ascii=False))
|
||||||
@@ -41,6 +41,30 @@ if [[ "$DRY_RUN" == "true" ]]; then
|
|||||||
materialize_args+=(--dry-run)
|
materialize_args+=(--dry-run)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"
|
deepseek_guard_args=()
|
||||||
go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"
|
if [[ "$DRY_RUN" == "true" ]]; then
|
||||||
|
deepseek_guard_args+=(--allow-bootstrap=false)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! go run -tags llm_script ./scripts/deepseek_news_signature_guard.go ./scripts/deepseek_news_signature_guard_lib.go ./scripts/deepseek_news_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go "${deepseek_guard_args[@]}"; then
|
||||||
|
if [[ "$DRY_RUN" != "true" ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_pricing_signature --snapshot-base deepseek-pricing --url https://platform.deepseek.com/pricing "${deepseek_guard_args[@]}"; then
|
||||||
|
if [[ "$DRY_RUN" != "true" ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_api_pricing_signature --snapshot-base deepseek-api-pricing --url https://platform.deepseek.com/docs/api-pricing "${deepseek_guard_args[@]}"; then
|
||||||
|
if [[ "$DRY_RUN" != "true" ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"
|
REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"
|
||||||
|
|||||||
Reference in New Issue
Block a user