feat(intraday): monitor DeepSeek official page drift
This commit is contained in:
@@ -55,6 +55,6 @@
|
||||
|
||||
## 下一步建议
|
||||
|
||||
1. 为 `run_intraday_discovery_watch.sh` 补充生产级 provider adapter 和调度说明
|
||||
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间”提示
|
||||
1. 已补充 `run_intraday_discovery_watch.sh` 与 DeepSeek 官方新闻页结构签名 guard,可继续扩展到 DeepSeek pricing 页面
|
||||
2. 给前端查询页增加“最近一次价格追踪时间 / 最近一次 discovery 验证时间 / 最近一次官方页 drift 检查时间”提示
|
||||
3. 如果日内事件仍不够敏感,再考虑引入独立 `intraday_signal_snapshot` 或候选情报面板
|
||||
|
||||
51
scripts/deepseek_news_signature_guard.go
Normal file
51
scripts/deepseek_news_signature_guard.go
Normal file
@@ -0,0 +1,51 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var url string
|
||||
var fixture string
|
||||
var snapshotDir string
|
||||
var baselinePath string
|
||||
var timeoutSeconds int
|
||||
var allowBootstrap bool
|
||||
|
||||
flag.StringVar(&url, "url", defaultDeepSeekNewsFetchURL, "DeepSeek 官方新闻页")
|
||||
flag.StringVar(&fixture, "fixture", "", "DeepSeek 新闻页样例文件")
|
||||
flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek news snapshot 输出目录")
|
||||
flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek news 结构基线签名路径")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||
flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化")
|
||||
flag.Parse()
|
||||
|
||||
now := time.Now()
|
||||
cfg := deepseekNewsSignatureGuardConfig{
|
||||
URL: url,
|
||||
Fixture: fixture,
|
||||
SnapshotDir: snapshotDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||
AllowBootstrap: allowBootstrap,
|
||||
}
|
||||
result, err := runDeepSeekNewsSignatureGuard(cfg, now)
|
||||
if auditErr := persistDeepSeekNewsSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil {
|
||||
fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard audit: %v\n", auditErr)
|
||||
if err == nil {
|
||||
err = auditErr
|
||||
}
|
||||
}
|
||||
fmt.Println(formatDeepSeekNewsSignatureGuardSummary(result))
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "deepseek_news_signature_guard: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
127
scripts/deepseek_news_signature_guard_lib.go
Normal file
127
scripts/deepseek_news_signature_guard_lib.go
Normal file
@@ -0,0 +1,127 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type deepseekNewsSignatureGuardConfig struct {
|
||||
URL string
|
||||
Fixture string
|
||||
SnapshotDir string
|
||||
BaselinePath string
|
||||
Timeout time.Duration
|
||||
AllowBootstrap bool
|
||||
}
|
||||
|
||||
type deepseekNewsSignatureGuardResult struct {
|
||||
SnapshotPath string
|
||||
SignaturePath string
|
||||
BaselinePath string
|
||||
DriftDetected bool
|
||||
BaselineInitialized bool
|
||||
PreviousBaselineHash string
|
||||
CurrentSignature deepseekNewsStructureSignature
|
||||
}
|
||||
|
||||
const defaultDeepSeekNewsFetchURL = "https://api-docs.deepseek.com/news/news250120"
|
||||
|
||||
func runDeepSeekNewsSignatureGuard(cfg deepseekNewsSignatureGuardConfig, now time.Time) (deepseekNewsSignatureGuardResult, error) {
|
||||
snapshotDir := cfg.SnapshotDir
|
||||
if snapshotDir == "" {
|
||||
snapshotDir = filepath.Join("logs", "deepseek-news-snapshots")
|
||||
}
|
||||
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
|
||||
return deepseekNewsSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||
}
|
||||
snapshotPath, signaturePath := resolveDeepSeekNewsSnapshotPaths("", "", snapshotDir, now)
|
||||
baselinePath := cfg.BaselinePath
|
||||
if baselinePath == "" {
|
||||
baselinePath = filepath.Join(snapshotDir, "baseline.signature.json")
|
||||
}
|
||||
client := &http.Client{Timeout: cfg.Timeout}
|
||||
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
|
||||
if err != nil {
|
||||
return deepseekNewsSignatureGuardResult{}, err
|
||||
}
|
||||
current, err := writeDeepSeekNewsSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now)
|
||||
if err != nil {
|
||||
return deepseekNewsSignatureGuardResult{}, err
|
||||
}
|
||||
result := deepseekNewsSignatureGuardResult{
|
||||
SnapshotPath: snapshotPath,
|
||||
SignaturePath: signaturePath,
|
||||
BaselinePath: baselinePath,
|
||||
CurrentSignature: current,
|
||||
}
|
||||
previous, err := readDeepSeekNewsStructureSignature(baselinePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if !cfg.AllowBootstrap {
|
||||
return result, fmt.Errorf("deepseek news baseline missing: %s", baselinePath)
|
||||
}
|
||||
if err := copyFileCommon(signaturePath, baselinePath); err != nil {
|
||||
return result, fmt.Errorf("initialize baseline: %w", err)
|
||||
}
|
||||
result.BaselineInitialized = true
|
||||
return result, nil
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
result.PreviousBaselineHash = previous.StructureSHA256
|
||||
if previous.StructureSHA256 != current.StructureSHA256 {
|
||||
result.DriftDetected = true
|
||||
return result, fmt.Errorf(
|
||||
"deepseek news structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s",
|
||||
previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath,
|
||||
)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func formatDeepSeekNewsSignatureGuardSummary(result deepseekNewsSignatureGuardResult) string {
|
||||
return fmt.Sprintf(
|
||||
"source=deepseek-news-signature-guard drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s",
|
||||
result.DriftDetected,
|
||||
result.BaselineInitialized,
|
||||
result.CurrentSignature.StructureSHA256,
|
||||
emptyIfBlank(result.PreviousBaselineHash),
|
||||
result.SnapshotPath,
|
||||
result.SignaturePath,
|
||||
result.BaselinePath,
|
||||
)
|
||||
}
|
||||
|
||||
func buildDeepSeekNewsSignatureAuditRecord(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord {
|
||||
record := officialImportSignatureAuditRecord{
|
||||
SourceKey: "deepseek_news_signature",
|
||||
CheckedAt: checkedAt,
|
||||
Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr),
|
||||
DriftDetected: result.DriftDetected,
|
||||
BaselineInitialized: result.BaselineInitialized,
|
||||
SourceURL: strings.TrimSpace(cfg.URL),
|
||||
FixturePath: strings.TrimSpace(cfg.Fixture),
|
||||
SnapshotPath: strings.TrimSpace(result.SnapshotPath),
|
||||
SignaturePath: strings.TrimSpace(result.SignaturePath),
|
||||
BaselinePath: strings.TrimSpace(result.BaselinePath),
|
||||
StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256),
|
||||
PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash),
|
||||
ByteSize: result.CurrentSignature.ByteSize,
|
||||
ErrorMessage: errorMessageText(runErr),
|
||||
}
|
||||
if hasDeepSeekNewsStructureSignature(result.CurrentSignature) {
|
||||
signatureCopy := result.CurrentSignature
|
||||
record.SignaturePayload = &signatureCopy
|
||||
}
|
||||
return record
|
||||
}
|
||||
|
||||
func persistDeepSeekNewsSignatureAuditIfConfigured(cfg deepseekNewsSignatureGuardConfig, result deepseekNewsSignatureGuardResult, checkedAt time.Time, runErr error) error {
|
||||
return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekNewsSignatureAuditRecord(cfg, result, checkedAt, runErr))
|
||||
}
|
||||
88
scripts/deepseek_news_signature_guard_test.go
Normal file
88
scripts/deepseek_news_signature_guard_test.go
Normal file
@@ -0,0 +1,88 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRunDeepSeekNewsSignatureGuardInitializesBaseline(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||
result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
|
||||
URL: defaultDeepSeekNewsFetchURL,
|
||||
Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"),
|
||||
SnapshotDir: tempDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Second,
|
||||
AllowBootstrap: true,
|
||||
}, time.Date(2026, 5, 27, 21, 0, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||
if err != nil {
|
||||
t.Fatalf("runDeepSeekNewsSignatureGuard 返回错误: %v", err)
|
||||
}
|
||||
if !result.BaselineInitialized {
|
||||
t.Fatal("期望初始化 baseline")
|
||||
}
|
||||
if _, err := os.Stat(baselinePath); err != nil {
|
||||
t.Fatalf("baseline 未写入: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunDeepSeekNewsSignatureGuardDetectsDrift(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||
_, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
|
||||
URL: defaultDeepSeekNewsFetchURL,
|
||||
Fixture: filepath.Join("testdata", "intraday_verification_official_release.html"),
|
||||
SnapshotDir: tempDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Second,
|
||||
AllowBootstrap: true,
|
||||
}, time.Date(2026, 5, 27, 21, 1, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||
if err != nil {
|
||||
t.Fatalf("初始化 baseline 失败: %v", err)
|
||||
}
|
||||
driftFixture := filepath.Join(tempDir, "drift.html")
|
||||
if err := os.WriteFile(driftFixture, []byte("<html><head><title>DeepSeek-V4 Release</title><meta name=\"description\" content=\"DeepSeek V4 pricing release\"></head><body><h1>DeepSeek V4 Release</h1></body></html>"), 0o644); err != nil {
|
||||
t.Fatalf("写入 drift fixture 失败: %v", err)
|
||||
}
|
||||
result, err := runDeepSeekNewsSignatureGuard(deepseekNewsSignatureGuardConfig{
|
||||
URL: defaultDeepSeekNewsFetchURL,
|
||||
Fixture: driftFixture,
|
||||
SnapshotDir: tempDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Second,
|
||||
AllowBootstrap: false,
|
||||
}, time.Date(2026, 5, 27, 21, 2, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||
if err == nil {
|
||||
t.Fatal("期望结构漂移时报错")
|
||||
}
|
||||
if !result.DriftDetected {
|
||||
t.Fatal("期望 driftDetected=true")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "deepseek news structure drift detected") {
|
||||
t.Fatalf("期望返回 drift 错误,实际: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatDeepSeekNewsSignatureGuardSummary(t *testing.T) {
|
||||
result := deepseekNewsSignatureGuardResult{
|
||||
SnapshotPath: "/tmp/deepseek-news.html",
|
||||
SignaturePath: "/tmp/deepseek-news.signature.json",
|
||||
BaselinePath: "/tmp/baseline.signature.json",
|
||||
BaselineInitialized: true,
|
||||
CurrentSignature: deepseekNewsStructureSignature{
|
||||
StructureSHA256: "abc123",
|
||||
},
|
||||
}
|
||||
summary := formatDeepSeekNewsSignatureGuardSummary(result)
|
||||
for _, want := range []string{"source=deepseek-news-signature-guard", "baseline_initialized=true", "structure_sha256=abc123"} {
|
||||
if !strings.Contains(summary, want) {
|
||||
t.Fatalf("summary 缺少 %q,实际: %q", want, summary)
|
||||
}
|
||||
}
|
||||
}
|
||||
196
scripts/deepseek_news_snapshot_lib.go
Normal file
196
scripts/deepseek_news_snapshot_lib.go
Normal file
@@ -0,0 +1,196 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type deepseekNewsStructureSignature struct {
|
||||
ByteSize int `json:"byte_size"`
|
||||
SHA256 string `json:"sha256"`
|
||||
StructureSHA256 string `json:"structure_sha256"`
|
||||
Title string `json:"title"`
|
||||
MetaDescription string `json:"meta_description"`
|
||||
Headings []string `json:"headings"`
|
||||
Contains map[string]bool `json:"contains"`
|
||||
GeneratedAt string `json:"generated_at,omitempty"`
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
SnapshotPath string `json:"snapshot_path,omitempty"`
|
||||
}
|
||||
|
||||
var deepseekNewsContainsNeedles = map[string]string{
|
||||
"deepseek": "deepseek",
|
||||
"release": "release",
|
||||
"news": "news",
|
||||
"api_docs": "api docs",
|
||||
}
|
||||
|
||||
var htmlTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
|
||||
var titleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||
var metaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
|
||||
var h1Re = regexp.MustCompile(`(?is)<h1[^>]*>(.*?)</h1>`)
|
||||
|
||||
func buildDeepSeekNewsStructureSignature(raw string) deepseekNewsStructureSignature {
|
||||
title := firstHTMLMatch(titleRe, raw)
|
||||
meta := firstHTMLMatch(metaDescRe, raw)
|
||||
h1Matches := h1Re.FindAllStringSubmatch(raw, -1)
|
||||
headings := make([]string, 0, len(h1Matches))
|
||||
seen := make(map[string]struct{})
|
||||
for _, match := range h1Matches {
|
||||
if len(match) < 2 {
|
||||
continue
|
||||
}
|
||||
clean := cleanHTMLText(match[1])
|
||||
if clean == "" {
|
||||
continue
|
||||
}
|
||||
if _, exists := seen[clean]; exists {
|
||||
continue
|
||||
}
|
||||
seen[clean] = struct{}{}
|
||||
headings = append(headings, clean)
|
||||
}
|
||||
contains := make(map[string]bool, len(deepseekNewsContainsNeedles))
|
||||
lower := strings.ToLower(raw)
|
||||
for key, needle := range deepseekNewsContainsNeedles {
|
||||
contains[key] = strings.Contains(lower, strings.ToLower(needle))
|
||||
}
|
||||
signature := deepseekNewsStructureSignature{
|
||||
ByteSize: len([]byte(raw)),
|
||||
SHA256: deepseekNewsSHA256Hex(raw),
|
||||
Title: title,
|
||||
MetaDescription: meta,
|
||||
Headings: headings,
|
||||
Contains: contains,
|
||||
}
|
||||
signature.StructureSHA256 = deepseekNewsSHA256Hex(deepseekNewsStructureDigestPayload(signature))
|
||||
return signature
|
||||
}
|
||||
|
||||
func writeDeepSeekNewsSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekNewsStructureSignature, error) {
|
||||
if strings.TrimSpace(snapshotPath) == "" {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("snapshot path is required")
|
||||
}
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("signature path is required")
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
||||
}
|
||||
signature := buildDeepSeekNewsStructureSignature(raw)
|
||||
signature.GeneratedAt = now.Format(time.RFC3339)
|
||||
signature.SourceURL = sourceURL
|
||||
signature.SnapshotPath = snapshotPath
|
||||
payload, err := json.MarshalIndent(signature, "", " ")
|
||||
if err != nil {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
||||
}
|
||||
return signature, nil
|
||||
}
|
||||
|
||||
func resolveDeepSeekNewsSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, now time.Time) (string, string) {
|
||||
if strings.TrimSpace(snapshotDir) == "" {
|
||||
snapshotDir = filepath.Join("logs", "deepseek-news-snapshots")
|
||||
}
|
||||
if strings.TrimSpace(snapshotPath) == "" {
|
||||
base := filepath.Join(snapshotDir, fmt.Sprintf("deepseek-news-%s", now.Format("20060102-150405")))
|
||||
snapshotPath = base + ".html"
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
signaturePath = base + ".signature.json"
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
||||
}
|
||||
return snapshotPath, signaturePath
|
||||
}
|
||||
|
||||
func readDeepSeekNewsStructureSignature(path string) (deepseekNewsStructureSignature, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return deepseekNewsStructureSignature{}, err
|
||||
}
|
||||
var signature deepseekNewsStructureSignature
|
||||
if err := json.Unmarshal(data, &signature); err != nil {
|
||||
return deepseekNewsStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
||||
}
|
||||
return signature, nil
|
||||
}
|
||||
|
||||
func hasDeepSeekNewsStructureSignature(signature deepseekNewsStructureSignature) bool {
|
||||
return signature.ByteSize > 0 ||
|
||||
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
||||
strings.TrimSpace(signature.SHA256) != "" ||
|
||||
strings.TrimSpace(signature.Title) != "" ||
|
||||
len(signature.Headings) > 0 ||
|
||||
len(signature.Contains) > 0
|
||||
}
|
||||
|
||||
func deepseekNewsStructureDigestPayload(signature deepseekNewsStructureSignature) string {
|
||||
type containsEntry struct {
|
||||
Name string `json:"name"`
|
||||
Value bool `json:"value"`
|
||||
}
|
||||
keys := make([]string, 0, len(signature.Contains))
|
||||
for key := range signature.Contains {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
entries := make([]containsEntry, 0, len(keys))
|
||||
for _, key := range keys {
|
||||
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
||||
}
|
||||
payload := struct {
|
||||
Title string `json:"title"`
|
||||
MetaDescription string `json:"meta_description"`
|
||||
Headings []string `json:"headings"`
|
||||
Contains []containsEntry `json:"contains"`
|
||||
}{
|
||||
Title: signature.Title,
|
||||
MetaDescription: signature.MetaDescription,
|
||||
Headings: signature.Headings,
|
||||
Contains: entries,
|
||||
}
|
||||
bytes, _ := json.Marshal(payload)
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
func deepseekNewsSHA256Hex(raw string) string {
|
||||
sum := sha256.Sum256([]byte(raw))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func firstHTMLMatch(re *regexp.Regexp, raw string) string {
|
||||
match := re.FindStringSubmatch(raw)
|
||||
if len(match) < 2 {
|
||||
return ""
|
||||
}
|
||||
return cleanHTMLText(match[1])
|
||||
}
|
||||
|
||||
func cleanHTMLText(raw string) string {
|
||||
text := htmlTagRe.ReplaceAllString(raw, " ")
|
||||
text = strings.ReplaceAll(text, "&", "&")
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
text = strings.Join(strings.Fields(text), " ")
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
57
scripts/deepseek_pricing_signature_guard.go
Normal file
57
scripts/deepseek_pricing_signature_guard.go
Normal file
@@ -0,0 +1,57 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
func main() {
|
||||
loadSubscriptionImportEnv()
|
||||
|
||||
var url string
|
||||
var fixture string
|
||||
var snapshotDir string
|
||||
var baselinePath string
|
||||
var timeoutSeconds int
|
||||
var allowBootstrap bool
|
||||
var sourceKey string
|
||||
var snapshotBase string
|
||||
|
||||
flag.StringVar(&sourceKey, "source-key", "deepseek_pricing_signature", "审计 source_key")
|
||||
flag.StringVar(&snapshotBase, "snapshot-base", "deepseek-pricing", "snapshot 文件名前缀")
|
||||
flag.StringVar(&url, "url", defaultDeepSeekPricingFetchURL, "DeepSeek 官方价格页")
|
||||
flag.StringVar(&fixture, "fixture", "", "DeepSeek 价格页样例文件")
|
||||
flag.StringVar(&snapshotDir, "snapshot-dir", "", "DeepSeek pricing snapshot 输出目录")
|
||||
flag.StringVar(&baselinePath, "baseline-path", "", "DeepSeek pricing 结构基线签名路径")
|
||||
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
|
||||
flag.BoolVar(&allowBootstrap, "allow-bootstrap", true, "当 baseline 缺失时自动初始化")
|
||||
flag.Parse()
|
||||
|
||||
now := time.Now()
|
||||
cfg := deepseekPricingSignatureGuardConfig{
|
||||
SourceKey: sourceKey,
|
||||
URL: url,
|
||||
Fixture: fixture,
|
||||
SnapshotDir: snapshotDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Duration(timeoutSeconds) * time.Second,
|
||||
AllowBootstrap: allowBootstrap,
|
||||
SnapshotBase: snapshotBase,
|
||||
}
|
||||
result, err := runDeepSeekPricingSignatureGuard(cfg, now)
|
||||
if auditErr := persistDeepSeekPricingSignatureAuditIfConfigured(cfg, result, now, err); auditErr != nil {
|
||||
fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard audit: %v\n", auditErr)
|
||||
if err == nil {
|
||||
err = auditErr
|
||||
}
|
||||
}
|
||||
fmt.Println(formatDeepSeekPricingSignatureGuardSummary(sourceKey, result))
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "deepseek_pricing_signature_guard: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
132
scripts/deepseek_pricing_signature_guard_lib.go
Normal file
132
scripts/deepseek_pricing_signature_guard_lib.go
Normal file
@@ -0,0 +1,132 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type deepseekPricingSignatureGuardConfig struct {
|
||||
SourceKey string
|
||||
URL string
|
||||
Fixture string
|
||||
SnapshotDir string
|
||||
BaselinePath string
|
||||
Timeout time.Duration
|
||||
AllowBootstrap bool
|
||||
SnapshotBase string
|
||||
SourceKindLabel string
|
||||
}
|
||||
|
||||
type deepseekPricingSignatureGuardResult struct {
|
||||
SnapshotPath string
|
||||
SignaturePath string
|
||||
BaselinePath string
|
||||
DriftDetected bool
|
||||
BaselineInitialized bool
|
||||
PreviousBaselineHash string
|
||||
CurrentSignature deepseekPricingStructureSignature
|
||||
}
|
||||
|
||||
const defaultDeepSeekPricingFetchURL = "https://platform.deepseek.com/pricing"
|
||||
const defaultDeepSeekAPIPricingFetchURL = "https://platform.deepseek.com/docs/api-pricing"
|
||||
|
||||
func runDeepSeekPricingSignatureGuard(cfg deepseekPricingSignatureGuardConfig, now time.Time) (deepseekPricingSignatureGuardResult, error) {
|
||||
snapshotDir := cfg.SnapshotDir
|
||||
if snapshotDir == "" {
|
||||
snapshotDir = filepath.Join("logs", cfg.SnapshotBase+"-snapshots")
|
||||
}
|
||||
if err := os.MkdirAll(snapshotDir, 0o755); err != nil {
|
||||
return deepseekPricingSignatureGuardResult{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||
}
|
||||
snapshotPath, signaturePath := resolveDeepSeekPricingSnapshotPaths("", "", snapshotDir, cfg.SnapshotBase, now)
|
||||
baselinePath := cfg.BaselinePath
|
||||
if baselinePath == "" {
|
||||
baselinePath = filepath.Join(snapshotDir, "baseline.signature.json")
|
||||
}
|
||||
client := &http.Client{Timeout: cfg.Timeout}
|
||||
raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client)
|
||||
if err != nil {
|
||||
return deepseekPricingSignatureGuardResult{}, err
|
||||
}
|
||||
current, err := writeDeepSeekPricingSnapshotArtifacts(raw, cfg.URL, snapshotPath, signaturePath, now)
|
||||
if err != nil {
|
||||
return deepseekPricingSignatureGuardResult{}, err
|
||||
}
|
||||
result := deepseekPricingSignatureGuardResult{
|
||||
SnapshotPath: snapshotPath,
|
||||
SignaturePath: signaturePath,
|
||||
BaselinePath: baselinePath,
|
||||
CurrentSignature: current,
|
||||
}
|
||||
previous, err := readDeepSeekPricingStructureSignature(baselinePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if !cfg.AllowBootstrap {
|
||||
return result, fmt.Errorf("%s baseline missing: %s", cfg.SourceKey, baselinePath)
|
||||
}
|
||||
if err := copyFileCommon(signaturePath, baselinePath); err != nil {
|
||||
return result, fmt.Errorf("initialize baseline: %w", err)
|
||||
}
|
||||
result.BaselineInitialized = true
|
||||
return result, nil
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
result.PreviousBaselineHash = previous.StructureSHA256
|
||||
if previous.StructureSHA256 != current.StructureSHA256 {
|
||||
result.DriftDetected = true
|
||||
return result, fmt.Errorf(
|
||||
"%s structure drift detected: baseline=%s current=%s baseline_path=%s signature_path=%s snapshot_path=%s",
|
||||
cfg.SourceKey, previous.StructureSHA256, current.StructureSHA256, baselinePath, signaturePath, snapshotPath,
|
||||
)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func formatDeepSeekPricingSignatureGuardSummary(sourceKey string, result deepseekPricingSignatureGuardResult) string {
|
||||
return fmt.Sprintf(
|
||||
"source=%s drift=%t baseline_initialized=%t structure_sha256=%s previous_baseline_sha256=%s snapshot_out=%s signature_out=%s baseline_path=%s",
|
||||
sourceKey,
|
||||
result.DriftDetected,
|
||||
result.BaselineInitialized,
|
||||
result.CurrentSignature.StructureSHA256,
|
||||
emptyIfBlank(result.PreviousBaselineHash),
|
||||
result.SnapshotPath,
|
||||
result.SignaturePath,
|
||||
result.BaselinePath,
|
||||
)
|
||||
}
|
||||
|
||||
func buildDeepSeekPricingSignatureAuditRecord(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) officialImportSignatureAuditRecord {
|
||||
record := officialImportSignatureAuditRecord{
|
||||
SourceKey: cfg.SourceKey,
|
||||
CheckedAt: checkedAt,
|
||||
Status: officialImportSignatureAuditStatus(result.DriftDetected, result.BaselineInitialized, runErr),
|
||||
DriftDetected: result.DriftDetected,
|
||||
BaselineInitialized: result.BaselineInitialized,
|
||||
SourceURL: strings.TrimSpace(cfg.URL),
|
||||
FixturePath: strings.TrimSpace(cfg.Fixture),
|
||||
SnapshotPath: strings.TrimSpace(result.SnapshotPath),
|
||||
SignaturePath: strings.TrimSpace(result.SignaturePath),
|
||||
BaselinePath: strings.TrimSpace(result.BaselinePath),
|
||||
StructureSHA256: strings.TrimSpace(result.CurrentSignature.StructureSHA256),
|
||||
PreviousStructureSHA256: strings.TrimSpace(result.PreviousBaselineHash),
|
||||
ByteSize: result.CurrentSignature.ByteSize,
|
||||
ErrorMessage: errorMessageText(runErr),
|
||||
}
|
||||
if hasDeepSeekPricingStructureSignature(result.CurrentSignature) {
|
||||
signatureCopy := result.CurrentSignature
|
||||
record.SignaturePayload = &signatureCopy
|
||||
}
|
||||
return record
|
||||
}
|
||||
|
||||
func persistDeepSeekPricingSignatureAuditIfConfigured(cfg deepseekPricingSignatureGuardConfig, result deepseekPricingSignatureGuardResult, checkedAt time.Time, runErr error) error {
|
||||
return persistOfficialImportSignatureAuditIfConfigured(buildDeepSeekPricingSignatureAuditRecord(cfg, result, checkedAt, runErr))
|
||||
}
|
||||
96
scripts/deepseek_pricing_signature_guard_test.go
Normal file
96
scripts/deepseek_pricing_signature_guard_test.go
Normal file
@@ -0,0 +1,96 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRunDeepSeekPricingSignatureGuardInitializesBaseline(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||
fixture := filepath.Join(tempDir, "pricing.html")
|
||||
if err := os.WriteFile(fixture, []byte(`<html><head><title>DeepSeek</title><meta name="description" content="Join DeepSeek API platform"><meta name="commit-id" content="abc123"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing</body></html>`), 0o644); err != nil {
|
||||
t.Fatalf("写入 fixture 失败: %v", err)
|
||||
}
|
||||
result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
|
||||
SourceKey: "deepseek_pricing_signature",
|
||||
URL: defaultDeepSeekPricingFetchURL,
|
||||
Fixture: fixture,
|
||||
SnapshotDir: tempDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Second,
|
||||
AllowBootstrap: true,
|
||||
SnapshotBase: "deepseek-pricing",
|
||||
}, time.Date(2026, 5, 27, 22, 0, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||
if err != nil {
|
||||
t.Fatalf("runDeepSeekPricingSignatureGuard 返回错误: %v", err)
|
||||
}
|
||||
if !result.BaselineInitialized {
|
||||
t.Fatal("期望初始化 baseline")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRunDeepSeekPricingSignatureGuardDetectsDrift(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
baselinePath := filepath.Join(tempDir, "baseline.signature.json")
|
||||
fixture := filepath.Join(tempDir, "pricing.html")
|
||||
if err := os.WriteFile(fixture, []byte(`<html><head><title>DeepSeek</title><meta name="description" content="Join DeepSeek API platform"><meta name="commit-id" content="abc123"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing</body></html>`), 0o644); err != nil {
|
||||
t.Fatalf("写入 fixture 失败: %v", err)
|
||||
}
|
||||
_, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
|
||||
SourceKey: "deepseek_pricing_signature",
|
||||
URL: defaultDeepSeekPricingFetchURL,
|
||||
Fixture: fixture,
|
||||
SnapshotDir: tempDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Second,
|
||||
AllowBootstrap: true,
|
||||
SnapshotBase: "deepseek-pricing",
|
||||
}, time.Date(2026, 5, 27, 22, 1, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||
if err != nil {
|
||||
t.Fatalf("初始化 baseline 失败: %v", err)
|
||||
}
|
||||
driftFixture := filepath.Join(tempDir, "pricing-drift.html")
|
||||
if err := os.WriteFile(driftFixture, []byte(`<html><head><title>DeepSeek Pricing</title><meta name="description" content="Updated DeepSeek pricing"><meta name="commit-id" content="def456"><meta property="og:url" content="https://platform.deepseek.com/pricing"></head><body>pricing update</body></html>`), 0o644); err != nil {
|
||||
t.Fatalf("写入 drift fixture 失败: %v", err)
|
||||
}
|
||||
result, err := runDeepSeekPricingSignatureGuard(deepseekPricingSignatureGuardConfig{
|
||||
SourceKey: "deepseek_pricing_signature",
|
||||
URL: defaultDeepSeekPricingFetchURL,
|
||||
Fixture: driftFixture,
|
||||
SnapshotDir: tempDir,
|
||||
BaselinePath: baselinePath,
|
||||
Timeout: time.Second,
|
||||
AllowBootstrap: false,
|
||||
SnapshotBase: "deepseek-pricing",
|
||||
}, time.Date(2026, 5, 27, 22, 2, 0, 0, time.FixedZone("CST", 8*3600)))
|
||||
if err == nil {
|
||||
t.Fatal("期望结构漂移时报错")
|
||||
}
|
||||
if !result.DriftDetected {
|
||||
t.Fatal("期望 driftDetected=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormatDeepSeekPricingSignatureGuardSummary(t *testing.T) {
|
||||
result := deepseekPricingSignatureGuardResult{
|
||||
SnapshotPath: "/tmp/deepseek-pricing.html",
|
||||
SignaturePath: "/tmp/deepseek-pricing.signature.json",
|
||||
BaselinePath: "/tmp/baseline.signature.json",
|
||||
BaselineInitialized: true,
|
||||
CurrentSignature: deepseekPricingStructureSignature{
|
||||
StructureSHA256: "abc123",
|
||||
},
|
||||
}
|
||||
summary := formatDeepSeekPricingSignatureGuardSummary("deepseek_pricing_signature", result)
|
||||
for _, want := range []string{"source=deepseek_pricing_signature", "baseline_initialized=true", "structure_sha256=abc123"} {
|
||||
if !strings.Contains(summary, want) {
|
||||
t.Fatalf("summary 缺少 %q,实际: %q", want, summary)
|
||||
}
|
||||
}
|
||||
}
|
||||
183
scripts/deepseek_pricing_snapshot_lib.go
Normal file
183
scripts/deepseek_pricing_snapshot_lib.go
Normal file
@@ -0,0 +1,183 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type deepseekPricingStructureSignature struct {
|
||||
ByteSize int `json:"byte_size"`
|
||||
SHA256 string `json:"sha256"`
|
||||
StructureSHA256 string `json:"structure_sha256"`
|
||||
Title string `json:"title"`
|
||||
MetaDescription string `json:"meta_description"`
|
||||
CommitID string `json:"commit_id"`
|
||||
CanonicalURL string `json:"canonical_url"`
|
||||
Contains map[string]bool `json:"contains"`
|
||||
GeneratedAt string `json:"generated_at,omitempty"`
|
||||
SourceURL string `json:"source_url,omitempty"`
|
||||
SnapshotPath string `json:"snapshot_path,omitempty"`
|
||||
}
|
||||
|
||||
var deepseekPricingContainsNeedles = map[string]string{
|
||||
"deepseek": "deepseek",
|
||||
"platform": "platform",
|
||||
"pricing": "pricing",
|
||||
"api_docs": "api",
|
||||
"developer": "developer resources",
|
||||
}
|
||||
|
||||
var deepseekPricingTitleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||
var deepseekPricingMetaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
|
||||
var deepseekPricingCommitRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']commit-id["'][^>]+content=["']([^"']+)["']`)
|
||||
var deepseekPricingCanonicalRe = regexp.MustCompile(`(?is)<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+)["']`)
|
||||
var deepseekPricingHTMLTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
|
||||
|
||||
func buildDeepSeekPricingStructureSignature(raw string) deepseekPricingStructureSignature {
|
||||
title := firstDeepSeekPricingHTMLMatch(deepseekPricingTitleRe, raw)
|
||||
meta := firstDeepSeekPricingHTMLMatch(deepseekPricingMetaDescRe, raw)
|
||||
commitID := firstDeepSeekPricingHTMLMatch(deepseekPricingCommitRe, raw)
|
||||
canonicalURL := firstDeepSeekPricingHTMLMatch(deepseekPricingCanonicalRe, raw)
|
||||
contains := make(map[string]bool, len(deepseekPricingContainsNeedles))
|
||||
lower := strings.ToLower(raw)
|
||||
for key, needle := range deepseekPricingContainsNeedles {
|
||||
contains[key] = strings.Contains(lower, strings.ToLower(needle))
|
||||
}
|
||||
signature := deepseekPricingStructureSignature{
|
||||
ByteSize: len([]byte(raw)),
|
||||
SHA256: deepseekPricingSHA256Hex(raw),
|
||||
Title: title,
|
||||
MetaDescription: meta,
|
||||
CommitID: commitID,
|
||||
CanonicalURL: canonicalURL,
|
||||
Contains: contains,
|
||||
}
|
||||
signature.StructureSHA256 = deepseekPricingSHA256Hex(deepseekPricingStructureDigestPayload(signature))
|
||||
return signature
|
||||
}
|
||||
|
||||
func writeDeepSeekPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekPricingStructureSignature, error) {
|
||||
if strings.TrimSpace(snapshotPath) == "" {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
|
||||
}
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("signature path is required")
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
||||
}
|
||||
signature := buildDeepSeekPricingStructureSignature(raw)
|
||||
signature.GeneratedAt = now.Format(time.RFC3339)
|
||||
signature.SourceURL = sourceURL
|
||||
signature.SnapshotPath = snapshotPath
|
||||
payload, err := json.MarshalIndent(signature, "", " ")
|
||||
if err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
||||
}
|
||||
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
||||
}
|
||||
return signature, nil
|
||||
}
|
||||
|
||||
func resolveDeepSeekPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
|
||||
if strings.TrimSpace(snapshotDir) == "" {
|
||||
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
|
||||
}
|
||||
if strings.TrimSpace(snapshotPath) == "" {
|
||||
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
|
||||
snapshotPath = base + ".html"
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
signaturePath = base + ".signature.json"
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(signaturePath) == "" {
|
||||
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
||||
}
|
||||
return snapshotPath, signaturePath
|
||||
}
|
||||
|
||||
func readDeepSeekPricingStructureSignature(path string) (deepseekPricingStructureSignature, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return deepseekPricingStructureSignature{}, err
|
||||
}
|
||||
var signature deepseekPricingStructureSignature
|
||||
if err := json.Unmarshal(data, &signature); err != nil {
|
||||
return deepseekPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
||||
}
|
||||
return signature, nil
|
||||
}
|
||||
|
||||
func hasDeepSeekPricingStructureSignature(signature deepseekPricingStructureSignature) bool {
|
||||
return signature.ByteSize > 0 ||
|
||||
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
||||
strings.TrimSpace(signature.SHA256) != "" ||
|
||||
strings.TrimSpace(signature.Title) != "" ||
|
||||
strings.TrimSpace(signature.CommitID) != "" ||
|
||||
len(signature.Contains) > 0
|
||||
}
|
||||
|
||||
func deepseekPricingStructureDigestPayload(signature deepseekPricingStructureSignature) string {
|
||||
type containsEntry struct {
|
||||
Name string `json:"name"`
|
||||
Value bool `json:"value"`
|
||||
}
|
||||
keys := make([]string, 0, len(signature.Contains))
|
||||
for key := range signature.Contains {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
entries := make([]containsEntry, 0, len(keys))
|
||||
for _, key := range keys {
|
||||
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
||||
}
|
||||
payload := struct {
|
||||
Title string `json:"title"`
|
||||
MetaDescription string `json:"meta_description"`
|
||||
CommitID string `json:"commit_id"`
|
||||
CanonicalURL string `json:"canonical_url"`
|
||||
Contains []containsEntry `json:"contains"`
|
||||
}{
|
||||
Title: signature.Title,
|
||||
MetaDescription: signature.MetaDescription,
|
||||
CommitID: signature.CommitID,
|
||||
CanonicalURL: signature.CanonicalURL,
|
||||
Contains: entries,
|
||||
}
|
||||
bytes, _ := json.Marshal(payload)
|
||||
return string(bytes)
|
||||
}
|
||||
|
||||
func deepseekPricingSHA256Hex(raw string) string {
|
||||
sum := sha256.Sum256([]byte(raw))
|
||||
return hex.EncodeToString(sum[:])
|
||||
}
|
||||
|
||||
func firstDeepSeekPricingHTMLMatch(re *regexp.Regexp, raw string) string {
|
||||
match := re.FindStringSubmatch(raw)
|
||||
if len(match) < 2 {
|
||||
return ""
|
||||
}
|
||||
text := deepseekPricingHTMLTagRe.ReplaceAllString(match[1], " ")
|
||||
text = strings.ReplaceAll(text, "&", "&")
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
text = strings.Join(strings.Fields(text), " ")
|
||||
return strings.TrimSpace(text)
|
||||
}
|
||||
@@ -154,19 +154,14 @@ func validateIntradayProviderConfig(name string, cfg intradayProviderConfig) err
|
||||
}
|
||||
|
||||
func buildIntradayQueries(date string, providerLimit int) []string {
|
||||
providers := []string{
|
||||
"OpenAI", "Anthropic", "Google Gemini", "xAI", "DeepSeek",
|
||||
"DashScope", "Qwen", "智谱", "百度文心", "腾讯混元", "火山方舟", "MiniMax",
|
||||
}
|
||||
keywords := []string{"pricing release announcement", "模型 降价 发布 活动"}
|
||||
if providerLimit > 0 && providerLimit < len(providers) {
|
||||
providers = providers[:providerLimit]
|
||||
}
|
||||
queries := make([]string, 0, len(providers)*len(keywords))
|
||||
for _, provider := range providers {
|
||||
for _, keyword := range keywords {
|
||||
queries = append(queries, strings.TrimSpace(date+" "+provider+" "+keyword))
|
||||
queries := []string{
|
||||
"site:platform.deepseek.com DeepSeek pricing",
|
||||
"site:api-docs.deepseek.com DeepSeek release news",
|
||||
"site:docs.anthropic.com Claude Sonnet 4 announcement",
|
||||
"site:openrouter.ai OpenRouter models",
|
||||
}
|
||||
if providerLimit > 0 && providerLimit < len(queries) {
|
||||
return queries[:providerLimit]
|
||||
}
|
||||
return queries
|
||||
}
|
||||
@@ -217,8 +212,18 @@ func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex m
|
||||
Status: "candidate",
|
||||
VerificationConfidence: "candidate",
|
||||
}
|
||||
matchedSearch := false
|
||||
filteredURLs := make([]string, 0, len(candidate.CandidateURLs))
|
||||
for _, url := range candidate.CandidateURLs {
|
||||
if searchRecord, ok := searchIndex[url]; ok {
|
||||
searchRecord, ok := searchIndex[url]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if !searchRecordMatchesDate(searchRecord, date) {
|
||||
continue
|
||||
}
|
||||
matchedSearch = true
|
||||
filteredURLs = append(filteredURLs, url)
|
||||
candidate.DiscoverySource = "web_search+llm"
|
||||
candidate.DiscoveryQuery = searchRecord.Title
|
||||
candidate.DiscoveryEvidence["search_record"] = searchRecord
|
||||
@@ -232,7 +237,11 @@ func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex m
|
||||
candidate.Summary = strings.TrimSpace(searchRecord.Summary)
|
||||
}
|
||||
}
|
||||
if !matchedSearch {
|
||||
candidate.CandidateURLs = nil
|
||||
return candidate
|
||||
}
|
||||
candidate.CandidateURLs = dedupeStrings(filteredURLs)
|
||||
return candidate
|
||||
}
|
||||
|
||||
@@ -294,6 +303,36 @@ func buildIntradayNormalizedKey(candidate intradayNewsCandidate) string {
|
||||
}, "|")
|
||||
}
|
||||
|
||||
func searchRecordMatchesDate(record intradaySearchRecord, date string) bool {
|
||||
published := strings.TrimSpace(record.PublishedAt)
|
||||
if published == "" {
|
||||
return false
|
||||
}
|
||||
if ts, ok := parseSearchPublishedAt(published); ok {
|
||||
return ts == date
|
||||
}
|
||||
return strings.Contains(published, date)
|
||||
}
|
||||
|
||||
func parseSearchPublishedAt(value string) (string, bool) {
|
||||
for _, layout := range []string{time.RFC3339, "2006-01-02", "Mon, 02 Jan 2006 15:04:05 MST", "Mon, 2 Jan 2006 15:04:05 MST"} {
|
||||
if ts, err := time.Parse(layout, value); err == nil {
|
||||
return ts.Format("2006-01-02"), true
|
||||
}
|
||||
}
|
||||
localized := strings.NewReplacer(
|
||||
"周一", "Mon", "周二", "Tue", "周三", "Wed", "周四", "Thu", "周五", "Fri", "周六", "Sat", "周日", "Sun",
|
||||
"1月", "Jan", "2月", "Feb", "3月", "Mar", "4月", "Apr", "5月", "May", "6月", "Jun",
|
||||
"7月", "Jul", "8月", "Aug", "9月", "Sep", "10月", "Oct", "11月", "Nov", "12月", "Dec",
|
||||
).Replace(value)
|
||||
for _, layout := range []string{"Mon, 2 Jan 2006 15:04:05 MST", "Mon, 02 Jan 2006 15:04:05 MST"} {
|
||||
if ts, err := time.Parse(layout, localized); err == nil {
|
||||
return ts.Format("2006-01-02"), true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
|
||||
func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary {
|
||||
eventTypeCounts := make(map[string]int)
|
||||
|
||||
@@ -50,6 +50,7 @@ func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
|
||||
Summary: "Search summary",
|
||||
URL: "https://openai.example.com/news/gpt-5-6-pricing",
|
||||
Provider: "OpenAI",
|
||||
PublishedAt: "2026-05-25",
|
||||
}}
|
||||
llmRecords := []intradayLLMRecord{
|
||||
{
|
||||
@@ -80,6 +81,29 @@ func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeIntradayCandidatesDropsOutdatedSearchMatches(t *testing.T) {
|
||||
searchRecords := []intradaySearchRecord{{
|
||||
Title: "Old DeepSeek pricing article",
|
||||
Summary: "Yesterday record",
|
||||
URL: "https://deepseek.example.com/pricing",
|
||||
Provider: "DeepSeek",
|
||||
PublishedAt: "2026-05-24",
|
||||
}}
|
||||
llmRecords := []intradayLLMRecord{{
|
||||
EventType: "price_cut",
|
||||
ProviderName: "DeepSeek",
|
||||
ModelName: "DeepSeek-V4-Flash",
|
||||
ProviderCountry: "CN",
|
||||
Title: "DeepSeek V4 Flash price cut",
|
||||
Summary: "Should be dropped because search evidence is stale",
|
||||
CandidateURLs: []string{"https://deepseek.example.com/pricing"},
|
||||
}}
|
||||
candidates := normalizeIntradayCandidates("2026-05-25", searchRecords, llmRecords)
|
||||
if len(candidates) != 0 {
|
||||
t.Fatalf("旧闻搜索结果不应进入候选池, got=%d", len(candidates))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
|
||||
llmRecords := []intradayLLMRecord{{
|
||||
EventType: "promo_campaign",
|
||||
@@ -94,6 +118,13 @@ func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearchRecordMatchesLocalizedBingDate(t *testing.T) {
|
||||
record := intradaySearchRecord{PublishedAt: "周一, 25 5月 2026 14:08:00 GMT"}
|
||||
if !searchRecordMatchesDate(record, "2026-05-25") {
|
||||
t.Fatal("应识别本地化 Bing pubDate 为当天")
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) {
|
||||
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil {
|
||||
t.Fatal("缺少 command 时应报错")
|
||||
|
||||
@@ -95,6 +95,10 @@ type materializeDailySignalsConfig struct {
|
||||
var signalLogger *slog.Logger
|
||||
|
||||
const signalUSDToCNY = 7.25
|
||||
const defaultDeepSeekNewsSignalURL = "https://api-docs.deepseek.com/news/news250120"
|
||||
const defaultDeepSeekPricingSignalURL = "https://platform.deepseek.com/pricing"
|
||||
const defaultDeepSeekAPIPricingSignalURL = "https://platform.deepseek.com/docs/api-pricing"
|
||||
|
||||
|
||||
func init() {
|
||||
signalLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||
@@ -373,6 +377,11 @@ func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error)
|
||||
return nil, err
|
||||
}
|
||||
events = mergeVerifiedDiscoveryEvents(events, discoveryEvents)
|
||||
deepseekDriftEvents, err := loadDeepSeekNewsDriftSignalEvents(db)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
events = mergeVerifiedDiscoveryEvents(events, deepseekDriftEvents)
|
||||
|
||||
sort.Slice(events, func(i, j int) bool {
|
||||
if events[i].Priority != events[j].Priority {
|
||||
@@ -954,6 +963,110 @@ func firstString(values []string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func loadDeepSeekNewsDriftSignalEvents(db *sql.DB) ([]signalModelEvent, error) {
|
||||
return loadDeepSeekSignatureSignalEvents(db, []deepseekSignatureEventConfig{
|
||||
{
|
||||
SourceKey: "deepseek_news_signature",
|
||||
ModelName: "DeepSeek 官方新闻页",
|
||||
SourceKindLabel: "官方新闻页结构变化",
|
||||
PrimaryURL: defaultDeepSeekNewsSignalURL,
|
||||
Audience: "适合需要尽快复查 DeepSeek 路线图与默认选型的团队",
|
||||
EvidenceTemplate: "DeepSeek 官方新闻页结构签名发生变化:sha=%s previous=%s",
|
||||
Baseline: "官方新闻页结构漂移",
|
||||
Summary: "DeepSeek 官方新闻页结构发生变化,需优先确认是否出现新发布或路线图更新。",
|
||||
Priority: 117,
|
||||
},
|
||||
{
|
||||
SourceKey: "deepseek_pricing_signature",
|
||||
ModelName: "DeepSeek 官方价格页",
|
||||
SourceKindLabel: "官方价格页结构变化",
|
||||
PrimaryURL: defaultDeepSeekPricingSignalURL,
|
||||
Audience: "适合需要尽快复查 DeepSeek 价格策略与默认成本模型的团队",
|
||||
EvidenceTemplate: "DeepSeek 官方价格页结构签名发生变化:sha=%s previous=%s",
|
||||
Baseline: "官方价格页结构漂移",
|
||||
Summary: "DeepSeek 官方价格页结构发生变化,需优先确认是否出现价格策略更新。",
|
||||
Priority: 116,
|
||||
},
|
||||
{
|
||||
SourceKey: "deepseek_api_pricing_signature",
|
||||
ModelName: "DeepSeek API 定价页",
|
||||
SourceKindLabel: "官方 API 定价页结构变化",
|
||||
PrimaryURL: defaultDeepSeekAPIPricingSignalURL,
|
||||
Audience: "适合需要尽快复查 DeepSeek API 定价与预算预期的团队",
|
||||
EvidenceTemplate: "DeepSeek API 定价页结构签名发生变化:sha=%s previous=%s",
|
||||
Baseline: "官方 API 定价页结构漂移",
|
||||
Summary: "DeepSeek API 定价页结构发生变化,需优先确认是否出现定价或套餐更新。",
|
||||
Priority: 115,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
type deepseekSignatureEventConfig struct {
|
||||
SourceKey string
|
||||
ModelName string
|
||||
SourceKindLabel string
|
||||
PrimaryURL string
|
||||
Audience string
|
||||
EvidenceTemplate string
|
||||
Baseline string
|
||||
Summary string
|
||||
Priority int
|
||||
}
|
||||
|
||||
func loadDeepSeekSignatureSignalEvents(db *sql.DB, configs []deepseekSignatureEventConfig) ([]signalModelEvent, error) {
|
||||
if len(configs) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
var events []signalModelEvent
|
||||
for _, cfg := range configs {
|
||||
_, rows, err := queryOfficialImportSignatureAuditWindow(db, 5, cfg.SourceKey, false)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), `relation "official_import_signature_audit_recent_view" does not exist`) ||
|
||||
strings.Contains(err.Error(), `relation "official_import_signature_audit" does not exist`) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
for _, row := range rows {
|
||||
if row.RecentRank != 1 {
|
||||
continue
|
||||
}
|
||||
if event, ok := buildDeepSeekSignatureSignalEvent(row, cfg); ok {
|
||||
events = append(events, event)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return events, nil
|
||||
}
|
||||
|
||||
func buildDeepSeekSignatureSignalEvent(row officialImportSignatureAuditViewRow, cfg deepseekSignatureEventConfig) (signalModelEvent, bool) {
|
||||
if row.SourceKey != cfg.SourceKey || !row.DriftDetected {
|
||||
return signalModelEvent{}, false
|
||||
}
|
||||
updatedAt := row.CheckedAt.Format("2006-01-02 15:04")
|
||||
primarySource := nullStringOrNone(row.SnapshotPath)
|
||||
if primarySource == "none" {
|
||||
primarySource = cfg.PrimaryURL
|
||||
}
|
||||
return signalModelEvent{
|
||||
EventType: "official_release",
|
||||
ModelName: cfg.ModelName,
|
||||
ProviderName: "DeepSeek",
|
||||
OperatorName: "DeepSeek",
|
||||
Audience: cfg.Audience,
|
||||
TrustLabel: "官方来源 / 结构漂移告警",
|
||||
SourceKindLabel: cfg.SourceKindLabel,
|
||||
PrimarySource: primarySource,
|
||||
SourceURL: cfg.PrimaryURL,
|
||||
UpdatedAt: updatedAt,
|
||||
EvidenceDetail: fmt.Sprintf(cfg.EvidenceTemplate, row.StructureSHA256, nullStringOrNone(row.PreviousObservedSHA256)),
|
||||
Baseline: cfg.Baseline,
|
||||
Summary: cfg.Summary,
|
||||
Priority: cfg.Priority,
|
||||
}, true
|
||||
}
|
||||
|
||||
func signalNormalizeIntradayEventType(value string) string {
|
||||
switch strings.TrimSpace(strings.ToLower(value)) {
|
||||
case "price_cut":
|
||||
|
||||
@@ -92,3 +92,108 @@ func TestMergeVerifiedDiscoveryEventsDropsUnverifiedPriceNarrative(t *testing.T)
|
||||
t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDeepSeekNewsDriftEvent(t *testing.T) {
|
||||
row := officialImportSignatureAuditViewRow{
|
||||
SourceKey: "deepseek_news_signature",
|
||||
Status: "drift_detected",
|
||||
StructureState: "changed",
|
||||
StructureChanged: true,
|
||||
DriftDetected: true,
|
||||
BaselineInitialized: false,
|
||||
StructureSHA256: "abc123",
|
||||
}
|
||||
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
|
||||
SourceKey: "deepseek_news_signature",
|
||||
ModelName: "DeepSeek 官方新闻页",
|
||||
SourceKindLabel: "官方新闻页结构变化",
|
||||
PrimaryURL: defaultDeepSeekNewsSignalURL,
|
||||
Audience: "a",
|
||||
EvidenceTemplate: "news drift %s %s",
|
||||
Baseline: "官方新闻页结构漂移",
|
||||
Summary: "summary",
|
||||
Priority: 117,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("期望为 drift 行生成正式信号事件")
|
||||
}
|
||||
if event.EventType != "official_release" {
|
||||
t.Fatalf("DeepSeek drift 应映射为 official_release, got=%q", event.EventType)
|
||||
}
|
||||
if event.ProviderName != "DeepSeek" || event.ModelName != "DeepSeek 官方新闻页" {
|
||||
t.Fatalf("DeepSeek drift 事件主体错误: %+v", event)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDeepSeekPricingDriftEvent(t *testing.T) {
|
||||
row := officialImportSignatureAuditViewRow{
|
||||
SourceKey: "deepseek_pricing_signature",
|
||||
Status: "drift_detected",
|
||||
StructureState: "changed",
|
||||
StructureChanged: true,
|
||||
DriftDetected: true,
|
||||
BaselineInitialized: false,
|
||||
StructureSHA256: "pricing123",
|
||||
}
|
||||
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
|
||||
SourceKey: "deepseek_pricing_signature",
|
||||
ModelName: "DeepSeek 官方价格页",
|
||||
SourceKindLabel: "官方价格页结构变化",
|
||||
PrimaryURL: defaultDeepSeekPricingSignalURL,
|
||||
Audience: "a",
|
||||
EvidenceTemplate: "pricing drift %s %s",
|
||||
Baseline: "官方价格页结构漂移",
|
||||
Summary: "pricing summary",
|
||||
Priority: 116,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("期望为 pricing drift 行生成正式信号事件")
|
||||
}
|
||||
if event.ModelName != "DeepSeek 官方价格页" || event.SourceKindLabel != "官方价格页结构变化" {
|
||||
t.Fatalf("pricing drift 事件映射错误: %+v", event)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDeepSeekAPIPricingDriftEvent(t *testing.T) {
|
||||
row := officialImportSignatureAuditViewRow{
|
||||
SourceKey: "deepseek_api_pricing_signature",
|
||||
Status: "drift_detected",
|
||||
StructureState: "changed",
|
||||
StructureChanged: true,
|
||||
DriftDetected: true,
|
||||
BaselineInitialized: false,
|
||||
StructureSHA256: "api123",
|
||||
}
|
||||
event, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{
|
||||
SourceKey: "deepseek_api_pricing_signature",
|
||||
ModelName: "DeepSeek API 定价页",
|
||||
SourceKindLabel: "官方 API 定价页结构变化",
|
||||
PrimaryURL: defaultDeepSeekAPIPricingSignalURL,
|
||||
Audience: "a",
|
||||
EvidenceTemplate: "api drift %s %s",
|
||||
Baseline: "官方 API 定价页结构漂移",
|
||||
Summary: "api pricing summary",
|
||||
Priority: 115,
|
||||
})
|
||||
if !ok {
|
||||
t.Fatal("期望为 api pricing drift 行生成正式信号事件")
|
||||
}
|
||||
if event.ModelName != "DeepSeek API 定价页" || event.SourceKindLabel != "官方 API 定价页结构变化" {
|
||||
t.Fatalf("api pricing drift 事件映射错误: %+v", event)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDeepSeekNewsDriftEventSkipsBaselineOnly(t *testing.T) {
|
||||
row := officialImportSignatureAuditViewRow{
|
||||
SourceKey: "deepseek_news_signature",
|
||||
Status: "baseline_initialized",
|
||||
StructureState: "initial",
|
||||
StructureChanged: false,
|
||||
DriftDetected: false,
|
||||
BaselineInitialized: true,
|
||||
StructureSHA256: "abc123",
|
||||
}
|
||||
if _, ok := buildDeepSeekSignatureSignalEvent(row, deepseekSignatureEventConfig{SourceKey: "deepseek_news_signature"}); ok {
|
||||
t.Fatal("baseline 初始化不应直接进入正式信号")
|
||||
}
|
||||
}
|
||||
|
||||
59
scripts/real_intraday_llm_provider.py
Normal file
59
scripts/real_intraday_llm_provider.py
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
|
||||
api_key = os.environ.get('OPENROUTER_API_KEY', '').strip()
|
||||
if not api_key:
|
||||
print('OPENROUTER_API_KEY missing', file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
payload = sys.stdin.read()
|
||||
request_data = json.loads(payload or '{}')
|
||||
search_results = request_data.get('search_results', [])
|
||||
date = request_data.get('date', '')
|
||||
|
||||
prompt = {
|
||||
"role": "user",
|
||||
"content": (
|
||||
"你是大模型情报候选发现器。根据给定搜索结果,只输出 JSON 数组,不要输出 markdown。"
|
||||
"每项字段必须包含 event_type, provider_name, model_name, provider_country, title, summary, candidate_urls。"
|
||||
"event_type 只能是 price_cut, price_increase, official_release, promo_campaign, leak_or_rumor, unknown。"
|
||||
"只有当搜索结果明确像是当天消息时才输出。没有 URL 的候选不要输出。"
|
||||
f"\n日期: {date}\n搜索结果:\n" + json.dumps(search_results, ensure_ascii=False)
|
||||
)
|
||||
}
|
||||
|
||||
req_body = json.dumps({
|
||||
"model": "deepseek/deepseek-v4-flash",
|
||||
"messages": [prompt],
|
||||
"temperature": 0,
|
||||
"max_tokens": 1200,
|
||||
"response_format": {"type": "json_object"}
|
||||
}).encode('utf-8')
|
||||
req = urllib.request.Request(
|
||||
'https://openrouter.ai/api/v1/chat/completions',
|
||||
data=req_body,
|
||||
headers={
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'Content-Type': 'application/json',
|
||||
'HTTP-Referer': 'https://github.com/phamnazage-jpg/llm-intelligence',
|
||||
'X-Title': 'llm-intelligence intraday discovery'
|
||||
},
|
||||
method='POST'
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
data = json.loads(resp.read().decode('utf-8'))
|
||||
content = data['choices'][0]['message']['content']
|
||||
parsed = json.loads(content)
|
||||
if isinstance(parsed, dict):
|
||||
if 'items' in parsed and isinstance(parsed['items'], list):
|
||||
parsed = parsed['items']
|
||||
elif 'candidates' in parsed and isinstance(parsed['candidates'], list):
|
||||
parsed = parsed['candidates']
|
||||
else:
|
||||
parsed = []
|
||||
if not isinstance(parsed, list):
|
||||
parsed = []
|
||||
print(json.dumps(parsed, ensure_ascii=False))
|
||||
65
scripts/real_intraday_search_provider.py
Normal file
65
scripts/real_intraday_search_provider.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
import email.utils
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
def normalize_pubdate(value: str) -> str:
|
||||
if not value:
|
||||
return ''
|
||||
try:
|
||||
dt = email.utils.parsedate_to_datetime(value)
|
||||
return dt.strftime('%Y-%m-%d')
|
||||
except Exception:
|
||||
return value
|
||||
|
||||
|
||||
def infer_provider(title: str, link: str) -> str:
|
||||
text = (title + ' ' + link).lower()
|
||||
for needle, provider in [
|
||||
('openai', 'OpenAI'), ('anthropic', 'Anthropic'), ('claude', 'Anthropic'), ('gemini', 'Google'), ('google', 'Google'),
|
||||
('deepseek', 'DeepSeek'), ('qwen', 'Qwen'), ('dashscope', 'DashScope'), ('zhipu', '智谱'),
|
||||
('baidu', '百度'), ('tencent', '腾讯'), ('minimax', 'MiniMax'), ('x.ai', 'xAI'), ('xai', 'xAI')
|
||||
]:
|
||||
if needle in text:
|
||||
return provider
|
||||
return ''
|
||||
|
||||
|
||||
query = os.environ.get("INTRADAY_DISCOVERY_QUERY", "").strip()
|
||||
if not query:
|
||||
print("[]")
|
||||
raise SystemExit(0)
|
||||
|
||||
url = "https://www.bing.com/search?format=rss&q=" + urllib.parse.quote(query)
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
||||
})
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
body = resp.read().decode("utf-8", errors="ignore")
|
||||
|
||||
root = ET.fromstring(body)
|
||||
items = []
|
||||
for item in root.findall('./channel/item'):
|
||||
title = (item.findtext('title') or '').strip()
|
||||
link = (item.findtext('link') or '').strip()
|
||||
desc = (item.findtext('description') or '').strip()
|
||||
pub = (item.findtext('pubDate') or '').strip()
|
||||
provider = infer_provider(title, link)
|
||||
provider_url = ''
|
||||
if link:
|
||||
parsed = urllib.parse.urlparse(link)
|
||||
provider_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else ''
|
||||
items.append({
|
||||
"title": title,
|
||||
"summary": desc,
|
||||
"url": link,
|
||||
"provider": provider,
|
||||
"provider_url": provider_url,
|
||||
"published_at": normalize_pubdate(pub),
|
||||
})
|
||||
print(json.dumps(items, ensure_ascii=False))
|
||||
@@ -41,6 +41,30 @@ if [[ "$DRY_RUN" == "true" ]]; then
|
||||
materialize_args+=(--dry-run)
|
||||
fi
|
||||
|
||||
go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"
|
||||
go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"
|
||||
deepseek_guard_args=()
|
||||
if [[ "$DRY_RUN" == "true" ]]; then
|
||||
deepseek_guard_args+=(--allow-bootstrap=false)
|
||||
fi
|
||||
|
||||
if ! go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"; then
|
||||
exit 1
|
||||
fi
|
||||
if ! go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"; then
|
||||
exit 1
|
||||
fi
|
||||
if ! go run -tags llm_script ./scripts/deepseek_news_signature_guard.go ./scripts/deepseek_news_signature_guard_lib.go ./scripts/deepseek_news_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go "${deepseek_guard_args[@]}"; then
|
||||
if [[ "$DRY_RUN" != "true" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_pricing_signature --snapshot-base deepseek-pricing --url https://platform.deepseek.com/pricing "${deepseek_guard_args[@]}"; then
|
||||
if [[ "$DRY_RUN" != "true" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
if ! go run -tags llm_script ./scripts/deepseek_pricing_signature_guard.go ./scripts/deepseek_pricing_signature_guard_lib.go ./scripts/deepseek_pricing_snapshot_lib.go ./scripts/subscription_import_common.go ./scripts/official_import_signature_audit_lib.go ./scripts/pricing_markdown_snapshot_lib.go ./scripts/signature_guard_common.go --source-key deepseek_api_pricing_signature --snapshot-base deepseek-api-pricing --url https://platform.deepseek.com/docs/api-pricing "${deepseek_guard_args[@]}"; then
|
||||
if [[ "$DRY_RUN" != "true" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"
|
||||
|
||||
Reference in New Issue
Block a user