feat(intraday): add discovery and verification watch pipeline
Some checks failed
CI / go-test (push) Has been cancelled
CI / scripts-regression (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / docker-build (push) Has been cancelled

This commit is contained in:
phamnazage-jpg
2026-05-27 18:54:32 +08:00
parent 32858bfec4
commit 475401bcbe
21 changed files with 2444 additions and 13 deletions

View File

@@ -0,0 +1,410 @@
//go:build llm_script
package main
import (
"context"
"database/sql"
"encoding/json"
"flag"
"fmt"
"log/slog"
"os"
"sort"
"strings"
"time"
_ "github.com/lib/pq"
)
type intradayNewsCandidate struct {
CandidateDate string
EventType string
ProviderName string
ModelName string
ProviderCountry string
Title string
Summary string
CandidateURLs []string
DiscoverySource string
DiscoveryQuery string
DiscoveryEvidence map[string]any
NormalizedKey string
Status string
VerificationConfidence string
VerificationNotes string
}
type intradayDiscoveryConfig struct {
Date string
DryRun bool
Search intradayProviderConfig
LLM intradayProviderConfig
DatabaseURL string
Timeout time.Duration
ProviderLimit int
}
type intradayDiscoverySummary struct {
CandidateTotal int `json:"candidate_total"`
ProviderHitCount int `json:"provider_hit_count"`
EventTypeCounts map[string]int `json:"event_type_counts"`
DiscoverySourceSet []string `json:"discovery_source_set"`
DryRun bool `json:"dry_run"`
}
var intradayDiscoveryLogger *slog.Logger
func init() {
intradayDiscoveryLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
}
func main() {
loadIntradayEnv()
cfg := loadIntradayDiscoveryConfig()
if err := runIntradayCandidateDiscovery(cfg); err != nil {
fmt.Fprintf(os.Stderr, "discover_intraday_news_candidates: %v\n", err)
os.Exit(1)
}
}
func loadIntradayDiscoveryConfig() intradayDiscoveryConfig {
var cfg intradayDiscoveryConfig
flag.StringVar(&cfg.Date, "date", intradayDateValue(), "候选发现日期,格式 YYYY-MM-DD")
flag.BoolVar(&cfg.DryRun, "dry-run", false, "仅输出摘要,不写数据库")
flag.IntVar(&cfg.ProviderLimit, "provider-limit", 10, "最大 provider 数")
flag.Parse()
cfg.DatabaseURL = intradayDefaultDSN()
cfg.Timeout = discoveryTimeoutFromEnv()
cfg.Search = intradayProviderConfig{
Mode: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_PROVIDER")),
Command: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_COMMAND")),
URL: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_URL")),
Fixture: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_SEARCH_FIXTURE")),
Timeout: cfg.Timeout,
}
cfg.LLM = intradayProviderConfig{
Mode: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_PROVIDER")),
Command: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_COMMAND")),
URL: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_URL")),
Fixture: strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_LLM_FIXTURE")),
Timeout: cfg.Timeout,
}
return cfg
}
func runIntradayCandidateDiscovery(cfg intradayDiscoveryConfig) error {
if strings.TrimSpace(cfg.Date) == "" {
return fmt.Errorf("date 未设置")
}
if err := validateIntradayProviderConfig("search", cfg.Search); err != nil {
return err
}
if err := validateIntradayProviderConfig("llm", cfg.LLM); err != nil {
return err
}
queries := buildIntradayQueries(cfg.Date, cfg.ProviderLimit)
searchRecords, err := loadIntradaySearchRecords(cfg.Search, cfg.Date, queries)
if err != nil {
return err
}
llmRecords, err := loadIntradayLLMRecords(cfg.LLM, cfg.Date, searchRecords)
if err != nil {
return err
}
candidates := normalizeIntradayCandidates(cfg.Date, searchRecords, llmRecords)
summary := summarizeIntradayCandidates(candidates, cfg.DryRun)
if cfg.DryRun {
return printIntradayDiscoverySummary(summary)
}
db, err := sql.Open("postgres", cfg.DatabaseURL)
if err != nil {
return fmt.Errorf("open db: %w", err)
}
defer db.Close()
if err := upsertIntradayCandidates(context.Background(), db, candidates); err != nil {
return err
}
return printIntradayDiscoverySummary(summary)
}
func validateIntradayProviderConfig(name string, cfg intradayProviderConfig) error {
if strings.TrimSpace(cfg.Mode) == "" {
return fmt.Errorf("%s provider 未设置", name)
}
switch cfg.Mode {
case "fixture":
if strings.TrimSpace(cfg.Fixture) == "" {
return fmt.Errorf("%s provider fixture 未设置", name)
}
case "command_json":
if strings.TrimSpace(cfg.Command) == "" {
return fmt.Errorf("%s provider command 未设置", name)
}
case "http_json":
if strings.TrimSpace(cfg.URL) == "" {
return fmt.Errorf("%s provider url 未设置", name)
}
default:
return fmt.Errorf("%s provider mode 不支持: %s", name, cfg.Mode)
}
return nil
}
func buildIntradayQueries(date string, providerLimit int) []string {
providers := []string{
"OpenAI", "Anthropic", "Google Gemini", "xAI", "DeepSeek",
"DashScope", "Qwen", "智谱", "百度文心", "腾讯混元", "火山方舟", "MiniMax",
}
keywords := []string{"pricing release announcement", "模型 降价 发布 活动"}
if providerLimit > 0 && providerLimit < len(providers) {
providers = providers[:providerLimit]
}
queries := make([]string, 0, len(providers)*len(keywords))
for _, provider := range providers {
for _, keyword := range keywords {
queries = append(queries, strings.TrimSpace(date+" "+provider+" "+keyword))
}
}
return queries
}
func normalizeIntradayCandidates(date string, searchRecords []intradaySearchRecord, llmRecords []intradayLLMRecord) []intradayNewsCandidate {
searchIndex := indexSearchRecordsByURL(searchRecords)
candidatesByKey := map[string]intradayNewsCandidate{}
for _, record := range llmRecords {
candidate := candidateFromLLMRecord(date, record, searchIndex)
if len(candidate.CandidateURLs) == 0 {
continue
}
if candidate.ProviderName == "" {
candidate.ProviderName = inferProviderFromTitle(candidate.Title)
}
candidate.EventType = normalizeIntradayEventType(candidate.EventType)
candidate.NormalizedKey = buildIntradayNormalizedKey(candidate)
mergeIntradayCandidate(candidatesByKey, candidate)
}
result := make([]intradayNewsCandidate, 0, len(candidatesByKey))
for _, candidate := range candidatesByKey {
result = append(result, candidate)
}
sort.Slice(result, func(i, j int) bool {
if result[i].ProviderName != result[j].ProviderName {
return result[i].ProviderName < result[j].ProviderName
}
if result[i].EventType != result[j].EventType {
return result[i].EventType < result[j].EventType
}
return result[i].NormalizedKey < result[j].NormalizedKey
})
return result
}
func candidateFromLLMRecord(date string, record intradayLLMRecord, searchIndex map[string]intradaySearchRecord) intradayNewsCandidate {
candidate := intradayNewsCandidate{
CandidateDate: date,
EventType: record.EventType,
ProviderName: strings.TrimSpace(record.ProviderName),
ModelName: strings.TrimSpace(record.ModelName),
ProviderCountry: strings.TrimSpace(record.ProviderCountry),
Title: strings.TrimSpace(record.Title),
Summary: strings.TrimSpace(record.Summary),
CandidateURLs: dedupeStrings(record.CandidateURLs),
DiscoverySource: "llm_answer",
DiscoveryEvidence: map[string]any{"llm_record": record},
Status: "candidate",
VerificationConfidence: "candidate",
}
for _, url := range candidate.CandidateURLs {
if searchRecord, ok := searchIndex[url]; ok {
candidate.DiscoverySource = "web_search+llm"
candidate.DiscoveryQuery = searchRecord.Title
candidate.DiscoveryEvidence["search_record"] = searchRecord
if candidate.ProviderName == "" {
candidate.ProviderName = strings.TrimSpace(searchRecord.Provider)
}
if candidate.Title == "" {
candidate.Title = strings.TrimSpace(searchRecord.Title)
}
if candidate.Summary == "" {
candidate.Summary = strings.TrimSpace(searchRecord.Summary)
}
}
}
return candidate
}
func indexSearchRecordsByURL(records []intradaySearchRecord) map[string]intradaySearchRecord {
indexed := make(map[string]intradaySearchRecord, len(records))
for _, record := range records {
url := strings.TrimSpace(record.URL)
if url == "" {
continue
}
indexed[url] = record
}
return indexed
}
func mergeIntradayCandidate(target map[string]intradayNewsCandidate, candidate intradayNewsCandidate) {
if candidate.NormalizedKey == "" {
return
}
existing, ok := target[candidate.NormalizedKey]
if !ok {
target[candidate.NormalizedKey] = candidate
return
}
merged := existing
merged.CandidateURLs = dedupeStrings(append(existing.CandidateURLs, candidate.CandidateURLs...))
if strings.TrimSpace(merged.Summary) == "" {
merged.Summary = candidate.Summary
}
if strings.TrimSpace(merged.ProviderCountry) == "" {
merged.ProviderCountry = candidate.ProviderCountry
}
if merged.DiscoverySource != candidate.DiscoverySource && candidate.DiscoverySource != "" {
merged.DiscoverySource = "web_search+llm"
}
if merged.DiscoveryEvidence == nil {
merged.DiscoveryEvidence = map[string]any{}
}
if llmRecord, ok := candidate.DiscoveryEvidence["llm_record"]; ok {
merged.DiscoveryEvidence["llm_record"] = llmRecord
}
if searchRecord, ok := candidate.DiscoveryEvidence["search_record"]; ok {
merged.DiscoveryEvidence["search_record"] = searchRecord
}
target[candidate.NormalizedKey] = merged
}
func buildIntradayNormalizedKey(candidate intradayNewsCandidate) string {
provider := normalizeWord(candidate.ProviderName)
model := normalizeWord(candidate.ModelName)
if model == "" {
model = normalizeWord(candidate.Title)
}
return strings.Join([]string{
candidate.CandidateDate,
normalizeWord(candidate.EventType),
provider,
model,
}, "|")
}
func summarizeIntradayCandidates(candidates []intradayNewsCandidate, dryRun bool) intradayDiscoverySummary {
eventTypeCounts := make(map[string]int)
providerSet := map[string]struct{}{}
sourceSet := map[string]struct{}{}
for _, candidate := range candidates {
eventTypeCounts[candidate.EventType]++
if candidate.ProviderName != "" {
providerSet[candidate.ProviderName] = struct{}{}
}
if candidate.DiscoverySource != "" {
sourceSet[candidate.DiscoverySource] = struct{}{}
}
}
sources := make([]string, 0, len(sourceSet))
for source := range sourceSet {
sources = append(sources, source)
}
sort.Strings(sources)
return intradayDiscoverySummary{
CandidateTotal: len(candidates),
ProviderHitCount: len(providerSet),
EventTypeCounts: eventTypeCounts,
DiscoverySourceSet: sources,
DryRun: dryRun,
}
}
func printIntradayDiscoverySummary(summary intradayDiscoverySummary) error {
payload, err := json.Marshal(summary)
if err != nil {
return err
}
fmt.Println(string(payload))
return nil
}
func upsertIntradayCandidates(ctx context.Context, db *sql.DB, candidates []intradayNewsCandidate) error {
if db == nil {
return fmt.Errorf("db is nil")
}
for _, candidate := range candidates {
urls, err := json.Marshal(candidate.CandidateURLs)
if err != nil {
return fmt.Errorf("marshal candidate urls: %w", err)
}
evidence, err := json.Marshal(candidate.DiscoveryEvidence)
if err != nil {
return fmt.Errorf("marshal discovery evidence: %w", err)
}
_, err = db.ExecContext(ctx, `
INSERT INTO intraday_news_candidate (
candidate_date, event_type, provider_name, model_name, provider_country,
title, summary, candidate_urls, discovery_source, discovery_query,
discovery_evidence, normalized_key, status, verification_confidence, verification_notes
) VALUES (
$1::date, $2, $3, NULLIF($4, ''), NULLIF($5, ''),
$6, NULLIF($7, ''), $8::jsonb, $9, NULLIF($10, ''),
$11::jsonb, $12, $13, $14, NULLIF($15, '')
)
ON CONFLICT (normalized_key) DO UPDATE SET
title = EXCLUDED.title,
summary = COALESCE(NULLIF(EXCLUDED.summary, ''), intraday_news_candidate.summary),
candidate_urls = EXCLUDED.candidate_urls,
discovery_source = EXCLUDED.discovery_source,
discovery_query = COALESCE(NULLIF(EXCLUDED.discovery_query, ''), intraday_news_candidate.discovery_query),
discovery_evidence = EXCLUDED.discovery_evidence,
provider_country = COALESCE(NULLIF(EXCLUDED.provider_country, ''), intraday_news_candidate.provider_country),
updated_at = CURRENT_TIMESTAMP`,
candidate.CandidateDate,
candidate.EventType,
candidate.ProviderName,
candidate.ModelName,
candidate.ProviderCountry,
candidate.Title,
candidate.Summary,
string(urls),
candidate.DiscoverySource,
candidate.DiscoveryQuery,
string(evidence),
candidate.NormalizedKey,
candidate.Status,
candidate.VerificationConfidence,
candidate.VerificationNotes,
)
if err != nil {
return fmt.Errorf("upsert intraday candidate %s: %w", candidate.NormalizedKey, err)
}
}
return nil
}
func inferProviderFromTitle(title string) string {
lower := strings.ToLower(title)
for _, pair := range []struct{ match, provider string }{
{"openai", "OpenAI"},
{"anthropic", "Anthropic"},
{"gemini", "Google"},
{"deepseek", "DeepSeek"},
{"qwen", "Qwen"},
{"dashscope", "DashScope"},
{"xai", "xAI"},
{"minimax", "MiniMax"},
{"智谱", "智谱"},
{"百度", "百度"},
{"腾讯", "腾讯"},
} {
if strings.Contains(lower, pair.match) {
return pair.provider
}
}
return ""
}

View File

@@ -0,0 +1,127 @@
//go:build llm_script
package main
import (
"context"
"database/sql"
"path/filepath"
"strings"
"testing"
)
func TestLoadIntradaySearchRecordsFromFixture(t *testing.T) {
cfg := intradayProviderConfig{
Mode: "fixture",
Fixture: filepath.Join("testdata", "intraday_discovery_search_sample.json"),
}
records, err := loadIntradaySearchRecords(cfg, "2026-05-25", []string{"OpenAI pricing release"})
if err != nil {
t.Fatalf("loadIntradaySearchRecords 返回错误: %v", err)
}
if len(records) != 2 {
t.Fatalf("搜索样例条数错误: got=%d", len(records))
}
if records[0].URL == "" || records[0].Provider == "" {
t.Fatalf("搜索样例未保留 URL/provider: %+v", records[0])
}
}
func TestLoadIntradayLLMRecordsFromFixture(t *testing.T) {
cfg := intradayProviderConfig{
Mode: "fixture",
Fixture: filepath.Join("testdata", "intraday_discovery_llm_sample.json"),
}
records, err := loadIntradayLLMRecords(cfg, "2026-05-25", nil)
if err != nil {
t.Fatalf("loadIntradayLLMRecords 返回错误: %v", err)
}
if len(records) != 2 {
t.Fatalf("LLM 样例条数错误: got=%d", len(records))
}
if records[0].EventType != "official_release" {
t.Fatalf("LLM 事件类型错误: %+v", records[0])
}
}
func TestNormalizeIntradayCandidatesDedupesEquivalentEvents(t *testing.T) {
searchRecords := []intradaySearchRecord{{
Title: "OpenAI announces GPT-5.6 preview pricing update",
Summary: "Search summary",
URL: "https://openai.example.com/news/gpt-5-6-pricing",
Provider: "OpenAI",
}}
llmRecords := []intradayLLMRecord{
{
EventType: "official_release",
ProviderName: "OpenAI",
ModelName: "GPT-5.6",
ProviderCountry: "US",
Title: "GPT-5.6 preview pricing update",
Summary: "First summary",
CandidateURLs: []string{"https://openai.example.com/news/gpt-5-6-pricing"},
},
{
EventType: "official_release",
ProviderName: "OpenAI",
ModelName: "GPT 5.6",
ProviderCountry: "US",
Title: "OpenAI GPT 5.6 preview pricing update",
Summary: "Second summary",
CandidateURLs: []string{"https://openai.example.com/news/gpt-5-6-pricing"},
},
}
candidates := normalizeIntradayCandidates("2026-05-25", searchRecords, llmRecords)
if len(candidates) != 1 {
t.Fatalf("期望去重后只剩 1 条候选, got=%d", len(candidates))
}
if candidates[0].DiscoverySource != "web_search+llm" {
t.Fatalf("期望 discovery source 合并, got=%q", candidates[0].DiscoverySource)
}
}
func TestNormalizeIntradayCandidatesDropsURLlessRecords(t *testing.T) {
llmRecords := []intradayLLMRecord{{
EventType: "promo_campaign",
ProviderName: "DeepSeek",
ModelName: "DeepSeek-V4-Flash",
Title: "No URL candidate",
Summary: "Should be dropped",
}}
candidates := normalizeIntradayCandidates("2026-05-25", nil, llmRecords)
if len(candidates) != 0 {
t.Fatalf("无 URL 候选应被丢弃, got=%d", len(candidates))
}
}
func TestValidateIntradayProviderConfigRequiresCommandOrURLOrFixture(t *testing.T) {
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "command_json"}); err == nil {
t.Fatal("缺少 command 时应报错")
}
if err := validateIntradayProviderConfig("llm", intradayProviderConfig{Mode: "http_json"}); err == nil {
t.Fatal("缺少 url 时应报错")
}
if err := validateIntradayProviderConfig("search", intradayProviderConfig{Mode: "fixture", Fixture: "fixture.json"}); err != nil {
t.Fatalf("fixture provider 不应报错: %v", err)
}
}
func TestBuildIntradayNormalizedKeyUsesProviderModelAndDate(t *testing.T) {
key := buildIntradayNormalizedKey(intradayNewsCandidate{
CandidateDate: "2026-05-25",
EventType: "official_release",
ProviderName: "OpenAI",
ModelName: "GPT-5.6",
})
if !strings.Contains(key, "2026-05-25") || !strings.Contains(key, "openai") || !strings.Contains(key, "gpt-5-6") {
t.Fatalf("normalized key 不符合预期: %q", key)
}
}
func TestUpsertIntradayCandidatesRequiresDB(t *testing.T) {
var db *sql.DB
err := upsertIntradayCandidates(context.Background(), db, nil)
if err == nil {
t.Fatal("nil db 时应报错")
}
}

View File

@@ -0,0 +1,111 @@
//go:build llm_script
package main
import (
"fmt"
"os"
"regexp"
"strings"
"time"
)
func loadIntradayEnv() {
for _, path := range []string{".env.local", ".env"} {
data, err := os.ReadFile(path)
if err != nil {
continue
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
key = strings.TrimSpace(key)
value = strings.Trim(strings.TrimSpace(value), `"'`)
if key == "" {
continue
}
if _, exists := os.LookupEnv(key); exists {
continue
}
_ = os.Setenv(key, value)
}
}
}
func intradayDefaultDSN() string {
if dsn := os.Getenv("DATABASE_URL"); dsn != "" {
return dsn
}
return "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
func intradayDateValue() string {
if value := strings.TrimSpace(os.Getenv("REPORT_DATE")); value != "" {
return value
}
return time.Now().Format("2006-01-02")
}
func discoveryTimeoutFromEnv() time.Duration {
raw := strings.TrimSpace(os.Getenv("INTRADAY_DISCOVERY_TIMEOUT_SEC"))
if raw == "" {
return 20 * time.Second
}
var seconds int
if _, err := fmt.Sscanf(raw, "%d", &seconds); err != nil || seconds <= 0 {
return 20 * time.Second
}
return time.Duration(seconds) * time.Second
}
func normalizeIntradayEventType(value string) string {
switch strings.TrimSpace(strings.ToLower(value)) {
case "price_cut":
return "price_cut"
case "price_increase":
return "price_increase"
case "official_release":
return "official_release"
case "promo_campaign":
return "promo_campaign"
case "leak_or_rumor":
return "leak_or_rumor"
default:
return "unknown"
}
}
func normalizeWord(value string) string {
value = strings.ToLower(strings.TrimSpace(value))
value = strings.ReplaceAll(value, "_", "-")
re := regexp.MustCompile(`[^a-z0-9\-]+`)
value = re.ReplaceAllString(value, "-")
value = strings.Trim(value, "-")
if value == "" {
return "unknown"
}
return value
}
func dedupeStrings(values []string) []string {
seen := map[string]struct{}{}
result := make([]string, 0, len(values))
for _, value := range values {
trimmed := strings.TrimSpace(value)
if trimmed == "" {
continue
}
if _, exists := seen[trimmed]; exists {
continue
}
seen[trimmed] = struct{}{}
result = append(result, trimmed)
}
return result
}

View File

@@ -0,0 +1,188 @@
//go:build llm_script
package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"strings"
"time"
)
type intradayProviderConfig struct {
Mode string
Command string
URL string
Fixture string
Timeout time.Duration
}
type intradaySearchRecord struct {
Title string `json:"title"`
Summary string `json:"summary"`
URL string `json:"url"`
Provider string `json:"provider"`
ProviderURL string `json:"provider_url"`
PublishedAt string `json:"published_at"`
}
type intradayLLMRecord struct {
EventType string `json:"event_type"`
ProviderName string `json:"provider_name"`
ModelName string `json:"model_name"`
ProviderCountry string `json:"provider_country"`
Title string `json:"title"`
Summary string `json:"summary"`
CandidateURLs []string `json:"candidate_urls"`
}
type intradayLLMRequest struct {
Date string `json:"date"`
SearchResults []intradaySearchRecord `json:"search_results"`
}
func loadIntradaySearchRecords(cfg intradayProviderConfig, date string, queries []string) ([]intradaySearchRecord, error) {
var all []intradaySearchRecord
for _, query := range queries {
payload, err := loadIntradayProviderPayload(cfg, intradayProviderPayloadInput{
Date: date,
Query: query,
})
if err != nil {
return nil, err
}
if len(bytes.TrimSpace(payload)) == 0 {
continue
}
var records []intradaySearchRecord
if err := json.Unmarshal(payload, &records); err != nil {
return nil, fmt.Errorf("unmarshal search records for query %q: %w", query, err)
}
all = append(all, records...)
if cfg.Mode == "fixture" {
break
}
}
return all, nil
}
func loadIntradayLLMRecords(cfg intradayProviderConfig, date string, searchResults []intradaySearchRecord) ([]intradayLLMRecord, error) {
request := intradayLLMRequest{Date: date, SearchResults: searchResults}
body, err := json.Marshal(request)
if err != nil {
return nil, fmt.Errorf("marshal llm request: %w", err)
}
payload, err := loadIntradayProviderPayload(cfg, intradayProviderPayloadInput{
Date: date,
RequestBody: body,
})
if err != nil {
return nil, err
}
if len(bytes.TrimSpace(payload)) == 0 {
return nil, nil
}
var records []intradayLLMRecord
if err := json.Unmarshal(payload, &records); err != nil {
return nil, fmt.Errorf("unmarshal llm records: %w", err)
}
return records, nil
}
type intradayProviderPayloadInput struct {
Date string
Query string
RequestBody []byte
}
func loadIntradayProviderPayload(cfg intradayProviderConfig, input intradayProviderPayloadInput) ([]byte, error) {
mode := strings.TrimSpace(cfg.Mode)
switch mode {
case "fixture":
if strings.TrimSpace(cfg.Fixture) == "" {
return nil, fmt.Errorf("provider fixture 未设置")
}
return os.ReadFile(cfg.Fixture)
case "command_json":
if strings.TrimSpace(cfg.Command) == "" {
return nil, fmt.Errorf("provider command 未设置")
}
return runIntradayCommand(cfg, input)
case "http_json":
if strings.TrimSpace(cfg.URL) == "" {
return nil, fmt.Errorf("provider url 未设置")
}
return fetchIntradayHTTP(cfg, input)
default:
return nil, fmt.Errorf("unsupported provider mode %q", mode)
}
}
func runIntradayCommand(cfg intradayProviderConfig, input intradayProviderPayloadInput) ([]byte, error) {
command := strings.TrimSpace(cfg.Command)
command = strings.ReplaceAll(command, "{{date}}", input.Date)
command = strings.ReplaceAll(command, "{{query}}", shellEscapeSingleArg(input.Query))
cmd := exec.Command("sh", "-c", command)
cmd.Env = append(os.Environ(),
"INTRADAY_DISCOVERY_DATE="+input.Date,
"INTRADAY_DISCOVERY_QUERY="+input.Query,
)
if len(input.RequestBody) > 0 {
cmd.Stdin = bytes.NewReader(input.RequestBody)
}
out, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
return nil, fmt.Errorf("run provider command: %w: %s", err, strings.TrimSpace(string(exitErr.Stderr)))
}
return nil, fmt.Errorf("run provider command: %w", err)
}
return out, nil
}
func fetchIntradayHTTP(cfg intradayProviderConfig, input intradayProviderPayloadInput) ([]byte, error) {
client := &http.Client{Timeout: cfg.Timeout}
rawURL := strings.TrimSpace(cfg.URL)
rawURL = strings.ReplaceAll(rawURL, "{{date}}", input.Date)
rawURL = strings.ReplaceAll(rawURL, "{{query}}", input.Query)
method := http.MethodGet
var body io.Reader
if len(input.RequestBody) > 0 {
method = http.MethodPost
body = bytes.NewReader(input.RequestBody)
}
req, err := http.NewRequest(method, rawURL, body)
if err != nil {
return nil, fmt.Errorf("build provider request: %w", err)
}
if len(input.RequestBody) > 0 {
req.Header.Set("Content-Type", "application/json")
}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("call provider url: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
payload, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("call provider url: unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(payload)))
}
payload, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read provider response: %w", err)
}
return payload, nil
}
func shellEscapeSingleArg(value string) string {
if value == "" {
return "''"
}
return "'" + strings.ReplaceAll(value, "'", "'\"'\"'") + "'"
}

View File

@@ -46,6 +46,7 @@ type signalModelEvent struct {
TrustLabel string `json:"trust_label"`
SourceKindLabel string `json:"source_kind_label"`
PrimarySource string `json:"primary_source"`
SourceURL string `json:"source_url"`
UpdatedAt string `json:"updated_at"`
EvidenceDetail string `json:"evidence_detail"`
Baseline string `json:"baseline"`
@@ -367,6 +368,12 @@ func loadSignalModelEvents(db *sql.DB, date string) ([]signalModelEvent, error)
}
events = append(events, priceEvents...)
discoveryEvents, err := loadVerifiedDiscoverySignalEvents(db, date)
if err != nil {
return nil, err
}
events = mergeVerifiedDiscoveryEvents(events, discoveryEvents)
sort.Slice(events, func(i, j int) bool {
if events[i].Priority != events[j].Priority {
return events[i].Priority > events[j].Priority
@@ -409,6 +416,7 @@ func loadSignalPromoCampaignEvents(date string) ([]signalModelEvent, error) {
TrustLabel: signalFirstNonEmpty(definition.TrustLabel, "官方来源 / 一级证据"),
SourceKindLabel: signalFirstNonEmpty(definition.SourceKindLabel, "官方活动页"),
PrimarySource: definition.PrimarySource,
SourceURL: definition.PrimarySource,
UpdatedAt: signalFormatEventUpdatedAt("", definition.Date),
EvidenceDetail: definition.EvidenceDetail,
Baseline: signalFirstNonEmpty(definition.Baseline, "活动窗口开启"),
@@ -520,6 +528,7 @@ func loadSignalOfficialReleaseEvents(db *sql.DB, date string) ([]signalModelEven
TrustLabel: buildSignalReleaseTrustLabel(model, dateConfidence),
SourceKindLabel: buildSignalReleaseSourceKindLabel(dateSourceKind, dateConfidence),
PrimarySource: sourceURL,
SourceURL: sourceURL,
UpdatedAt: releaseDate.Format("2006-01-02 15:04"),
EvidenceDetail: buildSignalReleaseEvidenceDetail(dateSourceKind, dateConfidence),
Baseline: "官方首次发布",
@@ -610,6 +619,7 @@ func loadSignalNewModelEvents(db *sql.DB, date string) ([]signalModelEvent, erro
TrustLabel: buildSignalTrustLabel(model),
SourceKindLabel: "模型快照",
PrimarySource: buildSignalPrimarySource("region_pricing", model.OperatorName),
SourceURL: buildSignalPrimarySource("region_pricing", model.OperatorName),
UpdatedAt: createdAt.Format("2006-01-02 15:04"),
EvidenceDetail: "models.created_at = 今日,且已存在最新价格快照",
Baseline: "首次出现",
@@ -709,6 +719,7 @@ func loadSignalPriceChangeEvents(db *sql.DB, date string) ([]signalModelEvent, e
TrustLabel: buildSignalTrustLabel(model),
SourceKindLabel: "价格快照",
PrimarySource: "pricing_history",
SourceURL: buildSignalPrimarySource("region_pricing", model.OperatorName),
UpdatedAt: changedAt.Format("2006-01-02 15:04"),
EvidenceDetail: buildSignalPriceEvidenceDetail(changePct, oldInputPrice, newInputPrice, model.Currency),
Baseline: fmt.Sprintf("较昨日 %+.0f%%", changePct),
@@ -747,6 +758,241 @@ func dedupeSignalEvents(events []signalModelEvent) []signalModelEvent {
return result
}
func loadVerifiedDiscoverySignalEvents(db *sql.DB, date string) ([]signalModelEvent, error) {
rows, err := db.Query(`
SELECT
event_type,
provider_name,
COALESCE(model_name, ''),
COALESCE(provider_country, ''),
title,
COALESCE(summary, ''),
COALESCE(candidate_urls::text, '[]'),
COALESCE(verification_notes, ''),
updated_at
FROM intraday_news_candidate
WHERE candidate_date = $1::date
AND status = 'verified'
AND verification_confidence = 'official_confirmed'
ORDER BY updated_at DESC, id DESC
`, date)
if err != nil {
if strings.Contains(err.Error(), `relation "intraday_news_candidate" does not exist`) {
return nil, nil
}
return nil, err
}
defer rows.Close()
var events []signalModelEvent
for rows.Next() {
var (
eventType string
providerName string
modelName string
providerCountry string
title string
summary string
rawURLs string
notes string
updatedAt time.Time
)
if err := rows.Scan(&eventType, &providerName, &modelName, &providerCountry, &title, &summary, &rawURLs, &notes, &updatedAt); err != nil {
return nil, err
}
var urls []string
if err := json.Unmarshal([]byte(rawURLs), &urls); err != nil {
return nil, fmt.Errorf("unmarshal discovery candidate urls: %w", err)
}
primaryURL := firstString(urls)
if strings.TrimSpace(primaryURL) == "" {
continue
}
normalizedType := signalNormalizeIntradayEventType(eventType)
events = append(events, signalModelEvent{
EventType: normalizedType,
ModelName: signalFirstNonEmpty(modelName, title),
ProviderName: providerName,
OperatorName: providerName,
Audience: buildDiscoveryAudience(normalizedType),
TrustLabel: "官方来源 / discovery 验证",
SourceKindLabel: buildDiscoverySourceKind(normalizedType),
PrimarySource: primaryURL,
SourceURL: primaryURL,
UpdatedAt: updatedAt.Format("2006-01-02 15:04"),
EvidenceDetail: signalFirstNonEmpty(notes, summary),
Baseline: buildDiscoveryBaseline(normalizedType),
Summary: signalFirstNonEmpty(summary, title),
Priority: buildDiscoveryPriority(normalizedType),
})
}
if err := rows.Err(); err != nil {
return nil, err
}
return filterVerifiedDiscoverySignalEvents(events), nil
}
func filterVerifiedDiscoverySignalEvents(events []signalModelEvent) []signalModelEvent {
filtered := make([]signalModelEvent, 0, len(events))
for _, event := range events {
switch event.EventType {
case "official_release", "promo_campaign", "price_cut", "price_increase":
filtered = append(filtered, event)
}
}
return filtered
}
func mergeVerifiedDiscoveryEvents(nativeEvents, discoveryEvents []signalModelEvent) []signalModelEvent {
merged := append([]signalModelEvent{}, nativeEvents...)
index := make(map[string]int, len(merged))
for i, event := range merged {
index[signalEventMergeKey(event)] = i
}
for _, event := range filterVerifiedDiscoverySignalEvents(discoveryEvents) {
key := signalEventMergeKey(event)
if idx, exists := index[key]; exists {
merged[idx] = mergeSignalEventEvidence(merged[idx], event)
continue
}
index[key] = len(merged)
merged = append(merged, event)
}
return merged
}
func mergeSignalEventEvidence(native, discovery signalModelEvent) signalModelEvent {
merged := native
if strings.TrimSpace(merged.SourceKindLabel) == "" {
merged.SourceKindLabel = discovery.SourceKindLabel
}
if strings.TrimSpace(merged.SourceURL) == "" {
merged.SourceURL = discovery.SourceURL
}
if strings.TrimSpace(merged.PrimarySource) == "" {
merged.PrimarySource = discovery.PrimarySource
}
if strings.TrimSpace(merged.EvidenceDetail) == "" {
merged.EvidenceDetail = discovery.EvidenceDetail
}
if strings.TrimSpace(merged.TrustLabel) == "" {
merged.TrustLabel = discovery.TrustLabel
}
return merged
}
func signalEventMergeKey(event signalModelEvent) string {
return strings.Join([]string{
signalNormalizeIntradayEventType(event.EventType),
signalNormalizeWord(event.ProviderName),
signalNormalizeWord(event.ModelName),
}, "|")
}
func buildDiscoveryAudience(eventType string) string {
switch eventType {
case "official_release":
return "适合需要尽快复查默认选型与路线图影响的团队"
case "promo_campaign":
return "适合想利用活动窗口压低成本的团队"
case "price_cut":
return "适合准备趁降价重排默认模型的团队"
case "price_increase":
return "适合提前准备替代模型和预算回退方案的团队"
default:
return "适合关注日内情报变化的读者"
}
}
func buildDiscoverySourceKind(eventType string) string {
switch eventType {
case "official_release":
return "discovery 验证 / 官方发布页"
case "promo_campaign":
return "discovery 验证 / 官方活动页"
case "price_cut", "price_increase":
return "discovery 验证 / 官方价格页"
default:
return "discovery 验证"
}
}
func buildDiscoveryBaseline(eventType string) string {
switch eventType {
case "official_release":
return "discovery 验证通过"
case "promo_campaign":
return "活动窗口已验证"
case "price_cut", "price_increase":
return "official_confirmed"
default:
return "discovery verified"
}
}
func buildDiscoveryPriority(eventType string) int {
switch eventType {
case "official_release":
return 118
case "promo_campaign":
return 112
case "price_cut":
return 96
case "price_increase":
return 94
default:
return 80
}
}
func firstString(values []string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func signalNormalizeIntradayEventType(value string) string {
switch strings.TrimSpace(strings.ToLower(value)) {
case "price_cut":
return "price_cut"
case "price_increase":
return "price_increase"
case "official_release":
return "official_release"
case "promo_campaign":
return "promo_campaign"
default:
return "unknown"
}
}
func signalNormalizeWord(value string) string {
value = strings.ToLower(strings.TrimSpace(value))
value = strings.ReplaceAll(value, "_", "-")
var b strings.Builder
lastDash := false
for _, r := range value {
isAlphaNum := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')
if isAlphaNum {
b.WriteRune(r)
lastDash = false
continue
}
if !lastDash {
b.WriteByte('-')
lastDash = true
}
}
result := strings.Trim(b.String(), "-")
if result == "" {
return "unknown"
}
return result
}
func classifySignalFreeSource(model signalModelInfo) string {
switch model.OperatorType {
case "official", "cloud":

View File

@@ -31,3 +31,64 @@ func TestBuildSignalPageMode(t *testing.T) {
t.Fatalf("官方发布日 page_mode 错误: %q", got)
}
}
func TestBuildSignalPageModeTreatsVerifiedDiscoveryPromoAsHot(t *testing.T) {
got := buildSignalPageMode(signalDailySignals{}, []signalModelEvent{{EventType: "promo_campaign", ModelName: "GPT-5.6"}})
if got != "hot" {
t.Fatalf("已验证活动事件应触发 hot, got=%q", got)
}
}
func TestFilterDiscoveryEventsDropsLeakAndCandidateOnly(t *testing.T) {
events := []signalModelEvent{
{EventType: "official_release", ModelName: "GPT-5.6", Priority: 120},
{EventType: "leak_or_rumor", ModelName: "GPT-5.6", Priority: 200},
{EventType: "unknown", ModelName: "Mystery", Priority: 50},
}
filtered := filterVerifiedDiscoverySignalEvents(events)
if len(filtered) != 1 {
t.Fatalf("期望仅保留 1 条正式事实事件, got=%d", len(filtered))
}
if filtered[0].EventType != "official_release" {
t.Fatalf("错误保留了非正式事件: %+v", filtered)
}
}
func TestMergeVerifiedDiscoveryEventsPrefersNativeFact(t *testing.T) {
native := []signalModelEvent{{
EventType: "official_release",
ModelName: "GPT-5.6",
ProviderName: "OpenAI",
PrimarySource: "native_release",
EvidenceDetail: "native evidence",
Priority: 120,
}}
discovery := []signalModelEvent{{
EventType: "official_release",
ModelName: "GPT-5.6",
ProviderName: "OpenAI",
PrimarySource: "discovery_release",
EvidenceDetail: "discovery evidence",
SourceKindLabel: "官方博客",
Priority: 110,
}}
merged := mergeVerifiedDiscoveryEvents(native, discovery)
if len(merged) != 1 {
t.Fatalf("期望去重后只剩 1 条事件, got=%d", len(merged))
}
if merged[0].PrimarySource != "native_release" {
t.Fatalf("原生事实不应被 discovery 覆盖: %+v", merged[0])
}
if merged[0].SourceKindLabel != "官方博客" {
t.Fatalf("原生事实应补入 discovery 证据缺口: %+v", merged[0])
}
}
func TestMergeVerifiedDiscoveryEventsDropsUnverifiedPriceNarrative(t *testing.T) {
native := []signalModelEvent{{EventType: "new_model", ModelName: "DeepSeek-V4-Flash", ProviderName: "DeepSeek", Priority: 80}}
discovery := []signalModelEvent{{EventType: "leak_or_rumor", ModelName: "DeepSeek-V4-Flash", ProviderName: "DeepSeek", Priority: 130}}
merged := mergeVerifiedDiscoveryEvents(native, discovery)
if len(merged) != 1 || merged[0].EventType != "new_model" {
t.Fatalf("非正式 discovery 事件不应进入正式快照: %+v", merged)
}
}

View File

@@ -0,0 +1,46 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
if [[ -f ".env.local" ]]; then
# shellcheck disable=SC1091
source ".env.local"
fi
if [[ -f ".env" ]]; then
# shellcheck disable=SC1091
source ".env"
fi
if [[ -z "${DATABASE_URL:-}" ]]; then
echo "DATABASE_URL 未设置" >&2
exit 1
fi
if [[ -z "${INTRADAY_DISCOVERY_SEARCH_PROVIDER:-}" ]]; then
echo "INTRADAY_DISCOVERY_SEARCH_PROVIDER 未设置" >&2
exit 1
fi
if [[ -z "${INTRADAY_DISCOVERY_LLM_PROVIDER:-}" ]]; then
echo "INTRADAY_DISCOVERY_LLM_PROVIDER 未设置" >&2
exit 1
fi
REPORT_DATE="${REPORT_DATE:-$(date +%F)}"
DRY_RUN="false"
if [[ "${1:-}" == "--dry-run" ]]; then
DRY_RUN="true"
fi
discovery_args=(--date "$REPORT_DATE")
verification_args=(--date "$REPORT_DATE")
materialize_args=(--date "$REPORT_DATE")
if [[ "$DRY_RUN" == "true" ]]; then
discovery_args+=(--dry-run)
verification_args+=(--dry-run)
materialize_args+=(--dry-run)
fi
go run -tags llm_script ./scripts/discover_intraday_news_candidates.go ./scripts/intraday_discovery_provider.go ./scripts/intraday_discovery_common.go "${discovery_args[@]}"
go run -tags llm_script ./scripts/verify_intraday_news_candidates.go ./scripts/intraday_discovery_common.go "${verification_args[@]}"
REPORT_TRIGGER_SOURCE="intraday_discovery" go run -tags llm_script ./scripts/materialize_daily_signals.go "${materialize_args[@]}"

View File

@@ -0,0 +1,24 @@
[
{
"event_type": "official_release",
"provider_name": "OpenAI",
"model_name": "GPT-5.6",
"provider_country": "US",
"title": "GPT-5.6 preview pricing update",
"summary": "OpenAI preview material indicates GPT-5.6 entered a preview pricing window.",
"candidate_urls": [
"https://openai.example.com/news/gpt-5-6-pricing"
]
},
{
"event_type": "promo_campaign",
"provider_name": "DeepSeek",
"model_name": "DeepSeek-V4-Flash",
"provider_country": "CN",
"title": "DeepSeek V4 Flash campaign",
"summary": "Official campaign page shows a temporary promotional window for DeepSeek-V4-Flash.",
"candidate_urls": [
"https://deepseek.example.com/campaign/v4-flash"
]
}
]

View File

@@ -0,0 +1,18 @@
[
{
"title": "OpenAI announces GPT-5.6 preview pricing update",
"summary": "OpenAI preview announcement mentions GPT-5.6 and updated API pricing references.",
"url": "https://openai.example.com/news/gpt-5-6-pricing",
"provider": "OpenAI",
"provider_url": "https://openai.example.com",
"published_at": "2026-05-25T09:00:00Z"
},
{
"title": "DeepSeek launches V4 Flash campaign",
"summary": "Campaign page suggests temporary promotional pricing for DeepSeek-V4-Flash.",
"url": "https://deepseek.example.com/campaign/v4-flash",
"provider": "DeepSeek",
"provider_url": "https://deepseek.example.com",
"published_at": "2026-05-25T10:00:00Z"
}
]

View File

@@ -0,0 +1,7 @@
<html><body>
<article>
<h1>OpenAI announces GPT-5.6 preview pricing update</h1>
<p>GPT-5.6 preview is now available in official preview channels.</p>
<p>Published 2026-05-25.</p>
</article>
</body></html>

View File

@@ -0,0 +1,8 @@
<html><body>
<section>
<h1>DeepSeek-V4-Flash pricing</h1>
<p>Old price: $10</p>
<p>New price: $6</p>
<p>Campaign window active now.</p>
</section>
</body></html>

View File

@@ -0,0 +1,6 @@
<html><body>
<article>
<h1>Industry blog discusses GPT-5.6 leak</h1>
<p>Writers speculate GPT-5.6 may appear soon based on references.</p>
</article>
</body></html>

View File

@@ -0,0 +1,501 @@
//go:build llm_script
package main
import (
"context"
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"regexp"
"strings"
"time"
_ "github.com/lib/pq"
)
type verificationCandidateRow struct {
ID int64
CandidateDate string
EventType string
ProviderName string
ModelName string
ProviderCountry string
Title string
Summary string
CandidateURLs []string
Status string
VerificationConfidence string
}
type intradayVerificationConfig struct {
Date string
DryRun bool
DatabaseURL string
Timeout time.Duration
}
type intradayVerificationResult struct {
CandidateID int64
CandidateStatus string
VerificationConfidence string
VerifierSource string
VerifierURL string
VerifierStatus string
ExtractedFacts map[string]any
Notes string
}
type intradayVerificationSummary struct {
CandidateTotal int `json:"candidate_total"`
VerifiedTotal int `json:"verified_total"`
OfficialConfirmedTotal int `json:"official_confirmed_total"`
SecondaryConfirmedTotal int `json:"secondary_confirmed_total"`
RejectedTotal int `json:"rejected_total"`
DryRun bool `json:"dry_run"`
}
func main() {
loadIntradayEnv()
cfg := intradayVerificationConfig{}
flag.StringVar(&cfg.Date, "date", intradayDateValue(), "验证日期,格式 YYYY-MM-DD")
flag.BoolVar(&cfg.DryRun, "dry-run", false, "仅输出摘要,不写数据库")
flag.Parse()
cfg.DatabaseURL = intradayDefaultDSN()
cfg.Timeout = discoveryTimeoutFromEnv()
if err := runIntradayCandidateVerification(cfg); err != nil {
fmt.Fprintf(os.Stderr, "verify_intraday_news_candidates: %v\n", err)
os.Exit(1)
}
}
func runIntradayCandidateVerification(cfg intradayVerificationConfig) error {
if strings.TrimSpace(cfg.Date) == "" {
return fmt.Errorf("date 未设置")
}
db, err := sql.Open("postgres", cfg.DatabaseURL)
if err != nil {
return fmt.Errorf("open db: %w", err)
}
defer db.Close()
candidates, err := loadIntradayVerificationCandidates(context.Background(), db, cfg.Date)
if err != nil {
return err
}
results := make([]intradayVerificationResult, 0, len(candidates))
for _, candidate := range candidates {
result, err := verifyIntradayCandidate(candidate, cfg.Timeout)
if err != nil {
result = intradayVerificationResult{
CandidateID: candidate.ID,
CandidateStatus: "candidate",
VerificationConfidence: candidate.VerificationConfidence,
VerifierStatus: "error",
Notes: err.Error(),
}
}
results = append(results, result)
}
if !cfg.DryRun {
if err := persistIntradayVerificationResults(context.Background(), db, results); err != nil {
return err
}
}
return printIntradayVerificationSummary(summarizeIntradayVerification(results, cfg.DryRun))
}
func loadIntradayVerificationCandidates(ctx context.Context, db *sql.DB, date string) ([]verificationCandidateRow, error) {
rows, err := db.QueryContext(ctx, `
SELECT id, candidate_date::text, event_type, provider_name, COALESCE(model_name, ''), COALESCE(provider_country, ''),
title, COALESCE(summary, ''), COALESCE(candidate_urls::text, '[]'), status, verification_confidence
FROM intraday_news_candidate
WHERE candidate_date = $1::date
AND status IN ('candidate', 'verifying')
ORDER BY discovered_at DESC, id DESC`, date)
if err != nil {
return nil, fmt.Errorf("query intraday candidates: %w", err)
}
defer rows.Close()
var candidates []verificationCandidateRow
for rows.Next() {
var row verificationCandidateRow
var rawURLs string
if err := rows.Scan(&row.ID, &row.CandidateDate, &row.EventType, &row.ProviderName, &row.ModelName, &row.ProviderCountry, &row.Title, &row.Summary, &rawURLs, &row.Status, &row.VerificationConfidence); err != nil {
return nil, fmt.Errorf("scan intraday candidate: %w", err)
}
if err := json.Unmarshal([]byte(rawURLs), &row.CandidateURLs); err != nil {
return nil, fmt.Errorf("unmarshal candidate urls: %w", err)
}
candidates = append(candidates, row)
}
return candidates, rows.Err()
}
func verifyIntradayCandidate(candidate verificationCandidateRow, timeout time.Duration) (intradayVerificationResult, error) {
client := &http.Client{Timeout: timeout}
best := intradayVerificationResult{
CandidateID: candidate.ID,
CandidateStatus: "candidate",
VerificationConfidence: candidate.VerificationConfidence,
VerifierStatus: "insufficient",
Notes: "未找到足够证据",
ExtractedFacts: map[string]any{},
}
for _, candidateURL := range candidate.CandidateURLs {
body, err := fetchVerificationDocument(candidateURL, client)
if err != nil {
best = preferVerificationResult(best, intradayVerificationResult{
CandidateID: candidate.ID,
CandidateStatus: "candidate",
VerificationConfidence: candidate.VerificationConfidence,
VerifierURL: candidateURL,
VerifierStatus: "error",
Notes: err.Error(),
ExtractedFacts: map[string]any{},
})
continue
}
result := verifyCandidateDocument(candidate, candidateURL, body)
if result.CandidateID == 0 {
result.CandidateID = candidate.ID
}
best = preferVerificationResult(best, result)
if best.CandidateStatus == "verified" && best.VerificationConfidence == "official_confirmed" {
return best, nil
}
}
return best, nil
}
func fetchVerificationDocument(rawURL string, client *http.Client) (string, error) {
req, err := http.NewRequest(http.MethodGet, rawURL, nil)
if err != nil {
return "", fmt.Errorf("build verification request: %w", err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; llm-intelligence intraday verifier)")
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("fetch verification document: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
payload, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("fetch verification document: unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(payload)))
}
payload, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("read verification document: %w", err)
}
return string(payload), nil
}
func verifyCandidateDocument(candidate verificationCandidateRow, candidateURL, body string) intradayVerificationResult {
source := classifyVerificationSource(candidate.ProviderName, candidateURL)
facts := extractVerificationFacts(body)
modelMatched := bodyMentionsModel(body, candidate.ModelName, candidate.Title)
result := intradayVerificationResult{
CandidateID: candidate.ID,
CandidateStatus: "candidate",
VerificationConfidence: "candidate",
VerifierSource: source,
VerifierURL: candidateURL,
VerifierStatus: "insufficient",
ExtractedFacts: facts,
Notes: "证据不足",
}
if isOfficialVerificationSource(source) {
switch normalizeIntradayEventType(candidate.EventType) {
case "official_release":
if modelMatched {
result.CandidateStatus = "verified"
result.VerificationConfidence = "official_confirmed"
result.VerifierStatus = "matched"
result.Notes = "官方页面命中模型发布线索"
}
case "promo_campaign":
if modelMatched && bodyMentionsPromo(body) {
result.CandidateStatus = "verified"
result.VerificationConfidence = "official_confirmed"
result.VerifierStatus = "matched"
result.Notes = "官方页面命中活动窗口或促销语义"
}
case "price_cut", "price_increase":
if priceResult, ok := deriveVerifiedPriceEvent(candidate.EventType, facts); ok {
result.CandidateStatus = "verified"
result.VerificationConfidence = "official_confirmed"
result.VerifierStatus = "matched"
result.ExtractedFacts = priceResult
result.Notes = "官方价格页命中真实价格变化"
} else if modelMatched {
result.VerifierStatus = "insufficient"
result.Notes = "命中模型但缺少可计算的价格变化事实"
}
case "leak_or_rumor":
if modelMatched {
result.CandidateStatus = "verified"
result.VerificationConfidence = "secondary_confirmed"
result.VerifierStatus = "matched"
result.Notes = "保留为待确认情报,不进入正式事实层"
}
}
} else if modelMatched {
result.CandidateStatus = "verified"
result.VerificationConfidence = "secondary_confirmed"
result.VerifierStatus = "matched"
result.Notes = "仅二手来源命中,不能进入正式事实层"
}
if result.VerifierStatus == "insufficient" && modelMatched && !isOfficialVerificationSource(source) {
result.VerificationConfidence = "secondary_confirmed"
}
return result
}
func isOfficialVerificationSource(source string) bool {
switch source {
case "official_page", "official_docs", "official_blog", "pricing_page":
return true
default:
return false
}
}
func classifyVerificationSource(providerName, rawURL string) string {
parsed, err := url.Parse(rawURL)
if err != nil {
return "secondary_media"
}
host := strings.ToLower(parsed.Host)
path := strings.ToLower(parsed.Path)
if isOfficialProviderHost(providerName, host) {
switch {
case strings.Contains(host, "docs.") || strings.Contains(path, "/docs"):
return "official_docs"
case strings.Contains(host, "pricing") || strings.Contains(path, "pricing") || strings.Contains(path, "price"):
return "pricing_page"
case strings.Contains(path, "blog") || strings.Contains(path, "news") || strings.Contains(path, "announcement"):
return "official_blog"
default:
return "official_page"
}
}
return "secondary_media"
}
func isOfficialProviderHost(providerName, host string) bool {
tokens := providerHostTokens(providerName)
for _, token := range tokens {
if token != "" && strings.Contains(host, token) {
return true
}
}
return false
}
func providerHostTokens(providerName string) []string {
switch strings.ToLower(strings.TrimSpace(providerName)) {
case "openai":
return []string{"openai.com"}
case "anthropic":
return []string{"anthropic.com"}
case "google", "google gemini", "gemini":
return []string{"google.com", "google.dev", "ai.google.dev"}
case "deepseek":
return []string{"deepseek.com", "deepseek.ai"}
case "qwen", "dashscope":
return []string{"aliyun.com", "dashscope.com"}
case "xai":
return []string{"x.ai"}
case "智谱":
return []string{"zhipuai.cn"}
case "百度", "百度文心":
return []string{"baidu.com", "cloud.baidu.com"}
case "腾讯", "腾讯混元":
return []string{"tencent.com", "cloud.tencent.com"}
case "minimax":
return []string{"minimax.io", "minimax.chat"}
default:
clean := strings.ToLower(strings.TrimSpace(providerName))
if clean == "" {
return nil
}
return []string{clean}
}
}
func bodyMentionsModel(body, modelName, title string) bool {
normBody := normalizeEvidenceText(body)
for _, candidate := range []string{modelName, title} {
normCandidate := normalizeEvidenceText(candidate)
if normCandidate != "" && strings.Contains(normBody, normCandidate) {
return true
}
}
return false
}
func bodyMentionsPromo(body string) bool {
lower := strings.ToLower(body)
for _, marker := range []string{"campaign", "promo", "promotion", "discount", "活动", "优惠", "限时", "窗口"} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
func extractVerificationFacts(body string) map[string]any {
facts := map[string]any{}
oldPrice, newPrice, ok := extractPricePair(body)
if ok {
facts["old_input_price"] = oldPrice
facts["new_input_price"] = newPrice
if oldPrice != 0 {
facts["price_change_pct"] = ((newPrice - oldPrice) / oldPrice) * 100
}
}
return facts
}
func deriveVerifiedPriceEvent(eventType string, facts map[string]any) (map[string]any, bool) {
oldValue, oldOK := facts["old_input_price"].(float64)
newValue, newOK := facts["new_input_price"].(float64)
changePct, pctOK := facts["price_change_pct"].(float64)
if !oldOK || !newOK || !pctOK || oldValue <= 0 || newValue <= 0 {
return nil, false
}
normalized := normalizeIntradayEventType(eventType)
if normalized == "price_cut" && changePct >= 0 {
return nil, false
}
if normalized == "price_increase" && changePct <= 0 {
return nil, false
}
return facts, true
}
func extractPricePair(body string) (float64, float64, bool) {
oldRe := regexp.MustCompile(`(?i)(old|from)\s*price[^0-9$¥]*[$¥]?([0-9]+(?:\.[0-9]+)?)`)
newRe := regexp.MustCompile(`(?i)(new|to)\s*price[^0-9$¥]*[$¥]?([0-9]+(?:\.[0-9]+)?)`)
oldMatch := oldRe.FindStringSubmatch(body)
newMatch := newRe.FindStringSubmatch(body)
if len(oldMatch) < 3 || len(newMatch) < 3 {
return 0, 0, false
}
var oldValue, newValue float64
if _, err := fmt.Sscanf(oldMatch[2], "%f", &oldValue); err != nil {
return 0, 0, false
}
if _, err := fmt.Sscanf(newMatch[2], "%f", &newValue); err != nil {
return 0, 0, false
}
return oldValue, newValue, true
}
func normalizeEvidenceText(value string) string {
value = strings.ToLower(value)
re := regexp.MustCompile(`[^a-z0-9\p{Han}]+`)
value = re.ReplaceAllString(value, "")
return strings.TrimSpace(value)
}
func preferVerificationResult(current, next intradayVerificationResult) intradayVerificationResult {
if verificationScore(next) > verificationScore(current) {
return next
}
return current
}
func verificationScore(result intradayVerificationResult) int {
score := 0
switch result.CandidateStatus {
case "verified":
score += 20
case "rejected":
score += 5
}
switch result.VerificationConfidence {
case "official_confirmed":
score += 10
case "secondary_confirmed":
score += 5
}
switch result.VerifierStatus {
case "matched":
score += 3
case "contradicted":
score += 1
}
return score
}
func persistIntradayVerificationResults(ctx context.Context, db *sql.DB, results []intradayVerificationResult) error {
for _, result := range results {
facts, err := json.Marshal(result.ExtractedFacts)
if err != nil {
return fmt.Errorf("marshal extracted facts: %w", err)
}
_, err = db.ExecContext(ctx, `
INSERT INTO intraday_news_verification (
candidate_id, verifier_source, verifier_url, verifier_status, extracted_facts, notes
) VALUES ($1, NULLIF($2, ''), NULLIF($3, ''), $4, $5::jsonb, NULLIF($6, ''))`,
result.CandidateID,
result.VerifierSource,
result.VerifierURL,
result.VerifierStatus,
string(facts),
result.Notes,
)
if err != nil {
return fmt.Errorf("insert intraday verification: %w", err)
}
_, err = db.ExecContext(ctx, `
UPDATE intraday_news_candidate
SET status = $2,
verification_confidence = $3,
verification_notes = NULLIF($4, ''),
updated_at = CURRENT_TIMESTAMP
WHERE id = $1`,
result.CandidateID,
result.CandidateStatus,
result.VerificationConfidence,
result.Notes,
)
if err != nil {
return fmt.Errorf("update intraday candidate: %w", err)
}
}
return nil
}
func summarizeIntradayVerification(results []intradayVerificationResult, dryRun bool) intradayVerificationSummary {
summary := intradayVerificationSummary{CandidateTotal: len(results), DryRun: dryRun}
for _, result := range results {
if result.CandidateStatus == "verified" {
summary.VerifiedTotal++
}
switch result.VerificationConfidence {
case "official_confirmed":
summary.OfficialConfirmedTotal++
case "secondary_confirmed":
summary.SecondaryConfirmedTotal++
}
if result.CandidateStatus == "rejected" {
summary.RejectedTotal++
}
}
return summary
}
func printIntradayVerificationSummary(summary intradayVerificationSummary) error {
payload, err := json.Marshal(summary)
if err != nil {
return err
}
fmt.Println(string(payload))
return nil
}

View File

@@ -0,0 +1,99 @@
//go:build llm_script
package main
import (
"os"
"path/filepath"
"testing"
)
func TestVerifyCandidateDocumentOfficialRelease(t *testing.T) {
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_official_release.html"))
if err != nil {
t.Fatalf("读取 official release fixture 失败: %v", err)
}
candidate := verificationCandidateRow{
ID: 1,
EventType: "official_release",
ProviderName: "OpenAI",
ModelName: "GPT-5.6",
Title: "GPT-5.6 preview pricing update",
}
result := verifyCandidateDocument(candidate, "https://openai.com/news/gpt-5-6-preview", string(body))
if result.CandidateStatus != "verified" || result.VerificationConfidence != "official_confirmed" {
t.Fatalf("官方发布应被确认: %+v", result)
}
}
func TestVerifyCandidateDocumentPriceCutNeedsRealPriceFacts(t *testing.T) {
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_pricing_page.html"))
if err != nil {
t.Fatalf("读取 pricing fixture 失败: %v", err)
}
candidate := verificationCandidateRow{
ID: 2,
EventType: "price_cut",
ProviderName: "DeepSeek",
ModelName: "DeepSeek-V4-Flash",
Title: "DeepSeek-V4-Flash price cut",
}
result := verifyCandidateDocument(candidate, "https://deepseek.com/pricing/v4-flash", string(body))
if result.CandidateStatus != "verified" || result.VerificationConfidence != "official_confirmed" {
t.Fatalf("价格页命中真实价格变化后应确认: %+v", result)
}
}
func TestVerifyCandidateDocumentPromoCampaignOfficial(t *testing.T) {
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_pricing_page.html"))
if err != nil {
t.Fatalf("读取 promo fixture 失败: %v", err)
}
candidate := verificationCandidateRow{
ID: 3,
EventType: "promo_campaign",
ProviderName: "DeepSeek",
ModelName: "DeepSeek-V4-Flash",
Title: "DeepSeek V4 Flash campaign",
}
result := verifyCandidateDocument(candidate, "https://deepseek.com/campaign/v4-flash", string(body))
if result.CandidateStatus != "verified" || result.VerificationConfidence != "official_confirmed" {
t.Fatalf("官方活动页应被确认: %+v", result)
}
}
func TestVerifyCandidateDocumentSecondaryMediaDowngrades(t *testing.T) {
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_secondary_media.html"))
if err != nil {
t.Fatalf("读取 secondary fixture 失败: %v", err)
}
candidate := verificationCandidateRow{
ID: 4,
EventType: "official_release",
ProviderName: "OpenAI",
ModelName: "GPT-5.6",
Title: "GPT-5.6 leak discussion",
}
result := verifyCandidateDocument(candidate, "https://someblog.example.com/gpt-5-6-leak", string(body))
if result.VerificationConfidence != "secondary_confirmed" {
t.Fatalf("二手媒体应降级为 secondary_confirmed: %+v", result)
}
}
func TestVerifyCandidateDocumentLeakStaysOutOfOfficialFacts(t *testing.T) {
body, err := os.ReadFile(filepath.Join("testdata", "intraday_verification_secondary_media.html"))
if err != nil {
t.Fatalf("读取 leak fixture 失败: %v", err)
}
candidate := verificationCandidateRow{
ID: 5,
EventType: "leak_or_rumor",
ProviderName: "OpenAI",
ModelName: "GPT-5.6",
Title: "GPT-5.6 leak discussion",
}
result := verifyCandidateDocument(candidate, "https://someblog.example.com/gpt-5-6-leak", string(body))
if result.VerificationConfidence == "official_confirmed" {
t.Fatalf("泄露类不应升级为正式事实: %+v", result)
}
}