Files
llm-intelligence/scripts/import_xfyun_pricing.go

218 lines
6.1 KiB
Go

//go:build llm_script
package main
import (
"context"
"database/sql"
"flag"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"regexp"
"strings"
"time"
)
const defaultXfyunPricingURL = "https://xinghuo.xfyun.cn/sparkapi?scr=price"
type xfyunPricingImportConfig struct {
URL string
Fixture string
DryRun bool
Timeout time.Duration
}
var xfyunPricingCardPattern = regexp.MustCompile(`(?s)<div class="apiprice_cardTitle__[^"]+">([^<]+)</div><div class="apiprice_cardPrice__[^"]+"><span>([0-9]+(?:\.[0-9]+)?)</span><span class="[^"]+">元/百万tokens</span>`)
func main() {
loadSubscriptionImportEnv()
var url string
var fixture string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&url, "url", defaultXfyunPricingURL, "讯飞官方价格页")
flag.StringVar(&fixture, "fixture", "", "讯飞价格样例文件")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", 30, "请求超时(秒)")
flag.Parse()
cfg := xfyunPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
var db *sql.DB
var err error
if !cfg.DryRun {
db, err = subscriptionImportDB()
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runXfyunPricingImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_xfyun_pricing: %v\n", err)
os.Exit(1)
}
}
func runXfyunPricingImport(cfg xfyunPricingImportConfig, db *sql.DB, out io.Writer) error {
raw, err := fetchXfyunPricingPage(cfg)
if err != nil {
return err
}
records, err := parseXfyunPricingCatalog(raw)
if err != nil {
return err
}
records = dedupeOfficialPricingRecords(records)
if len(records) == 0 {
return fmt.Errorf("unexpected xfyun pricing content: no records")
}
if cfg.DryRun {
_, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertOfficialPricingRecords(db, records, "xfyun-pricing-import"); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
return fmt.Errorf("count region_pricing: %w", err)
}
_, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
return err
}
func fetchXfyunPricingPage(cfg xfyunPricingImportConfig) (string, error) {
if strings.TrimSpace(cfg.Fixture) != "" {
data, err := os.ReadFile(cfg.Fixture)
if err != nil {
return "", fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
}
return string(data), nil
}
client := &http.Client{Timeout: cfg.Timeout}
raw, err := fetchRawPricingPage(cfg.URL, "", client)
if err == nil && strings.Contains(raw, "apiprice_cardTitle__") {
return raw, nil
}
rendered, renderErr := fetchXfyunPricingPageWithChromium(cfg.URL, cfg.Timeout)
if renderErr != nil {
if err != nil {
return "", fmt.Errorf("fetch shell failed: %v; chromium render failed: %w", err, renderErr)
}
return "", renderErr
}
return rendered, nil
}
func fetchXfyunPricingPageWithChromium(url string, timeout time.Duration) (string, error) {
browserPath, err := lookupChromiumBinary()
if err != nil {
return "", err
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
cmd := exec.CommandContext(ctx, browserPath,
"--headless",
"--no-sandbox",
"--disable-gpu",
"--dump-dom",
url,
)
cmd.Stderr = io.Discard
out, err := cmd.Output()
if ctx.Err() == context.DeadlineExceeded {
return "", fmt.Errorf("chromium render timeout after %s", timeout)
}
if err != nil {
return "", fmt.Errorf("chromium dump-dom: %w", err)
}
if len(out) == 0 {
return "", fmt.Errorf("chromium dump-dom returned empty output")
}
return string(out), nil
}
func lookupChromiumBinary() (string, error) {
for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
if path, err := exec.LookPath(name); err == nil {
return path, nil
}
}
return "", fmt.Errorf("no chromium-compatible browser found in PATH")
}
func parseXfyunPricingCatalog(raw string) ([]officialPricingRecord, error) {
matches := xfyunPricingCardPattern.FindAllStringSubmatch(raw, -1)
if len(matches) == 0 {
return nil, fmt.Errorf("unexpected xfyun pricing content: no pricing cards found")
}
providerNameCn, providerCountry, providerWebsite := providerMetadata("iFlytek")
records := make([]officialPricingRecord, 0, len(matches))
for _, match := range matches {
if len(match) != 3 {
continue
}
title := strings.TrimSpace(match[1])
modelName := xfyunCanonicalModelName(title)
if modelName == "" {
continue
}
price := mustParseSubscriptionPrice(match[2])
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("xfyun", modelName),
ModelName: modelName,
ProviderName: "iFlytek",
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Spark API",
OperatorNameCn: "讯飞星火 API",
OperatorCountry: "CN",
OperatorWebsite: defaultXfyunPricingURL,
OperatorType: "official",
Region: "CN",
Currency: "CNY",
InputPrice: price,
OutputPrice: price,
IsFree: price == 0,
SourceURL: defaultXfyunPricingURL,
ModelSourceURL: defaultXfyunPricingURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: "text",
})
}
if len(records) == 0 {
return nil, fmt.Errorf("unexpected xfyun pricing content: empty records after canonical mapping")
}
return records, nil
}
func xfyunCanonicalModelName(title string) string {
switch strings.TrimSpace(title) {
case "X2/X1.5模型":
return "Spark X2/X1.5"
case "Ultra模型":
return "Spark Ultra"
case "Pro模型":
return "Spark Pro"
case "Lite模型":
return "Spark Lite"
default:
return ""
}
}