218 lines
6.1 KiB
Go
218 lines
6.1 KiB
Go
//go:build llm_script
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const defaultXfyunPricingURL = "https://xinghuo.xfyun.cn/sparkapi?scr=price"
|
|
|
|
type xfyunPricingImportConfig struct {
|
|
URL string
|
|
Fixture string
|
|
DryRun bool
|
|
Timeout time.Duration
|
|
}
|
|
|
|
var xfyunPricingCardPattern = regexp.MustCompile(`(?s)<div class="apiprice_cardTitle__[^"]+">([^<]+)</div><div class="apiprice_cardPrice__[^"]+"><span>([0-9]+(?:\.[0-9]+)?)</span><span class="[^"]+">元/百万tokens</span>`)
|
|
|
|
func main() {
|
|
loadSubscriptionImportEnv()
|
|
|
|
var url string
|
|
var fixture string
|
|
var dryRun bool
|
|
var timeoutSeconds int
|
|
|
|
flag.StringVar(&url, "url", defaultXfyunPricingURL, "讯飞官方价格页")
|
|
flag.StringVar(&fixture, "fixture", "", "讯飞价格样例文件")
|
|
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
|
|
flag.IntVar(&timeoutSeconds, "timeout", 30, "请求超时(秒)")
|
|
flag.Parse()
|
|
|
|
cfg := xfyunPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
|
|
|
|
var db *sql.DB
|
|
var err error
|
|
if !cfg.DryRun {
|
|
db, err = subscriptionImportDB()
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer db.Close()
|
|
}
|
|
|
|
if err := runXfyunPricingImport(cfg, db, os.Stdout); err != nil {
|
|
fmt.Fprintf(os.Stderr, "import_xfyun_pricing: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func runXfyunPricingImport(cfg xfyunPricingImportConfig, db *sql.DB, out io.Writer) error {
|
|
raw, err := fetchXfyunPricingPage(cfg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
records, err := parseXfyunPricingCatalog(raw)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
records = dedupeOfficialPricingRecords(records)
|
|
if len(records) == 0 {
|
|
return fmt.Errorf("unexpected xfyun pricing content: no records")
|
|
}
|
|
if cfg.DryRun {
|
|
_, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
|
|
return err
|
|
}
|
|
if db == nil {
|
|
return fmt.Errorf("db is required when dry-run=false")
|
|
}
|
|
if err := upsertOfficialPricingRecords(db, records, "xfyun-pricing-import"); err != nil {
|
|
return err
|
|
}
|
|
var tableRows int
|
|
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
|
|
return fmt.Errorf("count region_pricing: %w", err)
|
|
}
|
|
_, err = fmt.Fprintf(out, "source=xfyun-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
|
|
return err
|
|
}
|
|
|
|
func fetchXfyunPricingPage(cfg xfyunPricingImportConfig) (string, error) {
|
|
if strings.TrimSpace(cfg.Fixture) != "" {
|
|
data, err := os.ReadFile(cfg.Fixture)
|
|
if err != nil {
|
|
return "", fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
|
|
}
|
|
return string(data), nil
|
|
}
|
|
|
|
client := &http.Client{Timeout: cfg.Timeout}
|
|
raw, err := fetchRawPricingPage(cfg.URL, "", client)
|
|
if err == nil && strings.Contains(raw, "apiprice_cardTitle__") {
|
|
return raw, nil
|
|
}
|
|
|
|
rendered, renderErr := fetchXfyunPricingPageWithChromium(cfg.URL, cfg.Timeout)
|
|
if renderErr != nil {
|
|
if err != nil {
|
|
return "", fmt.Errorf("fetch shell failed: %v; chromium render failed: %w", err, renderErr)
|
|
}
|
|
return "", renderErr
|
|
}
|
|
return rendered, nil
|
|
}
|
|
|
|
func fetchXfyunPricingPageWithChromium(url string, timeout time.Duration) (string, error) {
|
|
browserPath, err := lookupChromiumBinary()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
defer cancel()
|
|
cmd := exec.CommandContext(ctx, browserPath,
|
|
"--headless",
|
|
"--no-sandbox",
|
|
"--disable-gpu",
|
|
"--dump-dom",
|
|
url,
|
|
)
|
|
cmd.Stderr = io.Discard
|
|
out, err := cmd.Output()
|
|
if ctx.Err() == context.DeadlineExceeded {
|
|
return "", fmt.Errorf("chromium render timeout after %s", timeout)
|
|
}
|
|
if err != nil {
|
|
return "", fmt.Errorf("chromium dump-dom: %w", err)
|
|
}
|
|
if len(out) == 0 {
|
|
return "", fmt.Errorf("chromium dump-dom returned empty output")
|
|
}
|
|
return string(out), nil
|
|
}
|
|
|
|
func lookupChromiumBinary() (string, error) {
|
|
for _, name := range []string{"chromium", "chromium-browser", "google-chrome", "google-chrome-stable"} {
|
|
if path, err := exec.LookPath(name); err == nil {
|
|
return path, nil
|
|
}
|
|
}
|
|
return "", fmt.Errorf("no chromium-compatible browser found in PATH")
|
|
}
|
|
|
|
func parseXfyunPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
|
matches := xfyunPricingCardPattern.FindAllStringSubmatch(raw, -1)
|
|
if len(matches) == 0 {
|
|
return nil, fmt.Errorf("unexpected xfyun pricing content: no pricing cards found")
|
|
}
|
|
|
|
providerNameCn, providerCountry, providerWebsite := providerMetadata("iFlytek")
|
|
records := make([]officialPricingRecord, 0, len(matches))
|
|
for _, match := range matches {
|
|
if len(match) != 3 {
|
|
continue
|
|
}
|
|
title := strings.TrimSpace(match[1])
|
|
modelName := xfyunCanonicalModelName(title)
|
|
if modelName == "" {
|
|
continue
|
|
}
|
|
price := mustParseSubscriptionPrice(match[2])
|
|
records = append(records, officialPricingRecord{
|
|
ModelID: normalizeExternalID("xfyun", modelName),
|
|
ModelName: modelName,
|
|
ProviderName: "iFlytek",
|
|
ProviderNameCn: providerNameCn,
|
|
ProviderCountry: providerCountry,
|
|
ProviderWebsite: providerWebsite,
|
|
OperatorName: "Spark API",
|
|
OperatorNameCn: "讯飞星火 API",
|
|
OperatorCountry: "CN",
|
|
OperatorWebsite: defaultXfyunPricingURL,
|
|
OperatorType: "official",
|
|
Region: "CN",
|
|
Currency: "CNY",
|
|
InputPrice: price,
|
|
OutputPrice: price,
|
|
IsFree: price == 0,
|
|
SourceURL: defaultXfyunPricingURL,
|
|
ModelSourceURL: defaultXfyunPricingURL,
|
|
DateConfidence: "unknown",
|
|
DateSourceKind: "official_pricing",
|
|
Modality: "text",
|
|
})
|
|
}
|
|
if len(records) == 0 {
|
|
return nil, fmt.Errorf("unexpected xfyun pricing content: empty records after canonical mapping")
|
|
}
|
|
return records, nil
|
|
}
|
|
|
|
func xfyunCanonicalModelName(title string) string {
|
|
switch strings.TrimSpace(title) {
|
|
case "X2/X1.5模型":
|
|
return "Spark X2/X1.5"
|
|
case "Ultra模型":
|
|
return "Spark Ultra"
|
|
case "Pro模型":
|
|
return "Spark Pro"
|
|
case "Lite模型":
|
|
return "Spark Lite"
|
|
default:
|
|
return ""
|
|
}
|
|
}
|