//go:build llm_script && !scripts_pkg package main import ( "database/sql" "flag" "fmt" "html" "io" "net/http" "os" "regexp" "strings" "time" ) const defaultCUCloudPricingURL = "https://support.cucloud.cn/document/127/591/2357.html?id=2357&folderid=3236" type cucloudPricingImportConfig struct { URL string Fixture string DryRun bool Timeout time.Duration } type cucloudPricingSummary struct { Models int Records int Regions int PaygModeConfirmed bool PaygPriceTablePublic bool } var cucloudRequiredModels = []string{"DeepSeek-V4-Pro", "DeepSeek-V4-Flash", "MiniMax-M2.5"} func main() { loadSubscriptionImportEnv() var url string var fixture string var dryRun bool var timeoutSeconds int flag.StringVar(&url, "url", defaultCUCloudPricingURL, "联通云 AISP Token Plan 页面") flag.StringVar(&fixture, "fixture", "", "联通云价格样例文件") flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") flag.Parse() cfg := cucloudPricingImportConfig{URL: url, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} var db *sql.DB var err error if !cfg.DryRun { db, err = subscriptionImportDB() if err != nil { fmt.Fprintf(os.Stderr, "open db: %v\n", err) os.Exit(1) } defer db.Close() } if err := runCUCloudPricingImport(cfg, db, os.Stdout); err != nil { fmt.Fprintf(os.Stderr, "import_cucloud_pricing: %v\n", err) os.Exit(1) } } func runCUCloudPricingImport(cfg cucloudPricingImportConfig, db *sql.DB, out io.Writer) error { client := &http.Client{Timeout: cfg.Timeout} raw, err := fetchRawPricingPage(cfg.URL, cfg.Fixture, client) if err != nil { return err } records, summary, err := parseCUCloudPricingCatalog(raw, cfg.URL) if err != nil { return err } records = dedupeOfficialPricingRecords(records) if cfg.DryRun { _, err = fmt.Fprintf(out, "source=cucloud-pricing-import models=%d records=%d regions=%d operator=%s payg_mode_confirmed=%t payg_price_table_public=%t dry_run=true\n", summary.Models, summary.Records, summary.Regions, records[0].OperatorName, summary.PaygModeConfirmed, summary.PaygPriceTablePublic) return err } if db == nil { return fmt.Errorf("db is required when dry-run=false") } if err := upsertOfficialPricingRecords(db, records, "cucloud-pricing-import"); err != nil { return err } var tableRows int if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { return fmt.Errorf("count region_pricing: %w", err) } _, err = fmt.Fprintf(out, "source=cucloud-pricing-import models=%d records=%d regions=%d operator=%s table_rows=%d payg_mode_confirmed=%t payg_price_table_public=%t dry_run=false\n", summary.Models, summary.Records, summary.Regions, records[0].OperatorName, tableRows, summary.PaygModeConfirmed, summary.PaygPriceTablePublic) return err } func parseCUCloudPricingCatalog(raw string, sourceURL string) ([]officialPricingRecord, cucloudPricingSummary, error) { normalized := normalizeCUCloudRaw(raw) priceMap, err := extractCUCloudBlendedPrices(normalized) if err != nil { return nil, cucloudPricingSummary{}, err } regionMap, err := extractCUCloudRegionSupport(normalized) if err != nil { return nil, cucloudPricingSummary{}, err } records := make([]officialPricingRecord, 0) modelSet := make(map[string]struct{}) regionSet := make(map[string]struct{}) for _, modelName := range cucloudRequiredModels { price, ok := priceMap[modelName] if !ok { return nil, cucloudPricingSummary{}, fmt.Errorf("missing blended price for %s", modelName) } regions := regionMap[modelName] if len(regions) == 0 { return nil, cucloudPricingSummary{}, fmt.Errorf("missing supported regions for %s", modelName) } providerName := cucloudProviderName(modelName) providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) for _, region := range regions { records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("cucloud", "aisp", modelName), ModelName: modelName, ProviderName: providerName, ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Unicom AISP", OperatorNameCn: "联通云 AI服务平台AISP", OperatorCountry: "CN", OperatorWebsite: "https://www.cucloud.cn", OperatorType: "official", Region: region, Currency: "CNY", InputPrice: price, OutputPrice: price, SourceURL: sourceURL, ModelSourceURL: sourceURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(modelName), }) regionSet[region] = struct{}{} } modelSet[modelName] = struct{}{} } if len(records) == 0 { return nil, cucloudPricingSummary{}, fmt.Errorf("no cucloud pricing records found") } summary := cucloudPricingSummary{ Models: len(modelSet), Records: len(records), Regions: len(regionSet), PaygModeConfirmed: cucloudPaygModeConfirmed(normalized), PaygPriceTablePublic: cucloudHasPublicPaygPriceTable(normalized), } return records, summary, nil } func normalizeCUCloudRaw(raw string) string { raw = strings.ReplaceAll(raw, `\u003c`, "<") raw = strings.ReplaceAll(raw, `\u003e`, ">") raw = strings.ReplaceAll(raw, `\u0026nbsp;`, " ") raw = strings.ReplaceAll(raw, `\n`, "\n") raw = strings.ReplaceAll(raw, `\t`, " ") raw = strings.ReplaceAll(raw, `\r`, "\n") raw = html.UnescapeString(raw) return raw } func extractCUCloudBlendedPrices(raw string) (map[string]float64, error) { for _, table := range cucloudTableBlocks(raw) { rows := cucloudTableRows(table) if len(rows) == 0 { continue } prices := make(map[string]float64) for _, cell := range rows[0] { modelName, price, ok := cucloudBlendedPriceCell(cell) if ok { prices[modelName] = price } } if cucloudHasAllRequiredModels(prices) { return prices, nil } } return nil, fmt.Errorf("unexpected cucloud blended price table") } func cucloudBlendedPriceCell(raw string) (string, float64, bool) { cleaned := strings.TrimSpace(cleanHTMLText(raw)) match := regexp.MustCompile(`^(.*?)\s*综合单价\s*([0-9]+(?:\.[0-9]+)?)元/百万tokens$`).FindStringSubmatch(cleaned) if len(match) != 3 { return "", 0, false } modelName := strings.TrimSpace(match[1]) if modelName == "" { return "", 0, false } return modelName, mustParseSubscriptionPrice(match[2]), true } func extractCUCloudRegionSupport(raw string) (map[string][]string, error) { for _, table := range cucloudTableBlocks(raw) { rows := cucloudTableRows(table) if len(rows) < 2 { continue } headers := rows[0] if len(headers) < 2 || strings.TrimSpace(headers[0]) != "模型" { continue } if !strings.Contains(strings.Join(headers, "|"), "贵阳基地二区") { continue } regionMap := make(map[string][]string) regions := headers[1:] for _, row := range rows[1:] { if len(row) < len(regions)+1 { continue } modelName := strings.TrimSpace(row[0]) if modelName == "" { continue } supported := make([]string, 0) for idx, region := range regions { if strings.Contains(strings.TrimSpace(row[idx+1]), "支持") { supported = append(supported, strings.TrimSpace(region)) } } if len(supported) > 0 { regionMap[modelName] = supported } } if cucloudHasAllRequiredRegionRows(regionMap) { return regionMap, nil } } return nil, fmt.Errorf("unexpected cucloud region support table") } func cucloudTableBlocks(raw string) []string { pattern := regexp.MustCompile(`(?is)