//go:build llm_script package main import ( "database/sql" "encoding/json" "flag" "fmt" "html" "io" "net/http" "os" "regexp" "strings" "time" ) const ( defaultMobileCloudOutlineTreeURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/outline/tree?outlineId=972" defaultMobileCloudArticleInfoURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/article/info/%d" defaultMobileCloudArticleContentURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/article/content/%s" defaultMobileCloudDocURLPattern = "https://ecloud.10086.cn/op-help-center/doc/article/%d" mobileCloudPricingArticleTitle = "预置模型服务-token按量计费" ) type mobileCloudPricingImportConfig struct { OutlineTreeURL string Fixture string DryRun bool Timeout time.Duration } type mobileCloudOutlineEnvelope struct { Code int `json:"code"` Data mobileCloudOutlineNode `json:"data"` } type mobileCloudOutlineNode struct { ArticleID int `json:"articleId"` ArticleTitle string `json:"articleTitle"` ArticleContentPublished string `json:"articleContentPublished"` Children []mobileCloudOutlineNode `json:"children"` } type mobileCloudArticleInfoEnvelope struct { Code int `json:"code"` Data mobileCloudArticleInfo `json:"data"` } type mobileCloudArticleInfo struct { ID int `json:"id"` Title string `json:"title"` ContentPublished string `json:"contentPublished"` } type mobileCloudArticlePayload struct { ArticleID int Title string ContentPublished string DocURL string ContentHTML string } func main() { loadSubscriptionImportEnv() var outlineTreeURL string var fixture string var dryRun bool var timeoutSeconds int flag.StringVar(&outlineTreeURL, "outline-tree-url", defaultMobileCloudOutlineTreeURL, "移动云 MoMA 文档大纲树接口") flag.StringVar(&fixture, "fixture", "", "移动云 MoMA 价格样例文件") flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") flag.Parse() cfg := mobileCloudPricingImportConfig{OutlineTreeURL: outlineTreeURL, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second} var db *sql.DB var err error if !cfg.DryRun { db, err = subscriptionImportDB() if err != nil { fmt.Fprintf(os.Stderr, "open db: %v\n", err) os.Exit(1) } defer db.Close() } if err := runMobileCloudPricingImport(cfg, db, os.Stdout); err != nil { fmt.Fprintf(os.Stderr, "import_mobile_cloud_pricing: %v\n", err) os.Exit(1) } } func runMobileCloudPricingImport(cfg mobileCloudPricingImportConfig, db *sql.DB, out io.Writer) error { client := &http.Client{Timeout: cfg.Timeout} payload, err := fetchMobileCloudArticlePayload(cfg, client) if err != nil { return err } records, err := parseMobileCloudPricingHTML(payload.ContentHTML, payload.DocURL) if err != nil { return err } records = dedupeOfficialPricingRecords(records) if cfg.DryRun { _, err = fmt.Fprintf(out, "source=mobile-cloud-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) return err } if db == nil { return fmt.Errorf("db is required when dry-run=false") } if err := upsertOfficialPricingRecords(db, records, "mobile-cloud-pricing-import"); err != nil { return err } var tableRows int if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { return fmt.Errorf("count region_pricing: %w", err) } _, err = fmt.Fprintf(out, "source=mobile-cloud-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) return err } func fetchMobileCloudArticlePayload(cfg mobileCloudPricingImportConfig, client *http.Client) (mobileCloudArticlePayload, error) { if cfg.Fixture != "" { data, err := os.ReadFile(cfg.Fixture) if err != nil { return mobileCloudArticlePayload{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err) } return mobileCloudArticlePayload{ ArticleID: 91592, Title: mobileCloudPricingArticleTitle, DocURL: fmt.Sprintf(defaultMobileCloudDocURLPattern, 91592), ContentHTML: string(data), }, nil } if client == nil { client = &http.Client{Timeout: 20 * time.Second} } outlineRaw, err := fetchRawPricingPage(cfg.OutlineTreeURL, "", client) if err != nil { return mobileCloudArticlePayload{}, err } articleID, contentPublished, err := resolveMobileCloudPricingArticle(outlineRaw) if err != nil { return mobileCloudArticlePayload{}, err } infoURL := fmt.Sprintf(defaultMobileCloudArticleInfoURL, articleID) infoRaw, err := fetchRawPricingPage(infoURL, "", client) if err != nil { return mobileCloudArticlePayload{}, err } articleInfo, err := parseMobileCloudArticleInfo(infoRaw) if err != nil { return mobileCloudArticlePayload{}, err } if strings.TrimSpace(contentPublished) == "" { contentPublished = articleInfo.ContentPublished } contentURL := fmt.Sprintf(defaultMobileCloudArticleContentURL, contentPublished) contentHTML, err := fetchRawPricingPage(contentURL, "", client) if err != nil { return mobileCloudArticlePayload{}, err } return mobileCloudArticlePayload{ ArticleID: articleInfo.ID, Title: articleInfo.Title, ContentPublished: contentPublished, DocURL: fmt.Sprintf(defaultMobileCloudDocURLPattern, articleInfo.ID), ContentHTML: contentHTML, }, nil } func resolveMobileCloudPricingArticle(raw string) (int, string, error) { var envelope mobileCloudOutlineEnvelope if err := json.Unmarshal([]byte(raw), &envelope); err != nil { return 0, "", fmt.Errorf("parse mobile cloud outline tree: %w", err) } articleID, contentPublished, ok := findMobileCloudPricingArticle(envelope.Data) if !ok { return 0, "", fmt.Errorf("mobile cloud pricing article %q not found in outline tree", mobileCloudPricingArticleTitle) } return articleID, contentPublished, nil } func findMobileCloudPricingArticle(node mobileCloudOutlineNode) (int, string, bool) { if strings.TrimSpace(node.ArticleTitle) == mobileCloudPricingArticleTitle && node.ArticleID > 0 { return node.ArticleID, strings.TrimSpace(node.ArticleContentPublished), true } for _, child := range node.Children { if articleID, contentPublished, ok := findMobileCloudPricingArticle(child); ok { return articleID, contentPublished, true } } return 0, "", false } func parseMobileCloudArticleInfo(raw string) (mobileCloudArticleInfo, error) { var envelope mobileCloudArticleInfoEnvelope if err := json.Unmarshal([]byte(raw), &envelope); err != nil { return mobileCloudArticleInfo{}, fmt.Errorf("parse mobile cloud article info: %w", err) } if envelope.Data.ID == 0 { return mobileCloudArticleInfo{}, fmt.Errorf("unexpected mobile cloud article info content") } return envelope.Data, nil } func parseMobileCloudPricingHTML(raw string, docURL string) ([]officialPricingRecord, error) { sections := mobileCloudRegionSections(raw) if len(sections) == 0 { return nil, fmt.Errorf("no mobile cloud pricing regions found") } records := make([]officialPricingRecord, 0) for _, section := range sections { for _, table := range mobileCloudTableBlocks(section.Body) { rows := mobileCloudTableRows(table) if len(rows) < 2 { continue } switch { case isMobileCloudTokenPricingHeader(rows[0]): records = append(records, buildMobileCloudRecordsFromTable(section.Region, rows[1:], docURL)...) case isMobileCloudVoicePricingHeader(rows[0]): records = append(records, buildMobileCloudVoiceRecordsFromTable(section.Region, rows[1:], docURL)...) } } } if len(records) == 0 { return nil, fmt.Errorf("no mobile cloud token pricing rows found") } return records, nil } type mobileCloudRegionSection struct { Region string Body string } func mobileCloudRegionSections(raw string) []mobileCloudRegionSection { headingPattern := regexp.MustCompile(`(?is)]*>(.*?)`) matches := headingPattern.FindAllStringSubmatchIndex(raw, -1) sections := make([]mobileCloudRegionSection, 0, len(matches)) for i, match := range matches { heading := cleanMobileCloudHTMLText(raw[match[2]:match[3]]) if !strings.Contains(heading, "支持订购模型") { continue } start := match[1] end := len(raw) if i+1 < len(matches) { end = matches[i+1][0] } region := strings.TrimSpace(strings.TrimSuffix(heading, "资源池支持订购模型")) if region == heading { region = strings.TrimSpace(strings.TrimSuffix(heading, "支持订购模型")) } sections = append(sections, mobileCloudRegionSection{Region: region, Body: raw[start:end]}) } return sections } func mobileCloudTableBlocks(raw string) []string { return regexp.MustCompile(`(?is)`).FindAllString(raw, -1) } func mobileCloudTableRows(raw string) [][]string { rowMatches := regexp.MustCompile(`(?is)]*>(.*?)`).FindAllStringSubmatch(raw, -1) rows := make([][]string, 0, len(rowMatches)) for _, rowMatch := range rowMatches { cellMatches := regexp.MustCompile(`(?is)]*>(.*?)`).FindAllStringSubmatch(rowMatch[1], -1) cells := make([]string, 0, len(cellMatches)) for _, cellMatch := range cellMatches { cells = append(cells, cleanMobileCloudHTMLText(cellMatch[1])) } if len(cells) > 0 { rows = append(rows, cells) } } return rows } func cleanMobileCloudHTMLText(raw string) string { raw = strings.ReplaceAll(raw, "
", " ") raw = strings.ReplaceAll(raw, "
", " ") raw = strings.ReplaceAll(raw, "
", " ") raw = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(raw, " ") raw = html.UnescapeString(raw) raw = regexp.MustCompile(`\s+`).ReplaceAllString(raw, " ") return strings.TrimSpace(raw) } func isMobileCloudTokenPricingHeader(cells []string) bool { if len(cells) < 4 { return false } return cells[0] == "规格名称" && cells[1] == "输入/输出tokens" && cells[2] == "单价(元/百万tokens)" && cells[3] == "包含模型" } func isMobileCloudVoicePricingHeader(cells []string) bool { if len(cells) < 5 { return false } return cells[0] == "规格名称" && cells[1] == "模型类别" && cells[2] == "资费场景" && cells[3] == "单价" && cells[4] == "包含模型" } func buildMobileCloudRecordsFromTable(region string, rows [][]string, docURL string) []officialPricingRecord { records := make([]officialPricingRecord, 0) currentModels := make([]string, 0) currentInputPrice := 0.0 for _, row := range rows { switch { case len(row) >= 4: billingKind := strings.TrimSpace(row[1]) price := mustParseSubscriptionPrice(row[2]) currentModels = mobileCloudModelNames(row[3]) switch billingKind { case "输入tokens": currentInputPrice = price case "tokens资费": records = append(records, buildMobileCloudFlatTokenRecords(region, currentModels, price, docURL)...) currentInputPrice = 0 default: currentInputPrice = 0 } case len(row) >= 2 && strings.TrimSpace(row[0]) == "输出tokens": if currentInputPrice <= 0 || len(currentModels) == 0 { continue } outputPrice := mustParseSubscriptionPrice(row[1]) records = append(records, buildMobileCloudInputOutputRecords(region, currentModels, currentInputPrice, outputPrice, docURL)...) currentInputPrice = 0 } } return records } func buildMobileCloudInputOutputRecords(region string, modelNames []string, inputPrice float64, outputPrice float64, docURL string) []officialPricingRecord { records := make([]officialPricingRecord, 0, len(modelNames)) for _, modelName := range modelNames { providerName := mobileCloudProviderName(modelName) providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName), ModelName: modelName, ProviderName: providerName, ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Mobile Cloud", OperatorNameCn: "移动云", OperatorCountry: "CN", OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS", OperatorType: "official", Region: region, Currency: "CNY", InputPrice: inputPrice, OutputPrice: outputPrice, SourceURL: docURL, ModelSourceURL: docURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(modelName), }) } return records } func buildMobileCloudFlatTokenRecords(region string, modelNames []string, price float64, docURL string) []officialPricingRecord { records := make([]officialPricingRecord, 0, len(modelNames)) for _, modelName := range modelNames { providerName := mobileCloudProviderName(modelName) providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName), ModelName: modelName, ProviderName: providerName, ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Mobile Cloud", OperatorNameCn: "移动云", OperatorCountry: "CN", OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS", OperatorType: "official", Region: region, Currency: "CNY", InputPrice: price, OutputPrice: price, SourceURL: docURL, ModelSourceURL: docURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(modelName), }) } return records } func buildMobileCloudVoiceRecordsFromTable(region string, rows [][]string, docURL string) []officialPricingRecord { records := make([]officialPricingRecord, 0, len(rows)) for _, row := range rows { if len(row) < 5 { continue } modelNames := mobileCloudModelNames(row[4]) if len(modelNames) == 0 { modelNames = []string{strings.TrimSpace(row[0])} } flatPrice := mobileCloudInlinePrice(row[3]) priceUnit := mobileCloudVoicePriceUnit(row[2], row[3]) for _, modelName := range modelNames { providerName := mobileCloudProviderName(modelName) providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) records = append(records, officialPricingRecord{ ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName), ModelName: modelName, ProviderName: providerName, ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Mobile Cloud", OperatorNameCn: "移动云", OperatorCountry: "CN", OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS", OperatorType: "official", Region: region, Currency: "CNY", PricingMode: "flat", PriceUnit: priceUnit, FlatPrice: flatPrice, SourceURL: docURL, ModelSourceURL: docURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: "audio", }) } } return records } func mobileCloudVoicePriceUnit(scene string, price string) string { text := strings.ToLower(strings.TrimSpace(scene + " " + price)) switch { case strings.Contains(text, "万字符"), strings.Contains(text, "字符"): return "10k_characters" case strings.Contains(text, "元/秒"), strings.Contains(text, "秒"): return "second" default: return "flat" } } func mobileCloudInlinePrice(raw string) float64 { matches := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?)`).FindStringSubmatch(raw) if len(matches) != 2 { return 0 } return mustParseSubscriptionPrice(matches[1]) } func mobileCloudModelNames(raw string) []string { parts := strings.Fields(strings.TrimSpace(raw)) models := make([]string, 0, len(parts)) for _, part := range parts { cleaned := strings.TrimSpace(strings.TrimSuffix(part, "、")) if cleaned != "" { models = append(models, cleaned) } } return models } func mobileCloudProviderName(modelName string) string { lower := strings.ToLower(strings.TrimSpace(modelName)) switch { case strings.HasPrefix(lower, "minimax"): return "MiniMax" case strings.HasPrefix(lower, "deepseek"): return "DeepSeek" case strings.HasPrefix(lower, "qwen"), strings.HasPrefix(lower, "qwq"): return "Qwen" case strings.HasPrefix(lower, "bge"): return "BAAI" case strings.HasPrefix(lower, "cosyvoice"), strings.HasPrefix(lower, "sensevoice"): return "Alibaba" default: return "China Mobile" } } func mobileCloudRegionCode(region string) string { switch strings.TrimSpace(region) { case "华北-呼和浩特": return "huabei-huhehaote" case "东北-哈尔滨": return "dongbei-haerbin" case "华中-郑州": return "huazhong-zhengzhou" case "黑龙江-哈尔滨": return "heilongjiang-haerbin" case "华东-上海5": return "huadong-shanghai5" case "江西-南昌": return "jiangxi-nanchang" case "湖北-武汉": return "hubei-wuhan" case "华南-广州8": return "huanan-guangzhou8" default: return normalizeExternalID(region) } }