Files
llm-intelligence/scripts/import_mobile_cloud_pricing.go
phamnazage-jpg 5c5578a19b
Some checks failed
CI / go-test (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / docker-build (push) Has been cancelled
feat(region_pricing): 扩展非 token 统一计费字段,支持语音按字符/秒计费
- 新增 region_pricing.pricing_mode / price_unit / flat_price 字段
- 新增 migration 016_region_pricing_non_token_units.sql
- officialPricingRecord 新增 PricingMode/PriceUnit/FlatPrice 字段
- detectModality 新增 audio 模态检测(voice/audio/speech)
- providerMetadata 新增 BAAI/ByteDance/China Mobile 元数据
- import_mobile_cloud_pricing.go: 解析语音计费表(CosyVoice/SenseVoice)
  - CosyVoice: 2元/万字符 → pricingMode=flat, priceUnit=10k_characters
  - SenseVoice: 0.0007元/秒 → pricingMode=flat, priceUnit=second
- mobileCloudProviderName 新增 cosyvoice/sensevoice → Alibaba 映射
- cmd/server: modelResponse 新增 pricingMode/priceUnit/flatPrice,API 字段说明同步更新
- 新增 TestModelsHandlerReturnsFlatPricingFields 测试
2026-05-22 14:51:38 +08:00

518 lines
17 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"flag"
"fmt"
"html"
"io"
"net/http"
"os"
"regexp"
"strings"
"time"
)
const (
defaultMobileCloudOutlineTreeURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/outline/tree?outlineId=972"
defaultMobileCloudArticleInfoURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/article/info/%d"
defaultMobileCloudArticleContentURL = "https://ecloud.10086.cn/op-help-center/request-api/service-api/article/content/%s"
defaultMobileCloudDocURLPattern = "https://ecloud.10086.cn/op-help-center/doc/article/%d"
mobileCloudPricingArticleTitle = "预置模型服务-token按量计费"
)
type mobileCloudPricingImportConfig struct {
OutlineTreeURL string
Fixture string
DryRun bool
Timeout time.Duration
}
type mobileCloudOutlineEnvelope struct {
Code int `json:"code"`
Data mobileCloudOutlineNode `json:"data"`
}
type mobileCloudOutlineNode struct {
ArticleID int `json:"articleId"`
ArticleTitle string `json:"articleTitle"`
ArticleContentPublished string `json:"articleContentPublished"`
Children []mobileCloudOutlineNode `json:"children"`
}
type mobileCloudArticleInfoEnvelope struct {
Code int `json:"code"`
Data mobileCloudArticleInfo `json:"data"`
}
type mobileCloudArticleInfo struct {
ID int `json:"id"`
Title string `json:"title"`
ContentPublished string `json:"contentPublished"`
}
type mobileCloudArticlePayload struct {
ArticleID int
Title string
ContentPublished string
DocURL string
ContentHTML string
}
func main() {
loadSubscriptionImportEnv()
var outlineTreeURL string
var fixture string
var dryRun bool
var timeoutSeconds int
flag.StringVar(&outlineTreeURL, "outline-tree-url", defaultMobileCloudOutlineTreeURL, "移动云 MoMA 文档大纲树接口")
flag.StringVar(&fixture, "fixture", "", "移动云 MoMA 价格样例文件")
flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库")
flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)")
flag.Parse()
cfg := mobileCloudPricingImportConfig{OutlineTreeURL: outlineTreeURL, Fixture: fixture, DryRun: dryRun, Timeout: time.Duration(timeoutSeconds) * time.Second}
var db *sql.DB
var err error
if !cfg.DryRun {
db, err = subscriptionImportDB()
if err != nil {
fmt.Fprintf(os.Stderr, "open db: %v\n", err)
os.Exit(1)
}
defer db.Close()
}
if err := runMobileCloudPricingImport(cfg, db, os.Stdout); err != nil {
fmt.Fprintf(os.Stderr, "import_mobile_cloud_pricing: %v\n", err)
os.Exit(1)
}
}
func runMobileCloudPricingImport(cfg mobileCloudPricingImportConfig, db *sql.DB, out io.Writer) error {
client := &http.Client{Timeout: cfg.Timeout}
payload, err := fetchMobileCloudArticlePayload(cfg, client)
if err != nil {
return err
}
records, err := parseMobileCloudPricingHTML(payload.ContentHTML, payload.DocURL)
if err != nil {
return err
}
records = dedupeOfficialPricingRecords(records)
if cfg.DryRun {
_, err = fmt.Fprintf(out, "source=mobile-cloud-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName)
return err
}
if db == nil {
return fmt.Errorf("db is required when dry-run=false")
}
if err := upsertOfficialPricingRecords(db, records, "mobile-cloud-pricing-import"); err != nil {
return err
}
var tableRows int
if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil {
return fmt.Errorf("count region_pricing: %w", err)
}
_, err = fmt.Fprintf(out, "source=mobile-cloud-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows)
return err
}
func fetchMobileCloudArticlePayload(cfg mobileCloudPricingImportConfig, client *http.Client) (mobileCloudArticlePayload, error) {
if cfg.Fixture != "" {
data, err := os.ReadFile(cfg.Fixture)
if err != nil {
return mobileCloudArticlePayload{}, fmt.Errorf("read fixture %s: %w", cfg.Fixture, err)
}
return mobileCloudArticlePayload{
ArticleID: 91592,
Title: mobileCloudPricingArticleTitle,
DocURL: fmt.Sprintf(defaultMobileCloudDocURLPattern, 91592),
ContentHTML: string(data),
}, nil
}
if client == nil {
client = &http.Client{Timeout: 20 * time.Second}
}
outlineRaw, err := fetchRawPricingPage(cfg.OutlineTreeURL, "", client)
if err != nil {
return mobileCloudArticlePayload{}, err
}
articleID, contentPublished, err := resolveMobileCloudPricingArticle(outlineRaw)
if err != nil {
return mobileCloudArticlePayload{}, err
}
infoURL := fmt.Sprintf(defaultMobileCloudArticleInfoURL, articleID)
infoRaw, err := fetchRawPricingPage(infoURL, "", client)
if err != nil {
return mobileCloudArticlePayload{}, err
}
articleInfo, err := parseMobileCloudArticleInfo(infoRaw)
if err != nil {
return mobileCloudArticlePayload{}, err
}
if strings.TrimSpace(contentPublished) == "" {
contentPublished = articleInfo.ContentPublished
}
contentURL := fmt.Sprintf(defaultMobileCloudArticleContentURL, contentPublished)
contentHTML, err := fetchRawPricingPage(contentURL, "", client)
if err != nil {
return mobileCloudArticlePayload{}, err
}
return mobileCloudArticlePayload{
ArticleID: articleInfo.ID,
Title: articleInfo.Title,
ContentPublished: contentPublished,
DocURL: fmt.Sprintf(defaultMobileCloudDocURLPattern, articleInfo.ID),
ContentHTML: contentHTML,
}, nil
}
func resolveMobileCloudPricingArticle(raw string) (int, string, error) {
var envelope mobileCloudOutlineEnvelope
if err := json.Unmarshal([]byte(raw), &envelope); err != nil {
return 0, "", fmt.Errorf("parse mobile cloud outline tree: %w", err)
}
articleID, contentPublished, ok := findMobileCloudPricingArticle(envelope.Data)
if !ok {
return 0, "", fmt.Errorf("mobile cloud pricing article %q not found in outline tree", mobileCloudPricingArticleTitle)
}
return articleID, contentPublished, nil
}
func findMobileCloudPricingArticle(node mobileCloudOutlineNode) (int, string, bool) {
if strings.TrimSpace(node.ArticleTitle) == mobileCloudPricingArticleTitle && node.ArticleID > 0 {
return node.ArticleID, strings.TrimSpace(node.ArticleContentPublished), true
}
for _, child := range node.Children {
if articleID, contentPublished, ok := findMobileCloudPricingArticle(child); ok {
return articleID, contentPublished, true
}
}
return 0, "", false
}
func parseMobileCloudArticleInfo(raw string) (mobileCloudArticleInfo, error) {
var envelope mobileCloudArticleInfoEnvelope
if err := json.Unmarshal([]byte(raw), &envelope); err != nil {
return mobileCloudArticleInfo{}, fmt.Errorf("parse mobile cloud article info: %w", err)
}
if envelope.Data.ID == 0 {
return mobileCloudArticleInfo{}, fmt.Errorf("unexpected mobile cloud article info content")
}
return envelope.Data, nil
}
func parseMobileCloudPricingHTML(raw string, docURL string) ([]officialPricingRecord, error) {
sections := mobileCloudRegionSections(raw)
if len(sections) == 0 {
return nil, fmt.Errorf("no mobile cloud pricing regions found")
}
records := make([]officialPricingRecord, 0)
for _, section := range sections {
for _, table := range mobileCloudTableBlocks(section.Body) {
rows := mobileCloudTableRows(table)
if len(rows) < 2 {
continue
}
switch {
case isMobileCloudTokenPricingHeader(rows[0]):
records = append(records, buildMobileCloudRecordsFromTable(section.Region, rows[1:], docURL)...)
case isMobileCloudVoicePricingHeader(rows[0]):
records = append(records, buildMobileCloudVoiceRecordsFromTable(section.Region, rows[1:], docURL)...)
}
}
}
if len(records) == 0 {
return nil, fmt.Errorf("no mobile cloud token pricing rows found")
}
return records, nil
}
type mobileCloudRegionSection struct {
Region string
Body string
}
func mobileCloudRegionSections(raw string) []mobileCloudRegionSection {
headingPattern := regexp.MustCompile(`(?is)<h2[^>]*>(.*?)</h2>`)
matches := headingPattern.FindAllStringSubmatchIndex(raw, -1)
sections := make([]mobileCloudRegionSection, 0, len(matches))
for i, match := range matches {
heading := cleanMobileCloudHTMLText(raw[match[2]:match[3]])
if !strings.Contains(heading, "支持订购模型") {
continue
}
start := match[1]
end := len(raw)
if i+1 < len(matches) {
end = matches[i+1][0]
}
region := strings.TrimSpace(strings.TrimSuffix(heading, "资源池支持订购模型"))
if region == heading {
region = strings.TrimSpace(strings.TrimSuffix(heading, "支持订购模型"))
}
sections = append(sections, mobileCloudRegionSection{Region: region, Body: raw[start:end]})
}
return sections
}
func mobileCloudTableBlocks(raw string) []string {
return regexp.MustCompile(`(?is)<table.*?</table>`).FindAllString(raw, -1)
}
func mobileCloudTableRows(raw string) [][]string {
rowMatches := regexp.MustCompile(`(?is)<tr[^>]*>(.*?)</tr>`).FindAllStringSubmatch(raw, -1)
rows := make([][]string, 0, len(rowMatches))
for _, rowMatch := range rowMatches {
cellMatches := regexp.MustCompile(`(?is)<t[dh][^>]*>(.*?)</t[dh]>`).FindAllStringSubmatch(rowMatch[1], -1)
cells := make([]string, 0, len(cellMatches))
for _, cellMatch := range cellMatches {
cells = append(cells, cleanMobileCloudHTMLText(cellMatch[1]))
}
if len(cells) > 0 {
rows = append(rows, cells)
}
}
return rows
}
func cleanMobileCloudHTMLText(raw string) string {
raw = strings.ReplaceAll(raw, "<br>", " ")
raw = strings.ReplaceAll(raw, "<br/>", " ")
raw = strings.ReplaceAll(raw, "<br />", " ")
raw = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(raw, " ")
raw = html.UnescapeString(raw)
raw = regexp.MustCompile(`\s+`).ReplaceAllString(raw, " ")
return strings.TrimSpace(raw)
}
func isMobileCloudTokenPricingHeader(cells []string) bool {
if len(cells) < 4 {
return false
}
return cells[0] == "规格名称" && cells[1] == "输入/输出tokens" && cells[2] == "单价(元/百万tokens" && cells[3] == "包含模型"
}
func isMobileCloudVoicePricingHeader(cells []string) bool {
if len(cells) < 5 {
return false
}
return cells[0] == "规格名称" && cells[1] == "模型类别" && cells[2] == "资费场景" && cells[3] == "单价" && cells[4] == "包含模型"
}
func buildMobileCloudRecordsFromTable(region string, rows [][]string, docURL string) []officialPricingRecord {
records := make([]officialPricingRecord, 0)
currentModels := make([]string, 0)
currentInputPrice := 0.0
for _, row := range rows {
switch {
case len(row) >= 4:
billingKind := strings.TrimSpace(row[1])
price := mustParseSubscriptionPrice(row[2])
currentModels = mobileCloudModelNames(row[3])
switch billingKind {
case "输入tokens":
currentInputPrice = price
case "tokens资费":
records = append(records, buildMobileCloudFlatTokenRecords(region, currentModels, price, docURL)...)
currentInputPrice = 0
default:
currentInputPrice = 0
}
case len(row) >= 2 && strings.TrimSpace(row[0]) == "输出tokens":
if currentInputPrice <= 0 || len(currentModels) == 0 {
continue
}
outputPrice := mustParseSubscriptionPrice(row[1])
records = append(records, buildMobileCloudInputOutputRecords(region, currentModels, currentInputPrice, outputPrice, docURL)...)
currentInputPrice = 0
}
}
return records
}
func buildMobileCloudInputOutputRecords(region string, modelNames []string, inputPrice float64, outputPrice float64, docURL string) []officialPricingRecord {
records := make([]officialPricingRecord, 0, len(modelNames))
for _, modelName := range modelNames {
providerName := mobileCloudProviderName(modelName)
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Mobile Cloud",
OperatorNameCn: "移动云",
OperatorCountry: "CN",
OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS",
OperatorType: "official",
Region: region,
Currency: "CNY",
InputPrice: inputPrice,
OutputPrice: outputPrice,
SourceURL: docURL,
ModelSourceURL: docURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(modelName),
})
}
return records
}
func buildMobileCloudFlatTokenRecords(region string, modelNames []string, price float64, docURL string) []officialPricingRecord {
records := make([]officialPricingRecord, 0, len(modelNames))
for _, modelName := range modelNames {
providerName := mobileCloudProviderName(modelName)
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Mobile Cloud",
OperatorNameCn: "移动云",
OperatorCountry: "CN",
OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS",
OperatorType: "official",
Region: region,
Currency: "CNY",
InputPrice: price,
OutputPrice: price,
SourceURL: docURL,
ModelSourceURL: docURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: detectModality(modelName),
})
}
return records
}
func buildMobileCloudVoiceRecordsFromTable(region string, rows [][]string, docURL string) []officialPricingRecord {
records := make([]officialPricingRecord, 0, len(rows))
for _, row := range rows {
if len(row) < 5 {
continue
}
modelNames := mobileCloudModelNames(row[4])
if len(modelNames) == 0 {
modelNames = []string{strings.TrimSpace(row[0])}
}
flatPrice := mobileCloudInlinePrice(row[3])
priceUnit := mobileCloudVoicePriceUnit(row[2], row[3])
for _, modelName := range modelNames {
providerName := mobileCloudProviderName(modelName)
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
records = append(records, officialPricingRecord{
ModelID: normalizeExternalID("mobile-cloud", mobileCloudRegionCode(region), modelName),
ModelName: modelName,
ProviderName: providerName,
ProviderNameCn: providerNameCn,
ProviderCountry: providerCountry,
ProviderWebsite: providerWebsite,
OperatorName: "Mobile Cloud",
OperatorNameCn: "移动云",
OperatorCountry: "CN",
OperatorWebsite: "https://ecloud.10086.cn/portal/product/MaaS",
OperatorType: "official",
Region: region,
Currency: "CNY",
PricingMode: "flat",
PriceUnit: priceUnit,
FlatPrice: flatPrice,
SourceURL: docURL,
ModelSourceURL: docURL,
DateConfidence: "unknown",
DateSourceKind: "official_pricing",
Modality: "audio",
})
}
}
return records
}
func mobileCloudVoicePriceUnit(scene string, price string) string {
text := strings.ToLower(strings.TrimSpace(scene + " " + price))
switch {
case strings.Contains(text, "万字符"), strings.Contains(text, "字符"):
return "10k_characters"
case strings.Contains(text, "元/秒"), strings.Contains(text, "秒"):
return "second"
default:
return "flat"
}
}
func mobileCloudInlinePrice(raw string) float64 {
matches := regexp.MustCompile(`([0-9]+(?:\.[0-9]+)?)`).FindStringSubmatch(raw)
if len(matches) != 2 {
return 0
}
return mustParseSubscriptionPrice(matches[1])
}
func mobileCloudModelNames(raw string) []string {
parts := strings.Fields(strings.TrimSpace(raw))
models := make([]string, 0, len(parts))
for _, part := range parts {
cleaned := strings.TrimSpace(strings.TrimSuffix(part, "、"))
if cleaned != "" {
models = append(models, cleaned)
}
}
return models
}
func mobileCloudProviderName(modelName string) string {
lower := strings.ToLower(strings.TrimSpace(modelName))
switch {
case strings.HasPrefix(lower, "minimax"):
return "MiniMax"
case strings.HasPrefix(lower, "deepseek"):
return "DeepSeek"
case strings.HasPrefix(lower, "qwen"), strings.HasPrefix(lower, "qwq"):
return "Qwen"
case strings.HasPrefix(lower, "bge"):
return "BAAI"
case strings.HasPrefix(lower, "cosyvoice"), strings.HasPrefix(lower, "sensevoice"):
return "Alibaba"
default:
return "China Mobile"
}
}
func mobileCloudRegionCode(region string) string {
switch strings.TrimSpace(region) {
case "华北-呼和浩特":
return "huabei-huhehaote"
case "东北-哈尔滨":
return "dongbei-haerbin"
case "华中-郑州":
return "huazhong-zhengzhou"
case "黑龙江-哈尔滨":
return "heilongjiang-haerbin"
case "华东-上海5":
return "huadong-shanghai5"
case "江西-南昌":
return "jiangxi-nanchang"
case "湖北-武汉":
return "hubei-wuhan"
case "华南-广州8":
return "huanan-guangzhou8"
default:
return normalizeExternalID(region)
}
}