//go:build llm_script && !scripts_pkg package main import ( "database/sql" "encoding/json" "fmt" "log" "os" "strings" "time" _ "github.com/lib/pq" ) type RawData struct { Zhipu []struct { Model string `json:"model"` Context string `json:"context"` InputPrice string `json:"inputPrice"` OutputPrice string `json:"outputPrice"` Operator string `json:"operator"` Region string `json:"region"` Currency string `json:"currency"` } `json:"zhipu"` Baidu []struct { Model string `json:"model"` Type string `json:"type"` InputPrice *float64 `json:"inputPrice"` OutputPrice *float64 `json:"outputPrice"` Operator string `json:"operator"` Region string `json:"region"` Currency string `json:"currency"` } `json:"baidu"` } type ModelPricing struct { ModelID string ModelName string ProviderName string ProviderCountry string OperatorName string OperatorType string Region string Currency string InputPrice float64 OutputPrice float64 ContextLength int IsFree bool SourceURL string ModelSourceURL string ReleaseDate string DateConfidence string DateSourceKind string Modality string SceneTags []string } func releaseDateValue(raw string) any { if strings.TrimSpace(raw) == "" { return nil } parsed, err := time.Parse("2006-01-02", raw) if err != nil { return nil } return parsed } type baiduModelMetadata struct { Prefix string ReleaseDate string ModelSourceURL string DateConfidence string DateSourceKind string } var baiduModelMetadataRules = []baiduModelMetadata{ { Prefix: "baidu-ernie-5.0", ReleaseDate: "2026-01-22", ModelSourceURL: "https://cloud.baidu.com/news/news_eacd0f0b-0ca3-4963-aec8-5e6b9ebef9ba", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-x1.1", ReleaseDate: "2025-09-09", ModelSourceURL: "https://cloud.baidu.com/news/news_be713ff4-8477-4852-88f1-9cc56c406d6a", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-5.1", ReleaseDate: "2026-05-09", ModelSourceURL: "https://ernie.baidu.com/blog/posts/ernie-5.1-0508-release/", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-4.5-turbo-vl", ReleaseDate: "2025-08-07", ModelSourceURL: "https://cloud.baidu.com/product/wenxinworkshop.html", DateConfidence: "secondary_authoritative", DateSourceKind: "secondary_authoritative_report", }, { Prefix: "baidu-ernie-4.5-turbo", ReleaseDate: "2025-04-25", ModelSourceURL: "https://cloud.baidu.com/article/3887765", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-x1-turbo", ReleaseDate: "2025-04-25", ModelSourceURL: "https://cloud.baidu.com/article/3887765", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-4.5", ReleaseDate: "2025-03-16", ModelSourceURL: "https://cloud.baidu.com/article/3835921", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-x1", ReleaseDate: "2025-03-16", ModelSourceURL: "https://cloud.baidu.com/article/3835921", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-character", ReleaseDate: "2024-03-22", ModelSourceURL: "https://cloud.baidu.com/news/news_667c065f-0bd7-475d-98c2-901763d0ee77", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-lite-pro", ReleaseDate: "2024-03-22", ModelSourceURL: "https://cloud.baidu.com/news/news_667c065f-0bd7-475d-98c2-901763d0ee77", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-ernie-speed-pro", ReleaseDate: "2024-03-22", ModelSourceURL: "https://cloud.baidu.com/news/news_667c065f-0bd7-475d-98c2-901763d0ee77", DateConfidence: "official_primary", DateSourceKind: "official_announcement", }, { Prefix: "baidu-qianfan-", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, { Prefix: "baidu-deepseek-", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, { Prefix: "baidu-glm-", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, { Prefix: "baidu-qwen", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, { Prefix: "baidu-minimax-", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, { Prefix: "baidu-kimi-", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, { Prefix: "baidu-internvl", DateConfidence: "unknown", DateSourceKind: "catalog_backfill", }, } func enrichBaiduModelMetadata(model ModelPricing) ModelPricing { normalizedID := strings.ToLower(model.ModelID) for _, metadata := range baiduModelMetadataRules { if strings.HasPrefix(normalizedID, metadata.Prefix) { if metadata.ReleaseDate != "" { model.ReleaseDate = metadata.ReleaseDate } if metadata.ModelSourceURL != "" { model.ModelSourceURL = metadata.ModelSourceURL } if metadata.DateConfidence != "" { model.DateConfidence = metadata.DateConfidence } if metadata.DateSourceKind != "" { model.DateSourceKind = metadata.DateSourceKind } return model } } if model.ModelSourceURL == "" { model.ModelSourceURL = model.SourceURL } if model.DateConfidence == "" { model.DateConfidence = "unknown" } if model.DateSourceKind == "" { model.DateSourceKind = "unknown" } return model } func hasExplicitModelMetadata(model ModelPricing) bool { return strings.TrimSpace(model.ReleaseDate) != "" || firstNonEmpty(model.ModelSourceURL) != "" && model.ModelSourceURL != model.SourceURL || strings.TrimSpace(model.DateConfidence) != "" && model.DateConfidence != "unknown" || strings.TrimSpace(model.DateSourceKind) != "" && model.DateSourceKind != "unknown" } func parseZhipuPrice(s string) float64 { // Extract price from strings like "6元", "免费", "限时免费" if strings.Contains(s, "免费") { return 0 } var f float64 fmt.Sscanf(s, "%f", &f) return f } func extractContextLength(context string) int { if strings.Contains(context, "1M") || strings.Contains(context, "1000K") { return 1000000 } if strings.Contains(context, "200K") { return 200000 } if strings.Contains(context, "128K") { return 128000 } if strings.Contains(context, "32K") { return 32000 } if strings.Contains(context, "8K") { return 8000 } if strings.Contains(context, "262144") || strings.Contains(context, "256K") { return 262144 } if strings.Contains(context, "8192") { return 8192 } return 0 } func main() { dsn := os.Getenv("DATABASE_URL") if dsn == "" { dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" } db, err := sql.Open("postgres", dsn) if err != nil { log.Fatal(err) } defer db.Close() // Read raw data data, err := os.ReadFile("/tmp/phase2_raw_data.json") if err != nil { log.Fatal("Failed to read raw data:", err) } var raw RawData if err := json.Unmarshal(data, &raw); err != nil { log.Fatal("Failed to parse raw data:", err) } var prices []ModelPricing batchID := "manual-seed" // Process Baidu data modelPrices := make(map[string]map[string]float64) // model -> type -> price for _, b := range raw.Baidu { if modelPrices[b.Model] == nil { modelPrices[b.Model] = make(map[string]float64) } if b.InputPrice != nil { if strings.Contains(b.Type, "输入") { modelPrices[b.Model]["input"] = *b.InputPrice * 1000000 // Convert to per 1M } if strings.Contains(b.Type, "输出") { modelPrices[b.Model]["output"] = *b.InputPrice * 1000000 } } if b.OutputPrice != nil { if strings.Contains(b.Type, "输出") { modelPrices[b.Model]["output"] = *b.OutputPrice * 1000000 } } } for model, pricesMap := range modelPrices { prices = append(prices, enrichBaiduModelMetadata(ModelPricing{ ModelID: "baidu-" + strings.ToLower(strings.ReplaceAll(model, " ", "-")), ModelName: model, ProviderName: "Baidu", ProviderCountry: "CN", OperatorName: "Baidu Qianfan", OperatorType: "official", Region: "CN", Currency: "CNY", InputPrice: pricesMap["input"], OutputPrice: pricesMap["output"], IsFree: pricesMap["input"] == 0 && pricesMap["output"] == 0, SourceURL: "https://cloud.baidu.com/doc/qianfan/s/wmh4sv6ya", Modality: "text", })) } log.Printf("Parsed %d unique models from Baidu", len(prices)) // Save to database for _, p := range prices { // Find or create provider var providerID int64 err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID) if err == sql.ErrNoRows { err = db.QueryRow( "INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id", p.ProviderName, p.ProviderCountry, "", ).Scan(&providerID) } if err != nil { log.Printf("Provider error: %v", err) continue } // Find or create operator var operatorID int64 err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID) if err == sql.ErrNoRows { err = db.QueryRow( "INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id", p.OperatorName, p.ProviderCountry, ).Scan(&operatorID) } if err != nil { log.Printf("Operator error: %v", err) continue } // Find or create model var modelID int64 err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID) if err == sql.ErrNoRows { err = db.QueryRow( `INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id, source_url, release_date, date_confidence, date_source_kind) VALUES ($1, $2, $3, $4, $5, 'active', $6, $7, $8, $9, $10, $11) RETURNING id`, p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID, firstNonEmpty(p.ModelSourceURL, p.SourceURL), releaseDateValue(p.ReleaseDate), p.DateConfidence, p.DateSourceKind, ).Scan(&modelID) } if err != nil { log.Printf("Model error: %v", err) continue } if _, err := db.Exec( `UPDATE models SET source_url = CASE WHEN $4 THEN $2 ELSE COALESCE(NULLIF(source_url, ''), $2) END, release_date = CASE WHEN $4 THEN $3::date ELSE COALESCE(release_date, $3::date) END, date_confidence = CASE WHEN $4 THEN $5 ELSE COALESCE(NULLIF(date_confidence, ''), $5, 'unknown') END, date_source_kind = CASE WHEN $4 THEN $6 ELSE COALESCE(NULLIF(date_source_kind, ''), $6, 'unknown') END, updated_at = CURRENT_TIMESTAMP WHERE id = $1`, modelID, firstNonEmpty(p.ModelSourceURL, p.SourceURL), releaseDateValue(p.ReleaseDate), hasExplicitModelMetadata(p), p.DateConfidence, p.DateSourceKind, ); err != nil { log.Printf("Model metadata update error for %s: %v", p.ModelID, err) } // Insert pricing sourceType := p.OperatorType freeQuota := "" freeLimitations := "[]" rateLimit := "{}" if p.IsFree { sourceType = "free_tier" freeQuota = "Imported free-tier pricing entry" freeLimitations = `["See source_url for current quota and policy"]` } _, err = db.Exec( `INSERT INTO region_pricing (model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit) VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12) ON CONFLICT (model_id, operator_id, region, currency, effective_date) DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok, output_price_per_mtok = EXCLUDED.output_price_per_mtok, is_free = EXCLUDED.is_free, source_type = EXCLUDED.source_type, free_quota = EXCLUDED.free_quota, free_limitations = EXCLUDED.free_limitations, rate_limit = EXCLUDED.rate_limit, updated_at = CURRENT_TIMESTAMP`, modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL, sourceType, freeQuota, freeLimitations, rateLimit, ) if err != nil { log.Printf("Pricing error for %s: %v", p.ModelID, err) continue } } log.Printf("Successfully imported %d models into database", len(prices)) } func firstNonEmpty(values ...string) string { for _, value := range values { if value != "" { return value } } return "" }