Files
llm-intelligence/scripts/fetch_multi_source.go
2026-05-29 18:48:48 +08:00

669 lines
18 KiB
Go

//go:build llm_script && !scripts_pkg
// fetch_multi_source.go - 多源 LLM 定价采集器
// 支持: OpenRouter, Moonshot, DeepSeek, OpenAI 等
package main
import (
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"sort"
"strings"
"time"
_ "github.com/lib/pq"
)
var logger *slog.Logger
func init() {
logger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
}
// ============ 统一数据接口 ============
type ModelPricing struct {
ModelID string
ModelName string
ProviderName string
ProviderCountry string
OperatorName string
OperatorType string // official / reseller / cloud
Region string
Currency string
InputPrice float64 // per 1M tokens
OutputPrice float64 // per 1M tokens
ContextLength int
IsFree bool
SourceURL string
Modality string
SceneTags []string
}
// DataSource 统一采集接口
type DataSource interface {
Name() string
FetchPricing() ([]ModelPricing, error)
SourceType() string // official / reseller
}
type runConfig struct {
DryRun bool
}
type sourceDefinition struct {
Key string
Name string
Factory func() DataSource
}
type runSummary struct {
SelectedSources int
SelectedSourceKeys []string
SuccessfulSources int
SuccessfulSourceKeys []string
FailedSourceKeys []string
TotalModels int
DomesticModels int
CurrencyCounts map[string]int
}
type pricingMetadataFields struct {
SourceType string
FreeQuota string
FreeLimitations string
RateLimit string
}
// ============ OpenRouter 采集器 ============
type OpenRouterSource struct {
APIKey string
}
func (s *OpenRouterSource) Name() string { return "OpenRouter" }
func (s *OpenRouterSource) SourceType() string { return "reseller" }
func (s *OpenRouterSource) FetchPricing() ([]ModelPricing, error) {
url := "https://openrouter.ai/api/v1/models"
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
if s.APIKey != "" {
req.Header.Set("Authorization", "Bearer "+s.APIKey)
}
client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
var result struct {
Data []struct {
ID string `json:"id"`
Name string `json:"name"`
Pricing struct {
Prompt string `json:"prompt"`
Completion string `json:"completion"`
} `json:"pricing"`
ContextLength int `json:"context_length"`
} `json:"data"`
}
if err := json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("parse error: %w", err)
}
var prices []ModelPricing
for _, m := range result.Data {
inputPrice := parsePrice(m.Pricing.Prompt)
outputPrice := parsePrice(m.Pricing.Completion)
prices = append(prices, ModelPricing{
ModelID: m.ID,
ModelName: m.Name,
ProviderName: extractProvider(m.ID),
ProviderCountry: "US",
OperatorName: "OpenRouter",
OperatorType: "reseller",
Region: "global",
Currency: "USD",
InputPrice: inputPrice * 1000000,
OutputPrice: outputPrice * 1000000,
ContextLength: m.ContextLength,
IsFree: inputPrice == 0 && outputPrice == 0,
SourceURL: "https://openrouter.ai/docs#models",
Modality: "text",
})
}
logger.Info("OpenRouter采集完成", "models", len(prices))
return prices, nil
}
func parsePrice(s string) float64 {
var f float64
fmt.Sscanf(s, "%f", &f)
if f < 0 {
return 0
}
return f
}
func extractProvider(modelID string) string {
parts := strings.Split(modelID, "/")
if len(parts) > 1 {
return parts[0]
}
return "unknown"
}
func sourceDefinitions(apiKey string) []sourceDefinition {
return []sourceDefinition{
{
Key: "openrouter",
Name: "OpenRouter",
Factory: func() DataSource {
return &OpenRouterSource{APIKey: apiKey}
},
},
{
Key: "moonshot",
Name: "Moonshot",
Factory: func() DataSource {
return &MoonshotSource{}
},
},
{
Key: "deepseek",
Name: "DeepSeek",
Factory: func() DataSource {
return &DeepSeekSource{}
},
},
{
Key: "openai",
Name: "OpenAI",
Factory: func() DataSource {
return &OpenAISource{}
},
},
}
}
func parseSourceList(raw string) []string {
if strings.TrimSpace(raw) == "" {
return nil
}
parts := strings.Split(raw, ",")
sources := make([]string, 0, len(parts))
seen := make(map[string]struct{}, len(parts))
for _, part := range parts {
name := strings.ToLower(strings.TrimSpace(part))
if name == "" {
continue
}
if _, ok := seen[name]; ok {
continue
}
seen[name] = struct{}{}
sources = append(sources, name)
}
return sources
}
func buildSources(apiKey string, requested []string) ([]DataSource, error) {
definitions := sourceDefinitions(apiKey)
if len(requested) == 0 {
sources := make([]DataSource, 0, len(definitions))
for _, definition := range definitions {
sources = append(sources, definition.Factory())
}
return sources, nil
}
definitionByKey := make(map[string]sourceDefinition, len(definitions))
for _, definition := range definitions {
definitionByKey[definition.Key] = definition
}
sources := make([]DataSource, 0, len(requested))
for _, name := range requested {
definition, ok := definitionByKey[name]
if !ok {
return nil, fmt.Errorf("unknown source %q", name)
}
sources = append(sources, definition.Factory())
}
return sources, nil
}
func listSourceKeys(apiKey string) []string {
definitions := sourceDefinitions(apiKey)
keys := make([]string, 0, len(definitions))
for _, definition := range definitions {
keys = append(keys, definition.Key)
}
return keys
}
func summarizePrices(selectedSourceKeys []string, successfulSourceKeys []string, failedSourceKeys []string, prices []ModelPricing) runSummary {
summary := runSummary{
SelectedSources: len(selectedSourceKeys),
SelectedSourceKeys: append([]string(nil), selectedSourceKeys...),
SuccessfulSources: len(successfulSourceKeys),
SuccessfulSourceKeys: append([]string(nil), successfulSourceKeys...),
FailedSourceKeys: append([]string(nil), failedSourceKeys...),
TotalModels: len(prices),
CurrencyCounts: make(map[string]int),
}
for _, price := range prices {
if strings.EqualFold(price.ProviderCountry, "CN") {
summary.DomesticModels++
}
summary.CurrencyCounts[strings.ToUpper(price.Currency)]++
}
return summary
}
func sourceKey(src DataSource) string {
switch strings.ToLower(strings.TrimSpace(src.Name())) {
case "openrouter":
return "openrouter"
case "moonshot":
return "moonshot"
case "deepseek":
return "deepseek"
case "openai":
return "openai"
default:
return strings.ToLower(strings.ReplaceAll(strings.TrimSpace(src.Name()), " ", "_"))
}
}
func formatCountMap(counts map[string]int) string {
if len(counts) == 0 {
return "none"
}
keys := make([]string, 0, len(counts))
for key := range counts {
keys = append(keys, key)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key]))
}
return strings.Join(parts, ",")
}
func formatKeyList(keys []string) string {
if len(keys) == 0 {
return "none"
}
return strings.Join(keys, ",")
}
func printSummary(w io.Writer, summary runSummary) error {
if w == nil {
return nil
}
_, err := fmt.Fprintf(
w,
"sources=%d successful_sources=%d models=%d domestic_models=%d selected_source_keys=%s successful_source_keys=%s failed_source_keys=%s currencies=%s\n",
summary.SelectedSources,
summary.SuccessfulSources,
summary.TotalModels,
summary.DomesticModels,
formatKeyList(summary.SelectedSourceKeys),
formatKeyList(summary.SuccessfulSourceKeys),
formatKeyList(summary.FailedSourceKeys),
formatCountMap(summary.CurrencyCounts),
)
return err
}
func pricingMetadata(p ModelPricing) pricingMetadataFields {
sourceType := strings.TrimSpace(strings.ToLower(p.OperatorType))
if sourceType == "" {
sourceType = "official"
}
fields := pricingMetadataFields{
SourceType: sourceType,
FreeLimitations: "[]",
RateLimit: "{}",
}
if p.IsFree {
fields.SourceType = "free_tier"
fields.FreeQuota = "See source_url for provider free-tier details"
fields.FreeLimitations = `["See source_url for current quota and policy"]`
}
return fields
}
// ============ Moonshot 采集器 ============
type MoonshotSource struct{}
func (s *MoonshotSource) Name() string { return "Moonshot" }
func (s *MoonshotSource) SourceType() string { return "official" }
func (s *MoonshotSource) FetchPricing() ([]ModelPricing, error) {
prices := []ModelPricing{
{
ModelID: "kimi-k2.6", ModelName: "Kimi K2.6",
ProviderName: "Moonshot AI", ProviderCountry: "CN",
OperatorName: "Moonshot", OperatorType: "official",
Region: "CN", Currency: "CNY",
InputPrice: 6.50, OutputPrice: 27.00,
ContextLength: 262144, IsFree: false,
SourceURL: "https://platform.kimi.com/docs/pricing/chat-k26",
Modality: "multimodal",
SceneTags: []string{"对话", "视觉", "代码"},
},
{
ModelID: "kimi-k2-0905-preview", ModelName: "Kimi K2 0905 Preview",
ProviderName: "Moonshot AI", ProviderCountry: "CN",
OperatorName: "Moonshot", OperatorType: "official",
Region: "CN", Currency: "CNY",
InputPrice: 4.00, OutputPrice: 16.00,
ContextLength: 262144, IsFree: false,
SourceURL: "https://platform.kimi.com/docs/pricing/chat-k2",
Modality: "text",
SceneTags: []string{"代码", "对话"},
},
{
ModelID: "moonshot-v1-8k", ModelName: "Moonshot V1 8K",
ProviderName: "Moonshot AI", ProviderCountry: "CN",
OperatorName: "Moonshot", OperatorType: "official",
Region: "CN", Currency: "CNY",
InputPrice: 2.00, OutputPrice: 10.00,
ContextLength: 8192, IsFree: false,
SourceURL: "https://platform.kimi.com/docs/pricing/chat-v1",
Modality: "text",
SceneTags: []string{"对话"},
},
}
logger.Info("Moonshot采集完成", "models", len(prices))
return prices, nil
}
// ============ DeepSeek 采集器 ============
type DeepSeekSource struct{}
func (s *DeepSeekSource) Name() string { return "DeepSeek" }
func (s *DeepSeekSource) SourceType() string { return "official" }
func (s *DeepSeekSource) FetchPricing() ([]ModelPricing, error) {
prices := []ModelPricing{
{
ModelID: "deepseek-v4-flash", ModelName: "DeepSeek V4 Flash",
ProviderName: "DeepSeek", ProviderCountry: "CN",
OperatorName: "DeepSeek", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 0.14, OutputPrice: 0.28,
ContextLength: 1000000, IsFree: false,
SourceURL: "https://api-docs.deepseek.com/quick_start/pricing",
Modality: "text",
SceneTags: []string{"对话", "推理"},
},
{
ModelID: "deepseek-v4-pro", ModelName: "DeepSeek V4 Pro",
ProviderName: "DeepSeek", ProviderCountry: "CN",
OperatorName: "DeepSeek", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 0.435, OutputPrice: 0.87,
ContextLength: 1000000, IsFree: false,
SourceURL: "https://api-docs.deepseek.com/quick_start/pricing",
Modality: "code",
SceneTags: []string{"对话", "推理", "代码"},
},
}
logger.Info("DeepSeek采集完成", "models", len(prices))
return prices, nil
}
// ============ OpenAI 采集器 ============
type OpenAISource struct{}
func (s *OpenAISource) Name() string { return "OpenAI" }
func (s *OpenAISource) SourceType() string { return "official" }
func (s *OpenAISource) FetchPricing() ([]ModelPricing, error) {
prices := []ModelPricing{
{
ModelID: "gpt-5.5", ModelName: "GPT-5.5",
ProviderName: "OpenAI", ProviderCountry: "US",
OperatorName: "OpenAI", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 5.00, OutputPrice: 30.00,
ContextLength: 200000, IsFree: false,
SourceURL: "https://openai.com/api/pricing/",
Modality: "code",
SceneTags: []string{"代码", "推理", "对话"},
},
{
ModelID: "gpt-5.4", ModelName: "GPT-5.4",
ProviderName: "OpenAI", ProviderCountry: "US",
OperatorName: "OpenAI", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 2.50, OutputPrice: 15.00,
ContextLength: 200000, IsFree: false,
SourceURL: "https://openai.com/api/pricing/",
Modality: "text",
SceneTags: []string{"代码", "对话"},
},
{
ModelID: "gpt-5.4-mini", ModelName: "GPT-5.4 Mini",
ProviderName: "OpenAI", ProviderCountry: "US",
OperatorName: "OpenAI", OperatorType: "official",
Region: "global", Currency: "USD",
InputPrice: 0.75, OutputPrice: 4.50,
ContextLength: 200000, IsFree: false,
SourceURL: "https://openai.com/api/pricing/",
Modality: "text",
SceneTags: []string{"对话"},
},
}
logger.Info("OpenAI采集完成", "models", len(prices))
return prices, nil
}
// ============ 数据库写入 ============
func saveToDatabase(db *sql.DB, prices []ModelPricing, batchID string) error {
for _, p := range prices {
// 查找或创建 provider
var providerID int64
err := db.QueryRow(
"SELECT id FROM model_provider WHERE name = $1",
p.ProviderName,
).Scan(&providerID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
p.ProviderName, p.ProviderCountry, "",
).Scan(&providerID)
}
if err != nil {
logger.Warn("provider error", "name", p.ProviderName, "error", err)
continue
}
// 查找或创建 operator
var operatorID int64
err = db.QueryRow(
"SELECT id FROM operator WHERE name = $1",
p.OperatorName,
).Scan(&operatorID)
if err == sql.ErrNoRows {
err = db.QueryRow(
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
p.OperatorName, p.ProviderCountry,
).Scan(&operatorID)
}
if err != nil {
logger.Warn("operator error", "name", p.OperatorName, "error", err)
continue
}
// 查找或创建 model (使用 external_id)
var modelID int64
err = db.QueryRow(
"SELECT id FROM models WHERE external_id = $1",
p.ModelID,
).Scan(&modelID)
if err == sql.ErrNoRows {
err = db.QueryRow(
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
).Scan(&modelID)
}
if err != nil {
logger.Warn("model error", "id", p.ModelID, "error", err)
continue
}
// 插入定价
metadata := pricingMetadata(p)
_, err = db.Exec(
`INSERT INTO region_pricing
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
is_free = EXCLUDED.is_free,
source_type = EXCLUDED.source_type,
free_quota = EXCLUDED.free_quota,
free_limitations = EXCLUDED.free_limitations,
rate_limit = EXCLUDED.rate_limit,
updated_at = CURRENT_TIMESTAMP`,
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
metadata.SourceType, metadata.FreeQuota, metadata.FreeLimitations, metadata.RateLimit,
)
if err != nil {
logger.Warn("pricing insert error", "model", p.ModelID, "error", err)
continue
}
}
return nil
}
func savePricesToDatabase(dsn string, prices []ModelPricing) error {
db, err := sql.Open("postgres", dsn)
if err != nil {
return err
}
defer db.Close()
batchID := fmt.Sprintf("phase2-%s", time.Now().Format("20060102-150405"))
return saveToDatabase(db, prices, batchID)
}
func defaultDSN() string {
dsn := os.Getenv("DATABASE_URL")
if dsn != "" {
return dsn
}
return "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
func runCollector(cfg runConfig, sources []DataSource, saveFn func([]ModelPricing) error, out io.Writer) error {
allPrices := make([]ModelPricing, 0)
selectedSourceKeys := make([]string, 0, len(sources))
successfulSourceKeys := make([]string, 0, len(sources))
failedSourceKeys := make([]string, 0)
for _, src := range sources {
key := sourceKey(src)
selectedSourceKeys = append(selectedSourceKeys, key)
prices, err := src.FetchPricing()
if err != nil {
logger.Error("采集失败", "source", src.Name(), "error", err)
failedSourceKeys = append(failedSourceKeys, key)
continue
}
successfulSourceKeys = append(successfulSourceKeys, key)
allPrices = append(allPrices, prices...)
}
summary := summarizePrices(selectedSourceKeys, successfulSourceKeys, failedSourceKeys, allPrices)
if err := printSummary(out, summary); err != nil {
return err
}
if summary.SuccessfulSources == 0 {
return fmt.Errorf("no data source collected successfully")
}
if cfg.DryRun {
return nil
}
if saveFn == nil {
return fmt.Errorf("save function is required when dry-run is disabled")
}
if err := saveFn(allPrices); err != nil {
return err
}
logger.Info("多源采集完成", "total_models", len(allPrices), "sources", summary.SuccessfulSources)
return nil
}
// ============ 主程序 ============
func main() {
var sourcesFlag string
var dryRun bool
var listSources bool
flag.StringVar(&sourcesFlag, "sources", "", "comma-separated source keys: openrouter,moonshot,deepseek,openai")
flag.BoolVar(&dryRun, "dry-run", false, "collect and print summary without writing to database")
flag.BoolVar(&listSources, "list-sources", false, "print available source keys and exit")
flag.Parse()
apiKey := os.Getenv("OPENROUTER_API_KEY")
if listSources {
fmt.Println(strings.Join(listSourceKeys(apiKey), ","))
return
}
sources, err := buildSources(apiKey, parseSourceList(sourcesFlag))
if err != nil {
logger.Error("数据源参数非法", "error", err)
os.Exit(1)
}
cfg := runConfig{DryRun: dryRun}
if err := runCollector(cfg, sources, func(prices []ModelPricing) error {
return savePricesToDatabase(defaultDSN(), prices)
}, os.Stdout); err != nil {
logger.Error("多源采集失败", "error", err)
os.Exit(1)
}
}