forked from niuniu/llm-intelligence
632 lines
17 KiB
Go
632 lines
17 KiB
Go
//go:build llm_script
|
|
|
|
// fetch_multi_source.go - 多源 LLM 定价采集器
|
|
// 支持: OpenRouter, Moonshot, DeepSeek, OpenAI 等
|
|
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
_ "github.com/lib/pq"
|
|
)
|
|
|
|
var logger *slog.Logger
|
|
|
|
func init() {
|
|
logger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
|
}
|
|
|
|
// ============ 统一数据接口 ============
|
|
|
|
type ModelPricing struct {
|
|
ModelID string
|
|
ModelName string
|
|
ProviderName string
|
|
ProviderCountry string
|
|
OperatorName string
|
|
OperatorType string // official / reseller / cloud
|
|
Region string
|
|
Currency string
|
|
InputPrice float64 // per 1M tokens
|
|
OutputPrice float64 // per 1M tokens
|
|
ContextLength int
|
|
IsFree bool
|
|
SourceURL string
|
|
Modality string
|
|
SceneTags []string
|
|
}
|
|
|
|
// DataSource 统一采集接口
|
|
type DataSource interface {
|
|
Name() string
|
|
FetchPricing() ([]ModelPricing, error)
|
|
SourceType() string // official / reseller
|
|
}
|
|
|
|
type runConfig struct {
|
|
DryRun bool
|
|
}
|
|
|
|
type sourceDefinition struct {
|
|
Key string
|
|
Name string
|
|
Factory func() DataSource
|
|
}
|
|
|
|
type runSummary struct {
|
|
SelectedSources int
|
|
SuccessfulSources int
|
|
TotalModels int
|
|
DomesticModels int
|
|
CurrencyCounts map[string]int
|
|
}
|
|
|
|
type pricingMetadataFields struct {
|
|
SourceType string
|
|
FreeQuota string
|
|
FreeLimitations string
|
|
RateLimit string
|
|
}
|
|
|
|
// ============ OpenRouter 采集器 ============
|
|
|
|
type OpenRouterSource struct {
|
|
APIKey string
|
|
}
|
|
|
|
func (s *OpenRouterSource) Name() string { return "OpenRouter" }
|
|
func (s *OpenRouterSource) SourceType() string { return "reseller" }
|
|
|
|
func (s *OpenRouterSource) FetchPricing() ([]ModelPricing, error) {
|
|
url := "https://openrouter.ai/api/v1/models"
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if s.APIKey != "" {
|
|
req.Header.Set("Authorization", "Bearer "+s.APIKey)
|
|
}
|
|
|
|
client := &http.Client{Timeout: 30 * time.Second}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
body, _ := io.ReadAll(resp.Body)
|
|
|
|
var result struct {
|
|
Data []struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Pricing struct {
|
|
Prompt string `json:"prompt"`
|
|
Completion string `json:"completion"`
|
|
} `json:"pricing"`
|
|
ContextLength int `json:"context_length"`
|
|
} `json:"data"`
|
|
}
|
|
|
|
if err := json.Unmarshal(body, &result); err != nil {
|
|
return nil, fmt.Errorf("parse error: %w", err)
|
|
}
|
|
|
|
var prices []ModelPricing
|
|
for _, m := range result.Data {
|
|
inputPrice := parsePrice(m.Pricing.Prompt)
|
|
outputPrice := parsePrice(m.Pricing.Completion)
|
|
|
|
prices = append(prices, ModelPricing{
|
|
ModelID: m.ID,
|
|
ModelName: m.Name,
|
|
ProviderName: extractProvider(m.ID),
|
|
ProviderCountry: "US",
|
|
OperatorName: "OpenRouter",
|
|
OperatorType: "reseller",
|
|
Region: "global",
|
|
Currency: "USD",
|
|
InputPrice: inputPrice * 1000000,
|
|
OutputPrice: outputPrice * 1000000,
|
|
ContextLength: m.ContextLength,
|
|
IsFree: inputPrice == 0 && outputPrice == 0,
|
|
SourceURL: "https://openrouter.ai/docs#models",
|
|
Modality: "text",
|
|
})
|
|
}
|
|
|
|
logger.Info("OpenRouter采集完成", "models", len(prices))
|
|
return prices, nil
|
|
}
|
|
|
|
func parsePrice(s string) float64 {
|
|
var f float64
|
|
fmt.Sscanf(s, "%f", &f)
|
|
if f < 0 {
|
|
return 0
|
|
}
|
|
return f
|
|
}
|
|
|
|
func extractProvider(modelID string) string {
|
|
parts := strings.Split(modelID, "/")
|
|
if len(parts) > 1 {
|
|
return parts[0]
|
|
}
|
|
return "unknown"
|
|
}
|
|
|
|
func sourceDefinitions(apiKey string) []sourceDefinition {
|
|
return []sourceDefinition{
|
|
{
|
|
Key: "openrouter",
|
|
Name: "OpenRouter",
|
|
Factory: func() DataSource {
|
|
return &OpenRouterSource{APIKey: apiKey}
|
|
},
|
|
},
|
|
{
|
|
Key: "moonshot",
|
|
Name: "Moonshot",
|
|
Factory: func() DataSource {
|
|
return &MoonshotSource{}
|
|
},
|
|
},
|
|
{
|
|
Key: "deepseek",
|
|
Name: "DeepSeek",
|
|
Factory: func() DataSource {
|
|
return &DeepSeekSource{}
|
|
},
|
|
},
|
|
{
|
|
Key: "openai",
|
|
Name: "OpenAI",
|
|
Factory: func() DataSource {
|
|
return &OpenAISource{}
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func parseSourceList(raw string) []string {
|
|
if strings.TrimSpace(raw) == "" {
|
|
return nil
|
|
}
|
|
|
|
parts := strings.Split(raw, ",")
|
|
sources := make([]string, 0, len(parts))
|
|
seen := make(map[string]struct{}, len(parts))
|
|
for _, part := range parts {
|
|
name := strings.ToLower(strings.TrimSpace(part))
|
|
if name == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[name]; ok {
|
|
continue
|
|
}
|
|
seen[name] = struct{}{}
|
|
sources = append(sources, name)
|
|
}
|
|
return sources
|
|
}
|
|
|
|
func buildSources(apiKey string, requested []string) ([]DataSource, error) {
|
|
definitions := sourceDefinitions(apiKey)
|
|
if len(requested) == 0 {
|
|
sources := make([]DataSource, 0, len(definitions))
|
|
for _, definition := range definitions {
|
|
sources = append(sources, definition.Factory())
|
|
}
|
|
return sources, nil
|
|
}
|
|
|
|
definitionByKey := make(map[string]sourceDefinition, len(definitions))
|
|
for _, definition := range definitions {
|
|
definitionByKey[definition.Key] = definition
|
|
}
|
|
|
|
sources := make([]DataSource, 0, len(requested))
|
|
for _, name := range requested {
|
|
definition, ok := definitionByKey[name]
|
|
if !ok {
|
|
return nil, fmt.Errorf("unknown source %q", name)
|
|
}
|
|
sources = append(sources, definition.Factory())
|
|
}
|
|
return sources, nil
|
|
}
|
|
|
|
func listSourceKeys(apiKey string) []string {
|
|
definitions := sourceDefinitions(apiKey)
|
|
keys := make([]string, 0, len(definitions))
|
|
for _, definition := range definitions {
|
|
keys = append(keys, definition.Key)
|
|
}
|
|
return keys
|
|
}
|
|
|
|
func summarizePrices(selectedSources int, successfulSources int, prices []ModelPricing) runSummary {
|
|
summary := runSummary{
|
|
SelectedSources: selectedSources,
|
|
SuccessfulSources: successfulSources,
|
|
TotalModels: len(prices),
|
|
CurrencyCounts: make(map[string]int),
|
|
}
|
|
for _, price := range prices {
|
|
if strings.EqualFold(price.ProviderCountry, "CN") {
|
|
summary.DomesticModels++
|
|
}
|
|
summary.CurrencyCounts[strings.ToUpper(price.Currency)]++
|
|
}
|
|
return summary
|
|
}
|
|
|
|
func formatCountMap(counts map[string]int) string {
|
|
if len(counts) == 0 {
|
|
return "none"
|
|
}
|
|
keys := make([]string, 0, len(counts))
|
|
for key := range counts {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
|
|
parts := make([]string, 0, len(keys))
|
|
for _, key := range keys {
|
|
parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key]))
|
|
}
|
|
return strings.Join(parts, ",")
|
|
}
|
|
|
|
func printSummary(w io.Writer, summary runSummary) error {
|
|
if w == nil {
|
|
return nil
|
|
}
|
|
_, err := fmt.Fprintf(
|
|
w,
|
|
"sources=%d successful_sources=%d models=%d domestic_models=%d currencies=%s\n",
|
|
summary.SelectedSources,
|
|
summary.SuccessfulSources,
|
|
summary.TotalModels,
|
|
summary.DomesticModels,
|
|
formatCountMap(summary.CurrencyCounts),
|
|
)
|
|
return err
|
|
}
|
|
|
|
func pricingMetadata(p ModelPricing) pricingMetadataFields {
|
|
sourceType := strings.TrimSpace(strings.ToLower(p.OperatorType))
|
|
if sourceType == "" {
|
|
sourceType = "official"
|
|
}
|
|
|
|
fields := pricingMetadataFields{
|
|
SourceType: sourceType,
|
|
FreeLimitations: "[]",
|
|
RateLimit: "{}",
|
|
}
|
|
if p.IsFree {
|
|
fields.SourceType = "free_tier"
|
|
fields.FreeQuota = "See source_url for provider free-tier details"
|
|
fields.FreeLimitations = `["See source_url for current quota and policy"]`
|
|
}
|
|
return fields
|
|
}
|
|
|
|
// ============ Moonshot 采集器 ============
|
|
|
|
type MoonshotSource struct{}
|
|
|
|
func (s *MoonshotSource) Name() string { return "Moonshot" }
|
|
func (s *MoonshotSource) SourceType() string { return "official" }
|
|
|
|
func (s *MoonshotSource) FetchPricing() ([]ModelPricing, error) {
|
|
prices := []ModelPricing{
|
|
{
|
|
ModelID: "kimi-k2.6", ModelName: "Kimi K2.6",
|
|
ProviderName: "Moonshot AI", ProviderCountry: "CN",
|
|
OperatorName: "Moonshot", OperatorType: "official",
|
|
Region: "CN", Currency: "CNY",
|
|
InputPrice: 6.50, OutputPrice: 27.00,
|
|
ContextLength: 262144, IsFree: false,
|
|
SourceURL: "https://platform.kimi.com/docs/pricing/chat-k26",
|
|
Modality: "multimodal",
|
|
SceneTags: []string{"对话", "视觉", "代码"},
|
|
},
|
|
{
|
|
ModelID: "kimi-k2-0905-preview", ModelName: "Kimi K2 0905 Preview",
|
|
ProviderName: "Moonshot AI", ProviderCountry: "CN",
|
|
OperatorName: "Moonshot", OperatorType: "official",
|
|
Region: "CN", Currency: "CNY",
|
|
InputPrice: 4.00, OutputPrice: 16.00,
|
|
ContextLength: 262144, IsFree: false,
|
|
SourceURL: "https://platform.kimi.com/docs/pricing/chat-k2",
|
|
Modality: "text",
|
|
SceneTags: []string{"代码", "对话"},
|
|
},
|
|
{
|
|
ModelID: "moonshot-v1-8k", ModelName: "Moonshot V1 8K",
|
|
ProviderName: "Moonshot AI", ProviderCountry: "CN",
|
|
OperatorName: "Moonshot", OperatorType: "official",
|
|
Region: "CN", Currency: "CNY",
|
|
InputPrice: 2.00, OutputPrice: 10.00,
|
|
ContextLength: 8192, IsFree: false,
|
|
SourceURL: "https://platform.kimi.com/docs/pricing/chat-v1",
|
|
Modality: "text",
|
|
SceneTags: []string{"对话"},
|
|
},
|
|
}
|
|
|
|
logger.Info("Moonshot采集完成", "models", len(prices))
|
|
return prices, nil
|
|
}
|
|
|
|
// ============ DeepSeek 采集器 ============
|
|
|
|
type DeepSeekSource struct{}
|
|
|
|
func (s *DeepSeekSource) Name() string { return "DeepSeek" }
|
|
func (s *DeepSeekSource) SourceType() string { return "official" }
|
|
|
|
func (s *DeepSeekSource) FetchPricing() ([]ModelPricing, error) {
|
|
prices := []ModelPricing{
|
|
{
|
|
ModelID: "deepseek-v4-flash", ModelName: "DeepSeek V4 Flash",
|
|
ProviderName: "DeepSeek", ProviderCountry: "CN",
|
|
OperatorName: "DeepSeek", OperatorType: "official",
|
|
Region: "global", Currency: "USD",
|
|
InputPrice: 0.14, OutputPrice: 0.28,
|
|
ContextLength: 1000000, IsFree: false,
|
|
SourceURL: "https://api-docs.deepseek.com/quick_start/pricing",
|
|
Modality: "text",
|
|
SceneTags: []string{"对话", "推理"},
|
|
},
|
|
{
|
|
ModelID: "deepseek-v4-pro", ModelName: "DeepSeek V4 Pro",
|
|
ProviderName: "DeepSeek", ProviderCountry: "CN",
|
|
OperatorName: "DeepSeek", OperatorType: "official",
|
|
Region: "global", Currency: "USD",
|
|
InputPrice: 0.435, OutputPrice: 0.87,
|
|
ContextLength: 1000000, IsFree: false,
|
|
SourceURL: "https://api-docs.deepseek.com/quick_start/pricing",
|
|
Modality: "code",
|
|
SceneTags: []string{"对话", "推理", "代码"},
|
|
},
|
|
}
|
|
|
|
logger.Info("DeepSeek采集完成", "models", len(prices))
|
|
return prices, nil
|
|
}
|
|
|
|
// ============ OpenAI 采集器 ============
|
|
|
|
type OpenAISource struct{}
|
|
|
|
func (s *OpenAISource) Name() string { return "OpenAI" }
|
|
func (s *OpenAISource) SourceType() string { return "official" }
|
|
|
|
func (s *OpenAISource) FetchPricing() ([]ModelPricing, error) {
|
|
prices := []ModelPricing{
|
|
{
|
|
ModelID: "gpt-5.5", ModelName: "GPT-5.5",
|
|
ProviderName: "OpenAI", ProviderCountry: "US",
|
|
OperatorName: "OpenAI", OperatorType: "official",
|
|
Region: "global", Currency: "USD",
|
|
InputPrice: 5.00, OutputPrice: 30.00,
|
|
ContextLength: 200000, IsFree: false,
|
|
SourceURL: "https://openai.com/api/pricing/",
|
|
Modality: "code",
|
|
SceneTags: []string{"代码", "推理", "对话"},
|
|
},
|
|
{
|
|
ModelID: "gpt-5.4", ModelName: "GPT-5.4",
|
|
ProviderName: "OpenAI", ProviderCountry: "US",
|
|
OperatorName: "OpenAI", OperatorType: "official",
|
|
Region: "global", Currency: "USD",
|
|
InputPrice: 2.50, OutputPrice: 15.00,
|
|
ContextLength: 200000, IsFree: false,
|
|
SourceURL: "https://openai.com/api/pricing/",
|
|
Modality: "text",
|
|
SceneTags: []string{"代码", "对话"},
|
|
},
|
|
{
|
|
ModelID: "gpt-5.4-mini", ModelName: "GPT-5.4 Mini",
|
|
ProviderName: "OpenAI", ProviderCountry: "US",
|
|
OperatorName: "OpenAI", OperatorType: "official",
|
|
Region: "global", Currency: "USD",
|
|
InputPrice: 0.75, OutputPrice: 4.50,
|
|
ContextLength: 200000, IsFree: false,
|
|
SourceURL: "https://openai.com/api/pricing/",
|
|
Modality: "text",
|
|
SceneTags: []string{"对话"},
|
|
},
|
|
}
|
|
|
|
logger.Info("OpenAI采集完成", "models", len(prices))
|
|
return prices, nil
|
|
}
|
|
|
|
// ============ 数据库写入 ============
|
|
|
|
func saveToDatabase(db *sql.DB, prices []ModelPricing, batchID string) error {
|
|
for _, p := range prices {
|
|
// 查找或创建 provider
|
|
var providerID int64
|
|
err := db.QueryRow(
|
|
"SELECT id FROM model_provider WHERE name = $1",
|
|
p.ProviderName,
|
|
).Scan(&providerID)
|
|
|
|
if err == sql.ErrNoRows {
|
|
err = db.QueryRow(
|
|
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
|
|
p.ProviderName, p.ProviderCountry, "",
|
|
).Scan(&providerID)
|
|
}
|
|
if err != nil {
|
|
logger.Warn("provider error", "name", p.ProviderName, "error", err)
|
|
continue
|
|
}
|
|
|
|
// 查找或创建 operator
|
|
var operatorID int64
|
|
err = db.QueryRow(
|
|
"SELECT id FROM operator WHERE name = $1",
|
|
p.OperatorName,
|
|
).Scan(&operatorID)
|
|
|
|
if err == sql.ErrNoRows {
|
|
err = db.QueryRow(
|
|
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
|
|
p.OperatorName, p.ProviderCountry,
|
|
).Scan(&operatorID)
|
|
}
|
|
if err != nil {
|
|
logger.Warn("operator error", "name", p.OperatorName, "error", err)
|
|
continue
|
|
}
|
|
|
|
// 查找或创建 model (使用 external_id)
|
|
var modelID int64
|
|
err = db.QueryRow(
|
|
"SELECT id FROM models WHERE external_id = $1",
|
|
p.ModelID,
|
|
).Scan(&modelID)
|
|
|
|
if err == sql.ErrNoRows {
|
|
err = db.QueryRow(
|
|
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
|
|
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
|
|
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
|
|
).Scan(&modelID)
|
|
}
|
|
if err != nil {
|
|
logger.Warn("model error", "id", p.ModelID, "error", err)
|
|
continue
|
|
}
|
|
|
|
// 插入定价
|
|
metadata := pricingMetadata(p)
|
|
_, err = db.Exec(
|
|
`INSERT INTO region_pricing
|
|
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
|
|
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
|
|
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
|
|
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
|
|
is_free = EXCLUDED.is_free,
|
|
source_type = EXCLUDED.source_type,
|
|
free_quota = EXCLUDED.free_quota,
|
|
free_limitations = EXCLUDED.free_limitations,
|
|
rate_limit = EXCLUDED.rate_limit,
|
|
updated_at = CURRENT_TIMESTAMP`,
|
|
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
|
|
metadata.SourceType, metadata.FreeQuota, metadata.FreeLimitations, metadata.RateLimit,
|
|
)
|
|
if err != nil {
|
|
logger.Warn("pricing insert error", "model", p.ModelID, "error", err)
|
|
continue
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func savePricesToDatabase(dsn string, prices []ModelPricing) error {
|
|
db, err := sql.Open("postgres", dsn)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer db.Close()
|
|
|
|
batchID := fmt.Sprintf("phase2-%s", time.Now().Format("20060102-150405"))
|
|
return saveToDatabase(db, prices, batchID)
|
|
}
|
|
|
|
func defaultDSN() string {
|
|
dsn := os.Getenv("DATABASE_URL")
|
|
if dsn != "" {
|
|
return dsn
|
|
}
|
|
return "postgres://long@/llm_intelligence?host=/var/run/postgresql"
|
|
}
|
|
|
|
func runCollector(cfg runConfig, sources []DataSource, saveFn func([]ModelPricing) error, out io.Writer) error {
|
|
allPrices := make([]ModelPricing, 0)
|
|
successfulSources := 0
|
|
|
|
for _, src := range sources {
|
|
prices, err := src.FetchPricing()
|
|
if err != nil {
|
|
logger.Error("采集失败", "source", src.Name(), "error", err)
|
|
continue
|
|
}
|
|
successfulSources++
|
|
allPrices = append(allPrices, prices...)
|
|
}
|
|
|
|
summary := summarizePrices(len(sources), successfulSources, allPrices)
|
|
if err := printSummary(out, summary); err != nil {
|
|
return err
|
|
}
|
|
if successfulSources == 0 {
|
|
return fmt.Errorf("no data source collected successfully")
|
|
}
|
|
if cfg.DryRun {
|
|
return nil
|
|
}
|
|
if saveFn == nil {
|
|
return fmt.Errorf("save function is required when dry-run is disabled")
|
|
}
|
|
if err := saveFn(allPrices); err != nil {
|
|
return err
|
|
}
|
|
|
|
logger.Info("多源采集完成", "total_models", len(allPrices), "sources", successfulSources)
|
|
return nil
|
|
}
|
|
|
|
// ============ 主程序 ============
|
|
|
|
func main() {
|
|
var sourcesFlag string
|
|
var dryRun bool
|
|
var listSources bool
|
|
|
|
flag.StringVar(&sourcesFlag, "sources", "", "comma-separated source keys: openrouter,moonshot,deepseek,openai")
|
|
flag.BoolVar(&dryRun, "dry-run", false, "collect and print summary without writing to database")
|
|
flag.BoolVar(&listSources, "list-sources", false, "print available source keys and exit")
|
|
flag.Parse()
|
|
|
|
apiKey := os.Getenv("OPENROUTER_API_KEY")
|
|
if listSources {
|
|
fmt.Println(strings.Join(listSourceKeys(apiKey), ","))
|
|
return
|
|
}
|
|
|
|
sources, err := buildSources(apiKey, parseSourceList(sourcesFlag))
|
|
if err != nil {
|
|
logger.Error("数据源参数非法", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
cfg := runConfig{DryRun: dryRun}
|
|
if err := runCollector(cfg, sources, func(prices []ModelPricing) error {
|
|
return savePricesToDatabase(defaultDSN(), prices)
|
|
}, os.Stdout); err != nil {
|
|
logger.Error("多源采集失败", "error", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|