diff --git a/scripts/export_official_seed_json.go b/scripts/export_official_seed_json.go new file mode 100644 index 0000000..e4bafef --- /dev/null +++ b/scripts/export_official_seed_json.go @@ -0,0 +1,195 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "encoding/json" + "log" + "os" + + _ "github.com/lib/pq" +) + +type bytedanceSeedRow struct { + Model string `json:"model"` + InputPrice float64 `json:"inputPrice"` + OutputPrice float64 `json:"outputPrice"` + ContextLength int `json:"contextLength"` + Operator string `json:"operator"` + Region string `json:"region"` + Currency string `json:"currency"` +} + +type baiduSeedRow struct { + Model string `json:"model"` + Type string `json:"type"` + InputPrice *float64 `json:"inputPrice"` + OutputPrice *float64 `json:"outputPrice"` + Operator string `json:"operator"` + Region string `json:"region"` + Currency string `json:"currency"` +} + +func main() { + dsn := os.Getenv("DATABASE_URL") + if dsn == "" { + dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql" + } + + db, err := sql.Open("postgres", dsn) + if err != nil { + log.Fatal(err) + } + defer db.Close() + + bytedanceRows, err := loadBytedanceSeedRows(db) + if err != nil { + log.Fatal(err) + } + baiduRows, err := loadBaiduSeedRows(db) + if err != nil { + log.Fatal(err) + } + + if err := writeJSON("/tmp/bytedance_raw.json", map[string]any{"bytedance": bytedanceRows}); err != nil { + log.Fatal(err) + } + if err := writeJSON("/tmp/phase2_raw_data.json", map[string]any{"baidu": baiduRows, "zhipu": []any{}}); err != nil { + log.Fatal(err) + } + + log.Printf("Exported %d ByteDance rows to /tmp/bytedance_raw.json", len(bytedanceRows)) + log.Printf("Exported %d Baidu rows to /tmp/phase2_raw_data.json", len(baiduRows)) +} + +func loadBytedanceSeedRows(db *sql.DB) ([]bytedanceSeedRow, error) { + rows, err := db.Query(` + WITH latest AS ( + SELECT DISTINCT ON (rp.model_id, rp.operator_id, rp.region, rp.currency) + rp.model_id, + rp.operator_id, + rp.region, + rp.currency, + rp.input_price_per_mtok, + rp.output_price_per_mtok, + rp.effective_date + FROM region_pricing rp + JOIN models m ON m.id = rp.model_id + JOIN operator o ON o.id = rp.operator_id + WHERE m.external_id LIKE 'bytedance-%' + AND o.name = 'ByteDance Volcano' + ORDER BY rp.model_id, rp.operator_id, rp.region, rp.currency, rp.effective_date DESC, rp.updated_at DESC, rp.id DESC + ) + SELECT REPLACE(m.external_id, 'bytedance-', ''), + COALESCE(latest.input_price_per_mtok, 0), + COALESCE(latest.output_price_per_mtok, 0), + COALESCE(m.context_length, 0), + o.name, + latest.region, + latest.currency + FROM latest + JOIN models m ON m.id = latest.model_id + JOIN operator o ON o.id = latest.operator_id + ORDER BY m.external_id + `) + if err != nil { + return nil, err + } + defer rows.Close() + + var result []bytedanceSeedRow + for rows.Next() { + var row bytedanceSeedRow + if err := rows.Scan( + &row.Model, + &row.InputPrice, + &row.OutputPrice, + &row.ContextLength, + &row.Operator, + &row.Region, + &row.Currency, + ); err != nil { + return nil, err + } + result = append(result, row) + } + return result, rows.Err() +} + +func loadBaiduSeedRows(db *sql.DB) ([]baiduSeedRow, error) { + rows, err := db.Query(` + WITH latest AS ( + SELECT DISTINCT ON (rp.model_id, rp.operator_id, rp.region, rp.currency) + rp.model_id, + rp.operator_id, + rp.region, + rp.currency, + rp.input_price_per_mtok, + rp.output_price_per_mtok, + rp.effective_date + FROM region_pricing rp + JOIN models m ON m.id = rp.model_id + JOIN operator o ON o.id = rp.operator_id + WHERE m.external_id LIKE 'baidu-%' + AND o.name = 'Baidu Qianfan' + ORDER BY rp.model_id, rp.operator_id, rp.region, rp.currency, rp.effective_date DESC, rp.updated_at DESC, rp.id DESC + ) + SELECT m.name, + COALESCE(latest.input_price_per_mtok, 0), + COALESCE(latest.output_price_per_mtok, 0), + o.name, + latest.region, + latest.currency + FROM latest + JOIN models m ON m.id = latest.model_id + JOIN operator o ON o.id = latest.operator_id + ORDER BY m.external_id + `) + if err != nil { + return nil, err + } + defer rows.Close() + + var result []baiduSeedRow + for rows.Next() { + var ( + model string + inputPrice float64 + outputPrice float64 + operator string + region string + currency string + ) + if err := rows.Scan(&model, &inputPrice, &outputPrice, &operator, ®ion, ¤cy); err != nil { + return nil, err + } + inputPerToken := inputPrice / 1000000 + outputPerToken := outputPrice / 1000000 + result = append(result, baiduSeedRow{ + Model: model, + Type: "输入", + InputPrice: &inputPerToken, + Operator: operator, + Region: region, + Currency: currency, + }) + result = append(result, baiduSeedRow{ + Model: model, + Type: "输出", + OutputPrice: &outputPerToken, + Operator: operator, + Region: region, + Currency: currency, + }) + } + return result, rows.Err() +} + +func writeJSON(path string, value any) error { + data, err := json.MarshalIndent(value, "", " ") + if err != nil { + return err + } + return os.WriteFile(path, data, 0644) +}