This commit is contained in:
162
scripts/import_bytedance_data.go
Normal file
162
scripts/import_bytedance_data.go
Normal file
@@ -0,0 +1,162 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
|
||||
_ "github.com/lib/pq"
|
||||
)
|
||||
|
||||
type ModelPricing struct {
|
||||
ModelID string
|
||||
ModelName string
|
||||
ProviderName string
|
||||
ProviderCountry string
|
||||
OperatorName string
|
||||
OperatorType string
|
||||
Region string
|
||||
Currency string
|
||||
InputPrice float64
|
||||
OutputPrice float64
|
||||
ContextLength int
|
||||
IsFree bool
|
||||
SourceURL string
|
||||
Modality string
|
||||
}
|
||||
|
||||
func main() {
|
||||
dsn := os.Getenv("DATABASE_URL")
|
||||
if dsn == "" {
|
||||
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
|
||||
}
|
||||
|
||||
db, err := sql.Open("postgres", dsn)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// Read raw data
|
||||
data, err := os.ReadFile("/tmp/bytedance_raw.json")
|
||||
if err != nil {
|
||||
log.Fatal("Failed to read raw data:", err)
|
||||
}
|
||||
|
||||
var raw struct {
|
||||
Bytedance []struct {
|
||||
Model string `json:"model"`
|
||||
InputPrice float64 `json:"inputPrice"`
|
||||
OutputPrice float64 `json:"outputPrice"`
|
||||
ContextLength int `json:"contextLength"`
|
||||
Operator string `json:"operator"`
|
||||
Region string `json:"region"`
|
||||
Currency string `json:"currency"`
|
||||
} `json:"bytedance"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &raw); err != nil {
|
||||
log.Fatal("Failed to parse raw data:", err)
|
||||
}
|
||||
|
||||
log.Printf("Importing %d ByteDance models...", len(raw.Bytedance))
|
||||
batchID := "manual-seed"
|
||||
|
||||
for _, b := range raw.Bytedance {
|
||||
p := ModelPricing{
|
||||
ModelID: "bytedance-" + b.Model,
|
||||
ModelName: b.Model,
|
||||
ProviderName: "ByteDance",
|
||||
ProviderCountry: "CN",
|
||||
OperatorName: "ByteDance Volcano",
|
||||
OperatorType: "official",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: b.InputPrice,
|
||||
OutputPrice: b.OutputPrice,
|
||||
ContextLength: b.ContextLength,
|
||||
IsFree: b.InputPrice == 0,
|
||||
SourceURL: "https://www.volcengine.com/docs/82379/1099320",
|
||||
Modality: "text",
|
||||
}
|
||||
|
||||
// Find or create provider
|
||||
var providerID int64
|
||||
err := db.QueryRow("SELECT id FROM model_provider WHERE name = $1", p.ProviderName).Scan(&providerID)
|
||||
if err == sql.ErrNoRows {
|
||||
err = db.QueryRow(
|
||||
"INSERT INTO model_provider (name, country, website, status) VALUES ($1, $2, $3, 'active') RETURNING id",
|
||||
p.ProviderName, p.ProviderCountry, "",
|
||||
).Scan(&providerID)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("Provider error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Find or create operator
|
||||
var operatorID int64
|
||||
err = db.QueryRow("SELECT id FROM operator WHERE name = $1", p.OperatorName).Scan(&operatorID)
|
||||
if err == sql.ErrNoRows {
|
||||
err = db.QueryRow(
|
||||
"INSERT INTO operator (name, country, status) VALUES ($1, $2, 'active') RETURNING id",
|
||||
p.OperatorName, p.ProviderCountry,
|
||||
).Scan(&operatorID)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("Operator error: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Find or create model
|
||||
var modelID int64
|
||||
err = db.QueryRow("SELECT id FROM models WHERE external_id = $1", p.ModelID).Scan(&modelID)
|
||||
if err == sql.ErrNoRows {
|
||||
err = db.QueryRow(
|
||||
`INSERT INTO models (external_id, name, provider_id, modality, context_length, status, source, batch_id)
|
||||
VALUES ($1, $2, $3, $4, $5, 'active', $6, $7) RETURNING id`,
|
||||
p.ModelID, p.ModelName, providerID, p.Modality, p.ContextLength, p.OperatorName, batchID,
|
||||
).Scan(&modelID)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("Model error for %s: %v", p.ModelID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Insert pricing
|
||||
sourceType := p.OperatorType
|
||||
freeQuota := ""
|
||||
freeLimitations := "[]"
|
||||
rateLimit := "{}"
|
||||
if p.IsFree {
|
||||
sourceType = "free_tier"
|
||||
freeQuota = "Imported free-tier pricing entry"
|
||||
freeLimitations = `["See source_url for current quota and policy"]`
|
||||
}
|
||||
_, err = db.Exec(
|
||||
`INSERT INTO region_pricing
|
||||
(model_id, operator_id, region, currency, input_price_per_mtok, output_price_per_mtok, is_free, effective_date, source_url, source_type, free_quota, free_limitations, rate_limit)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, CURRENT_DATE, $8, $9, $10, $11, $12)
|
||||
ON CONFLICT (model_id, operator_id, region, currency, effective_date)
|
||||
DO UPDATE SET input_price_per_mtok = EXCLUDED.input_price_per_mtok,
|
||||
output_price_per_mtok = EXCLUDED.output_price_per_mtok,
|
||||
is_free = EXCLUDED.is_free,
|
||||
source_type = EXCLUDED.source_type,
|
||||
free_quota = EXCLUDED.free_quota,
|
||||
free_limitations = EXCLUDED.free_limitations,
|
||||
rate_limit = EXCLUDED.rate_limit,
|
||||
updated_at = CURRENT_TIMESTAMP`,
|
||||
modelID, operatorID, p.Region, p.Currency, p.InputPrice, p.OutputPrice, p.IsFree, p.SourceURL,
|
||||
sourceType, freeQuota, freeLimitations, rateLimit,
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("Pricing error for %s: %v", p.ModelID, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Successfully imported %d ByteDance models", len(raw.Bytedance))
|
||||
}
|
||||
Reference in New Issue
Block a user