feat(import): add CoreHub pricing collector and importer
- coreshub_pricing_lib.go: CoreHub pricing data extraction and parsing - import_coreshub_pricing.go: importer with dry_run support - import_coreshub_pricing_test.go: unit tests for importer - coreshub_pricing_sample.txt: test fixture
This commit is contained in:
81
scripts/coreshub_pricing_lib.go
Normal file
81
scripts/coreshub_pricing_lib.go
Normal file
@@ -0,0 +1,81 @@
|
||||
//go:build llm_script
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const defaultCoresHubPricingURL = "https://docs.coreshub.cn/console/big_model_server/introduce/model_choose"
|
||||
|
||||
var coreshubPricingPattern = regexp.MustCompile(`(DeepSeek-[A-Za-z0-9.\-]+)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)`)
|
||||
var coreshubPricingHTMLRowPattern = regexp.MustCompile(`(?is)<tr>\s*<td[^>]*>\s*<p[^>]*>(DeepSeek-[^<]+)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*<td[^>]*>\s*<p[^>]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)</p>\s*</td>\s*</tr>`)
|
||||
var coreshubPriceValuePattern = regexp.MustCompile(`([\d.]+)`)
|
||||
|
||||
func parseCoresHubPricingCatalog(raw string) ([]officialPricingRecord, error) {
|
||||
raw = strings.ReplaceAll(raw, "¥", "¥")
|
||||
matches := coreshubPricingHTMLRowPattern.FindAllStringSubmatch(raw, -1)
|
||||
if len(matches) == 0 {
|
||||
normalized := cleanHTMLText(raw)
|
||||
normalized = strings.ReplaceAll(normalized, "¥", "¥")
|
||||
matches = coreshubPricingPattern.FindAllStringSubmatch(normalized, -1)
|
||||
}
|
||||
if len(matches) == 0 {
|
||||
return nil, fmt.Errorf("no coreshub pricing rows found")
|
||||
}
|
||||
|
||||
records := make([]officialPricingRecord, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
modelName := strings.TrimSpace(match[1])
|
||||
providerName := "DeepSeek"
|
||||
providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName)
|
||||
inputPrice, inputFree, err := parseCoresHubPrice(match[2])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse input price for %s: %w", modelName, err)
|
||||
}
|
||||
outputPrice, outputFree, err := parseCoresHubPrice(match[3])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse output price for %s: %w", modelName, err)
|
||||
}
|
||||
record := officialPricingRecord{
|
||||
ModelID: normalizeExternalID("coreshub", modelName),
|
||||
ModelName: modelName,
|
||||
ProviderName: providerName,
|
||||
ProviderNameCn: providerNameCn,
|
||||
ProviderCountry: providerCountry,
|
||||
ProviderWebsite: providerWebsite,
|
||||
OperatorName: "CoresHub",
|
||||
OperatorNameCn: "CoresHub",
|
||||
OperatorCountry: "CN",
|
||||
OperatorWebsite: "https://www.qingcloud.com/products/coreshub",
|
||||
OperatorType: "cloud",
|
||||
Region: "CN",
|
||||
Currency: "CNY",
|
||||
InputPrice: inputPrice,
|
||||
OutputPrice: outputPrice,
|
||||
SourceURL: defaultCoresHubPricingURL,
|
||||
ModelSourceURL: defaultCoresHubPricingURL,
|
||||
DateConfidence: "unknown",
|
||||
DateSourceKind: "official_product_page",
|
||||
Modality: detectModality(modelName),
|
||||
IsFree: inputFree && outputFree,
|
||||
}
|
||||
records = append(records, record)
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func parseCoresHubPrice(raw string) (float64, bool, error) {
|
||||
value := strings.TrimSpace(raw)
|
||||
if strings.Contains(value, "免费") {
|
||||
return 0, true, nil
|
||||
}
|
||||
match := coreshubPriceValuePattern.FindStringSubmatch(value)
|
||||
if len(match) != 2 {
|
||||
return 0, false, fmt.Errorf("price value not found in %q", raw)
|
||||
}
|
||||
price := mustParseSubscriptionPrice(match[1]) * 1000
|
||||
return price, false, nil
|
||||
}
|
||||
Reference in New Issue
Block a user