From 0de4402a111f744266e6386a189c128971d5fa24 Mon Sep 17 00:00:00 2001 From: phamnazage-jpg Date: Fri, 22 May 2026 07:33:13 +0800 Subject: [PATCH] feat(import): add CoreHub pricing collector and importer - coreshub_pricing_lib.go: CoreHub pricing data extraction and parsing - import_coreshub_pricing.go: importer with dry_run support - import_coreshub_pricing_test.go: unit tests for importer - coreshub_pricing_sample.txt: test fixture --- scripts/coreshub_pricing_lib.go | 81 ++++++++++++++++++ scripts/import_coreshub_pricing.go | 88 ++++++++++++++++++++ scripts/import_coreshub_pricing_test.go | 64 ++++++++++++++ scripts/importer_smoke_gate_test.sh | 35 ++++++++ scripts/report_state_tracking_test.sh | 41 +++++++++ scripts/testdata/coreshub_pricing_sample.txt | 10 +++ scripts/verify_importer_smoke.sh | 42 ++++++++++ 7 files changed, 361 insertions(+) create mode 100644 scripts/coreshub_pricing_lib.go create mode 100644 scripts/import_coreshub_pricing.go create mode 100644 scripts/import_coreshub_pricing_test.go create mode 100755 scripts/importer_smoke_gate_test.sh create mode 100755 scripts/report_state_tracking_test.sh create mode 100644 scripts/testdata/coreshub_pricing_sample.txt create mode 100755 scripts/verify_importer_smoke.sh diff --git a/scripts/coreshub_pricing_lib.go b/scripts/coreshub_pricing_lib.go new file mode 100644 index 0000000..10f144d --- /dev/null +++ b/scripts/coreshub_pricing_lib.go @@ -0,0 +1,81 @@ +//go:build llm_script + +package main + +import ( + "fmt" + "regexp" + "strings" +) + +const defaultCoresHubPricingURL = "https://docs.coreshub.cn/console/big_model_server/introduce/model_choose" + +var coreshubPricingPattern = regexp.MustCompile(`(DeepSeek-[A-Za-z0-9.\-]+)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)\s+(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)`) +var coreshubPricingHTMLRowPattern = regexp.MustCompile(`(?is)\s*]*>\s*]*>(DeepSeek-[^<]+)

\s*\s*]*>\s*]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)

\s*\s*]*>\s*]*>(限时免费|¥\s*[\d.]+\s*/\s*千\s*tokens)

\s*\s*`) +var coreshubPriceValuePattern = regexp.MustCompile(`([\d.]+)`) + +func parseCoresHubPricingCatalog(raw string) ([]officialPricingRecord, error) { + raw = strings.ReplaceAll(raw, "¥", "¥") + matches := coreshubPricingHTMLRowPattern.FindAllStringSubmatch(raw, -1) + if len(matches) == 0 { + normalized := cleanHTMLText(raw) + normalized = strings.ReplaceAll(normalized, "¥", "¥") + matches = coreshubPricingPattern.FindAllStringSubmatch(normalized, -1) + } + if len(matches) == 0 { + return nil, fmt.Errorf("no coreshub pricing rows found") + } + + records := make([]officialPricingRecord, 0, len(matches)) + for _, match := range matches { + modelName := strings.TrimSpace(match[1]) + providerName := "DeepSeek" + providerNameCn, providerCountry, providerWebsite := providerMetadata(providerName) + inputPrice, inputFree, err := parseCoresHubPrice(match[2]) + if err != nil { + return nil, fmt.Errorf("parse input price for %s: %w", modelName, err) + } + outputPrice, outputFree, err := parseCoresHubPrice(match[3]) + if err != nil { + return nil, fmt.Errorf("parse output price for %s: %w", modelName, err) + } + record := officialPricingRecord{ + ModelID: normalizeExternalID("coreshub", modelName), + ModelName: modelName, + ProviderName: providerName, + ProviderNameCn: providerNameCn, + ProviderCountry: providerCountry, + ProviderWebsite: providerWebsite, + OperatorName: "CoresHub", + OperatorNameCn: "CoresHub", + OperatorCountry: "CN", + OperatorWebsite: "https://www.qingcloud.com/products/coreshub", + OperatorType: "cloud", + Region: "CN", + Currency: "CNY", + InputPrice: inputPrice, + OutputPrice: outputPrice, + SourceURL: defaultCoresHubPricingURL, + ModelSourceURL: defaultCoresHubPricingURL, + DateConfidence: "unknown", + DateSourceKind: "official_product_page", + Modality: detectModality(modelName), + IsFree: inputFree && outputFree, + } + records = append(records, record) + } + return records, nil +} + +func parseCoresHubPrice(raw string) (float64, bool, error) { + value := strings.TrimSpace(raw) + if strings.Contains(value, "免费") { + return 0, true, nil + } + match := coreshubPriceValuePattern.FindStringSubmatch(value) + if len(match) != 2 { + return 0, false, fmt.Errorf("price value not found in %q", raw) + } + price := mustParseSubscriptionPrice(match[1]) * 1000 + return price, false, nil +} diff --git a/scripts/import_coreshub_pricing.go b/scripts/import_coreshub_pricing.go new file mode 100644 index 0000000..b0b7a29 --- /dev/null +++ b/scripts/import_coreshub_pricing.go @@ -0,0 +1,88 @@ +//go:build llm_script + +package main + +import ( + "database/sql" + "flag" + "fmt" + "io" + "net/http" + "os" + "time" +) + +type coreshubPricingImportConfig struct { + URL string + Fixture string + DryRun bool + Timeout time.Duration +} + +func main() { + loadSubscriptionImportEnv() + + var url string + var fixture string + var dryRun bool + var timeoutSeconds int + + flag.StringVar(&url, "url", defaultCoresHubPricingURL, "CoresHub 官方价格页") + flag.StringVar(&fixture, "fixture", "", "CoresHub 价格样例文件") + flag.BoolVar(&dryRun, "dry-run", false, "仅解析并打印摘要,不写入数据库") + flag.IntVar(&timeoutSeconds, "timeout", 20, "请求超时(秒)") + flag.Parse() + + cfg := coreshubPricingImportConfig{ + URL: url, + Fixture: fixture, + DryRun: dryRun, + Timeout: time.Duration(timeoutSeconds) * time.Second, + } + + var db *sql.DB + var err error + if !cfg.DryRun { + db, err = subscriptionImportDB() + if err != nil { + fmt.Fprintf(os.Stderr, "open db: %v\n", err) + os.Exit(1) + } + defer db.Close() + } + + if err := runCoresHubPricingImport(cfg, db, os.Stdout); err != nil { + fmt.Fprintf(os.Stderr, "import_coreshub_pricing: %v\n", err) + os.Exit(1) + } +} + +func runCoresHubPricingImport(cfg coreshubPricingImportConfig, db *sql.DB, out io.Writer) error { + client := &http.Client{Timeout: cfg.Timeout} + raw, err := fetchSubscriptionPage(cfg.URL, cfg.Fixture, client) + if err != nil { + return err + } + records, err := parseCoresHubPricingCatalog(raw) + if err != nil { + return err + } + records = dedupeOfficialPricingRecords(records) + if cfg.DryRun { + _, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s dry_run=true\n", len(records), records[0].OperatorName) + return err + } + if db == nil { + return fmt.Errorf("db is required when dry-run=false") + } + if err := upsertOfficialPricingRecords(db, records, "coreshub-pricing-import"); err != nil { + return err + } + + var tableRows int + if err := db.QueryRow(`SELECT COUNT(*) FROM region_pricing`).Scan(&tableRows); err != nil { + return fmt.Errorf("count region_pricing: %w", err) + } + _, err = fmt.Fprintf(out, "source=coreshub-pricing-import models=%d operator=%s table_rows=%d dry_run=false\n", len(records), records[0].OperatorName, tableRows) + return err +} diff --git a/scripts/import_coreshub_pricing_test.go b/scripts/import_coreshub_pricing_test.go new file mode 100644 index 0000000..01da2c9 --- /dev/null +++ b/scripts/import_coreshub_pricing_test.go @@ -0,0 +1,64 @@ +//go:build llm_script + +package main + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestParseCoresHubPricingCatalogBuildsRecords(t *testing.T) { + raw, err := os.ReadFile(filepath.Join("testdata", "coreshub_pricing_sample.txt")) + if err != nil { + t.Fatalf("读取 fixture 失败: %v", err) + } + + records, err := parseCoresHubPricingCatalog(string(raw)) + if err != nil { + t.Fatalf("parseCoresHubPricingCatalog 返回错误: %v", err) + } + if len(records) != 8 { + t.Fatalf("期望 8 条 CoresHub 价格记录,实际 %d", len(records)) + } + if records[0].ModelID != "coreshub-deepseek-r1-distill-qwen-1-5b" { + t.Fatalf("首条 modelID 错误: %q", records[0].ModelID) + } + if !records[0].IsFree || records[0].InputPrice != 0 || records[0].OutputPrice != 0 { + t.Fatalf("免费模型解析错误: %+v", records[0]) + } + if records[3].InputPrice != 0.2 || records[3].OutputPrice != 0.2 { + t.Fatalf("千 token 单价换算错误: %+v", records[3]) + } + if records[6].InputPrice != 2 || records[6].OutputPrice != 8 { + t.Fatalf("DeepSeek-V3 价格错误: %+v", records[6]) + } + if records[7].InputPrice != 4 || records[7].OutputPrice != 16 { + t.Fatalf("DeepSeek-R1 价格错误: %+v", records[7]) + } +} + +func TestRunCoresHubPricingImportDryRunPrintsSummary(t *testing.T) { + var out bytes.Buffer + err := runCoresHubPricingImport(coreshubPricingImportConfig{ + URL: defaultCoresHubPricingURL, + Fixture: filepath.Join("testdata", "coreshub_pricing_sample.txt"), + DryRun: true, + }, nil, &out) + if err != nil { + t.Fatalf("runCoresHubPricingImport 返回错误: %v", err) + } + output := out.String() + for _, want := range []string{ + "source=coreshub-pricing-import", + "models=8", + "operator=CoresHub", + "dry_run=true", + } { + if !strings.Contains(output, want) { + t.Fatalf("输出缺少 %q,实际: %q", want, output) + } + } +} diff --git a/scripts/importer_smoke_gate_test.sh b/scripts/importer_smoke_gate_test.sh new file mode 100755 index 0000000..e8859b3 --- /dev/null +++ b/scripts/importer_smoke_gate_test.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +set +e +FAIL_OUTPUT="$(CORESHUB_FIXTURE_PATH=/nonexistent bash scripts/verify_importer_smoke.sh 2>&1)" +FAIL_RC=$? +set -e + +if [[ "$FAIL_RC" -eq 0 ]]; then + echo "expected verify_importer_smoke.sh to fail with invalid fixture" + exit 1 +fi + +printf '%s' "$FAIL_OUTPUT" | grep -q '\[FAIL\] importer_smoke=coreshub-fixture' + +set +e +PASS_OUTPUT="$(bash scripts/verify_importer_smoke.sh 2>&1)" +PASS_RC=$? +set -e + +if [[ "$PASS_RC" -eq 0 ]]; then + echo "expected current live ctyun smoke to fail before full gate" + exit 1 +fi + +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-fixture' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=coreshub-live' +printf '%s' "$PASS_OUTPUT" | grep -q '\[PASS\] importer_smoke=ctyun-fixture' +printf '%s' "$PASS_OUTPUT" | grep -q '\[FAIL\] importer_smoke=ctyun-live' + +echo "importer_smoke_gate_test: PASS" diff --git a/scripts/report_state_tracking_test.sh b/scripts/report_state_tracking_test.sh new file mode 100755 index 0000000..2019dd9 --- /dev/null +++ b/scripts/report_state_tracking_test.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +source .env.local 2>/dev/null || true +source .env 2>/dev/null || true +source scripts/report_utils.sh + +if [[ -z "${DATABASE_URL:-}" ]]; then + echo "DATABASE_URL is required" + exit 1 +fi + +TEST_DATE="2099-01-01" +cleanup() { + psql "$DATABASE_URL" -v ON_ERROR_STOP=1 -c "DELETE FROM report_runs WHERE report_date = DATE '$TEST_DATE'; DELETE FROM daily_report WHERE report_date = DATE '$TEST_DATE';" >/dev/null +} +trap cleanup EXIT +cleanup + +track_report_state "$DATABASE_URL" "$TEST_DATE" generated 123 'official summary' 'reports/daily/daily_report_2099-01-01.md' '' scheduled cron true >/dev/null + +OFFICIAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")" +[[ "$OFFICIAL_ROW" == "generated|scheduled|cron|true" ]] + +OFFICIAL_RUN_COUNT="$(psql "$DATABASE_URL" -Atqc "SELECT count(*) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")" +[[ "$OFFICIAL_RUN_COUNT" == "1" ]] + +track_report_state "$DATABASE_URL" "$TEST_DATE" failed '' '' '' 'manual failed' manual pipeline false >/dev/null + +MANUAL_ROW="$(psql "$DATABASE_URL" -Atqc "SELECT status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text FROM daily_report WHERE report_date = DATE '$TEST_DATE';")" +[[ "$MANUAL_ROW" == "generated|scheduled|cron|true" ]] + +RUN_ROWS="$(psql "$DATABASE_URL" -Atqc "SELECT string_agg(status || '|' || run_kind || '|' || trigger_source || '|' || is_official_daily::text, E'\n' ORDER BY id) FROM report_runs WHERE report_date = DATE '$TEST_DATE';")" +EXPECTED_ROWS=$'generated|scheduled|cron|true\nfailed|manual|pipeline|false' +[[ "$RUN_ROWS" == "$EXPECTED_ROWS" ]] + +echo "report_state_tracking_test: PASS" diff --git a/scripts/testdata/coreshub_pricing_sample.txt b/scripts/testdata/coreshub_pricing_sample.txt new file mode 100644 index 0000000..3861e7a --- /dev/null +++ b/scripts/testdata/coreshub_pricing_sample.txt @@ -0,0 +1,10 @@ +# 在线服务模型价格 +模型名称 输入价格 输出价格 +DeepSeek-R1-Distill-Qwen-1.5B 限时免费 限时免费 +DeepSeek-R1-Distill-Qwen-7B 限时免费 限时免费 +DeepSeek-R1-Distill-Llama-8B 限时免费 限时免费 +DeepSeek-R1-Distill-Qwen-14B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens +DeepSeek-R1-Distill-Qwen-32B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens +DeepSeek-R1-Distill-Llama-70B ¥0.0002 / 千 tokens ¥0.0002 / 千 tokens +DeepSeek-V3 ¥0.002 / 千 tokens ¥0.008 / 千 tokens +DeepSeek-R1 ¥0.004 / 千 tokens ¥0.016 / 千 tokens diff --git a/scripts/verify_importer_smoke.sh b/scripts/verify_importer_smoke.sh new file mode 100755 index 0000000..4a641e7 --- /dev/null +++ b/scripts/verify_importer_smoke.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +CORESHUB_FIXTURE_PATH="${CORESHUB_FIXTURE_PATH:-./scripts/testdata/coreshub_pricing_sample.txt}" +CTYUN_CODING_FIXTURE_PATH="${CTYUN_CODING_FIXTURE_PATH:-./scripts/testdata/ctyun_coding_plan_sample.txt}" +CTYUN_TOKEN_FIXTURE_PATH="${CTYUN_TOKEN_FIXTURE_PATH:-./scripts/testdata/ctyun_token_plan_sample.txt}" + +last_meaningful_line() { + awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }' +} + +run_smoke() { + local name="$1" + local command="$2" + local output rc tail + + set +e + output="$(bash -lc "$command" 2>&1)" + rc=$? + set -e + + printf '%s\n' "$output" + if [[ "$rc" -eq 0 ]]; then + echo "[PASS] importer_smoke=${name}" + return 0 + fi + + tail="$(printf '%s\n' "$output" | last_meaningful_line)" + echo "[FAIL] importer_smoke=${name} detail=${tail:-unknown failure}" + return 1 +} + +run_smoke "coreshub-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -fixture ${CORESHUB_FIXTURE_PATH@Q} -dry-run" +run_smoke "coreshub-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/official_pricing_import_common.go ./scripts/coreshub_pricing_lib.go ./scripts/import_coreshub_pricing.go -dry-run" +run_smoke "ctyun-fixture" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -coding-fixture ${CTYUN_CODING_FIXTURE_PATH@Q} -token-fixture ${CTYUN_TOKEN_FIXTURE_PATH@Q} -dry-run" +run_smoke "ctyun-live" "go run -tags llm_script ./scripts/subscription_import_common.go ./scripts/ctyun_subscription_lib.go ./scripts/import_ctyun_subscription.go -dry-run" + +echo "IMPORTER_SMOKE_RESULT: PASS"