Files
llm-intelligence/scripts/subscription_import_common.go
phamnazage-jpg f5b373caf4
Some checks failed
CI / go-test (push) Has been cancelled
CI / scripts-regression (push) Has been cancelled
CI / frontend-build (push) Has been cancelled
CI / docker-build (push) Has been cancelled
feat(report): improve daily intelligence UX and price tracking
2026-05-27 17:23:08 +08:00

655 lines
18 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//go:build llm_script
package main
import (
"database/sql"
"encoding/json"
"fmt"
"html"
"io"
"net/http"
"os"
"regexp"
"sort"
"strconv"
"strings"
"time"
_ "github.com/lib/pq"
)
const subscriptionFetchMaxAttempts = 3
type subscriptionImportRecord struct {
ProviderName string
ProviderNameCn string
ProviderCountry string
ProviderWebsite string
OperatorName string
OperatorNameCn string
OperatorCountry string
OperatorWebsite string
OperatorType string
PlanFamily string
PlanCode string
PlanName string
Tier string
BillingCycle string
Currency string
ListPrice float64
PriceUnit string
QuotaValue int64
QuotaUnit string
ContextWindow int
PlanScope string
ModelScope []string
SourceURL string
PublishedAt string
EffectiveDate string
Notes string
PublishedAtKnown bool
}
func loadSubscriptionImportEnv() {
for _, path := range []string{".env.local", ".env"} {
loadSubscriptionEnvFile(path)
}
}
func loadSubscriptionEnvFile(path string) {
data, err := os.ReadFile(path)
if err != nil {
return
}
for _, line := range strings.Split(string(data), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
key = strings.TrimSpace(key)
value = strings.Trim(strings.TrimSpace(value), `"'`)
if key == "" {
continue
}
if _, exists := os.LookupEnv(key); exists {
continue
}
_ = os.Setenv(key, value)
}
}
func subscriptionImportDB() (*sql.DB, error) {
dsn := os.Getenv("DATABASE_URL")
if dsn == "" {
dsn = "postgres://long@/llm_intelligence?host=/var/run/postgresql"
}
return sql.Open("postgres", dsn)
}
func fetchSubscriptionPage(url string, fixture string, client *http.Client) (string, error) {
if fixture != "" {
data, err := os.ReadFile(fixture)
if err != nil {
return "", fmt.Errorf("read fixture %s: %w", fixture, err)
}
return string(data), nil
}
body, err := fetchSubscriptionPageWithRetry(url, client)
if err == nil {
return body, nil
}
if markdownURL, ok := markdownFallbackURL(url, err); ok {
return fetchSubscriptionPageWithRetry(markdownURL, client)
}
return "", err
}
func fetchSubscriptionPageWithRetry(url string, client *http.Client) (string, error) {
var lastErr error
for attempt := 1; attempt <= subscriptionFetchMaxAttempts; attempt++ {
body, retryable, err := fetchSubscriptionPageOnce(url, client)
if err == nil {
return body, nil
}
lastErr = err
if !retryable || attempt == subscriptionFetchMaxAttempts {
return "", err
}
time.Sleep(time.Duration(attempt) * 200 * time.Millisecond)
}
return "", lastErr
}
func fetchSubscriptionPageOnce(url string, client *http.Client) (string, bool, error) {
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return "", false, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
resp, err := client.Do(req)
if err != nil {
return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("fetch %s: %w", url, err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
retryable := resp.StatusCode == http.StatusForbidden ||
resp.StatusCode == http.StatusTooManyRequests ||
resp.StatusCode == http.StatusBadGateway ||
resp.StatusCode == http.StatusServiceUnavailable ||
resp.StatusCode == http.StatusGatewayTimeout
return "", retryable, fmt.Errorf("fetch %s: unexpected status %d", url, resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", isRetriableSubscriptionFetchError(err), fmt.Errorf("read %s: %w", url, err)
}
return normalizeSubscriptionPage(string(body)), false, nil
}
func markdownFallbackURL(url string, err error) (string, bool) {
if strings.TrimSpace(url) == "" || err == nil {
return "", false
}
lower := strings.ToLower(err.Error())
if !strings.Contains(lower, "status 403") && !strings.Contains(lower, "forbidden") {
return "", false
}
if strings.HasSuffix(url, ".md") {
return "", false
}
return strings.TrimRight(url, "/") + ".md", true
}
func isRetriableSubscriptionFetchError(err error) bool {
if err == nil {
return false
}
lower := strings.ToLower(err.Error())
for _, marker := range []string{
"eof",
"timeout",
"temporarily unavailable",
"transport closed",
"connection reset",
"connection refused",
"tls handshake timeout",
"i/o timeout",
"too many requests",
"no such host",
"forbidden",
"status 403",
} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
func normalizeSubscriptionPage(raw string) string {
text := raw
scriptPattern := regexp.MustCompile(`(?is)<script[^>]*>.*?</script>`)
stylePattern := regexp.MustCompile(`(?is)<style[^>]*>.*?</style>`)
tagPattern := regexp.MustCompile(`(?is)<[^>]+>`)
spacePattern := regexp.MustCompile(`[ \t]+`)
text = scriptPattern.ReplaceAllString(text, "\n")
text = stylePattern.ReplaceAllString(text, "\n")
text = tagPattern.ReplaceAllString(text, "\n")
text = html.UnescapeString(text)
text = strings.ReplaceAll(text, "\r\n", "\n")
text = strings.ReplaceAll(text, "\r", "\n")
lines := strings.Split(text, "\n")
cleaned := make([]string, 0, len(lines))
for _, line := range lines {
line = spacePattern.ReplaceAllString(line, " ")
line = strings.TrimSpace(line)
if line == "" {
continue
}
cleaned = append(cleaned, line)
}
return strings.Join(cleaned, "\n")
}
func publishedAtFromText(raw string) (string, bool) {
patterns := []*regexp.Regexp{
regexp.MustCompile(`最近更新时间[:]\s*(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})`),
regexp.MustCompile(`更新时间[:]?\s*(\d{4}-\d{2}-\d{2})`),
}
for _, pattern := range patterns {
matches := pattern.FindStringSubmatch(raw)
if len(matches) != 2 {
continue
}
if len(matches[1]) == len("2006-01-02") {
return matches[1] + " 00:00:00", true
}
return matches[1], true
}
return time.Now().Format("2006-01-02 15:04:05"), false
}
func effectiveDateFromPublishedAt(publishedAt string) string {
if len(publishedAt) >= len("2006-01-02") {
return publishedAt[:10]
}
return time.Now().Format("2006-01-02")
}
func upsertSubscriptionImportRecords(db *sql.DB, records []subscriptionImportRecord) error {
type snapshotKey struct {
providerID int64
planCode string
}
historyKeys := make(map[snapshotKey]struct{})
for _, record := range records {
providerID, err := ensureSubscriptionProvider(db, record)
if err != nil {
return err
}
operatorID, err := ensureSubscriptionOperator(db, record)
if err != nil {
return err
}
if !record.PublishedAtKnown {
history, err := loadSubscriptionSnapshotHistory(db, providerID, record.PlanCode)
if err != nil {
return err
}
if _, err := reuseExistingSnapshotDates(&record, history); err != nil {
return err
}
}
publishedAt, err := time.Parse("2006-01-02 15:04:05", record.PublishedAt)
if err != nil {
return fmt.Errorf("parse published_at for %s: %w", record.PlanCode, err)
}
effectiveDate, err := time.Parse("2006-01-02", record.EffectiveDate)
if err != nil {
return fmt.Errorf("parse effective_date for %s: %w", record.PlanCode, err)
}
modelScopeRaw, err := json.Marshal(record.ModelScope)
if err != nil {
return fmt.Errorf("marshal model_scope for %s: %w", record.PlanCode, err)
}
_, err = db.Exec(
`INSERT INTO subscription_plan (
provider_id, operator_id, plan_family, plan_code, plan_name, tier,
billing_cycle, currency, list_price, price_unit, quota_value, quota_unit,
context_window, plan_scope, model_scope, source_url, published_at, effective_date, notes
) VALUES (
$1, $2, $3, $4, $5, $6,
$7, $8, $9, $10, $11, $12,
$13, $14, $15, $16, $17, $18, $19
)
ON CONFLICT (provider_id, plan_code, effective_date)
DO UPDATE SET
operator_id = EXCLUDED.operator_id,
plan_family = EXCLUDED.plan_family,
plan_name = EXCLUDED.plan_name,
tier = EXCLUDED.tier,
billing_cycle = EXCLUDED.billing_cycle,
currency = EXCLUDED.currency,
list_price = EXCLUDED.list_price,
price_unit = EXCLUDED.price_unit,
quota_value = EXCLUDED.quota_value,
quota_unit = EXCLUDED.quota_unit,
context_window = EXCLUDED.context_window,
plan_scope = EXCLUDED.plan_scope,
model_scope = EXCLUDED.model_scope,
source_url = EXCLUDED.source_url,
published_at = EXCLUDED.published_at,
notes = EXCLUDED.notes,
updated_at = CURRENT_TIMESTAMP`,
providerID, operatorID, record.PlanFamily, record.PlanCode, record.PlanName, record.Tier,
record.BillingCycle, record.Currency, record.ListPrice, record.PriceUnit, nullIfZeroInt64(record.QuotaValue), nullIfBlank(record.QuotaUnit),
nullIfZeroIntCommon(record.ContextWindow), nullIfBlank(record.PlanScope), string(modelScopeRaw), record.SourceURL, publishedAt, effectiveDate, nullIfBlank(record.Notes),
)
if err != nil {
return fmt.Errorf("upsert subscription_plan %s: %w", record.PlanCode, err)
}
historyKeys[snapshotKey{providerID: providerID, planCode: record.PlanCode}] = struct{}{}
}
for key := range historyKeys {
if err := compactSubscriptionSnapshotHistory(db, key.providerID, key.planCode); err != nil {
return err
}
}
return nil
}
type subscriptionSnapshotRow struct {
ID int64
PlanName string
Tier string
BillingCycle string
Currency string
ListPrice float64
PriceUnit string
QuotaValue int64
QuotaUnit string
ContextWindow int
PlanScope string
ModelScope string
SourceURL string
Notes string
PublishedAt time.Time
EffectiveDate time.Time
}
func loadSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) ([]subscriptionSnapshotRow, error) {
rows, err := db.Query(
`SELECT
id,
plan_name,
tier,
billing_cycle,
currency,
list_price,
price_unit,
COALESCE(quota_value, 0),
COALESCE(quota_unit, ''),
COALESCE(context_window, 0),
COALESCE(plan_scope, ''),
model_scope,
source_url,
COALESCE(notes, ''),
published_at,
effective_date
FROM subscription_plan
WHERE provider_id = $1 AND plan_code = $2
ORDER BY effective_date DESC, published_at DESC NULLS LAST, id DESC`,
providerID, planCode,
)
if err != nil {
return nil, fmt.Errorf("load subscription snapshot history %s: %w", planCode, err)
}
defer rows.Close()
history := make([]subscriptionSnapshotRow, 0)
for rows.Next() {
row := subscriptionSnapshotRow{}
if err := rows.Scan(
&row.ID,
&row.PlanName,
&row.Tier,
&row.BillingCycle,
&row.Currency,
&row.ListPrice,
&row.PriceUnit,
&row.QuotaValue,
&row.QuotaUnit,
&row.ContextWindow,
&row.PlanScope,
&row.ModelScope,
&row.SourceURL,
&row.Notes,
&row.PublishedAt,
&row.EffectiveDate,
); err != nil {
return nil, fmt.Errorf("scan subscription snapshot history %s: %w", planCode, err)
}
history = append(history, row)
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("iterate subscription snapshot history %s: %w", planCode, err)
}
return history, nil
}
func reuseExistingSnapshotDates(record *subscriptionImportRecord, history []subscriptionSnapshotRow) (bool, error) {
if record == nil || record.PublishedAtKnown || len(history) == 0 {
return false, nil
}
modelScopeRaw, err := json.Marshal(record.ModelScope)
if err != nil {
return false, fmt.Errorf("marshal model_scope for snapshot comparison %s: %w", record.PlanCode, err)
}
for _, existing := range history {
if existing.PlanName != record.PlanName ||
existing.Tier != record.Tier ||
existing.BillingCycle != record.BillingCycle ||
existing.Currency != record.Currency ||
existing.ListPrice != record.ListPrice ||
existing.PriceUnit != record.PriceUnit ||
existing.QuotaValue != record.QuotaValue ||
existing.QuotaUnit != strings.TrimSpace(record.QuotaUnit) ||
existing.ContextWindow != record.ContextWindow ||
existing.PlanScope != strings.TrimSpace(record.PlanScope) ||
existing.ModelScope != string(modelScopeRaw) ||
existing.SourceURL != record.SourceURL ||
existing.Notes != strings.TrimSpace(record.Notes) {
continue
}
record.PublishedAt = existing.PublishedAt.Format("2006-01-02 15:04:05")
record.EffectiveDate = existing.EffectiveDate.Format("2006-01-02")
return true, nil
}
return false, nil
}
func compactSubscriptionSnapshotHistory(db *sql.DB, providerID int64, planCode string) error {
history, err := loadSubscriptionSnapshotHistory(db, providerID, planCode)
if err != nil {
return err
}
for _, id := range redundantSnapshotRowIDs(history) {
if _, err := db.Exec(`DELETE FROM subscription_plan WHERE id = $1`, id); err != nil {
return fmt.Errorf("delete redundant subscription snapshot %d for %s: %w", id, planCode, err)
}
}
return nil
}
func redundantSnapshotRowIDs(history []subscriptionSnapshotRow) []int64 {
type signatureKey struct {
PlanName string
Tier string
BillingCycle string
Currency string
ListPrice float64
PriceUnit string
QuotaValue int64
QuotaUnit string
ContextWindow int
PlanScope string
ModelScope string
SourceURL string
Notes string
}
type keptSnapshot struct {
ID int64
EffectiveDate time.Time
PublishedAt time.Time
}
makeKey := func(row subscriptionSnapshotRow) signatureKey {
return signatureKey{
PlanName: row.PlanName,
Tier: row.Tier,
BillingCycle: row.BillingCycle,
Currency: row.Currency,
ListPrice: row.ListPrice,
PriceUnit: row.PriceUnit,
QuotaValue: row.QuotaValue,
QuotaUnit: row.QuotaUnit,
ContextWindow: row.ContextWindow,
PlanScope: row.PlanScope,
ModelScope: row.ModelScope,
SourceURL: row.SourceURL,
Notes: row.Notes,
}
}
shouldReplace := func(current keptSnapshot, candidate subscriptionSnapshotRow) bool {
if candidate.EffectiveDate.Before(current.EffectiveDate) {
return true
}
if candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Before(current.PublishedAt) {
return true
}
return candidate.EffectiveDate.Equal(current.EffectiveDate) && candidate.PublishedAt.Equal(current.PublishedAt) && candidate.ID < current.ID
}
keptBySignature := make(map[signatureKey]keptSnapshot)
redundant := make([]int64, 0)
for _, row := range history {
key := makeKey(row)
current, exists := keptBySignature[key]
if !exists {
keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt}
continue
}
if shouldReplace(current, row) {
redundant = append(redundant, current.ID)
keptBySignature[key] = keptSnapshot{ID: row.ID, EffectiveDate: row.EffectiveDate, PublishedAt: row.PublishedAt}
continue
}
redundant = append(redundant, row.ID)
}
sort.Slice(redundant, func(i, j int) bool { return redundant[i] < redundant[j] })
return redundant
}
func ensureSubscriptionProvider(db *sql.DB, record subscriptionImportRecord) (int64, error) {
var providerID int64
err := db.QueryRow(`SELECT id FROM model_provider WHERE name = $1`, record.ProviderName).Scan(&providerID)
if err == nil {
return providerID, nil
}
if err != sql.ErrNoRows {
return 0, err
}
err = db.QueryRow(
`INSERT INTO model_provider (name, name_cn, country, website, status)
VALUES ($1, $2, $3, $4, 'active')
RETURNING id`,
record.ProviderName, nullIfBlank(record.ProviderNameCn), record.ProviderCountry, nullIfBlank(record.ProviderWebsite),
).Scan(&providerID)
return providerID, err
}
func ensureSubscriptionOperator(db *sql.DB, record subscriptionImportRecord) (int64, error) {
var operatorID int64
err := db.QueryRow(`SELECT id FROM operator WHERE name = $1`, record.OperatorName).Scan(&operatorID)
if err == nil {
return operatorID, nil
}
if err != sql.ErrNoRows {
return 0, err
}
err = db.QueryRow(
`INSERT INTO operator (name, name_cn, country, website, description, status, type)
VALUES ($1, $2, $3, $4, $5, 'active', $6)
RETURNING id`,
record.OperatorName, nullIfBlank(record.OperatorNameCn), record.OperatorCountry, nullIfBlank(record.OperatorWebsite),
fmt.Sprintf("%s subscription import", record.OperatorName), record.OperatorType,
).Scan(&operatorID)
return operatorID, err
}
func summarizeSubscriptionImport(records []subscriptionImportRecord, getter func(subscriptionImportRecord) string) string {
counts := make(map[string]int)
keys := make([]string, 0)
for _, record := range records {
key := getter(record)
if _, exists := counts[key]; !exists {
keys = append(keys, key)
}
counts[key]++
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%s:%d", key, counts[key]))
}
return strings.Join(parts, ",")
}
func nullIfBlank(value string) any {
if strings.TrimSpace(value) == "" {
return nil
}
return value
}
func nullIfZeroInt64(value int64) any {
if value == 0 {
return nil
}
return value
}
func nullIfZeroIntCommon(value int) any {
if value == 0 {
return nil
}
return value
}
func mustParseSubscriptionPrice(raw string) float64 {
cleaned := strings.ReplaceAll(raw, ",", "")
cleaned = strings.ReplaceAll(cleaned, " ", "")
value, _ := strconv.ParseFloat(cleaned, 64)
return value
}
func mustParseSubscriptionInt64(raw string) int64 {
cleaned := strings.ReplaceAll(raw, ",", "")
cleaned = strings.ReplaceAll(cleaned, " ", "")
value, _ := strconv.ParseInt(cleaned, 10, 64)
return value
}
func firstNonEmptyText(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func parseDecimalMultiplier(raw string, unit int64) int64 {
cleaned := strings.TrimSpace(strings.ReplaceAll(raw, " ", ""))
value, _ := strconv.ParseFloat(cleaned, 64)
return int64(value * float64(unit))
}
func sliceSection(raw string, start string, end string) string {
startIndex := strings.Index(raw, start)
if startIndex < 0 {
return ""
}
section := raw[startIndex+len(start):]
if end != "" {
if endIndex := strings.Index(section, end); endIndex >= 0 {
section = section[:endIndex]
}
}
return section
}