//go:build llm_script package main import ( "fmt" "html" "io" "net/http" "os" "regexp" "sort" "strings" "time" ) const ( defaultTencentCatalogURL = "https://cloud.tencent.com/document/product/1823/130060" ) var defaultTencentCatalogTimeout = 20 * time.Second type fetchTencentCatalogConfig struct { URL string DryRun bool Timeout time.Duration Fixture string } type tencentCatalog struct { UpdatedAt string Plans []tencentPlan Models []tencentModel } type tencentPlan struct { Series string Tier string Quota string Price string BillingCycle string Scene string } type tencentModel struct { Series string Name string ModelID string ContextLength int Notes []string } func fetchTencentCatalogContent(cfg fetchTencentCatalogConfig, client *http.Client) (string, error) { if strings.TrimSpace(cfg.Fixture) != "" { data, err := os.ReadFile(cfg.Fixture) if err != nil { return "", err } return string(data), nil } req, err := http.NewRequest(http.MethodGet, cfg.URL, nil) if err != nil { return "", err } req.Header.Set("User-Agent", "llm-intelligence/tencent-catalog-fetcher") resp, err := client.Do(req) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("unexpected status %d", resp.StatusCode) } body, err := io.ReadAll(resp.Body) if err != nil { return "", err } return string(body), nil } func parseTencentCatalog(raw string) (tencentCatalog, error) { lines := normalizeTencentCatalogLines(raw) var catalog tencentCatalog var currentSeries string var currentMode string for i := 0; i < len(lines); i++ { line := lines[i] if catalog.UpdatedAt == "" { if updatedAt := extractUpdatedAt(line); updatedAt != "" { catalog.UpdatedAt = updatedAt continue } } if series := extractSeriesHeading(line); series != "" { currentSeries = series currentMode = "" continue } switch line { case "### 套餐详情": currentMode = "plans" continue case "### 可用模型": currentMode = "models" continue } switch currentMode { case "plans": plan, nextIndex, ok := tryParseTencentPlan(lines, i, currentSeries) if ok { catalog.Plans = append(catalog.Plans, plan) i = nextIndex } case "models": model, nextIndex, ok := tryParseTencentModel(lines, i, currentSeries) if ok { catalog.Models = append(catalog.Models, model) i = nextIndex } } } if catalog.UpdatedAt == "" { return tencentCatalog{}, fmt.Errorf("catalog updated_at not found") } if len(catalog.Plans) == 0 { return tencentCatalog{}, fmt.Errorf("catalog plans not found") } if len(catalog.Models) == 0 { return tencentCatalog{}, fmt.Errorf("catalog models not found") } return catalog, nil } func normalizeTencentCatalogLines(raw string) []string { text := html.UnescapeString(raw) replacements := []string{"
", "
", "
", "

", "", "", "", "", "", "", "", "", "", ""} for _, replacement := range replacements { text = strings.ReplaceAll(text, replacement, "\n") } tagPattern := regexp.MustCompile(`<[^>]+>`) text = tagPattern.ReplaceAllString(text, "") text = strings.ReplaceAll(text, "\r\n", "\n") text = strings.ReplaceAll(text, "\r", "\n") rawLines := strings.Split(text, "\n") lines := make([]string, 0, len(rawLines)) for _, rawLine := range rawLines { line := strings.TrimSpace(rawLine) if line == "" { continue } lines = append(lines, line) } return lines } func extractUpdatedAt(line string) string { const prefix = "最近更新时间：" if strings.HasPrefix(line, prefix) { return strings.TrimSpace(strings.TrimPrefix(line, prefix)) } return "" } func extractSeriesHeading(line string) string { if !strings.HasPrefix(line, "## ") { return "" } series := strings.TrimSpace(strings.TrimPrefix(line, "## ")) if strings.Contains(series, "Token Plan") || strings.Contains(series, "Coding Plan") { return strings.TrimSpace(strings.TrimSuffix(series, "套餐")) } return "" } func tryParseTencentPlan(lines []string, start int, series string) (tencentPlan, int, bool) { if start+4 >= len(lines) { return tencentPlan{}, start, false } if !isTencentPlanName(lines[start]) { return tencentPlan{}, start, false } if !isTencentPlanTier(lines[start+1]) { return tencentPlan{}, start, false } if !strings.Contains(lines[start+2], "订阅月") { return tencentPlan{}, start, false } if !strings.Contains(lines[start+3], "Tokens") { return tencentPlan{}, start, false } if !strings.Contains(lines[start+4], "元/月") { return tencentPlan{}, start, false } plan := tencentPlan{ Series: series, Tier: strings.Trim(lines[start+1], "（）() "), BillingCycle: lines[start+2], Quota: lines[start+3], Price: lines[start+4], } nextIndex := start + 4 if start+5 < len(lines) && !strings.HasPrefix(lines[start+5], "### ") && !isTencentPlanName(lines[start+5]) { plan.Scene = lines[start+5] nextIndex = start + 5 } return plan, nextIndex, true } func tryParseTencentModel(lines []string, start int, series string) (tencentModel, int, bool) { if start+1 >= len(lines) { return tencentModel{}, start, false } if !isTencentModelID(lines[start+1]) { return tencentModel{}, start, false } if isReservedTencentLine(lines[start]) { return tencentModel{}, start, false } model := tencentModel{ Series: series, Name: lines[start], ModelID: lines[start+1], } notes := make([]string, 0, 4) nextIndex := start + 1 for i := start + 2; i < len(lines); i++ { line := lines[i] if strings.HasPrefix(line, "## ") || strings.HasPrefix(line, "### ") { break } if isTencentPlanName(line) && i+1 < len(lines) && isTencentPlanTier(lines[i+1]) { break } if i+1 < len(lines) && isTencentModelID(lines[i+1]) && !isReservedTencentLine(line) { break } notes = append(notes, line) nextIndex = i } model.Notes = notes model.ContextLength = extractContextLength(strings.Join(notes, " ")) return model, nextIndex, true } func isTencentPlanName(line string) bool { switch line { case "体验套餐", "基础套餐", "进阶套餐", "专业套餐": return true default: return false } } func isTencentPlanTier(line string) bool { return strings.HasPrefix(line, "（") && strings.HasSuffix(line, "）") } func isReservedTencentLine(line string) bool { if strings.HasPrefix(line, "#") { return true } switch line { case "Token Plan 个人版套餐概览", "套餐详情", "可用模型": return true default: return false } } func isTencentModelID(line string) bool { modelIDPattern := regexp.MustCompile(`^[a-z0-9][a-z0-9._-]*$`) return modelIDPattern.MatchString(line) } func extractContextLength(text string) int { contextPattern := regexp.MustCompile(`(?i)(\d+)\s*([KM])\s*上下文`) matches := contextPattern.FindStringSubmatch(text) if len(matches) != 3 { return 0 } value := 0 fmt.Sscanf(matches[1], "%d", &value) switch strings.ToUpper(matches[2]) { case "K": return value * 1024 case "M": return value * 1024 * 1024 default: return 0 } } func formatSeriesSummary(plans []tencentPlan) string { counts := make(map[string]int) for _, plan := range plans { counts[plan.Series]++ } series := make([]string, 0, len(counts)) for name := range counts { series = append(series, name) } sort.Strings(series) parts := make([]string, 0, len(series)) for _, name := range series { parts = append(parts, fmt.Sprintf("%s:%d", name, counts[name])) } return strings.Join(parts, ",") }