343 lines
7.7 KiB
Go
343 lines
7.7 KiB
Go
//go:build llm_script
|
||
|
||
package main
|
||
|
||
import (
|
||
"fmt"
|
||
"html"
|
||
"io"
|
||
"net/http"
|
||
"os"
|
||
"regexp"
|
||
"sort"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
const (
|
||
defaultTencentCatalogURL = "https://cloud.tencent.com/document/product/1823/130060"
|
||
)
|
||
|
||
var defaultTencentCatalogTimeout = 20 * time.Second
|
||
|
||
type fetchTencentCatalogConfig struct {
|
||
URL string
|
||
DryRun bool
|
||
Timeout time.Duration
|
||
Fixture string
|
||
}
|
||
|
||
type tencentCatalog struct {
|
||
UpdatedAt string
|
||
Plans []tencentPlan
|
||
Models []tencentModel
|
||
}
|
||
|
||
type tencentPlan struct {
|
||
Series string
|
||
Tier string
|
||
Quota string
|
||
Price string
|
||
BillingCycle string
|
||
Scene string
|
||
}
|
||
|
||
type tencentModel struct {
|
||
Series string
|
||
Name string
|
||
ModelID string
|
||
ContextLength int
|
||
Notes []string
|
||
}
|
||
|
||
func fetchTencentCatalogContent(cfg fetchTencentCatalogConfig, client *http.Client) (string, error) {
|
||
if strings.TrimSpace(cfg.Fixture) != "" {
|
||
data, err := os.ReadFile(cfg.Fixture)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return string(data), nil
|
||
}
|
||
|
||
req, err := http.NewRequest(http.MethodGet, cfg.URL, nil)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
req.Header.Set("User-Agent", "llm-intelligence/tencent-catalog-fetcher")
|
||
|
||
resp, err := client.Do(req)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
return "", fmt.Errorf("unexpected status %d", resp.StatusCode)
|
||
}
|
||
|
||
body, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
return string(body), nil
|
||
}
|
||
|
||
func parseTencentCatalog(raw string) (tencentCatalog, error) {
|
||
lines := normalizeTencentCatalogLines(raw)
|
||
|
||
var catalog tencentCatalog
|
||
var currentSeries string
|
||
var currentMode string
|
||
|
||
for i := 0; i < len(lines); i++ {
|
||
line := lines[i]
|
||
|
||
if catalog.UpdatedAt == "" {
|
||
if updatedAt := extractUpdatedAt(line); updatedAt != "" {
|
||
catalog.UpdatedAt = updatedAt
|
||
continue
|
||
}
|
||
}
|
||
|
||
if series := extractSeriesHeading(line); series != "" {
|
||
currentSeries = series
|
||
currentMode = ""
|
||
continue
|
||
}
|
||
|
||
switch line {
|
||
case "### 套餐详情", "套餐详情":
|
||
if currentSeries == "" {
|
||
continue
|
||
}
|
||
currentMode = "plans"
|
||
continue
|
||
case "### 可用模型", "可用模型":
|
||
if currentSeries == "" {
|
||
continue
|
||
}
|
||
currentMode = "models"
|
||
continue
|
||
}
|
||
|
||
switch currentMode {
|
||
case "plans":
|
||
plan, nextIndex, ok := tryParseTencentPlan(lines, i, currentSeries)
|
||
if ok {
|
||
catalog.Plans = append(catalog.Plans, plan)
|
||
i = nextIndex
|
||
}
|
||
case "models":
|
||
model, nextIndex, ok := tryParseTencentModel(lines, i, currentSeries)
|
||
if ok {
|
||
catalog.Models = append(catalog.Models, model)
|
||
i = nextIndex
|
||
}
|
||
}
|
||
}
|
||
|
||
if catalog.UpdatedAt == "" {
|
||
return tencentCatalog{}, fmt.Errorf("catalog updated_at not found")
|
||
}
|
||
if len(catalog.Plans) == 0 {
|
||
return tencentCatalog{}, fmt.Errorf("catalog plans not found")
|
||
}
|
||
if len(catalog.Models) == 0 {
|
||
return tencentCatalog{}, fmt.Errorf("catalog models not found")
|
||
}
|
||
return catalog, nil
|
||
}
|
||
|
||
func normalizeTencentCatalogLines(raw string) []string {
|
||
text := html.UnescapeString(raw)
|
||
|
||
replacements := []string{"<br>", "<br/>", "<br />", "</p>", "</div>", "</li>", "</tr>", "</td>", "</h1>", "</h2>", "</h3>", "</h4>", "</pre>", "</main>"}
|
||
for _, replacement := range replacements {
|
||
text = strings.ReplaceAll(text, replacement, "\n")
|
||
}
|
||
|
||
tagPattern := regexp.MustCompile(`<[^>]+>`)
|
||
text = tagPattern.ReplaceAllString(text, "")
|
||
|
||
text = strings.ReplaceAll(text, "\r\n", "\n")
|
||
text = strings.ReplaceAll(text, "\r", "\n")
|
||
|
||
rawLines := strings.Split(text, "\n")
|
||
lines := make([]string, 0, len(rawLines))
|
||
for _, rawLine := range rawLines {
|
||
line := strings.Trim(strings.TrimSpace(rawLine), "\uFEFF")
|
||
if line == "" {
|
||
continue
|
||
}
|
||
lines = append(lines, line)
|
||
}
|
||
return lines
|
||
}
|
||
|
||
func extractUpdatedAt(line string) string {
|
||
const prefix = "最近更新时间:"
|
||
if strings.HasPrefix(line, prefix) {
|
||
return strings.TrimSpace(strings.TrimPrefix(line, prefix))
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func extractSeriesHeading(line string) string {
|
||
if !strings.HasPrefix(line, "## ") {
|
||
trimmed := strings.Trim(line, "\uFEFF ")
|
||
switch trimmed {
|
||
case "通用 Token Plan 套餐":
|
||
return "通用 Token Plan"
|
||
case "Hy Token Plan 套餐":
|
||
return "Hy Token Plan"
|
||
}
|
||
return ""
|
||
}
|
||
series := strings.TrimSpace(strings.TrimPrefix(line, "## "))
|
||
if strings.Contains(series, "Token Plan") || strings.Contains(series, "Coding Plan") {
|
||
return strings.TrimSpace(strings.TrimSuffix(series, "套餐"))
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func tryParseTencentPlan(lines []string, start int, series string) (tencentPlan, int, bool) {
|
||
if !looksLikeTencentPlan(lines, start) {
|
||
return tencentPlan{}, start, false
|
||
}
|
||
|
||
plan := tencentPlan{
|
||
Series: series,
|
||
Tier: strings.Trim(lines[start+1], "()() "),
|
||
BillingCycle: lines[start+2],
|
||
Quota: lines[start+3],
|
||
Price: lines[start+4],
|
||
}
|
||
|
||
nextIndex := start + 4
|
||
if start+5 < len(lines) && !strings.HasPrefix(lines[start+5], "### ") && !looksLikeTencentPlan(lines, start+5) {
|
||
plan.Scene = lines[start+5]
|
||
nextIndex = start + 5
|
||
}
|
||
return plan, nextIndex, true
|
||
}
|
||
|
||
func tryParseTencentModel(lines []string, start int, series string) (tencentModel, int, bool) {
|
||
if start+1 >= len(lines) {
|
||
return tencentModel{}, start, false
|
||
}
|
||
if !isTencentModelID(lines[start+1]) {
|
||
return tencentModel{}, start, false
|
||
}
|
||
if isReservedTencentLine(lines[start]) {
|
||
return tencentModel{}, start, false
|
||
}
|
||
|
||
model := tencentModel{
|
||
Series: series,
|
||
Name: lines[start],
|
||
ModelID: lines[start+1],
|
||
}
|
||
|
||
notes := make([]string, 0, 4)
|
||
nextIndex := start + 1
|
||
for i := start + 2; i < len(lines); i++ {
|
||
line := lines[i]
|
||
if strings.HasPrefix(line, "## ") || strings.HasPrefix(line, "### ") {
|
||
break
|
||
}
|
||
if looksLikeTencentPlan(lines, i) {
|
||
break
|
||
}
|
||
if i+1 < len(lines) && isTencentModelID(lines[i+1]) && !isReservedTencentLine(line) {
|
||
break
|
||
}
|
||
notes = append(notes, line)
|
||
nextIndex = i
|
||
}
|
||
|
||
model.Notes = notes
|
||
model.ContextLength = extractContextLength(strings.Join(notes, " "))
|
||
return model, nextIndex, true
|
||
}
|
||
|
||
func isTencentPlanTier(line string) bool {
|
||
return strings.HasPrefix(line, "(") && strings.HasSuffix(line, ")")
|
||
}
|
||
|
||
func looksLikeTencentPlan(lines []string, start int) bool {
|
||
if start+4 >= len(lines) {
|
||
return false
|
||
}
|
||
if isReservedTencentLine(lines[start]) {
|
||
return false
|
||
}
|
||
if isTencentModelID(lines[start]) {
|
||
return false
|
||
}
|
||
if !isTencentPlanTier(lines[start+1]) {
|
||
return false
|
||
}
|
||
if !strings.Contains(lines[start+2], "订阅月") {
|
||
return false
|
||
}
|
||
if !strings.Contains(lines[start+3], "Tokens") {
|
||
return false
|
||
}
|
||
return strings.Contains(lines[start+4], "元/月")
|
||
}
|
||
|
||
func isReservedTencentLine(line string) bool {
|
||
if strings.HasPrefix(line, "#") {
|
||
return true
|
||
}
|
||
switch line {
|
||
case "Token Plan 个人版套餐概览", "套餐详情", "可用模型":
|
||
return true
|
||
default:
|
||
return false
|
||
}
|
||
}
|
||
|
||
func isTencentModelID(line string) bool {
|
||
modelIDPattern := regexp.MustCompile(`^[a-z0-9][a-z0-9._-]*$`)
|
||
return modelIDPattern.MatchString(line)
|
||
}
|
||
|
||
func extractContextLength(text string) int {
|
||
contextPattern := regexp.MustCompile(`(?i)(\d+)\s*([KM])\s*上下文`)
|
||
matches := contextPattern.FindStringSubmatch(text)
|
||
if len(matches) != 3 {
|
||
return 0
|
||
}
|
||
|
||
value := 0
|
||
fmt.Sscanf(matches[1], "%d", &value)
|
||
switch strings.ToUpper(matches[2]) {
|
||
case "K":
|
||
return value * 1024
|
||
case "M":
|
||
return value * 1024 * 1024
|
||
default:
|
||
return 0
|
||
}
|
||
}
|
||
|
||
func formatSeriesSummary(plans []tencentPlan) string {
|
||
counts := make(map[string]int)
|
||
for _, plan := range plans {
|
||
counts[plan.Series]++
|
||
}
|
||
|
||
series := make([]string, 0, len(counts))
|
||
for name := range counts {
|
||
series = append(series, name)
|
||
}
|
||
sort.Strings(series)
|
||
|
||
parts := make([]string, 0, len(series))
|
||
for _, name := range series {
|
||
parts = append(parts, fmt.Sprintf("%s:%d", name, counts[name]))
|
||
}
|
||
return strings.Join(parts, ",")
|
||
}
|