//go:build llm_script package main import ( "fmt" "html" "regexp" "strings" ) const defaultVertexPricingURL = "https://cloud.google.com/gemini-enterprise-agent-platform/generative-ai/pricing" var ( vertexRowPattern = regexp.MustCompile(`(?s)(.*?)`) vertexCellPattern = regexp.MustCompile(`(?s)]*>(.*?)`) vertexHeadingPattern = regexp.MustCompile(`(?is)]*>(.*?)`) vertexTablePattern = regexp.MustCompile(`(?is)]*>(.*?)`) vertexStandardHeadingPattern = regexp.MustCompile(`(?is)]*>\s*(standard|标准)\s*`) ) func parseVertexPricingCatalog(raw string) ([]officialPricingRecord, error) { familyBlocks := splitVertexFamilyBlocks(raw) records := make([]officialPricingRecord, 0) if len(familyBlocks) > 0 { for _, block := range familyBlocks { tableHTML := extractVertexStandardTable(block) if strings.TrimSpace(tableHTML) == "" { continue } records = append(records, parseVertexStandardTable(tableHTML)...) } } if len(records) > 0 { return records, nil } records = parseVertexStandardTextBlocks(raw) if len(records) > 0 { return records, nil } if len(familyBlocks) == 0 { return nil, fmt.Errorf("unexpected vertex pricing content") } return nil, fmt.Errorf("no vertex standard pricing rows found") } func parseVertexStandardTable(table string) []officialPricingRecord { rows := vertexRowPattern.FindAllStringSubmatch(table, -1) records := make([]officialPricingRecord, 0) currentModel := "" currentInput := 0.0 for _, row := range rows { cells := vertexCellPattern.FindAllStringSubmatch(row[1], -1) if len(cells) == 0 { continue } values := make([]string, 0, len(cells)) for _, cell := range cells { values = append(values, cleanHTMLText(cell[1])) } if len(values) == 1 && !strings.Contains(values[0], "Model") { currentModel = values[0] currentInput = 0 continue } if len(values) < 2 { continue } rowType := values[0] priceCell := values[1] if len(values) > 2 && strings.Contains(strings.ToLower(values[0]), "gemini") { currentModel = values[0] rowType = values[1] priceCell = values[2] } if strings.TrimSpace(currentModel) == "" || strings.EqualFold(currentModel, "Model") { continue } switch { case strings.HasPrefix(rowType, "Input (text"), strings.HasPrefix(rowType, "输入(文本"): price, ok := firstDollarPrice(priceCell) if ok { currentInput = price } case strings.HasPrefix(rowType, "Text output"), strings.HasPrefix(rowType, "文本输出"): outputPrice, ok := firstDollarPrice(priceCell) if !ok || currentInput == 0 { continue } providerNameCn, providerCountry, providerWebsite := providerMetadata("Google") record := officialPricingRecord{ ModelID: normalizeExternalID("vertex", currentModel), ModelName: currentModel, ProviderName: "Google", ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Google Cloud Vertex AI", OperatorNameCn: "Google Cloud Vertex AI", OperatorCountry: "US", OperatorWebsite: "https://cloud.google.com/vertex-ai", OperatorType: "cloud", Region: "global", Currency: "USD", InputPrice: currentInput, OutputPrice: outputPrice, SourceURL: defaultVertexPricingURL, ModelSourceURL: defaultVertexPricingURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(currentModel), } record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 records = append(records, record) } } return records } func splitVertexFamilyBlocks(raw string) []string { indices := make([]int, 0) matches := vertexHeadingPattern.FindAllStringSubmatchIndex(raw, -1) for _, match := range matches { label := cleanHTMLText(raw[match[2]:match[3]]) if !strings.Contains(strings.ToLower(label), "gemini") { continue } indices = append(indices, match[0]) } blocks := make([]string, 0, len(indices)) for i, start := range indices { end := len(raw) if i+1 < len(indices) { end = indices[i+1] } blocks = append(blocks, raw[start:end]) } return blocks } func extractVertexStandardTable(raw string) string { heading := vertexStandardHeadingPattern.FindStringIndex(raw) if heading == nil { return "" } segment := raw[heading[1]:] table := vertexTablePattern.FindStringSubmatch(segment) if len(table) != 2 { return "" } return table[1] } func parseVertexStandardTextBlocks(raw string) []officialPricingRecord { lines := htmlLines(raw) records := make([]officialPricingRecord, 0) currentModelParts := make([]string, 0) currentInput := 0.0 inStandard := false for _, line := range lines { lower := strings.ToLower(line) sectionTitle := normalizeVertexSectionTitle(lower) switch { case sectionTitle != "": inStandard = sectionTitle == "standard" || sectionTitle == "标准" currentModelParts = currentModelParts[:0] currentInput = 0 continue case !inStandard: continue case strings.Contains(lower, "model type price"): continue case strings.Contains(line, "$"): modelName := strings.TrimSpace(strings.Join(currentModelParts, " ")) if modelName == "" { continue } switch { case strings.HasPrefix(lower, "input (text"), strings.HasPrefix(lower, "1m input text tokens"): if price, ok := firstDollarPrice(line); ok { currentInput = price } case strings.HasPrefix(lower, "text output"), strings.HasPrefix(lower, "1m output text tokens"): outputPrice, ok := firstDollarPrice(line) if !ok || currentInput == 0 { continue } providerNameCn, providerCountry, providerWebsite := providerMetadata("Google") record := officialPricingRecord{ ModelID: normalizeExternalID("vertex", modelName), ModelName: modelName, ProviderName: "Google", ProviderNameCn: providerNameCn, ProviderCountry: providerCountry, ProviderWebsite: providerWebsite, OperatorName: "Google Cloud Vertex AI", OperatorNameCn: "Google Cloud Vertex AI", OperatorCountry: "US", OperatorWebsite: "https://cloud.google.com/vertex-ai", OperatorType: "cloud", Region: "global", Currency: "USD", InputPrice: currentInput, OutputPrice: outputPrice, SourceURL: defaultVertexPricingURL, ModelSourceURL: defaultVertexPricingURL, DateConfidence: "unknown", DateSourceKind: "official_pricing", Modality: detectModality(modelName), } record.IsFree = record.InputPrice == 0 && record.OutputPrice == 0 records = append(records, record) currentModelParts = currentModelParts[:0] currentInput = 0 } default: currentModelParts = append(currentModelParts, line) } } return dedupeOfficialPricingRecords(records) } func normalizeVertexSectionTitle(line string) string { title := strings.TrimSpace(strings.TrimLeft(line, "#")) title = strings.TrimSpace(title) switch title { case "standard", "标准", "priority", "优先级", "flex/batch", "灵活/批处理", "batch api", "live api": return title default: return "" } } func htmlLines(raw string) []string { replacer := strings.NewReplacer( "
", "\n", "
", "\n", "
", "\n", "

", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", "", "\n", ) withBreaks := replacer.Replace(raw) tagPattern := regexp.MustCompile(`(?is)<[^>]+>`) spacePattern := regexp.MustCompile(`[ \t]+`) cleaned := html.UnescapeString(withBreaks) cleaned = strings.ReplaceAll(cleaned, "\r\n", "\n") cleaned = strings.ReplaceAll(cleaned, "\r", "\n") cleaned = strings.ReplaceAll(cleaned, "\u00a0", " ") cleaned = tagPattern.ReplaceAllString(cleaned, "") rawLines := strings.Split(cleaned, "\n") lines := make([]string, 0, len(rawLines)) for _, line := range rawLines { line = strings.TrimSpace(spacePattern.ReplaceAllString(line, " ")) if line == "" { continue } lines = append(lines, line) } return lines }