184 lines
6.9 KiB
Go
184 lines
6.9 KiB
Go
//go:build llm_script
|
|
|
|
package main
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type deepseekPricingStructureSignature struct {
|
|
ByteSize int `json:"byte_size"`
|
|
SHA256 string `json:"sha256"`
|
|
StructureSHA256 string `json:"structure_sha256"`
|
|
Title string `json:"title"`
|
|
MetaDescription string `json:"meta_description"`
|
|
CommitID string `json:"commit_id"`
|
|
CanonicalURL string `json:"canonical_url"`
|
|
Contains map[string]bool `json:"contains"`
|
|
GeneratedAt string `json:"generated_at,omitempty"`
|
|
SourceURL string `json:"source_url,omitempty"`
|
|
SnapshotPath string `json:"snapshot_path,omitempty"`
|
|
}
|
|
|
|
var deepseekPricingContainsNeedles = map[string]string{
|
|
"deepseek": "deepseek",
|
|
"platform": "platform",
|
|
"pricing": "pricing",
|
|
"api_docs": "api",
|
|
"developer": "developer resources",
|
|
}
|
|
|
|
var deepseekPricingTitleRe = regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
|
var deepseekPricingMetaDescRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']description["'][^>]+content=["']([^"']+)["']`)
|
|
var deepseekPricingCommitRe = regexp.MustCompile(`(?is)<meta[^>]+name=["']commit-id["'][^>]+content=["']([^"']+)["']`)
|
|
var deepseekPricingCanonicalRe = regexp.MustCompile(`(?is)<meta[^>]+property=["']og:url["'][^>]+content=["']([^"']+)["']`)
|
|
var deepseekPricingHTMLTagRe = regexp.MustCompile(`(?s)<[^>]+>`)
|
|
|
|
func buildDeepSeekPricingStructureSignature(raw string) deepseekPricingStructureSignature {
|
|
title := firstDeepSeekPricingHTMLMatch(deepseekPricingTitleRe, raw)
|
|
meta := firstDeepSeekPricingHTMLMatch(deepseekPricingMetaDescRe, raw)
|
|
commitID := firstDeepSeekPricingHTMLMatch(deepseekPricingCommitRe, raw)
|
|
canonicalURL := firstDeepSeekPricingHTMLMatch(deepseekPricingCanonicalRe, raw)
|
|
contains := make(map[string]bool, len(deepseekPricingContainsNeedles))
|
|
lower := strings.ToLower(raw)
|
|
for key, needle := range deepseekPricingContainsNeedles {
|
|
contains[key] = strings.Contains(lower, strings.ToLower(needle))
|
|
}
|
|
signature := deepseekPricingStructureSignature{
|
|
ByteSize: len([]byte(raw)),
|
|
SHA256: deepseekPricingSHA256Hex(raw),
|
|
Title: title,
|
|
MetaDescription: meta,
|
|
CommitID: commitID,
|
|
CanonicalURL: canonicalURL,
|
|
Contains: contains,
|
|
}
|
|
signature.StructureSHA256 = deepseekPricingSHA256Hex(deepseekPricingStructureDigestPayload(signature))
|
|
return signature
|
|
}
|
|
|
|
func writeDeepSeekPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time) (deepseekPricingStructureSignature, error) {
|
|
if strings.TrimSpace(snapshotPath) == "" {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
|
|
}
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("signature path is required")
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
|
}
|
|
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
|
}
|
|
signature := buildDeepSeekPricingStructureSignature(raw)
|
|
signature.GeneratedAt = now.Format(time.RFC3339)
|
|
signature.SourceURL = sourceURL
|
|
signature.SnapshotPath = snapshotPath
|
|
payload, err := json.MarshalIndent(signature, "", " ")
|
|
if err != nil {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
|
}
|
|
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
|
}
|
|
return signature, nil
|
|
}
|
|
|
|
func resolveDeepSeekPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
|
|
if strings.TrimSpace(snapshotDir) == "" {
|
|
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
|
|
}
|
|
if strings.TrimSpace(snapshotPath) == "" {
|
|
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
|
|
snapshotPath = base + ".html"
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
signaturePath = base + ".signature.json"
|
|
}
|
|
}
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
|
}
|
|
return snapshotPath, signaturePath
|
|
}
|
|
|
|
func readDeepSeekPricingStructureSignature(path string) (deepseekPricingStructureSignature, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return deepseekPricingStructureSignature{}, err
|
|
}
|
|
var signature deepseekPricingStructureSignature
|
|
if err := json.Unmarshal(data, &signature); err != nil {
|
|
return deepseekPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
|
}
|
|
return signature, nil
|
|
}
|
|
|
|
func hasDeepSeekPricingStructureSignature(signature deepseekPricingStructureSignature) bool {
|
|
return signature.ByteSize > 0 ||
|
|
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
|
strings.TrimSpace(signature.SHA256) != "" ||
|
|
strings.TrimSpace(signature.Title) != "" ||
|
|
strings.TrimSpace(signature.CommitID) != "" ||
|
|
len(signature.Contains) > 0
|
|
}
|
|
|
|
func deepseekPricingStructureDigestPayload(signature deepseekPricingStructureSignature) string {
|
|
type containsEntry struct {
|
|
Name string `json:"name"`
|
|
Value bool `json:"value"`
|
|
}
|
|
keys := make([]string, 0, len(signature.Contains))
|
|
for key := range signature.Contains {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
entries := make([]containsEntry, 0, len(keys))
|
|
for _, key := range keys {
|
|
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
|
}
|
|
payload := struct {
|
|
Title string `json:"title"`
|
|
MetaDescription string `json:"meta_description"`
|
|
CommitID string `json:"commit_id"`
|
|
CanonicalURL string `json:"canonical_url"`
|
|
Contains []containsEntry `json:"contains"`
|
|
}{
|
|
Title: signature.Title,
|
|
MetaDescription: signature.MetaDescription,
|
|
CommitID: signature.CommitID,
|
|
CanonicalURL: signature.CanonicalURL,
|
|
Contains: entries,
|
|
}
|
|
bytes, _ := json.Marshal(payload)
|
|
return string(bytes)
|
|
}
|
|
|
|
func deepseekPricingSHA256Hex(raw string) string {
|
|
sum := sha256.Sum256([]byte(raw))
|
|
return hex.EncodeToString(sum[:])
|
|
}
|
|
|
|
func firstDeepSeekPricingHTMLMatch(re *regexp.Regexp, raw string) string {
|
|
match := re.FindStringSubmatch(raw)
|
|
if len(match) < 2 {
|
|
return ""
|
|
}
|
|
text := deepseekPricingHTMLTagRe.ReplaceAllString(match[1], " ")
|
|
text = strings.ReplaceAll(text, "&", "&")
|
|
text = strings.ReplaceAll(text, " ", " ")
|
|
text = strings.Join(strings.Fields(text), " ")
|
|
return strings.TrimSpace(text)
|
|
}
|