Add snapshot, signature, and drift guard support for Vertex AI, Cloudflare Workers AI, and Perplexity API, backed by a queryable audit table and recent-window view. This commit also wires the audit query layer into daily signal materialization and report generation so structure drift becomes a first-class signal instead of a log-only artifact.
252 lines
7.9 KiB
Go
252 lines
7.9 KiB
Go
//go:build llm_script
|
|
|
|
package main
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type markdownPricingStructureSignature struct {
|
|
ByteSize int `json:"byte_size"`
|
|
SHA256 string `json:"sha256"`
|
|
StructureSHA256 string `json:"structure_sha256"`
|
|
NormalizedLineCount int `json:"normalized_line_count"`
|
|
Headings []string `json:"headings"`
|
|
TableHeaders []string `json:"table_headers"`
|
|
Contains map[string]bool `json:"contains"`
|
|
GeneratedAt string `json:"generated_at,omitempty"`
|
|
SourceURL string `json:"source_url,omitempty"`
|
|
SnapshotPath string `json:"snapshot_path,omitempty"`
|
|
}
|
|
|
|
func buildMarkdownPricingStructureSignature(raw string, containsNeedles map[string]string) markdownPricingStructureSignature {
|
|
lines := markdownPricingLines(raw)
|
|
headings := extractMarkdownPricingHeadings(lines)
|
|
tableHeaders := extractMarkdownPricingTableHeaders(lines)
|
|
contains := make(map[string]bool, len(containsNeedles))
|
|
for key, needle := range containsNeedles {
|
|
contains[key] = strings.Contains(strings.ToLower(raw), strings.ToLower(needle))
|
|
}
|
|
|
|
signature := markdownPricingStructureSignature{
|
|
ByteSize: len([]byte(raw)),
|
|
SHA256: markdownPricingSHA256Hex(raw),
|
|
NormalizedLineCount: len(lines),
|
|
Headings: headings,
|
|
TableHeaders: tableHeaders,
|
|
Contains: contains,
|
|
}
|
|
signature.StructureSHA256 = markdownPricingSHA256Hex(markdownPricingStructureDigestPayload(signature))
|
|
return signature
|
|
}
|
|
|
|
func writeMarkdownPricingSnapshotArtifacts(raw string, sourceURL string, snapshotPath string, signaturePath string, now time.Time, containsNeedles map[string]string) (markdownPricingStructureSignature, error) {
|
|
if strings.TrimSpace(snapshotPath) == "" {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("snapshot path is required")
|
|
}
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("signature path is required")
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(snapshotPath), 0o755); err != nil {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("mkdir snapshot dir: %w", err)
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(signaturePath), 0o755); err != nil {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("mkdir signature dir: %w", err)
|
|
}
|
|
if err := os.WriteFile(snapshotPath, []byte(raw), 0o644); err != nil {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("write snapshot: %w", err)
|
|
}
|
|
|
|
signature := buildMarkdownPricingStructureSignature(raw, containsNeedles)
|
|
signature.GeneratedAt = now.Format(time.RFC3339)
|
|
signature.SourceURL = sourceURL
|
|
signature.SnapshotPath = snapshotPath
|
|
payload, err := json.MarshalIndent(signature, "", " ")
|
|
if err != nil {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("marshal signature: %w", err)
|
|
}
|
|
if err := os.WriteFile(signaturePath, payload, 0o644); err != nil {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("write signature: %w", err)
|
|
}
|
|
return signature, nil
|
|
}
|
|
|
|
func resolveMarkdownPricingSnapshotPaths(snapshotPath string, signaturePath string, snapshotDir string, baseName string, now time.Time) (string, string) {
|
|
if strings.TrimSpace(snapshotDir) == "" {
|
|
snapshotDir = filepath.Join("logs", baseName+"-snapshots")
|
|
}
|
|
if strings.TrimSpace(snapshotPath) == "" {
|
|
base := filepath.Join(snapshotDir, fmt.Sprintf("%s-%s", baseName, now.Format("20060102-150405")))
|
|
snapshotPath = base + ".md"
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
signaturePath = base + ".signature.json"
|
|
}
|
|
}
|
|
if strings.TrimSpace(signaturePath) == "" {
|
|
signaturePath = strings.TrimSuffix(snapshotPath, filepath.Ext(snapshotPath)) + ".signature.json"
|
|
}
|
|
return snapshotPath, signaturePath
|
|
}
|
|
|
|
func readMarkdownPricingStructureSignature(path string) (markdownPricingStructureSignature, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return markdownPricingStructureSignature{}, err
|
|
}
|
|
var signature markdownPricingStructureSignature
|
|
if err := json.Unmarshal(data, &signature); err != nil {
|
|
return markdownPricingStructureSignature{}, fmt.Errorf("unmarshal signature %s: %w", path, err)
|
|
}
|
|
return signature, nil
|
|
}
|
|
|
|
func hasMarkdownPricingStructureSignature(signature markdownPricingStructureSignature) bool {
|
|
return signature.ByteSize > 0 ||
|
|
strings.TrimSpace(signature.StructureSHA256) != "" ||
|
|
strings.TrimSpace(signature.SHA256) != "" ||
|
|
len(signature.Headings) > 0 ||
|
|
len(signature.TableHeaders) > 0 ||
|
|
len(signature.Contains) > 0
|
|
}
|
|
|
|
func markdownPricingLines(raw string) []string {
|
|
text := strings.ReplaceAll(raw, "\r\n", "\n")
|
|
text = strings.ReplaceAll(text, "\r", "\n")
|
|
rawLines := strings.Split(text, "\n")
|
|
lines := make([]string, 0, len(rawLines))
|
|
for _, line := range rawLines {
|
|
trimmed := strings.TrimSpace(line)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
lines = append(lines, trimmed)
|
|
}
|
|
return lines
|
|
}
|
|
|
|
func extractMarkdownPricingHeadings(lines []string) []string {
|
|
headings := make([]string, 0, 12)
|
|
seen := make(map[string]struct{})
|
|
for _, line := range lines {
|
|
if !strings.HasPrefix(line, "#") {
|
|
continue
|
|
}
|
|
heading := strings.TrimSpace(strings.TrimLeft(line, "#"))
|
|
if heading == "" {
|
|
continue
|
|
}
|
|
if _, exists := seen[heading]; exists {
|
|
continue
|
|
}
|
|
seen[heading] = struct{}{}
|
|
headings = append(headings, heading)
|
|
if len(headings) >= 12 {
|
|
break
|
|
}
|
|
}
|
|
return headings
|
|
}
|
|
|
|
func extractMarkdownPricingTableHeaders(lines []string) []string {
|
|
headers := make([]string, 0, 6)
|
|
for i, line := range lines {
|
|
if !strings.HasPrefix(line, "|") {
|
|
continue
|
|
}
|
|
if i+1 >= len(lines) || !isMarkdownSnapshotTableSeparator(splitMarkdownSnapshotTableRow(lines[i+1])) {
|
|
continue
|
|
}
|
|
headers = append(headers, line)
|
|
if len(headers) >= 6 {
|
|
break
|
|
}
|
|
}
|
|
return headers
|
|
}
|
|
|
|
func markdownPricingStructureDigestPayload(signature markdownPricingStructureSignature) string {
|
|
type containsEntry struct {
|
|
Name string `json:"name"`
|
|
Value bool `json:"value"`
|
|
}
|
|
keys := make([]string, 0, len(signature.Contains))
|
|
for key := range signature.Contains {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
entries := make([]containsEntry, 0, len(keys))
|
|
for _, key := range keys {
|
|
entries = append(entries, containsEntry{Name: key, Value: signature.Contains[key]})
|
|
}
|
|
payload := struct {
|
|
NormalizedLineCount int `json:"normalized_line_count"`
|
|
Headings []string `json:"headings"`
|
|
TableHeaders []string `json:"table_headers"`
|
|
Contains []containsEntry `json:"contains"`
|
|
}{
|
|
NormalizedLineCount: signature.NormalizedLineCount,
|
|
Headings: signature.Headings,
|
|
TableHeaders: signature.TableHeaders,
|
|
Contains: entries,
|
|
}
|
|
bytes, _ := json.Marshal(payload)
|
|
return string(bytes)
|
|
}
|
|
|
|
func markdownPricingSHA256Hex(raw string) string {
|
|
sum := sha256.Sum256([]byte(raw))
|
|
return hex.EncodeToString(sum[:])
|
|
}
|
|
|
|
func splitMarkdownSnapshotTableRow(line string) []string {
|
|
trimmed := strings.TrimSpace(line)
|
|
trimmed = strings.TrimPrefix(trimmed, "|")
|
|
trimmed = strings.TrimSuffix(trimmed, "|")
|
|
if trimmed == "" {
|
|
return nil
|
|
}
|
|
parts := strings.Split(trimmed, "|")
|
|
result := make([]string, 0, len(parts))
|
|
for _, part := range parts {
|
|
result = append(result, strings.TrimSpace(part))
|
|
}
|
|
return result
|
|
}
|
|
|
|
func isMarkdownSnapshotTableSeparator(parts []string) bool {
|
|
if len(parts) == 0 {
|
|
return false
|
|
}
|
|
for _, part := range parts {
|
|
trimmed := strings.TrimSpace(part)
|
|
if trimmed == "" {
|
|
return false
|
|
}
|
|
for _, ch := range trimmed {
|
|
if ch != '-' && ch != ':' {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func copyFileCommon(src string, dst string) error {
|
|
data, err := os.ReadFile(src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(dst, data, 0o644)
|
|
}
|