Files
llm-intelligence/scripts/collector_stats_window_audit.sh
phamnazage-jpg d5d18e987e feat(pipeline): wire collectors into real pipeline gates
Wire the new subscription and official pricing collectors into the daily, real, and intel pipeline entrypoints.

This commit also upgrades Phase 6 verification with recent-window collector classification so gate failures distinguish preconditions from true runtime or provider issues.
2026-05-15 22:37:06 +08:00

165 lines
5.0 KiB
Bash

#!/usr/bin/env bash
set -euo pipefail
LIMIT=7
DB_URL="${DATABASE_URL:-}"
INPUT_PATH=""
THRESHOLD=""
FIELD_SEP=$'\x1f'
usage() {
cat <<'EOF'
用法:
bash scripts/collector_stats_window_audit.sh --db <DATABASE_URL> [--limit N] [--assert-success-rate PCT]
bash scripts/collector_stats_window_audit.sh --input <tsv-file> [--limit N] [--assert-success-rate PCT]
输入 TSV 列顺序:
source<TAB>success<TAB>error_message<TAB>created_at
EOF
}
classify_failure() {
local message normalized
message="${1:-}"
normalized="$(printf '%s' "$message" | tr '[:upper:]' '[:lower:]')"
if [[ -z "${normalized// }" ]]; then
printf '%s\n' "collector_runtime_failure"
return
fi
case "$normalized" in
*"api key"*|*"strict real mode"*|*"database_url"*|*"password authentication failed"*|*"permission denied"*|*"role does not exist"*|*"relation does not exist"*|*"must provide"*)
printf '%s\n' "precondition_missing"
;;
*"429"*|*"rate limit"*|*"too many requests"*|*"timeout"*|*"temporarily unavailable"*|*"transport closed"*|*"connection reset"*|*"connection refused"*|*"eof"*|*"tls handshake timeout"*|*"no such host"*|*"i/o timeout"*)
printf '%s\n' "external_provider_failure"
;;
*)
printf '%s\n' "collector_runtime_failure"
;;
esac
}
fetch_rows_from_db() {
if [[ -z "${DB_URL:-}" ]]; then
echo "missing --db / DATABASE_URL" >&2
return 1
fi
psql "$DB_URL" -F "$FIELD_SEP" -Atqc "
SELECT
COALESCE(source, ''),
CASE WHEN success THEN 't' ELSE 'f' END,
COALESCE(error_message, ''),
TO_CHAR(created_at, 'YYYY-MM-DD HH24:MI:SS')
FROM collector_stats
ORDER BY created_at DESC
LIMIT ${LIMIT};
"
}
fetch_rows_from_file() {
if [[ -z "${INPUT_PATH:-}" ]]; then
echo "missing --input" >&2
return 1
fi
head -n "$LIMIT" "$INPUT_PATH"
}
while [[ $# -gt 0 ]]; do
case "$1" in
--db)
DB_URL="$2"
shift 2
;;
--input)
INPUT_PATH="$2"
shift 2
;;
--limit)
LIMIT="$2"
shift 2
;;
--assert-success-rate)
THRESHOLD="$2"
shift 2
;;
--help|-h)
usage
exit 0
;;
*)
echo "unknown arg: $1" >&2
usage >&2
exit 1
;;
esac
done
if [[ -n "$INPUT_PATH" ]]; then
ROWS="$(fetch_rows_from_file)"
else
ROWS="$(fetch_rows_from_db)"
fi
SUCCESS_COUNT=0
FAILURE_COUNT=0
PRECONDITION_COUNT=0
EXTERNAL_COUNT=0
RUNTIME_COUNT=0
UNKNOWN_COUNT=0
ROW_COUNT=0
DETAIL_LINES=""
while IFS= read -r raw_line; do
[[ -z "${raw_line}" ]] && continue
normalized_line="${raw_line//$'\t'/$FIELD_SEP}"
IFS="$FIELD_SEP" read -r source success error_message created_at <<< "$normalized_line"
ROW_COUNT=$((ROW_COUNT + 1))
if [[ "$success" == "t" || "$success" == "true" ]]; then
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
category="success"
rendered_error="-"
else
FAILURE_COUNT=$((FAILURE_COUNT + 1))
category="$(classify_failure "$error_message")"
rendered_error="${error_message:-unknown}"
case "$category" in
precondition_missing)
PRECONDITION_COUNT=$((PRECONDITION_COUNT + 1))
;;
external_provider_failure)
EXTERNAL_COUNT=$((EXTERNAL_COUNT + 1))
;;
collector_runtime_failure)
RUNTIME_COUNT=$((RUNTIME_COUNT + 1))
;;
*)
UNKNOWN_COUNT=$((UNKNOWN_COUNT + 1))
;;
esac
fi
DETAIL_LINES+=$'sample_'"${ROW_COUNT}"$' created_at='"${created_at:-unknown}"$' source='"${source:-unknown}"$' outcome='"$([[ "$category" == "success" ]] && printf '%s' "success" || printf '%s' "failure")"$' category='"${category}"$' error='"${rendered_error}"$'\n'
done <<< "$ROWS"
if [[ "$ROW_COUNT" -eq 0 ]]; then
echo "window_size=0 success_count=0 failure_count=0 success_rate=0.00 threshold=${THRESHOLD:-n/a} precondition_missing=0 external_provider_failure=0 collector_runtime_failure=0 unknown_failure=0"
echo "sample_window=empty"
if [[ -n "$THRESHOLD" ]]; then
exit 1
fi
exit 0
fi
SUCCESS_RATE="$(awk -v success="$SUCCESS_COUNT" -v total="$ROW_COUNT" 'BEGIN { printf "%.2f", (success * 100) / total }')"
echo "window_size=${ROW_COUNT} success_count=${SUCCESS_COUNT} failure_count=${FAILURE_COUNT} success_rate=${SUCCESS_RATE} threshold=${THRESHOLD:-n/a} precondition_missing=${PRECONDITION_COUNT} external_provider_failure=${EXTERNAL_COUNT} collector_runtime_failure=${RUNTIME_COUNT} unknown_failure=${UNKNOWN_COUNT}"
printf '%s' "$DETAIL_LINES"
if [[ -n "$THRESHOLD" ]]; then
if awk -v actual="$SUCCESS_RATE" -v threshold="$THRESHOLD" 'BEGIN { exit !(actual >= threshold) }'; then
exit 0
fi
exit 1
fi