Files
llm-intelligence/scripts/verify_phase6.sh
2026-05-29 18:48:48 +08:00

362 lines
14 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
. "$SCRIPT_DIR/verify_common.sh"
. "$SCRIPT_DIR/secret_gate_lib.sh"
DB_URL="${DATABASE_URL:-host=/var/run/postgresql dbname=llm_intelligence user=long sslmode=disable}"
SERVER_BIN="/tmp/llm_phase6_server"
SERVER_LOG="/tmp/llm_phase6_server.log"
SERVER_PORT="${PHASE6_PORT:-}"
SERVER_PID=""
API_AUTH_TOKEN="${API_AUTH_TOKEN:-phase6-local-token}"
ROOT_CAUSE_CLASS="none"
ROOT_CAUSE_SOURCE="none"
ROOT_CAUSE_SUMMARY="none"
RELEASE_SEMANTICS_CLASS="release-ready"
RELEASE_SEMANTICS_GATE="phase6"
RELEASE_SEMANTICS_POLICY="release-allowed"
BLOCKER_SWITCH_CLASS="none"
BLOCKER_SWITCH_OLD="none"
BLOCKER_SWITCH_NEW="none"
cleanup() {
if [ -n "${SERVER_PID:-}" ] && kill -0 "$SERVER_PID" >/dev/null 2>&1; then
kill "$SERVER_PID" >/dev/null 2>&1 || true
wait "$SERVER_PID" >/dev/null 2>&1 || true
fi
rm -f "$SERVER_BIN"
}
trap cleanup EXIT
port_in_use() {
local port="$1"
(echo >"/dev/tcp/127.0.0.1/$port") >/dev/null 2>&1
}
reserve_server_port() {
if [ -n "${SERVER_PORT:-}" ]; then
return 0
fi
for candidate in $(seq 18080 18120); do
if ! port_in_use "$candidate"; then
SERVER_PORT="$candidate"
return 0
fi
done
return 1
}
start_server() {
DATABASE_URL="$DB_URL" PORT="$SERVER_PORT" API_AUTH_TOKEN="$API_AUTH_TOKEN" "$SERVER_BIN" >"$SERVER_LOG" 2>&1 &
SERVER_PID=$!
for _ in $(seq 1 20); do
if ! kill -0 "$SERVER_PID" >/dev/null 2>&1; then
return 1
fi
if curl -fsS "http://127.0.0.1:${SERVER_PORT}/health" >/tmp/llm_phase6_health.out 2>/tmp/llm_phase6_health.err &&
grep -q '"status":"ok"' /tmp/llm_phase6_health.out; then
return 0
fi
sleep 0.5
done
return 1
}
last_nonempty_line() {
awk 'NF { line=$0 } END { print line }'
}
last_meaningful_failure_line() {
awk 'NF && $0 !~ /^exit status [0-9]+$/ { line=$0 } END { print line }'
}
extract_window_metric() {
local name="$1"
local payload="$2"
printf '%s\n' "$payload" | awk -v key="$name" '
$0 ~ key"=" {
for (i = 1; i <= NF; i++) {
split($i, parts, "=")
if (parts[1] == key) {
print parts[2]
exit
}
}
}
'
}
classify_window_failure() {
local payload="$1"
local precondition_missing external_provider_failure collector_runtime_failure unknown_failure
precondition_missing="$(extract_window_metric precondition_missing "$payload")"
external_provider_failure="$(extract_window_metric external_provider_failure "$payload")"
collector_runtime_failure="$(extract_window_metric collector_runtime_failure "$payload")"
unknown_failure="$(extract_window_metric unknown_failure "$payload")"
precondition_missing="${precondition_missing:-0}"
external_provider_failure="${external_provider_failure:-0}"
collector_runtime_failure="${collector_runtime_failure:-0}"
unknown_failure="${unknown_failure:-0}"
if [ "$precondition_missing" -gt 0 ] && [ "$external_provider_failure" -eq 0 ] && [ "$collector_runtime_failure" -eq 0 ] && [ "$unknown_failure" -eq 0 ]; then
echo "precondition_missing_only"
elif [ "$external_provider_failure" -gt 0 ] && [ "$precondition_missing" -eq 0 ] && [ "$collector_runtime_failure" -eq 0 ] && [ "$unknown_failure" -eq 0 ]; then
echo "external_provider_failure_only"
else
echo "mixed"
fi
}
set_root_cause_once() {
local class="$1"
local source="$2"
local summary="$3"
if [ "$ROOT_CAUSE_CLASS" != "none" ]; then
return
fi
ROOT_CAUSE_CLASS="$class"
ROOT_CAUSE_SOURCE="$source"
ROOT_CAUSE_SUMMARY="$summary"
}
set_release_semantics() {
local class="$1"
local gate="$2"
local policy="$3"
RELEASE_SEMANTICS_CLASS="$class"
RELEASE_SEMANTICS_GATE="$gate"
RELEASE_SEMANTICS_POLICY="$policy"
}
set_blocker_switch_once() {
local class="$1"
local old="$2"
local new="$3"
if [ "$BLOCKER_SWITCH_CLASS" != "none" ]; then
return
fi
BLOCKER_SWITCH_CLASS="$class"
BLOCKER_SWITCH_OLD="$old"
BLOCKER_SWITCH_NEW="$new"
}
classify_live_run_failure() {
local live_tail="$1"
local normalized
normalized="$(printf '%s' "$live_tail" | tr '[:upper:]' '[:lower:]')"
case "$normalized" in
*"api key"*|*"database_url"*|*"must provide"*|*"未设置"*|*"permission denied"*|*"role does not exist"*|*"relation does not exist"*)
printf '%s\n' "precondition_missing"
;;
*"signature_guard"*|*"unexpected status 403"*|*"unexpected status 502"*|*"unexpected status 503"*|*"unexpected status 504"*|*"no pricing cards found"*|*"no model rows parsed"*|*"no model overview cards parsed"*|*"context deadline exceeded"*|*"client.timeout"*|*"i/o timeout"*|*"tls handshake timeout"*|*"transport closed"*|*"connection reset"*|*"connection refused"*|*"no such host"*)
printf '%s\n' "external_provider_failure"
;;
*)
printf '%s\n' "primary_pipeline_failure"
;;
esac
}
classify_live_run_provider() {
local live_tail="$1"
local normalized
normalized="$(printf '%s' "$live_tail" | tr '[:upper:]' '[:lower:]')"
case "$normalized" in
*"import_vertex_pricing"*) printf '%s\n' 'vertex_pricing' ;;
*"import_cloudflare_pricing"*|*"cloudflare_pricing"*) printf '%s\n' 'cloudflare_pricing' ;;
*"import_perplexity_pricing"*|*"perplexity_pricing"*) printf '%s\n' 'perplexity_pricing' ;;
*"import_xfyun_pricing"*|*"xfyun_pricing"*) printf '%s\n' 'xfyun_pricing' ;;
*) printf '%s\n' 'unknown_external_provider' ;;
esac
}
run_live_pipeline_gate() {
local live_output live_rc live_tail
set +e
live_output="$(bash scripts/run_real_pipeline.sh 2>&1)"
live_rc=$?
set -e
printf '%s\n' "$live_output" >/tmp/llm_phase6_live_pipeline.out
live_tail="$(printf '%s\n' "$live_output" | last_meaningful_failure_line)"
if [ "$live_rc" -eq 0 ]; then
pass "live_run_result=PASS 主链路真实采集并输出今日日报"
else
live_failure_class="$(classify_live_run_failure "${live_tail:-}")"
case "$live_failure_class" in
precondition_missing)
set_root_cause_once "precondition_missing" "live_run" "主链路因前置条件缺失未执行"
fail "live_run_result=FAIL 主链路因前置条件缺失未执行 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
;;
external_provider_failure)
live_provider="$(classify_live_run_provider "${live_tail:-}")"
set_root_cause_once "external_provider_failure" "live_run:${live_provider}" "外部文档站/价格页异常阻断主链路"
fail "live_run_result=FAIL 外部文档站/价格页异常阻断主链路 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
;;
*)
set_root_cause_once "primary_pipeline_failure" "live_run" "主链路真实采集失败"
fail "live_run_result=FAIL 主链路真实采集失败 (${live_tail:-see /tmp/llm_phase6_live_pipeline.out})"
;;
esac
fi
}
run_importer_smoke_gate() {
local smoke_output smoke_rc smoke_tail
set +e
smoke_output="$(bash scripts/verify_importer_smoke.sh 2>&1)"
smoke_rc=$?
set -e
printf '%s\n' "$smoke_output"
printf '%s\n' "$smoke_output" >/tmp/llm_phase6_importer_smoke.out
if [ "$smoke_rc" -eq 0 ]; then
pass "importer_smoke_gate_result=PASS 新增导入器 smoke gate 通过"
return 0
fi
smoke_tail="$(printf '%s\n' "$smoke_output" | last_meaningful_failure_line)"
set_root_cause_once "importer_smoke_gate_failure" "importer_smoke_gate" "新增导入器 smoke gate 未通过"
fail "importer_smoke_gate_result=FAIL 新增导入器 smoke gate 未通过 (${smoke_tail:-see /tmp/llm_phase6_importer_smoke.out})"
return 1
}
run_window_gate() {
local collector_window_output collector_window_rc window_failure_class
set +e
collector_window_output="$(bash scripts/collector_stats_window_audit.sh --db "$DB_URL" --limit 7 --assert-success-rate 95 2>&1)"
collector_window_rc=$?
set -e
echo "$collector_window_output"
if [ "$collector_window_rc" -eq 0 ]; then
set_release_semantics "release-ready" "window_gate" "release-allowed"
pass "window_gate_result=PASS 最近 7 次采集成功率达到 95%已输出分类摘要stability_label=stable-window"
return
fi
window_failure_class="$(classify_window_failure "$collector_window_output")"
if [ "$window_failure_class" = "precondition_missing_only" ]; then
set_release_semantics "precondition-degraded" "window_gate" "release-allowed-with-warning"
pass "window_gate_result=PASS 最近 7 次采集成功率达到 95%环境纪律问题precondition_missing_only调度环境缺 OPENROUTER_API_KEY非系统缺陷stability_label=precondition-only-window"
elif [ "$window_failure_class" = "external_provider_failure_only" ]; then
set_release_semantics "degraded-external-provider" "window_gate" "release-allowed-with-warning"
set_root_cause_once "external_provider_failure_only" "window_gate" "最近 7 次采集窗口仅被外部依赖失败拖低"
warn "window_gate_result=WARN 最近 7 次采集成功率未达 95%仅外部文档站失败external_provider_failure_only需要 release 语义降级而非误判为 collector bugstability_label=recovered-external-incident"
else
set_release_semantics "release-blocked" "window_gate" "release-blocked"
set_root_cause_once "mixed_window_failure" "window_gate" "最近 7 次采集窗口存在混合失败"
fail "window_gate_result=FAIL 最近 7 次采集成功率达到 95%window_failure_class=${window_failure_class}stability_label=unstable-window"
fi
}
echo "=== Phase 6 综合验收检查 ==="
check_shell "Phase 1~5 总门禁通过" "bash scripts/verify_pre_phase6.sh"
check_shell "全仓 Go 测试通过" "go test ./..."
check_shell "脚本级采集器单测通过" "bash scripts/test.sh"
if run_importer_smoke_gate; then
run_live_pipeline_gate
else
set_blocker_switch_once "global-blocker-shift" "importer_smoke_gate" "live_run"
warn "live_run_result=SKIPPED 因 importer_smoke_gate_result=FAIL"
fi
if [ "$BLOCKER_SWITCH_CLASS" = "none" ] && [ "$ROOT_CAUSE_CLASS" != "none" ] && grep -q 'importer_smoke_gate_result=PASS' /tmp/llm_phase6_importer_smoke.out 2>/dev/null; then
set_blocker_switch_once "global-blocker-shift" "importer_smoke_gate" "$ROOT_CAUSE_SOURCE"
fi
check_shell "API Server 可构建" "go build -o /dev/null ./cmd/server"
check_shell "健康检查脚本通过" "DATABASE_URL='$DB_URL' bash healthcheck.sh"
check_shell "源码与环境文件未包含明显硬编码密钥" "source scripts/secret_gate_lib.sh && secret_scan_paths . cmd internal frontend/src scripts .github/workflows && secret_env_files .dockerignore"
run_window_gate
if go build -o "$SERVER_BIN" ./cmd/server >/tmp/llm_phase6_server_build.out 2>/tmp/llm_phase6_server_build.err; then
if reserve_server_port && start_server; then
pass "API /health 可用"
set +e
api_metrics="$(curl -sS -H "Authorization: Bearer ${API_AUTH_TOKEN}" -o /tmp/llm_phase6_models.json -w '%{http_code} %{time_total}' "http://127.0.0.1:${SERVER_PORT}/api/v1/models")"
api_rc=$?
set -e
if [ "$api_rc" -eq 0 ]; then
api_code="$(printf '%s' "$api_metrics" | awk '{print $1}')"
api_time="$(printf '%s' "$api_metrics" | awk '{print $2}')"
if [ "$api_code" = "200" ]; then
pass "API /api/v1/models 返回 200"
else
fail "API /api/v1/models 返回异常状态 (HTTP ${api_code:-unknown})"
fi
if awk "BEGIN { exit !($api_time < 0.5) }"; then
pass "API 响应 < 500ms (当前: ${api_time}s)"
else
fail "API 响应 >= 500ms (当前: ${api_time}s)"
fi
if grep -q '"data"' /tmp/llm_phase6_models.json; then
pass "API 返回模型数据载荷"
else
fail "API 返回体缺少 data 字段"
fi
else
fail "API /api/v1/models 请求失败"
fi
set +e
plan_metrics="$(curl -sS -H "Authorization: Bearer ${API_AUTH_TOKEN}" -o /tmp/llm_phase6_subscription_plans.json -w '%{http_code} %{time_total}' "http://127.0.0.1:${SERVER_PORT}/api/v1/subscription-plans")"
plan_rc=$?
set -e
if [ "$plan_rc" -eq 0 ]; then
plan_code="$(printf '%s' "$plan_metrics" | awk '{print $1}')"
if [ "$plan_code" = "200" ]; then
pass "API /api/v1/subscription-plans 返回 200"
else
fail "API /api/v1/subscription-plans 返回异常状态 (HTTP ${plan_code:-unknown})"
fi
if grep -q '"data"' /tmp/llm_phase6_subscription_plans.json; then
pass "API 返回套餐数据载荷"
else
fail "套餐 API 返回体缺少 data 字段"
fi
else
fail "API /api/v1/subscription-plans 请求失败"
fi
printf 'RELEASE_SEMANTICS class=%s gate=%s policy=%s\n' "$RELEASE_SEMANTICS_CLASS" "$RELEASE_SEMANTICS_GATE" "$RELEASE_SEMANTICS_POLICY"
printf 'BLOCKER_SWITCH class=%s old=%s new=%s\n' "$BLOCKER_SWITCH_CLASS" "$BLOCKER_SWITCH_OLD" "$BLOCKER_SWITCH_NEW"
else
details="$(tr '\n' ' ' <"$SERVER_LOG" | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
set_root_cause_once "api_server_start_failure" "api_server" "API Server 启动失败"
fail "API Server 启动失败 (${details:-no server log})"
fi
else
details="$(tr '\n' ' ' </tmp/llm_phase6_server_build.err | sed 's/[[:space:]]\+/ /g' | sed 's/ $//')"
set_root_cause_once "api_server_build_failure" "api_server" "API Server 构建失败"
fail "API Server 构建失败 (${details:-unknown build error})"
fi
check_shell "Phase 6 性能文档存在" "test -f docs/PERFORMANCE_TEST.md"
check_shell "前端已具备测试入口" "cd frontend && npm run test -- --run >/tmp/llm_phase6_frontend_test.out 2>/tmp/llm_phase6_frontend_test.err"
check_shell "secret gate 独立测试通过" "bash scripts/secret_gate_test.sh"
printf 'ROOT_CAUSE class=%s source=%s summary=%s\n' "$ROOT_CAUSE_CLASS" "$ROOT_CAUSE_SOURCE" "$ROOT_CAUSE_SUMMARY"
finish_phase