feat: close v3 slo gates and lifecycle metrics
This commit is contained in:
@@ -1,11 +1,10 @@
|
||||
# Prometheus Alerting Rules for sub2api-cn-relay-manager
|
||||
# Place this file in your Prometheus rules directory
|
||||
# Aligned with current vNext.3 metrics semantics (2026-06-08)
|
||||
|
||||
groups:
|
||||
- name: sub2api-relay-manager-alerts
|
||||
interval: 30s
|
||||
rules:
|
||||
# 服务可用性告警
|
||||
- alert: ServiceDown
|
||||
expr: up{job="sub2api-relay-manager"} == 0
|
||||
for: 1m
|
||||
@@ -16,53 +15,110 @@ groups:
|
||||
summary: "sub2api-relay-manager service is down"
|
||||
description: "The sub2api-relay-manager service has been down for more than 1 minute."
|
||||
|
||||
# HTTP错误率告警
|
||||
- alert: HighErrorRate
|
||||
expr: |
|
||||
(
|
||||
sum(rate(http_requests_total{status=~"5..|4.."}[5m]))
|
||||
/
|
||||
sum(rate(http_requests_total[5m]))
|
||||
sum(rate(http_requests_total{status=~"4..|5.."}[5m]))
|
||||
/
|
||||
clamp_min(sum(rate(http_requests_total[5m])), 0.001)
|
||||
) > 0.05
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "High error rate detected"
|
||||
description: "Error rate is above 5% for more than 2 minutes. Current value: {{ $value | humanizePercentage }}"
|
||||
summary: "High HTTP error rate detected"
|
||||
description: "HTTP 4xx/5xx error rate is above 5% for more than 2 minutes."
|
||||
|
||||
# 请求延迟告警
|
||||
- alert: HighLatency
|
||||
expr: |
|
||||
histogram_quantile(0.95,
|
||||
sum(rate(http_request_duration_seconds_bucket[5m])) by (le)
|
||||
) > 1.0
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "High request latency"
|
||||
description: "95th percentile latency is above 1 second for more than 3 minutes."
|
||||
|
||||
# 路由故障转移告警
|
||||
- alert: RouteFailoverSpike
|
||||
- alert: UserKeyChatSuccessRateLow
|
||||
expr: |
|
||||
(
|
||||
rate(route_failovers_total[5m])
|
||||
>
|
||||
2 * avg_over_time(rate(route_failovers_total[1h])[1h:5m])
|
||||
)
|
||||
for: 1m
|
||||
sum(rate(user_key_chat_requests_total{result="ok"}[10m]))
|
||||
/
|
||||
clamp_min(sum(rate(user_key_chat_requests_total[10m])), 0.001)
|
||||
) < 0.95
|
||||
and sum(rate(user_key_chat_requests_total[10m])) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "User-key chat success rate below SLO"
|
||||
description: "Recent user-key chat success rate is below 95% for 10 minutes."
|
||||
|
||||
- alert: UserKeyChatP95LatencyHigh
|
||||
expr: |
|
||||
histogram_quantile(0.95,
|
||||
sum(rate(http_request_duration_seconds_bucket{path="/v1/chat/completions"}[10m])) by (le)
|
||||
) > 5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "Route failover spike detected"
|
||||
description: "Route failovers have spiked above normal levels. Current rate: {{ $value }}"
|
||||
summary: "User-key chat P95 latency is high"
|
||||
description: "P95 latency for /v1/chat/completions exceeds 5 seconds for 10 minutes."
|
||||
|
||||
- alert: UserKeyCreateFailures
|
||||
expr: |
|
||||
sum(rate(user_key_operations_total{operation="create",result!~"success|rate_limited"}[10m])) > 0.02
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "User-key create failures detected"
|
||||
description: "Non-rate-limit create failures are occurring on the self-service path."
|
||||
|
||||
- alert: UserKeyResetFailures
|
||||
expr: |
|
||||
sum(rate(user_key_operations_total{operation="reset",result!~"success|rate_limited"}[10m])) > 0.02
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "User-key reset failures detected"
|
||||
description: "Non-rate-limit reset failures are occurring on the self-service path."
|
||||
|
||||
- alert: UserKeyQuotaExhaustedSpike
|
||||
expr: |
|
||||
sum(rate(user_key_chat_requests_total{result="quota_exhausted"}[10m])) > 0.05
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "Quota exhausted events are rising"
|
||||
description: "quota_exhausted responses are rising on the public user-key gateway path."
|
||||
|
||||
- alert: UserKeyAuthFailuresSpike
|
||||
expr: |
|
||||
sum(rate(user_key_chat_requests_total{result=~"unauthorized|invalid_api_key"}[10m])) > 0.05
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: security
|
||||
annotations:
|
||||
summary: "User-key auth failures are rising"
|
||||
description: "unauthorized/invalid_api_key outcomes are rising on the public gateway path."
|
||||
|
||||
- alert: RouteFailoverShareHigh
|
||||
expr: |
|
||||
(
|
||||
sum(rate(route_decisions_total{status="failover"}[10m]))
|
||||
/
|
||||
clamp_min(sum(rate(route_decisions_total[10m])), 0.001)
|
||||
) > 0.20
|
||||
and sum(rate(route_decisions_total[10m])) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "Route failover share is high"
|
||||
description: "More than 20% of recent route decisions are failovers."
|
||||
|
||||
# 活跃Provider数量告警
|
||||
- alert: NoActiveProviders
|
||||
expr: active_providers == 0
|
||||
for: 1m
|
||||
@@ -73,17 +129,6 @@ groups:
|
||||
summary: "No active providers"
|
||||
description: "There are no active providers configured. The system cannot route requests."
|
||||
|
||||
- alert: LowActiveProviders
|
||||
expr: active_providers < 2
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "Low number of active providers"
|
||||
description: "Only {{ $value }} active provider(s) detected. Consider adding more for redundancy."
|
||||
|
||||
# 活跃Host告警
|
||||
- alert: NoActiveHosts
|
||||
expr: active_hosts == 0
|
||||
for: 1m
|
||||
@@ -94,31 +139,6 @@ groups:
|
||||
summary: "No active hosts"
|
||||
description: "There are no active hosts. The system cannot import providers."
|
||||
|
||||
# 数据库连接告警
|
||||
- alert: HighDBConnections
|
||||
expr: db_connections_active > 50
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "High database connection count"
|
||||
description: "Active DB connections: {{ $value }}. Consider connection pool tuning."
|
||||
|
||||
# 数据库操作错误告警
|
||||
- alert: DBOperationErrors
|
||||
expr: |
|
||||
rate(db_operations_total{operation=~"INSERT|UPDATE|DELETE"}[5m])
|
||||
> 100
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "High database write rate"
|
||||
description: "DB write operations are above threshold: {{ $value }} ops/sec"
|
||||
|
||||
# 日志系统告警
|
||||
- alert: LogFlushErrors
|
||||
expr: rate(log_flush_errors_total[5m]) > 0
|
||||
for: 1m
|
||||
@@ -130,48 +150,17 @@ groups:
|
||||
description: "Log flush errors have been detected. Check log storage/backend."
|
||||
|
||||
- alert: LogDroppedEvents
|
||||
expr: |
|
||||
rate(log_dropped_events_total[5m]) > 10
|
||||
expr: rate(log_dropped_events_total[5m]) > 10
|
||||
for: 1m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "Log events being dropped"
|
||||
description: "Log events are being dropped at {{ $value }} events/sec. Check log buffer capacity."
|
||||
description: "Log events are being dropped. Check log buffer capacity."
|
||||
|
||||
# 批处理导入告警
|
||||
- alert: BatchImportFailures
|
||||
expr: |
|
||||
(
|
||||
rate(route_decisions_total{status="failed"}[5m])
|
||||
/
|
||||
rate(route_decisions_total[5m])
|
||||
) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
team: ops
|
||||
annotations:
|
||||
summary: "High batch import failure rate"
|
||||
description: "Batch import failure rate is above 10%. Check provider configurations."
|
||||
|
||||
# API认证失败告警
|
||||
- alert: AuthFailures
|
||||
expr: |
|
||||
rate(http_requests_total{status="401"}[5m]) > 10
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
team: security
|
||||
annotations:
|
||||
summary: "High authentication failure rate"
|
||||
description: "Auth failures detected. Possible credential issues or attacks."
|
||||
|
||||
# 健康检查告警
|
||||
- alert: HealthCheckFailing
|
||||
expr: |
|
||||
http_requests_total{path="/healthz",status!="200"} > 0
|
||||
expr: http_requests_total{path="/healthz",status!="200"} > 0
|
||||
for: 30s
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# SLO and Observability
|
||||
|
||||
日期:2026-06-04
|
||||
状态:待审核
|
||||
状态:已落地(V3-2)
|
||||
适用版本:vNext.3
|
||||
|
||||
## 目的
|
||||
@@ -107,3 +107,50 @@ vNext.3 引入正式发布门禁前,至少要求:
|
||||
|
||||
本文件属于 vNext.3 设计文档。
|
||||
当前 vNext.1 不进入实现,但必须在规划阶段明确其后续必备性,避免将来“功能可用但不可运营”。
|
||||
|
||||
## 2026-06-08 当前落地口径
|
||||
|
||||
### 已落地指标
|
||||
|
||||
- `http_requests_total{method,path,status}`
|
||||
- `status` 使用数值字符串(如 `200/403/500`)
|
||||
- `path` 优先使用 `r.Pattern`,避免动态 path 高基数
|
||||
- `http_request_duration_seconds{method,path}`
|
||||
- `route_decisions_total{logical_group,status}`
|
||||
- `status in (sticky_hit, bind, fallback, failover)`
|
||||
- `route_failovers_total`
|
||||
- `user_key_operations_total{operation,result}`
|
||||
- `operation in (create, reset, pause, resume, delete)`
|
||||
- `result` 已覆盖 `success / unauthorized / bad_request / rate_limited / open_store_error / get_key_error / not_found / rate_limit_store_error / resolve_host_error / resolve_shadow_group_error / ensure_access_error / pause_access_error / resume_access_error / db_tx_error`
|
||||
- `user_key_chat_requests_total{result}`
|
||||
- `result in (ok, unauthorized, invalid_api_key, key_paused, key_retired, quota_exhausted, bad_request, db_error, proxy_error)`
|
||||
|
||||
### 已落地告警规则
|
||||
|
||||
文件:`deploy/monitoring/prometheus-rules.yml`
|
||||
|
||||
- `HighErrorRate`
|
||||
- `UserKeyChatSuccessRateLow`
|
||||
- `UserKeyChatP95LatencyHigh`
|
||||
- `UserKeyCreateFailures`
|
||||
- `UserKeyResetFailures`
|
||||
- `UserKeyQuotaExhaustedSpike`
|
||||
- `UserKeyAuthFailuresSpike`
|
||||
- `RouteFailoverShareHigh`
|
||||
- `NoActiveProviders`
|
||||
- `NoActiveHosts`
|
||||
- `LogFlushErrors`
|
||||
- `LogDroppedEvents`
|
||||
- `HealthCheckFailing`
|
||||
|
||||
### 已落地发布门禁
|
||||
|
||||
脚本:`scripts/test/verify_vnext_slo_release_gate.sh`
|
||||
|
||||
门禁要求:
|
||||
|
||||
1. 核心 source-of-truth 文档存在
|
||||
2. metrics 接线与代码口径一致
|
||||
3. Prometheus 规则引用真实存在的指标与标签
|
||||
4. live governance artifact 满足 create `201` → chat `200` → pause `200` → chat-paused `403 key_paused` → resume `200` → chat `200` → delete `200`
|
||||
5. `docs/EXECUTION_BOARD.md` 已同步 V3-2 完成态
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
## 一、先说结论
|
||||
|
||||
当前状态:条件完成(全量 vNext)
|
||||
当前状态:完成(全量 vNext)
|
||||
|
||||
说明:
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
- vNext.2 已完成 V2-4 + V2-5:key self-service API、portal key 管理 UI、用户 portal reset 后首次调用 200 真实线上闭环。
|
||||
- vNext.3 已完成 V3-1:key/account governance 的公网 create→chat→pause→chat-paused→resume→chat-resumed 真验闭环。
|
||||
- vNext.3 / V3-2 已启动首批 SLO/观测最小闭环:HTTP metrics route pattern 归一化、route resolve/failover 指标接线、user-key lifecycle/chat outcome 指标接线与回归测试已完成。
|
||||
- 仍未完成的是更宽泛的后续治理/SLO 扩展范围;因此按“当前 CRM 网关路线”口径已完成,按“全量 vNext 后续扩展全部做完”口径仍是条件完成。
|
||||
- V3-2 的失败路径指标细化、Prometheus 告警规则、V3-2 release gate 已全部补齐;当前 CRM 网关路线与全量 vNext 后续扩展口径现已统一为完成。
|
||||
|
||||
## 二、5 个核心问题 Checklist(全量 vNext 目标)
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
| 2. 同模型多供应商池化 | 模型池抽象 + 映射 + 真实池化验收 | vNext.1 已闭环 | `model_pool.go`、pool 测试、真实验收脚本已存在 |
|
||||
| 3. 插件前端承接用户弱能力 | Portal 能承接用户信息、模型、示例、key 信息 | V2-5 已完成 | `PORTAL_KEY_EXPERIENCE.md`、`deploy/tksea-portal/index.html`、`artifacts/portal-ui-v25/20260606_1009/99-summary.json` |
|
||||
| 4. 插件生成/申请 key 并交付 base URL/model/curl 示例 | key self-service API + 首次调用 200 闭环 | V2-4/V2-5 已完成 | `KEY_SELF_SERVICE_API.md`、`verify_user_key_self_service.sh`、`artifacts/user-key-self-service/20260605_195408/99-summary.json`、`artifacts/portal-ui-v25/20260606_1009/99-summary.json` |
|
||||
| 5. key / 账号暂停、恢复、限额治理 | 三态模型 + 管理页动作 + 真实治理验收 | V3-1 已闭环 / V3-2 进行中 | `KEY_ACCOUNT_GOVERNANCE.md`、`artifacts/v3-governance-live/20260608_102323/99-summary.json`、`internal/metrics/metrics.go`、`internal/app/public_chat_metrics_test.go` |
|
||||
| 5. key / 账号暂停、恢复、限额治理 | 三态模型 + 管理页动作 + 真实治理验收 | V3-1/V3-2 已闭环 | `KEY_ACCOUNT_GOVERNANCE.md`、`artifacts/v3-governance-live/20260608_102323/99-summary.json`、`internal/metrics/metrics.go`、`internal/app/public_chat_metrics_test.go`、`scripts/test/verify_vnext_slo_release_gate.sh` |
|
||||
|
||||
## 三、vNext.1 发布范围 Checklist
|
||||
|
||||
@@ -96,19 +96,22 @@
|
||||
- Task 4.1 状态模型与治理语义:已实现并接线到 CRM 网关 `POST /v1/chat/completions`
|
||||
- Task 4.2 管理页治理动作:已实现(pause / resume 同步宿主 managed user `allowed_groups`)
|
||||
- Task 4.3 真实治理验收:已完成,见 `artifacts/v3-governance-live/20260608_102323/99-summary.json`
|
||||
- Task 4.4 SLO / 观测最小闭环(第一批):已完成首批接线
|
||||
- `internal/metrics/metrics.go` 新增 `user_key_operations_total`、`user_key_chat_requests_total`
|
||||
- Task 4.4 SLO / 观测闭环:已完成
|
||||
- `internal/metrics/metrics.go` 新增 `user_key_operations_total`、`user_key_chat_requests_total`,并把 HTTP status label 收口为数值字符串
|
||||
- `http_requests_total` 优先使用 `r.Pattern`,避免动态 path 高基数
|
||||
- route resolve / failover、user-key self-service、public chat outcome 已接指标并补回归测试
|
||||
- user-key lifecycle 失败路径指标已补齐到 create/reset/pause/resume/delete
|
||||
- `deploy/monitoring/prometheus-rules.yml` 已按当前指标口径重写
|
||||
- `scripts/test/verify_vnext_slo_release_gate.sh` 已落地并纳入总质量门
|
||||
|
||||
状态:V3-1 已闭环;V3-2 首批 SLO/观测接线已完成,剩余治理/SLO 扩展项继续推进
|
||||
状态:V3-1 / V3-2 已闭环
|
||||
|
||||
### Phase 5
|
||||
|
||||
- Task 5.1 默认链路准入规则:vNext.1 已闭环
|
||||
- Task 5.2 多层验证:vNext.1 + V2-4 当前均已有真实 artifact
|
||||
|
||||
状态:部分完成(整体 vNext 仍未完成)
|
||||
状态:完成(全量 vNext 已完成)
|
||||
|
||||
## 五、当前缺失文件 / 脚本 / 测试(按真实存在性校对)
|
||||
|
||||
@@ -120,6 +123,9 @@
|
||||
- `scripts/acceptance/verify_host_protocol_matrix.sh` — 已存在
|
||||
- `scripts/acceptance/verify_user_key_self_service.sh` — 已从 skeleton 升级为真实验收脚本
|
||||
- `internal/app/key_self_service_test.go` — 已存在
|
||||
- `internal/app/user_key_operation_metrics_test.go` — 已存在
|
||||
- `scripts/test/verify_vnext_slo_release_gate.sh` — 已存在
|
||||
- `deploy/monitoring/prometheus-rules.yml` — 已按 V3-2 指标口径重写
|
||||
|
||||
### vNext.2 尚缺
|
||||
|
||||
@@ -145,16 +151,15 @@
|
||||
|
||||
### 立即执行:收尾与同步
|
||||
|
||||
1. 已完成 V3-1 公网真验闭环:create 201 → chat 200 → pause 200 → chat-paused 403 → resume 200 → chat 200
|
||||
2. 已确认 2026-06-06 的“pause 后仍 200”并非宿主 cache TTL,而是公网 `/v1/chat/completions` 当时仍走宿主、且 CRM `hosts.auth_token` 已过期
|
||||
3. 已补 remote43 nginx 精确路由与 host bearer 刷新;仓库同步更新部署脚本/示例 nginx
|
||||
4. 下一步仅剩文档、commit、push 与后续 SLO 范围推进
|
||||
1. 运行完整质量门并确认 worktree clean
|
||||
2. commit + push 本轮 V3-2 收尾变更
|
||||
3. 更新任务真相源为 completed
|
||||
|
||||
## 八、当前判定(唯一有效口径)
|
||||
|
||||
- 按 vNext.1 发布范围:**完成**
|
||||
- 按 vNext.2 当前执行项:**完成**(V2-4 + V2-5 已真实闭环)
|
||||
- 按全量 vNext 规划:**条件完成**(V3-1 核心代码+测试+线上真验已闭环;剩余仅是后续治理/SLO 扩展项,不再阻塞当前 CRM 网关路线)
|
||||
- 按全量 vNext 规划:**完成**(V2-4 / V2-5 / V3-1 / V3-2 均已完成代码、门禁、文档与真实证据闭环)
|
||||
- 当前结论:
|
||||
- V2-4 / V2-5 / V3-1 已真实闭环,可提交/推送
|
||||
- 若要宣告“全量 vNext 所有后续扩展都完成”,还需单独定义并交付 V3-2/SLO 范围
|
||||
- 全量 vNext 后续扩展已完成
|
||||
- 当前仅剩本轮变更的 commit / push 收尾动作
|
||||
|
||||
@@ -129,18 +129,20 @@
|
||||
|
||||
- 目标:先把现有 CRM 网关与 user-key 自助链路接成可观测真相源,而不是停留在“有 /metrics 端点但关键路径不产生日志/指标”。
|
||||
- 本轮代码接线:
|
||||
- `internal/metrics/metrics.go`:新增 `user_key_operations_total`、`user_key_chat_requests_total`;HTTP metrics 优先使用 `r.Pattern`,避免动态 path 高基数
|
||||
- `internal/app/route_resolve_api.go`:resolve / failover 接入 route metrics
|
||||
- `internal/app/key_self_service_svc.go`:create/reset/pause/resume/delete success metrics 接线
|
||||
- `internal/metrics/metrics.go`:`user_key_operations_total`、`user_key_chat_requests_total` 已接线;HTTP status label 改为数值字符串,HTTP path 优先使用 `r.Pattern`,避免动态 path 高基数
|
||||
- `internal/app/route_resolve_api.go`:route decision 语义收口为 `sticky_hit / bind / fallback / failover`,failover 不再和 fallback 混成单一状态
|
||||
- `internal/app/key_self_service_svc.go`:create/reset/pause/resume/delete 不只记录 success,还补齐 `open_store_error / get_key_error / not_found / rate_limit_store_error / resolve_host_error / resolve_shadow_group_error / ensure_access_error / pause_access_error / resume_access_error / db_tx_error` 等失败路径指标
|
||||
- `internal/app/http_api.go`:`/v1/chat/completions` 接入 `unauthorized / invalid_api_key / key_paused / key_retired / quota_exhausted / bad_request / db_error / proxy_error / ok` outcome metrics
|
||||
- `internal/app/public_chat_metrics_test.go`:新增 quota_exhausted 与 route pattern 回归测试
|
||||
- `deploy/monitoring/prometheus-rules.yml`:已按当前真实指标口径重写为 `UserKeyChatSuccessRateLow / UserKeyChatP95LatencyHigh / UserKeyCreateFailures / UserKeyResetFailures / UserKeyQuotaExhaustedSpike / UserKeyAuthFailuresSpike / RouteFailoverShareHigh` 等告警规则
|
||||
- `scripts/test/verify_vnext_slo_release_gate.sh`:新增 V3-2 发布门禁脚本,并已接入 `scripts/test/verify_quality_gates.sh`
|
||||
- 本轮门禁:
|
||||
- `go test ./internal/app ./internal/metrics -count=1` → PASS
|
||||
- `go test ./tests/integration/... -count=1` → PASS
|
||||
- `go vet ./...` → PASS
|
||||
- `go test -cover ./internal/...` → PASS(核心包 `access/provision/pack` 均 ≥ 70%)
|
||||
- `bash ./scripts/test/verify_vnext_slo_release_gate.sh` → PASS(校验 metrics 接线 / 告警规则 / live governance artifact / 文档口径)
|
||||
- 当前结论:
|
||||
- `部分闭环` —— 首批 SLO/观测接线已完成并过门禁;更宽泛的治理/SLO 扩展(失败路径细化、告警/发布门禁)继续推进
|
||||
- `已闭环` —— V3-2 的失败路径细化、告警规则、发布门禁均已落地;全量 vNext 后续扩展已收口到可验证完成态
|
||||
|
||||
- portal key 管理 UI 已完成实现、部署和真实公网验收:
|
||||
- 关键代码:
|
||||
|
||||
275
internal/app/key_self_service_handlers_test.go
Normal file
275
internal/app/key_self_service_handlers_test.go
Normal file
@@ -0,0 +1,275 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGeneratePlaintextKeyAndExtractSubjectID(t *testing.T) {
|
||||
t.Parallel()
|
||||
plaintext, fingerprint := generatePlaintextKey()
|
||||
if !strings.HasPrefix(plaintext, "sk-") {
|
||||
t.Fatalf("plaintext = %q, want sk- prefix", plaintext)
|
||||
}
|
||||
if !strings.HasPrefix(fingerprint, "sha256:") {
|
||||
t.Fatalf("fingerprint = %q, want sha256 prefix", fingerprint)
|
||||
}
|
||||
|
||||
h := &UserKeyHandler{}
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/keys", nil)
|
||||
req.Header.Set("Authorization", "Bearer abcdefgh12345678")
|
||||
subjectID, httpErr := h.extractSubjectID(req)
|
||||
if httpErr != nil {
|
||||
t.Fatalf("extractSubjectID() unexpected error: %+v", httpErr)
|
||||
}
|
||||
if subjectID != "skeleton_user_abcdefgh" {
|
||||
t.Fatalf("subjectID = %q, want skeleton_user_abcdefgh", subjectID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUserKeyListNotImplemented(t *testing.T) {
|
||||
t.Parallel()
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/keys", nil)
|
||||
rr := httptest.NewRecorder()
|
||||
serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) {
|
||||
handleListUserKeys(w, r, nil)
|
||||
})
|
||||
if rr.Code != http.StatusNotImplemented {
|
||||
t.Fatalf("status = %d, want 501 body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUserKeyListSuccess(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := &UserKeyHandler{
|
||||
listFn: func(ctx context.Context, subjectID string) ([]UserKeyMeta, error) {
|
||||
if subjectID != "portal-user:1" {
|
||||
t.Fatalf("subjectID = %q, want portal-user:1", subjectID)
|
||||
}
|
||||
return []UserKeyMeta{{KeyID: "key_1", AdminStatus: "active"}}, nil
|
||||
},
|
||||
}
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/keys", nil)
|
||||
req.Header.Set("X-Portal-Subject", "portal-user:1")
|
||||
rr := httptest.NewRecorder()
|
||||
serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) {
|
||||
handleListUserKeys(w, r, h)
|
||||
})
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("status = %d, want 200 body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
if !strings.Contains(rr.Body.String(), "key_1") {
|
||||
t.Fatalf("body missing key_1: %s", rr.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleGetUserKeyMissingKeyID(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := &UserKeyHandler{getFn: func(context.Context, string, string) (UserKeyMeta, error) {
|
||||
t.Fatal("getFn should not be called when key_id is missing")
|
||||
return UserKeyMeta{}, nil
|
||||
}}
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/keys/", nil)
|
||||
req.Header.Set("X-Portal-Subject", "portal-user:1")
|
||||
rr := httptest.NewRecorder()
|
||||
serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) {
|
||||
handleGetUserKey(w, r, h)
|
||||
})
|
||||
if rr.Code != http.StatusBadRequest {
|
||||
t.Fatalf("status = %d, want 400 body=%s", rr.Code, rr.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUserKeyMutationHandlers(t *testing.T) {
|
||||
t.Parallel()
|
||||
meta := UserKeyMeta{KeyID: "key_1", MaskedPreview: "sk-****1234", AdminStatus: "active"}
|
||||
cases := []struct {
|
||||
name string
|
||||
method string
|
||||
path string
|
||||
handlerFn func(http.ResponseWriter, *http.Request, *UserKeyHandler)
|
||||
userHandler *UserKeyHandler
|
||||
wantStatus int
|
||||
wantBody string
|
||||
}{
|
||||
{
|
||||
name: "get-success",
|
||||
method: http.MethodGet,
|
||||
path: "/api/keys/key_1",
|
||||
handlerFn: handleGetUserKey,
|
||||
userHandler: &UserKeyHandler{getFn: func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) {
|
||||
if keyID != "key_1" || subjectID != "portal-user:1" {
|
||||
t.Fatalf("getFn args = (%q,%q)", keyID, subjectID)
|
||||
}
|
||||
return meta, nil
|
||||
}},
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "key_1",
|
||||
},
|
||||
{
|
||||
name: "reset-success",
|
||||
method: http.MethodPost,
|
||||
path: "/api/keys/key_1/reset",
|
||||
handlerFn: handleResetUserKey,
|
||||
userHandler: &UserKeyHandler{resetFn: func(ctx context.Context, keyID, subjectID string) (ResetUserKeyResponse, error) {
|
||||
if keyID != "key_1" || subjectID != "portal-user:1" {
|
||||
t.Fatalf("resetFn args = (%q,%q)", keyID, subjectID)
|
||||
}
|
||||
return ResetUserKeyResponse{PlaintextKey: "sk-new", MaskedPreview: "sk-****new", AdminStatus: "active"}, nil
|
||||
}},
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "sk-new",
|
||||
},
|
||||
{
|
||||
name: "pause-success",
|
||||
method: http.MethodPost,
|
||||
path: "/api/keys/key_1/pause",
|
||||
handlerFn: handlePauseUserKey,
|
||||
userHandler: &UserKeyHandler{pauseFn: func(ctx context.Context, keyID, subjectID, reason string) (UserKeyMeta, error) {
|
||||
if keyID != "key_1" || subjectID != "portal-user:1" || reason != "" {
|
||||
t.Fatalf("pauseFn args = (%q,%q,%q)", keyID, subjectID, reason)
|
||||
}
|
||||
paused := meta
|
||||
paused.AdminStatus = "paused"
|
||||
return paused, nil
|
||||
}},
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "paused",
|
||||
},
|
||||
{
|
||||
name: "resume-success",
|
||||
method: http.MethodPost,
|
||||
path: "/api/keys/key_1/resume",
|
||||
handlerFn: handleResumeUserKey,
|
||||
userHandler: &UserKeyHandler{resumeFn: func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) {
|
||||
if keyID != "key_1" || subjectID != "portal-user:1" {
|
||||
t.Fatalf("resumeFn args = (%q,%q)", keyID, subjectID)
|
||||
}
|
||||
return meta, nil
|
||||
}},
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "active",
|
||||
},
|
||||
{
|
||||
name: "delete-success",
|
||||
method: http.MethodDelete,
|
||||
path: "/api/keys/key_1",
|
||||
handlerFn: handleDeleteUserKey,
|
||||
userHandler: &UserKeyHandler{deleteFn: func(ctx context.Context, keyID, subjectID string) error {
|
||||
if keyID != "key_1" || subjectID != "portal-user:1" {
|
||||
t.Fatalf("deleteFn args = (%q,%q)", keyID, subjectID)
|
||||
}
|
||||
return nil
|
||||
}},
|
||||
wantStatus: http.StatusOK,
|
||||
wantBody: "deleted",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(tc.method, tc.path, nil)
|
||||
req.Header.Set("X-Portal-Subject", "portal-user:1")
|
||||
req.SetPathValue("key_id", "key_1")
|
||||
rr := httptest.NewRecorder()
|
||||
serveWithMetrics(t, req, rr, func(w http.ResponseWriter, r *http.Request) {
|
||||
tc.handlerFn(w, r, tc.userHandler)
|
||||
})
|
||||
if rr.Code != tc.wantStatus {
|
||||
t.Fatalf("status = %d, want %d body=%s", rr.Code, tc.wantStatus, rr.Body.String())
|
||||
}
|
||||
if !strings.Contains(rr.Body.String(), tc.wantBody) {
|
||||
t.Fatalf("body missing %q: %s", tc.wantBody, rr.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func serveWithMetrics(t *testing.T, req *http.Request, rr *httptest.ResponseRecorder, fn func(http.ResponseWriter, *http.Request)) {
|
||||
t.Helper()
|
||||
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fn(w, r)
|
||||
}).ServeHTTP(rr, req)
|
||||
}
|
||||
|
||||
func TestHandleListUserKeysResponseShape(t *testing.T) {
|
||||
t.Parallel()
|
||||
h := &UserKeyHandler{listFn: func(context.Context, string) ([]UserKeyMeta, error) {
|
||||
return []UserKeyMeta{{KeyID: "key_json", AdminStatus: "active"}}, nil
|
||||
}}
|
||||
req := httptest.NewRequest(http.MethodGet, "/api/keys", nil)
|
||||
req.Header.Set("X-Portal-Subject", "portal-user:json")
|
||||
rr := httptest.NewRecorder()
|
||||
handleListUserKeys(rr, req, h)
|
||||
var payload struct {
|
||||
Keys []UserKeyMeta `json:"keys"`
|
||||
}
|
||||
if err := json.Unmarshal(rr.Body.Bytes(), &payload); err != nil {
|
||||
t.Fatalf("decode response: %v", err)
|
||||
}
|
||||
if len(payload.Keys) != 1 || payload.Keys[0].KeyID != "key_json" {
|
||||
t.Fatalf("payload = %+v, want one key_json entry", payload)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHandleUserKeyMutationHandlersErrorPaths(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
handlerFn func(http.ResponseWriter, *http.Request, *UserKeyHandler)
|
||||
userHandler *UserKeyHandler
|
||||
wantStatus int
|
||||
}{
|
||||
{
|
||||
name: "reset-not-found",
|
||||
handlerFn: handleResetUserKey,
|
||||
userHandler: &UserKeyHandler{resetFn: func(context.Context, string, string) (ResetUserKeyResponse, error) {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("key %q not found", "key_1")
|
||||
}},
|
||||
wantStatus: http.StatusNotFound,
|
||||
},
|
||||
{
|
||||
name: "pause-not-found",
|
||||
handlerFn: handlePauseUserKey,
|
||||
userHandler: &UserKeyHandler{pauseFn: func(context.Context, string, string, string) (UserKeyMeta, error) {
|
||||
return UserKeyMeta{}, fmt.Errorf("key %q not found", "key_1")
|
||||
}},
|
||||
wantStatus: http.StatusNotFound,
|
||||
},
|
||||
{
|
||||
name: "resume-not-found",
|
||||
handlerFn: handleResumeUserKey,
|
||||
userHandler: &UserKeyHandler{resumeFn: func(context.Context, string, string) (UserKeyMeta, error) {
|
||||
return UserKeyMeta{}, fmt.Errorf("key %q not found", "key_1")
|
||||
}},
|
||||
wantStatus: http.StatusNotFound,
|
||||
},
|
||||
{
|
||||
name: "delete-not-found",
|
||||
handlerFn: handleDeleteUserKey,
|
||||
userHandler: &UserKeyHandler{deleteFn: func(context.Context, string, string) error {
|
||||
return fmt.Errorf("key %q not found", "key_1")
|
||||
}},
|
||||
wantStatus: http.StatusNotFound,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/keys/key_1", nil)
|
||||
req.Header.Set("X-Portal-Subject", "portal-user:1")
|
||||
req.SetPathValue("key_id", "key_1")
|
||||
rr := httptest.NewRecorder()
|
||||
tc.handlerFn(rr, req, tc.userHandler)
|
||||
if rr.Code != tc.wantStatus {
|
||||
t.Fatalf("status = %d, want %d body=%s", rr.Code, tc.wantStatus, rr.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -104,6 +104,11 @@ func ensureSubjectHasAccess(ctx context.Context, client *sub2api.Client, subject
|
||||
return apiKey, nil
|
||||
}
|
||||
|
||||
func recordUserKeyFailure(operation, result string, err error) error {
|
||||
metrics.RecordUserKeyOperation(operation, result)
|
||||
return err
|
||||
}
|
||||
|
||||
func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
return &UserKeyHandler{
|
||||
createFn: func(ctx context.Context, req CreateUserKeyRequest) (CreateUserKeyResponse, error) {
|
||||
@@ -117,14 +122,14 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
}
|
||||
store, err := sqlite.Open(ctx, sqliteDSN)
|
||||
if err != nil {
|
||||
return CreateUserKeyResponse{}, fmt.Errorf("open store: %w", err)
|
||||
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "open_store_error", fmt.Errorf("open store: %w", err))
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
windowStart := time.Now().UTC().Format("2006-01-02T15:00:00Z")
|
||||
count, err := store.SubjectRateLimits().IncrementWindow(ctx, req.SubjectID, "create", windowStart)
|
||||
if err != nil {
|
||||
return CreateUserKeyResponse{}, fmt.Errorf("increment create rate limit: %w", err)
|
||||
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "rate_limit_store_error", fmt.Errorf("increment create rate limit: %w", err))
|
||||
}
|
||||
if count > defaultKeyRateLimitPerHour {
|
||||
metrics.RecordUserKeyOperation("create", "rate_limited")
|
||||
@@ -134,15 +139,15 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
// Resolve logical group → host → group ID → ensure subscription access
|
||||
_, route, hostRow, client, err := resolveLogicalGroupHost(ctx, store, req.LogicalGroupID)
|
||||
if err != nil {
|
||||
return CreateUserKeyResponse{}, fmt.Errorf("resolve host for %q: %w", req.LogicalGroupID, err)
|
||||
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "resolve_host_error", fmt.Errorf("resolve host for %q: %w", req.LogicalGroupID, err))
|
||||
}
|
||||
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
|
||||
if err != nil {
|
||||
return CreateUserKeyResponse{}, fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err)
|
||||
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err))
|
||||
}
|
||||
apiKey, err := ensureSubjectHasAccess(ctx, client, req.SubjectID, hostGroupID)
|
||||
if err != nil {
|
||||
return CreateUserKeyResponse{}, fmt.Errorf("ensure access for %q: %w", req.LogicalGroupID, err)
|
||||
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "ensure_access_error", fmt.Errorf("ensure access for %q: %w", req.LogicalGroupID, err))
|
||||
}
|
||||
|
||||
fingerprint := "sha256:" + sha256Hex(apiKey)
|
||||
@@ -177,7 +182,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return CreateUserKeyResponse{}, err
|
||||
return CreateUserKeyResponse{}, recordUserKeyFailure("create", "db_tx_error", err)
|
||||
}
|
||||
|
||||
metrics.RecordUserKeyOperation("create", "success")
|
||||
@@ -253,21 +258,21 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
resetFn: func(ctx context.Context, keyID, subjectID string) (ResetUserKeyResponse, error) {
|
||||
store, err := sqlite.Open(ctx, sqliteDSN)
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("open store: %w", err)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "open_store_error", fmt.Errorf("open store: %w", err))
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
rec, err := store.UserKeys().GetByID(ctx, keyID)
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("get key: %w", err)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "get_key_error", fmt.Errorf("get key: %w", err))
|
||||
}
|
||||
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("key %q not found", keyID)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "not_found", fmt.Errorf("key %q not found", keyID))
|
||||
}
|
||||
windowStart := time.Now().UTC().Format("2006-01-02T00:00:00Z")
|
||||
count, err := store.SubjectRateLimits().IncrementWindow(ctx, subjectID, "reset", windowStart)
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("increment reset rate limit: %w", err)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "rate_limit_store_error", fmt.Errorf("increment reset rate limit: %w", err))
|
||||
}
|
||||
if count > defaultKeyResetPerDay {
|
||||
metrics.RecordUserKeyOperation("reset", "rate_limited")
|
||||
@@ -277,15 +282,15 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
// Re-resolve host access to get a fresh key
|
||||
_, route, _, client, err := resolveLogicalGroupHost(ctx, store, rec.LogicalGroupID)
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("resolve host for %q: %w", rec.LogicalGroupID, err)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "resolve_host_error", fmt.Errorf("resolve host for %q: %w", rec.LogicalGroupID, err))
|
||||
}
|
||||
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for %q: %w", route.ShadowGroupID, err))
|
||||
}
|
||||
newPlaintext, err := ensureSubjectHasAccess(ctx, client, rec.OwnerSubjectID, hostGroupID)
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, fmt.Errorf("ensure access on reset for %q: %w", rec.LogicalGroupID, err)
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "ensure_access_error", fmt.Errorf("ensure access on reset for %q: %w", rec.LogicalGroupID, err))
|
||||
}
|
||||
|
||||
hostFingerprint := "sha256:" + sha256Hex(newPlaintext)
|
||||
@@ -309,7 +314,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return ResetUserKeyResponse{}, err
|
||||
return ResetUserKeyResponse{}, recordUserKeyFailure("reset", "db_tx_error", err)
|
||||
}
|
||||
metrics.RecordUserKeyOperation("reset", "success")
|
||||
return ResetUserKeyResponse{PlaintextKey: newPlaintext, MaskedPreview: masked, AdminStatus: "active"}, nil
|
||||
@@ -317,27 +322,27 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
pauseFn: func(ctx context.Context, keyID, subjectID, reason string) (UserKeyMeta, error) {
|
||||
store, err := sqlite.Open(ctx, sqliteDSN)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("open store: %w", err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "open_store_error", fmt.Errorf("open store: %w", err))
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
rec, err := store.UserKeys().GetByID(ctx, keyID)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("get key: %w", err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "get_key_error", fmt.Errorf("get key: %w", err))
|
||||
}
|
||||
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
|
||||
return UserKeyMeta{}, fmt.Errorf("key %q not found", keyID)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "not_found", fmt.Errorf("key %q not found", keyID))
|
||||
}
|
||||
_, route, _, client, err := resolveLogicalGroupHost(ctx, store, rec.LogicalGroupID)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("resolve host for pause %q: %w", rec.LogicalGroupID, err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "resolve_host_error", fmt.Errorf("resolve host for pause %q: %w", rec.LogicalGroupID, err))
|
||||
}
|
||||
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("resolve shadow group id for pause %q: %w", route.ShadowGroupID, err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for pause %q: %w", route.ShadowGroupID, err))
|
||||
}
|
||||
if err := client.PauseManagedSubscriptionAccess(ctx, rec.OwnerSubjectID, hostGroupID); err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("pause managed subscription access: %w", err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "pause_access_error", fmt.Errorf("pause managed subscription access: %w", err))
|
||||
}
|
||||
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
|
||||
if err := q.UserKeys.UpdateStatus(ctx, keyID, "paused"); err != nil {
|
||||
@@ -352,7 +357,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, err
|
||||
return UserKeyMeta{}, recordUserKeyFailure("pause", "db_tx_error", err)
|
||||
}
|
||||
metrics.RecordUserKeyOperation("pause", "success")
|
||||
return UserKeyMeta{KeyID: keyID, MaskedPreview: rec.MaskedPreview, AdminStatus: "paused"}, nil
|
||||
@@ -360,27 +365,27 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
resumeFn: func(ctx context.Context, keyID, subjectID string) (UserKeyMeta, error) {
|
||||
store, err := sqlite.Open(ctx, sqliteDSN)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("open store: %w", err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "open_store_error", fmt.Errorf("open store: %w", err))
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
rec, err := store.UserKeys().GetByID(ctx, keyID)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("get key: %w", err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "get_key_error", fmt.Errorf("get key: %w", err))
|
||||
}
|
||||
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
|
||||
return UserKeyMeta{}, fmt.Errorf("key %q not found", keyID)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "not_found", fmt.Errorf("key %q not found", keyID))
|
||||
}
|
||||
_, route, _, client, err := resolveLogicalGroupHost(ctx, store, rec.LogicalGroupID)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("resolve host for resume %q: %w", rec.LogicalGroupID, err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "resolve_host_error", fmt.Errorf("resolve host for resume %q: %w", rec.LogicalGroupID, err))
|
||||
}
|
||||
hostGroupID, err := resolveShadowHostGroupID(ctx, client, route)
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("resolve shadow group id for resume %q: %w", route.ShadowGroupID, err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "resolve_shadow_group_error", fmt.Errorf("resolve shadow group id for resume %q: %w", route.ShadowGroupID, err))
|
||||
}
|
||||
if err := client.ResumeManagedSubscriptionAccess(ctx, rec.OwnerSubjectID, hostGroupID); err != nil {
|
||||
return UserKeyMeta{}, fmt.Errorf("resume managed subscription access: %w", err)
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "resume_access_error", fmt.Errorf("resume managed subscription access: %w", err))
|
||||
}
|
||||
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
|
||||
if err := q.UserKeys.UpdateStatus(ctx, keyID, "active"); err != nil {
|
||||
@@ -395,7 +400,7 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return UserKeyMeta{}, err
|
||||
return UserKeyMeta{}, recordUserKeyFailure("resume", "db_tx_error", err)
|
||||
}
|
||||
metrics.RecordUserKeyOperation("resume", "success")
|
||||
return UserKeyMeta{KeyID: keyID, MaskedPreview: rec.MaskedPreview, AdminStatus: "active"}, nil
|
||||
@@ -403,16 +408,16 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
deleteFn: func(ctx context.Context, keyID, subjectID string) error {
|
||||
store, err := sqlite.Open(ctx, sqliteDSN)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open store: %w", err)
|
||||
return recordUserKeyFailure("delete", "open_store_error", fmt.Errorf("open store: %w", err))
|
||||
}
|
||||
defer store.Close()
|
||||
|
||||
rec, err := store.UserKeys().GetByID(ctx, keyID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get key: %w", err)
|
||||
return recordUserKeyFailure("delete", "get_key_error", fmt.Errorf("get key: %w", err))
|
||||
}
|
||||
if rec.OwnerSubjectID != subjectID && subjectID != "admin" {
|
||||
return fmt.Errorf("key %q not found", keyID)
|
||||
return recordUserKeyFailure("delete", "not_found", fmt.Errorf("key %q not found", keyID))
|
||||
}
|
||||
err = store.WithTx(ctx, func(q *sqlite.Queries) error {
|
||||
if err := q.UserKeys.UpdateStatus(ctx, keyID, "retired"); err != nil {
|
||||
@@ -431,8 +436,9 @@ func buildUserKeyHandler(sqliteDSN string) *UserKeyHandler {
|
||||
})
|
||||
if err == nil {
|
||||
metrics.RecordUserKeyOperation("delete", "success")
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
return recordUserKeyFailure("delete", "db_tx_error", err)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -322,3 +322,151 @@ func TestUserKeyAPIMetricsMiddlewareAndCreateMetric(t *testing.T) {
|
||||
t.Fatal("expected metrics endpoint to expose user_key_operations_total after create validation failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUserKeyPauseResumeDeleteLifecycleUpdatesHostAndStore(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
store := openAppTestStore(t)
|
||||
defer closeAppTestStore(t, store)
|
||||
|
||||
const logicalGroupID = "gpt-shared"
|
||||
const hostGroupID = "999"
|
||||
const subjectID = "portal-user:lifecycle"
|
||||
const keyID = "key_lifecycle"
|
||||
|
||||
var allowedGroupsUpdates [][]int64
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case r.Method == http.MethodGet && strings.HasPrefix(r.URL.RequestURI(), "/api/v1/admin/users?"):
|
||||
w.Write([]byte(`{"data":{"items":[{"id":84,"email":"` + expectedManagedIdentity(subjectID, hostGroupID).Email + `"}]}}`))
|
||||
case r.Method == http.MethodPut && r.URL.Path == "/api/v1/admin/users/84":
|
||||
var payload struct {
|
||||
AllowedGroups []int64 `json:"allowed_groups"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
t.Fatalf("decode update payload: %v", err)
|
||||
}
|
||||
allowedGroupsUpdates = append(allowedGroupsUpdates, append([]int64(nil), payload.AllowedGroups...))
|
||||
w.Write([]byte(`{"data":{"id":84}}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
_, _ = store.Hosts().Create(context.Background(), sqlite.Host{
|
||||
HostID: "test-host",
|
||||
BaseURL: server.URL,
|
||||
HostVersion: "0.0.1",
|
||||
CapabilityProbeJSON: "{}",
|
||||
AuthType: "apikey",
|
||||
AuthToken: "test-token",
|
||||
})
|
||||
_, _ = store.LogicalGroups().Create(context.Background(), sqlite.LogicalGroup{
|
||||
LogicalGroupID: logicalGroupID,
|
||||
DisplayName: "GPT Shared",
|
||||
Status: "active",
|
||||
})
|
||||
_, _ = store.LogicalGroupRoutes().Create(context.Background(), sqlite.LogicalGroupRoute{
|
||||
RouteID: "test-route",
|
||||
LogicalGroupID: logicalGroupID,
|
||||
Name: "Test Route",
|
||||
Status: "active",
|
||||
ShadowHostID: "test-host",
|
||||
ShadowGroupID: hostGroupID,
|
||||
})
|
||||
if _, err := store.UserKeys().Create(context.Background(), sqlite.UserKeyRecord{
|
||||
KeyID: keyID,
|
||||
OwnerSubjectID: subjectID,
|
||||
KeyFingerprint: "sha256:test",
|
||||
MaskedPreview: "sk-****test",
|
||||
DisplayName: "lifecycle key",
|
||||
LogicalGroupID: logicalGroupID,
|
||||
AllowedModels: []string{"gpt-5.4"},
|
||||
AdminStatus: "active",
|
||||
QuotaStatus: "ok",
|
||||
}); err != nil {
|
||||
t.Fatalf("UserKeys().Create() error = %v", err)
|
||||
}
|
||||
|
||||
handler := buildUserKeyHandler(appTestDSN(t, store))
|
||||
paused, err := handler.pauseFn(context.Background(), keyID, subjectID, "")
|
||||
if err != nil {
|
||||
t.Fatalf("pauseFn() error = %v", err)
|
||||
}
|
||||
if paused.AdminStatus != "paused" {
|
||||
t.Fatalf("pauseFn() admin_status = %q, want paused", paused.AdminStatus)
|
||||
}
|
||||
row, err := store.UserKeys().GetByID(context.Background(), keyID)
|
||||
if err != nil {
|
||||
t.Fatalf("UserKeys().GetByID() after pause error = %v", err)
|
||||
}
|
||||
if row.AdminStatus != "paused" {
|
||||
t.Fatalf("stored admin_status after pause = %q, want paused", row.AdminStatus)
|
||||
}
|
||||
|
||||
resumed, err := handler.resumeFn(context.Background(), keyID, subjectID)
|
||||
if err != nil {
|
||||
t.Fatalf("resumeFn() error = %v", err)
|
||||
}
|
||||
if resumed.AdminStatus != "active" {
|
||||
t.Fatalf("resumeFn() admin_status = %q, want active", resumed.AdminStatus)
|
||||
}
|
||||
row, err = store.UserKeys().GetByID(context.Background(), keyID)
|
||||
if err != nil {
|
||||
t.Fatalf("UserKeys().GetByID() after resume error = %v", err)
|
||||
}
|
||||
if row.AdminStatus != "active" {
|
||||
t.Fatalf("stored admin_status after resume = %q, want active", row.AdminStatus)
|
||||
}
|
||||
|
||||
if err := handler.deleteFn(context.Background(), keyID, subjectID); err != nil {
|
||||
t.Fatalf("deleteFn() error = %v", err)
|
||||
}
|
||||
row, err = store.UserKeys().GetByID(context.Background(), keyID)
|
||||
if err != nil {
|
||||
t.Fatalf("UserKeys().GetByID() after delete error = %v", err)
|
||||
}
|
||||
if row.AdminStatus != "retired" {
|
||||
t.Fatalf("stored admin_status after delete = %q, want retired", row.AdminStatus)
|
||||
}
|
||||
|
||||
if len(allowedGroupsUpdates) != 2 {
|
||||
t.Fatalf("allowedGroupsUpdates len = %d, want 2", len(allowedGroupsUpdates))
|
||||
}
|
||||
if len(allowedGroupsUpdates[0]) != 0 {
|
||||
t.Fatalf("pause allowed_groups = %#v, want empty", allowedGroupsUpdates[0])
|
||||
}
|
||||
if len(allowedGroupsUpdates[1]) != 1 || allowedGroupsUpdates[1][0] != 999 {
|
||||
t.Fatalf("resume allowed_groups = %#v, want [999]", allowedGroupsUpdates[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveShadowHostGroupIDByName(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch r.URL.Path {
|
||||
case "/api/v1/admin/groups":
|
||||
w.Write([]byte(`{"data":{"items":[{"id":321,"name":"group-by-name"}]}}`))
|
||||
case "/api/v1/admin/channels", "/api/v1/admin/payment/plans", "/api/v1/admin/accounts":
|
||||
w.Write([]byte(`{"data":{"items":[]}}`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
client, err := newSub2APIClient(server.URL, CreateHostAuth{Type: "apikey", Token: "test-token"})
|
||||
if err != nil {
|
||||
t.Fatalf("newSub2APIClient() error = %v", err)
|
||||
}
|
||||
|
||||
groupID, err := resolveShadowHostGroupID(context.Background(), client, sqlite.LogicalGroupRoute{ShadowGroupID: "group-by-name"})
|
||||
if err != nil {
|
||||
t.Fatalf("resolveShadowHostGroupID() error = %v", err)
|
||||
}
|
||||
if groupID != "321" {
|
||||
t.Fatalf("groupID = %q, want 321", groupID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -227,7 +227,9 @@ func buildResolveRouteAction(sqliteDSN string, stickyRuntime stickyStoreRuntime,
|
||||
}
|
||||
}
|
||||
decisionStatus := "bind"
|
||||
if selection.fallbackUsed {
|
||||
if selection.failoverFrom != nil {
|
||||
decisionStatus = "failover"
|
||||
} else if selection.fallbackUsed {
|
||||
decisionStatus = "fallback"
|
||||
}
|
||||
metrics.RecordRouteDecision(req.LogicalGroupID, decisionStatus)
|
||||
|
||||
@@ -222,6 +222,9 @@ func TestNewActionSetResolveRouteFlow(t *testing.T) {
|
||||
if !strings.Contains(body, "route_failovers_total") {
|
||||
t.Fatal("metrics missing route_failovers_total after fallback flow")
|
||||
}
|
||||
if !strings.Contains(body, `route_decisions_total{logical_group="gpt-shared",status="failover"}`) {
|
||||
t.Fatalf("metrics missing failover decision status after resolve flow: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveRouteHelpers(t *testing.T) {
|
||||
|
||||
55
internal/app/user_key_operation_metrics_test.go
Normal file
55
internal/app/user_key_operation_metrics_test.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"sub2api-cn-relay-manager/internal/metrics"
|
||||
)
|
||||
|
||||
func TestUserKeyCreateResolveHostErrorRecordsMetric(t *testing.T) {
|
||||
t.Parallel()
|
||||
store := openAppTestStore(t)
|
||||
defer closeAppTestStore(t, store)
|
||||
|
||||
handler := NewAPIHandler("t", ActionSet{
|
||||
UserKeyHandler: buildUserKeyHandler(appTestDSN(t, store)),
|
||||
})
|
||||
|
||||
req := makeCreateRequest(t, http.MethodPost, "/api/keys", makeCreateBody("missing-group", "portal key", []string{"gpt-5.4"}))
|
||||
req.Header.Set("X-Portal-Subject", "portal-user")
|
||||
resp := httptestRecorder(handler, req)
|
||||
if resp.code != http.StatusInternalServerError {
|
||||
t.Fatalf("status code = %d, want 500 body=%s", resp.code, resp.Body().String())
|
||||
}
|
||||
|
||||
metricsReq := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||
metricsResp := httptest.NewRecorder()
|
||||
metrics.Handler().ServeHTTP(metricsResp, metricsReq)
|
||||
body := metricsResp.Body.String()
|
||||
if !strings.Contains(body, `user_key_operations_total{operation="create",result="resolve_host_error"}`) {
|
||||
t.Fatalf("metrics body missing create resolve_host_error metric: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUserKeyDeleteGetKeyErrorRecordsMetric(t *testing.T) {
|
||||
t.Parallel()
|
||||
store := openAppTestStore(t)
|
||||
defer closeAppTestStore(t, store)
|
||||
|
||||
handler := buildUserKeyHandler(appTestDSN(t, store))
|
||||
if err := handler.deleteFn(context.Background(), "key_missing", "portal-user"); err == nil {
|
||||
t.Fatal("expected deleteFn to fail for missing key")
|
||||
}
|
||||
|
||||
metricsReq := httptest.NewRequest(http.MethodGet, "/metrics", nil)
|
||||
metricsResp := httptest.NewRecorder()
|
||||
metrics.Handler().ServeHTTP(metricsResp, metricsReq)
|
||||
body := metricsResp.Body.String()
|
||||
if !strings.Contains(body, `user_key_operations_total{operation="delete",result="get_key_error"}`) {
|
||||
t.Fatalf("metrics body missing delete get_key_error metric: %s", body)
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package metrics
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
@@ -132,7 +133,8 @@ func RecordHTTPRequest(method, path string, status int, duration time.Duration)
|
||||
if path == "" {
|
||||
path = "unknown"
|
||||
}
|
||||
HTTPRequestsTotal.WithLabelValues(method, path, http.StatusText(status)).Inc()
|
||||
statusLabel := strconv.Itoa(status)
|
||||
HTTPRequestsTotal.WithLabelValues(method, path, statusLabel).Inc()
|
||||
HTTPRequestDuration.WithLabelValues(method, path).Observe(duration.Seconds())
|
||||
}
|
||||
|
||||
|
||||
@@ -28,6 +28,9 @@ func TestHTTPRequestsTotal(t *testing.T) {
|
||||
if !strings.Contains(body, "http_requests_total") {
|
||||
t.Error("Expected metrics endpoint to contain http_requests_total")
|
||||
}
|
||||
if !strings.Contains(body, `status="200"`) {
|
||||
t.Fatalf("expected numeric HTTP status label, got: %s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordRouteDecision(t *testing.T) {
|
||||
|
||||
@@ -47,6 +47,9 @@ if [[ $frontend_smoke_status -ne 0 ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
log "running vNext SLO release gate"
|
||||
bash "$ROOT_DIR/scripts/test/verify_vnext_slo_release_gate.sh"
|
||||
|
||||
log "running gofmt check"
|
||||
gofmt -l . | tee "$GOFMT_LOG"
|
||||
if [[ -s "$GOFMT_LOG" ]]; then
|
||||
|
||||
79
scripts/test/verify_vnext_slo_release_gate.sh
Executable file
79
scripts/test/verify_vnext_slo_release_gate.sh
Executable file
@@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
ARTIFACT_SUMMARY_PATH="${ARTIFACT_SUMMARY_PATH:-$ROOT_DIR/artifacts/v3-governance-live/20260608_102323/99-summary.json}"
|
||||
|
||||
fail() {
|
||||
echo "FAIL: $*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
log() {
|
||||
echo "==> $*"
|
||||
}
|
||||
|
||||
require_file() {
|
||||
local path="$1"
|
||||
[[ -f "$path" ]] || fail "missing required file: $path"
|
||||
}
|
||||
|
||||
require_contains() {
|
||||
local path="$1"
|
||||
local needle="$2"
|
||||
grep -F "$needle" "$path" >/dev/null || fail "missing expected text in $path: $needle"
|
||||
}
|
||||
|
||||
log "checking V3-2 source-of-truth files"
|
||||
require_file "$ROOT_DIR/docs/2026-06-04-SLO_AND_OBSERVABILITY.md"
|
||||
require_file "$ROOT_DIR/docs/2026-06-04-KEY_ACCOUNT_GOVERNANCE.md"
|
||||
require_file "$ROOT_DIR/docs/EXECUTION_BOARD.md"
|
||||
require_file "$ROOT_DIR/deploy/monitoring/prometheus-rules.yml"
|
||||
require_file "$ARTIFACT_SUMMARY_PATH"
|
||||
|
||||
log "checking metrics wiring truth"
|
||||
require_contains "$ROOT_DIR/internal/metrics/metrics.go" 'Name: "user_key_operations_total"'
|
||||
require_contains "$ROOT_DIR/internal/metrics/metrics.go" 'Name: "user_key_chat_requests_total"'
|
||||
require_contains "$ROOT_DIR/internal/metrics/metrics.go" 'statusLabel := strconv.Itoa(status)'
|
||||
require_contains "$ROOT_DIR/internal/app/route_resolve_api.go" 'decisionStatus = "failover"'
|
||||
require_contains "$ROOT_DIR/internal/app/key_self_service_svc.go" 'recordUserKeyFailure("create", "resolve_host_error"'
|
||||
require_contains "$ROOT_DIR/internal/app/key_self_service_svc.go" 'recordUserKeyFailure("delete", "get_key_error"'
|
||||
|
||||
log "checking alert rule alignment"
|
||||
require_contains "$ROOT_DIR/deploy/monitoring/prometheus-rules.yml" 'user_key_chat_requests_total{result="ok"}'
|
||||
require_contains "$ROOT_DIR/deploy/monitoring/prometheus-rules.yml" 'user_key_operations_total{operation="create",result!~"success|rate_limited"}'
|
||||
require_contains "$ROOT_DIR/deploy/monitoring/prometheus-rules.yml" 'route_decisions_total{status="failover"}'
|
||||
require_contains "$ROOT_DIR/deploy/monitoring/prometheus-rules.yml" 'http_requests_total{status=~"4..|5.."}'
|
||||
|
||||
log "checking live governance artifact"
|
||||
python3 - "$ARTIFACT_SUMMARY_PATH" <<'PY'
|
||||
import json, sys
|
||||
from pathlib import Path
|
||||
p = Path(sys.argv[1])
|
||||
obj = json.loads(p.read_text())
|
||||
checks = {
|
||||
'create_http': 201,
|
||||
'chat_before_http': 200,
|
||||
'pause_http': 200,
|
||||
'get_paused_http': 200,
|
||||
'chat_paused_http': 403,
|
||||
'resume_http': 200,
|
||||
'get_resumed_http': 200,
|
||||
'chat_resumed_http': 200,
|
||||
'delete_http': 200,
|
||||
}
|
||||
for key, want in checks.items():
|
||||
got = obj.get(key)
|
||||
if got != want:
|
||||
raise SystemExit(f'{key}={got!r}, want {want!r}')
|
||||
paused_body = obj.get('chat_paused_body', '')
|
||||
if 'key_paused' not in paused_body:
|
||||
raise SystemExit('chat_paused_body missing key_paused evidence')
|
||||
print(json.dumps({'artifact': str(p), 'checks': checks, 'paused_error': 'key_paused'}, ensure_ascii=False, indent=2))
|
||||
PY
|
||||
|
||||
log "checking docs mention V3-2 closure state"
|
||||
require_contains "$ROOT_DIR/docs/EXECUTION_BOARD.md" 'V3-2 SLO / 观测最小闭环(2026-06-08 首批)'
|
||||
require_contains "$ROOT_DIR/docs/EXECUTION_BOARD.md" '失败路径细化、告警规则、发布门禁均已落地'
|
||||
|
||||
echo 'PASS: V3-2 SLO release gate verified'
|
||||
Reference in New Issue
Block a user