整理内容: - 删除 60+ 临时测试输出文件 (*.txt) - 移动二进制文件到 bin/ 目录 - 移动 Shell 脚本到 scripts/ 目录 - scripts/dev/: check_gitea.sh, check_sub2api.sh, run_tests.sh - scripts/deploy/: deploy_*.sh, simple_deploy.sh - scripts/ops/: fix_nginx.sh, fix_ssl.sh, install_docker.sh - scripts/test/: test_*.sh, test_*.bat - 移动批处理文件到 scripts/ - 移动 Python 脚本到 tools/ - 清理临时日志文件 保留根目录必要文件: - go.mod, go.sum, go.work - Makefile, docker-compose.yml - .env.example, .gitignore - README.md, AGENTS.md, DEPLOY_GUIDE.md 验证: go build ./... && go test ./... 通过
135 lines
4.3 KiB
YAML
135 lines
4.3 KiB
YAML
global:
|
||
resolve_timeout: 5m
|
||
# 飞书 Webhook 全局超时
|
||
http_config:
|
||
follow_redirects: true
|
||
|
||
# 注意:
|
||
# 该文件为模板文件,生产环境必须先注入并渲染 `${ALERTMANAGER_*}` 变量,
|
||
# 再将渲染结果交给 Alertmanager 使用。
|
||
# 飞书 Webhook 地址从环境变量 ${FEISHU_WEBHOOK_URL} 注入
|
||
# PagerDuty integration key 从 ${PAGERDUTY_INTEGRATION_KEY} 注入
|
||
|
||
# 告警路由
|
||
route:
|
||
group_by: ['alertname', 'service', 'severity']
|
||
group_wait: 30s
|
||
group_interval: 5m
|
||
repeat_interval: 4h # 降低重复告警频率(原12h过长,改4h)
|
||
receiver: 'default'
|
||
|
||
routes:
|
||
# P0: Critical — 立即通知,同时走飞书 + 邮件(On-Call 链路)
|
||
- match:
|
||
severity: critical
|
||
receiver: 'critical-oncall'
|
||
group_wait: 10s
|
||
repeat_interval: 30m # Critical 30min 没恢复重新告警
|
||
continue: false # Critical 不继续向下路由
|
||
|
||
# P1: Warning — 走飞书频道,不发邮件
|
||
- match:
|
||
severity: warning
|
||
receiver: 'warning-feishu'
|
||
group_wait: 1m
|
||
repeat_interval: 2h
|
||
continue: false
|
||
|
||
# P2: Info — 仅飞书记录
|
||
- match:
|
||
severity: info
|
||
receiver: 'info-feishu'
|
||
group_wait: 5m
|
||
repeat_interval: 24h
|
||
continue: false
|
||
|
||
# 告警接收者
|
||
receivers:
|
||
# 默认接收者(邮件兜底)
|
||
- name: 'default'
|
||
email_configs:
|
||
- to: '${ALERTMANAGER_DEFAULT_TO}'
|
||
from: '${ALERTMANAGER_FROM}'
|
||
smarthost: '${ALERTMANAGER_SMARTHOST}'
|
||
auth_username: '${ALERTMANAGER_AUTH_USERNAME}'
|
||
auth_password: '${ALERTMANAGER_AUTH_PASSWORD}'
|
||
send_resolved: true
|
||
headers:
|
||
Subject: '[{{ .Status | toUpper }}][UMS] {{ .GroupLabels.alertname }}'
|
||
html: |
|
||
{{ range .Alerts }}
|
||
<b>告警名称:</b> {{ .Labels.alertname }}<br>
|
||
<b>严重级别:</b> {{ .Labels.severity }}<br>
|
||
<b>摘要:</b> {{ .Annotations.summary }}<br>
|
||
<b>详情:</b> {{ .Annotations.description }}<br>
|
||
<b>时间:</b> {{ .StartsAt.Format "2006-01-02 15:04:05" }}<br>
|
||
<hr>
|
||
{{ end }}
|
||
|
||
# CRIT-04 修复: Critical On-Call 接收者(飞书 + 邮件双通道)
|
||
- name: 'critical-oncall'
|
||
# 飞书机器人 Webhook(CRIT-04 核心修复:原来全是占位符,现在是真实可用的格式)
|
||
webhook_configs:
|
||
- url: '${FEISHU_WEBHOOK_URL_CRITICAL}'
|
||
send_resolved: true
|
||
http_config:
|
||
bearer_token: '${FEISHU_WEBHOOK_SECRET}'
|
||
max_alerts: 10
|
||
# 邮件兜底
|
||
email_configs:
|
||
- to: '${ALERTMANAGER_CRITICAL_TO}'
|
||
from: '${ALERTMANAGER_FROM}'
|
||
smarthost: '${ALERTMANAGER_SMARTHOST}'
|
||
auth_username: '${ALERTMANAGER_AUTH_USERNAME}'
|
||
auth_password: '${ALERTMANAGER_AUTH_PASSWORD}'
|
||
send_resolved: true
|
||
headers:
|
||
Subject: '[CRITICAL][UMS] {{ .GroupLabels.alertname }} — 立即处理'
|
||
html: |
|
||
<h2 style="color:red">⚠️ CRITICAL 告警</h2>
|
||
{{ range .Alerts }}
|
||
<b>告警:</b> {{ .Labels.alertname }}<br>
|
||
<b>摘要:</b> {{ .Annotations.summary }}<br>
|
||
<b>详情:</b> {{ .Annotations.description }}<br>
|
||
<b>Runbook:</b> {{ .Annotations.runbook_url }}<br>
|
||
<b>触发时间:</b> {{ .StartsAt.Format "2006-01-02 15:04:05" }}<br>
|
||
<hr>
|
||
{{ end }}
|
||
|
||
# Warning 接收者(飞书频道)
|
||
- name: 'warning-feishu'
|
||
webhook_configs:
|
||
- url: '${FEISHU_WEBHOOK_URL_WARNING}'
|
||
send_resolved: true
|
||
max_alerts: 20
|
||
|
||
# Info 接收者(飞书日志频道)
|
||
- name: 'info-feishu'
|
||
webhook_configs:
|
||
- url: '${FEISHU_WEBHOOK_URL_INFO}'
|
||
send_resolved: false # Info 级别恢复不再通知
|
||
max_alerts: 50
|
||
|
||
# 告警抑制规则
|
||
inhibit_rules:
|
||
# critical 告警激活时,抑制同一服务的 warning
|
||
- source_match:
|
||
severity: 'critical'
|
||
target_match:
|
||
severity: 'warning'
|
||
equal: ['alertname', 'service']
|
||
|
||
# critical 告警激活时,抑制同一服务的 info
|
||
- source_match:
|
||
severity: 'critical'
|
||
target_match:
|
||
severity: 'info'
|
||
equal: ['service']
|
||
|
||
# warning 告警激活时,抑制同一服务的 info
|
||
- source_match:
|
||
severity: 'warning'
|
||
target_match:
|
||
severity: 'info'
|
||
equal: ['service']
|