Files
user-system/deployment/alertmanager/alertmanager.yml
long-agent 5b6bd93179 refactor: 整理项目根目录结构
整理内容:
- 删除 60+ 临时测试输出文件 (*.txt)
- 移动二进制文件到 bin/ 目录
- 移动 Shell 脚本到 scripts/ 目录
  - scripts/dev/: check_gitea.sh, check_sub2api.sh, run_tests.sh
  - scripts/deploy/: deploy_*.sh, simple_deploy.sh
  - scripts/ops/: fix_nginx.sh, fix_ssl.sh, install_docker.sh
  - scripts/test/: test_*.sh, test_*.bat
- 移动批处理文件到 scripts/
- 移动 Python 脚本到 tools/
- 清理临时日志文件

保留根目录必要文件:
- go.mod, go.sum, go.work
- Makefile, docker-compose.yml
- .env.example, .gitignore
- README.md, AGENTS.md, DEPLOY_GUIDE.md

验证: go build ./... && go test ./... 通过
2026-04-07 18:10:36 +08:00

135 lines
4.3 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
global:
resolve_timeout: 5m
# 飞书 Webhook 全局超时
http_config:
follow_redirects: true
# 注意:
# 该文件为模板文件,生产环境必须先注入并渲染 `${ALERTMANAGER_*}` 变量,
# 再将渲染结果交给 Alertmanager 使用。
# 飞书 Webhook 地址从环境变量 ${FEISHU_WEBHOOK_URL} 注入
# PagerDuty integration key 从 ${PAGERDUTY_INTEGRATION_KEY} 注入
# 告警路由
route:
group_by: ['alertname', 'service', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h # 降低重复告警频率原12h过长改4h
receiver: 'default'
routes:
# P0: Critical — 立即通知,同时走飞书 + 邮件On-Call 链路)
- match:
severity: critical
receiver: 'critical-oncall'
group_wait: 10s
repeat_interval: 30m # Critical 30min 没恢复重新告警
continue: false # Critical 不继续向下路由
# P1: Warning — 走飞书频道,不发邮件
- match:
severity: warning
receiver: 'warning-feishu'
group_wait: 1m
repeat_interval: 2h
continue: false
# P2: Info — 仅飞书记录
- match:
severity: info
receiver: 'info-feishu'
group_wait: 5m
repeat_interval: 24h
continue: false
# 告警接收者
receivers:
# 默认接收者(邮件兜底)
- name: 'default'
email_configs:
- to: '${ALERTMANAGER_DEFAULT_TO}'
from: '${ALERTMANAGER_FROM}'
smarthost: '${ALERTMANAGER_SMARTHOST}'
auth_username: '${ALERTMANAGER_AUTH_USERNAME}'
auth_password: '${ALERTMANAGER_AUTH_PASSWORD}'
send_resolved: true
headers:
Subject: '[{{ .Status | toUpper }}][UMS] {{ .GroupLabels.alertname }}'
html: |
{{ range .Alerts }}
<b>告警名称:</b> {{ .Labels.alertname }}<br>
<b>严重级别:</b> {{ .Labels.severity }}<br>
<b>摘要:</b> {{ .Annotations.summary }}<br>
<b>详情:</b> {{ .Annotations.description }}<br>
<b>时间:</b> {{ .StartsAt.Format "2006-01-02 15:04:05" }}<br>
<hr>
{{ end }}
# CRIT-04 修复: Critical On-Call 接收者(飞书 + 邮件双通道)
- name: 'critical-oncall'
# 飞书机器人 WebhookCRIT-04 核心修复:原来全是占位符,现在是真实可用的格式)
webhook_configs:
- url: '${FEISHU_WEBHOOK_URL_CRITICAL}'
send_resolved: true
http_config:
bearer_token: '${FEISHU_WEBHOOK_SECRET}'
max_alerts: 10
# 邮件兜底
email_configs:
- to: '${ALERTMANAGER_CRITICAL_TO}'
from: '${ALERTMANAGER_FROM}'
smarthost: '${ALERTMANAGER_SMARTHOST}'
auth_username: '${ALERTMANAGER_AUTH_USERNAME}'
auth_password: '${ALERTMANAGER_AUTH_PASSWORD}'
send_resolved: true
headers:
Subject: '[CRITICAL][UMS] {{ .GroupLabels.alertname }} — 立即处理'
html: |
<h2 style="color:red">⚠️ CRITICAL 告警</h2>
{{ range .Alerts }}
<b>告警:</b> {{ .Labels.alertname }}<br>
<b>摘要:</b> {{ .Annotations.summary }}<br>
<b>详情:</b> {{ .Annotations.description }}<br>
<b>Runbook:</b> {{ .Annotations.runbook_url }}<br>
<b>触发时间:</b> {{ .StartsAt.Format "2006-01-02 15:04:05" }}<br>
<hr>
{{ end }}
# Warning 接收者(飞书频道)
- name: 'warning-feishu'
webhook_configs:
- url: '${FEISHU_WEBHOOK_URL_WARNING}'
send_resolved: true
max_alerts: 20
# Info 接收者(飞书日志频道)
- name: 'info-feishu'
webhook_configs:
- url: '${FEISHU_WEBHOOK_URL_INFO}'
send_resolved: false # Info 级别恢复不再通知
max_alerts: 50
# 告警抑制规则
inhibit_rules:
# critical 告警激活时,抑制同一服务的 warning
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'service']
# critical 告警激活时,抑制同一服务的 info
- source_match:
severity: 'critical'
target_match:
severity: 'info'
equal: ['service']
# warning 告警激活时,抑制同一服务的 info
- source_match:
severity: 'warning'
target_match:
severity: 'info'
equal: ['service']