- Remove old review reports (keep latest only) - Move docs/ to deploy/docs-backup/ - Move performance-testing/ to deploy/ - Clean up test output files - Organize root directory
98 lines
2.8 KiB
YAML
98 lines
2.8 KiB
YAML
# Sub2API 单机版 Prometheus 配置
|
||
# 优化目标: 内存 < 128MB, 存储 < 2GB, 保留 15天
|
||
|
||
global:
|
||
scrape_interval: 30s # 单机放宽抓取间隔
|
||
evaluation_interval: 30s
|
||
external_labels:
|
||
cluster: 'sub2api-single'
|
||
replica: 'single'
|
||
|
||
# 告警规则
|
||
rule_files:
|
||
- 'rules/sub2api-alerts-light.yml'
|
||
|
||
# 抓取配置
|
||
scrape_configs:
|
||
# Sub2API 应用指标
|
||
- job_name: 'sub2api-app'
|
||
static_configs:
|
||
- targets: ['host.docker.internal:8080']
|
||
labels:
|
||
service: 'sub2api'
|
||
tier: 'backend'
|
||
metrics_path: '/metrics'
|
||
scrape_interval: 30s
|
||
scrape_timeout: 10s
|
||
# 只抓取关键指标,减少数据量
|
||
params:
|
||
collect[]:
|
||
- 'http'
|
||
- 'runtime'
|
||
- 'database'
|
||
|
||
# Node Exporter - 系统指标
|
||
- job_name: 'node-exporter'
|
||
static_configs:
|
||
- targets: ['node-exporter:9100']
|
||
labels:
|
||
instance: 'sub2api-server'
|
||
scrape_interval: 30s
|
||
scrape_timeout: 10s
|
||
|
||
# Prometheus 自身指标
|
||
- job_name: 'prometheus'
|
||
static_configs:
|
||
- targets: ['localhost:9090']
|
||
labels:
|
||
instance: 'prometheus'
|
||
scrape_interval: 30s
|
||
|
||
# Blackbox Exporter - TLS 证书检查 & 端点可用性探测
|
||
# 需要在 docker-compose.single.yml 中添加 blackbox-exporter 容器
|
||
# 参考: deploy/monitoring/docker-compose.single.yml 中的 blackbox-exporter service
|
||
- job_name: 'blackbox-https'
|
||
metrics_path: /probe
|
||
params:
|
||
module: [http_2xx]
|
||
static_configs:
|
||
- targets:
|
||
# TODO: 替换为实际域名
|
||
- https://sub2api.example.com/health
|
||
relabel_configs:
|
||
- source_labels: [__address__]
|
||
target_label: __param_target
|
||
- source_labels: [__param_target]
|
||
target_label: instance
|
||
- target_label: __address__
|
||
replacement: blackbox-exporter:9115
|
||
|
||
# TLS 证书过期专项检查 (TCP 模式,只检查证书)
|
||
- job_name: 'blackbox-tls-cert'
|
||
metrics_path: /probe
|
||
params:
|
||
module: [tcp_tls]
|
||
static_configs:
|
||
- targets:
|
||
# TODO: 替换为实际域名:端口
|
||
- sub2api.example.com:443
|
||
relabel_configs:
|
||
- source_labels: [__address__]
|
||
target_label: __param_target
|
||
- source_labels: [__param_target]
|
||
target_label: instance
|
||
- target_label: __address__
|
||
replacement: blackbox-exporter:9115
|
||
scrape_interval: 300s # 5 分钟检查一次,节省资源
|
||
|
||
# 启用 Alertmanager 集成 (取消注释以启用)
|
||
# Phase 1: 暂用现有 ops 告警系统
|
||
# Phase 2 开始启用 Alertmanager,同时通过 ops-bridge webhook 回写 ops_alert_events
|
||
alerting:
|
||
alertmanagers:
|
||
- static_configs:
|
||
- targets: [] # Phase 2: 替换为 ['alertmanager:9093']
|
||
# 生产环境建议配置 TLS:
|
||
# tls_config:
|
||
# ca_file: /etc/prometheus/certs/ca.crt
|