Files
user-system/internal/monitoring/slo.go
long-agent 582ad7a069 test: add comprehensive test coverage and improve code quality
- Add new test files for auth, service, and handler modules
- Improve test organization and coverage
- Refactor code for better maintainability
- Add captcha, settings, stats, and theme handler tests
- Add auth module tests (CAS, OAuth, password, SSO, state)
- Add service layer tests for auth, export, permissions, roles
- All Go tests pass (exit code 0)
- All frontend tests pass (325 tests in 59 files)
2026-04-17 20:43:50 +08:00

178 lines
5.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package monitoring
import (
"sync"
"github.com/prometheus/client_golang/prometheus"
)
// SLOMetrics 服务级别目标SLO相关指标
// 这些指标是 SLO 测量的基础,用于计算错误预算燃烧率
type SLOMetrics struct {
// 缓存命中统计alerts.yml 引用但原来未定义)
CacheHitsTotal *prometheus.CounterVec
CacheOperationsTotal *prometheus.CounterVec
// 数据库连接池状态alerts.yml 引用但原来未定义)
DBConnectionsActive prometheus.Gauge
DBConnectionsMax prometheus.Gauge
// Token 操作
TokenRefreshTotal *prometheus.CounterVec
// 账号安全事件
AccountLockTotal prometheus.Counter
AnomalyDetectedTotal *prometheus.CounterVec
// 错误预算燃烧率(可选,用于自定义仪表盘)
ErrorBudgetBurnRate *prometheus.GaugeVec
registry *prometheus.Registry
once sync.Once
}
var (
globalSLOMetrics *SLOMetrics
globalSLOMetricsOnce sync.Once
)
// NewSLOMetrics 创建 SLO 指标实例(使用独立 registry 避免测试冲突)
func NewSLOMetrics() *SLOMetrics {
reg := prometheus.NewRegistry()
m := &SLOMetrics{registry: reg}
m.CacheHitsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "cache_hits_total",
Help: "Total number of cache hits",
},
[]string{"level", "operation"}, // level: l1/l2, operation: get/set
)
m.CacheOperationsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "cache_operations_total",
Help: "Total number of cache operations",
},
[]string{"level", "operation"},
)
m.DBConnectionsActive = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "db_connections_active",
Help: "Number of active database connections",
},
)
m.DBConnectionsMax = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "db_connections_max",
Help: "Maximum number of database connections configured",
},
)
m.TokenRefreshTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "token_refresh_total",
Help: "Total number of token refresh attempts",
},
[]string{"status"}, // success/failure/rate_limited
)
m.AccountLockTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "account_lock_total",
Help: "Total number of account lockout events due to failed login attempts",
},
)
m.AnomalyDetectedTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "anomaly_detected_total",
Help: "Total number of anomaly login detections",
},
[]string{"type"}, // geo_anomaly/device_anomaly/brute_force/suspicious_ip
)
m.ErrorBudgetBurnRate = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "error_budget_burn_rate",
Help: "Current error budget burn rate multiplier (1.0 = nominal consumption)",
},
[]string{"slo"}, // api-availability/api-latency/login-success-rate
)
reg.MustRegister(
m.CacheHitsTotal,
m.CacheOperationsTotal,
m.DBConnectionsActive,
m.DBConnectionsMax,
m.TokenRefreshTotal,
m.AccountLockTotal,
m.AnomalyDetectedTotal,
m.ErrorBudgetBurnRate,
)
return m
}
// GetGlobalSLOMetrics 获取全局 SLO 指标单例(生产使用)
func GetGlobalSLOMetrics() *SLOMetrics {
globalSLOMetricsOnce.Do(func() {
m := NewSLOMetrics()
// 注册到默认 registry 以便 /metrics 端点暴露
prometheus.DefaultRegisterer.Register(m.CacheHitsTotal) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.CacheOperationsTotal) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.DBConnectionsActive) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.DBConnectionsMax) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.TokenRefreshTotal) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.AccountLockTotal) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.AnomalyDetectedTotal) //nolint:errcheck
prometheus.DefaultRegisterer.Register(m.ErrorBudgetBurnRate) //nolint:errcheck
globalSLOMetrics = m
})
return globalSLOMetrics
}
// GetRegistry 获取私有 registry测试使用
func (m *SLOMetrics) GetRegistry() *prometheus.Registry {
return m.registry
}
// RecordCacheHit 记录缓存命中
func (m *SLOMetrics) RecordCacheHit(level, operation string) {
m.CacheHitsTotal.WithLabelValues(level, operation).Inc()
m.CacheOperationsTotal.WithLabelValues(level, operation).Inc()
}
// RecordCacheMiss 记录缓存未命中
func (m *SLOMetrics) RecordCacheMiss(level, operation string) {
m.CacheOperationsTotal.WithLabelValues(level, operation).Inc()
}
// RecordTokenRefresh 记录 Token 刷新操作
func (m *SLOMetrics) RecordTokenRefresh(status string) {
m.TokenRefreshTotal.WithLabelValues(status).Inc()
}
// RecordAccountLock 记录账号锁定事件
func (m *SLOMetrics) RecordAccountLock() {
m.AccountLockTotal.Inc()
}
// RecordAnomaly 记录异常检测事件
func (m *SLOMetrics) RecordAnomaly(anomalyType string) {
m.AnomalyDetectedTotal.WithLabelValues(anomalyType).Inc()
}
// SetDBConnections 更新数据库连接池状态
func (m *SLOMetrics) SetDBConnections(active, max float64) {
m.DBConnectionsActive.Set(active)
m.DBConnectionsMax.Set(max)
}
// SetErrorBudgetBurnRate 设置错误预算燃烧率
func (m *SLOMetrics) SetErrorBudgetBurnRate(slo string, burnRate float64) {
m.ErrorBudgetBurnRate.WithLabelValues(slo).Set(burnRate)
}