mirror of
https://github.com/woodchen-ink/proxy-go.git
synced 2025-07-18 16:41:54 +08:00
feat(config): update alert configurations and error rate thresholds
- Increased ErrorRate threshold from 0.5 to 0.8 for stricter alerting. - Added AlertInterval setting to config for customizable notification intervals. - Updated latency thresholds for small, medium, and large files to improve performance monitoring. - Enhanced metrics handling to incorporate new alert configurations in the system.
This commit is contained in:
parent
68c27b544b
commit
3962799980
@ -45,7 +45,8 @@
|
|||||||
"WindowInterval": "5m",
|
"WindowInterval": "5m",
|
||||||
"DedupeWindow": "15m",
|
"DedupeWindow": "15m",
|
||||||
"MinRequests": 10,
|
"MinRequests": 10,
|
||||||
"ErrorRate": 0.5
|
"ErrorRate": 0.8,
|
||||||
|
"AlertInterval": "24h"
|
||||||
},
|
},
|
||||||
"Latency": {
|
"Latency": {
|
||||||
"SmallFileSize": 1048576,
|
"SmallFileSize": 1048576,
|
||||||
|
@ -47,6 +47,7 @@ type MetricsConfig struct {
|
|||||||
DedupeWindow time.Duration `json:"DedupeWindow"` // 告警去重时间窗口
|
DedupeWindow time.Duration `json:"DedupeWindow"` // 告警去重时间窗口
|
||||||
MinRequests int64 `json:"MinRequests"` // 触发告警的最小请求数
|
MinRequests int64 `json:"MinRequests"` // 触发告警的最小请求数
|
||||||
ErrorRate float64 `json:"ErrorRate"` // 错误率告警阈值
|
ErrorRate float64 `json:"ErrorRate"` // 错误率告警阈值
|
||||||
|
AlertInterval time.Duration `json:"AlertInterval"` // 告警间隔时间
|
||||||
} `json:"Alert"`
|
} `json:"Alert"`
|
||||||
// 延迟告警配置
|
// 延迟告警配置
|
||||||
Latency struct {
|
Latency struct {
|
||||||
|
@ -24,17 +24,18 @@ var (
|
|||||||
AlertWindowSize = 12 // 监控窗口数量
|
AlertWindowSize = 12 // 监控窗口数量
|
||||||
AlertWindowInterval = 5 * time.Minute // 每个窗口时间长度
|
AlertWindowInterval = 5 * time.Minute // 每个窗口时间长度
|
||||||
AlertDedupeWindow = 15 * time.Minute // 告警去重时间窗口
|
AlertDedupeWindow = 15 * time.Minute // 告警去重时间窗口
|
||||||
|
AlertNotifyInterval = 24 * time.Hour // 告警通知间隔
|
||||||
MinRequestsForAlert int64 = 10 // 触发告警的最小请求数
|
MinRequestsForAlert int64 = 10 // 触发告警的最小请求数
|
||||||
ErrorRateThreshold = 0.5 // 错误率告警阈值 (50%)
|
ErrorRateThreshold = 0.8 // 错误率告警阈值
|
||||||
|
|
||||||
// 延迟告警阈值
|
// 延迟告警阈值
|
||||||
SmallFileSize int64 = 1 * MB // 小文件阈值
|
SmallFileSize int64 = 1 * MB // 小文件阈值
|
||||||
MediumFileSize int64 = 10 * MB // 中等文件阈值
|
MediumFileSize int64 = 10 * MB // 中等文件阈值
|
||||||
LargeFileSize int64 = 100 * MB // 大文件阈值
|
LargeFileSize int64 = 100 * MB // 大文件阈值
|
||||||
|
|
||||||
SmallFileLatency = 3 * time.Second // 小文件最大延迟
|
SmallFileLatency = 5 * time.Second // 小文件最大延迟
|
||||||
MediumFileLatency = 8 * time.Second // 中等文件最大延迟
|
MediumFileLatency = 10 * time.Second // 中等文件最大延迟
|
||||||
LargeFileLatency = 30 * time.Second // 大文件最大延迟
|
LargeFileLatency = 50 * time.Second // 大文件最大延迟
|
||||||
HugeFileLatency = 300 * time.Second // 超大文件最大延迟 (5分钟)
|
HugeFileLatency = 300 * time.Second // 超大文件最大延迟 (5分钟)
|
||||||
|
|
||||||
// 单位常量
|
// 单位常量
|
||||||
@ -60,6 +61,9 @@ func UpdateFromConfig(cfg *config.Config) {
|
|||||||
if cfg.Metrics.Alert.ErrorRate > 0 {
|
if cfg.Metrics.Alert.ErrorRate > 0 {
|
||||||
ErrorRateThreshold = cfg.Metrics.Alert.ErrorRate
|
ErrorRateThreshold = cfg.Metrics.Alert.ErrorRate
|
||||||
}
|
}
|
||||||
|
if cfg.Metrics.Alert.AlertInterval > 0 {
|
||||||
|
AlertNotifyInterval = cfg.Metrics.Alert.AlertInterval
|
||||||
|
}
|
||||||
|
|
||||||
// 延迟告警配置
|
// 延迟告警配置
|
||||||
if cfg.Metrics.Latency.SmallFileSize > 0 {
|
if cfg.Metrics.Latency.SmallFileSize > 0 {
|
||||||
|
@ -47,6 +47,11 @@ func (h *ProxyHandler) MetricsHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var avgLatency int64
|
||||||
|
if latency, ok := stats["avg_latency"]; ok && latency != nil {
|
||||||
|
avgLatency = latency.(int64)
|
||||||
|
}
|
||||||
|
|
||||||
metrics := Metrics{
|
metrics := Metrics{
|
||||||
Uptime: uptime.String(),
|
Uptime: uptime.String(),
|
||||||
ActiveRequests: stats["active_requests"].(int64),
|
ActiveRequests: stats["active_requests"].(int64),
|
||||||
@ -55,7 +60,7 @@ func (h *ProxyHandler) MetricsHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
ErrorRate: float64(stats["total_errors"].(int64)) / float64(stats["total_requests"].(int64)),
|
ErrorRate: float64(stats["total_errors"].(int64)) / float64(stats["total_requests"].(int64)),
|
||||||
NumGoroutine: stats["num_goroutine"].(int),
|
NumGoroutine: stats["num_goroutine"].(int),
|
||||||
MemoryUsage: stats["memory_usage"].(string),
|
MemoryUsage: stats["memory_usage"].(string),
|
||||||
AverageResponseTime: metrics.FormatDuration(time.Duration(stats["avg_latency"].(int64))),
|
AverageResponseTime: metrics.FormatDuration(time.Duration(avgLatency)),
|
||||||
TotalBytes: stats["total_bytes"].(int64),
|
TotalBytes: stats["total_bytes"].(int64),
|
||||||
BytesPerSecond: float64(stats["total_bytes"].(int64)) / metrics.Max(uptime.Seconds(), 1),
|
BytesPerSecond: float64(stats["total_bytes"].(int64)) / metrics.Max(uptime.Seconds(), 1),
|
||||||
RequestsPerSecond: float64(stats["total_requests"].(int64)) / metrics.Max(uptime.Seconds(), 1),
|
RequestsPerSecond: float64(stats["total_requests"].(int64)) / metrics.Max(uptime.Seconds(), 1),
|
||||||
|
@ -48,9 +48,10 @@ type Monitor struct {
|
|||||||
alerts chan Alert
|
alerts chan Alert
|
||||||
handlers []AlertHandler
|
handlers []AlertHandler
|
||||||
dedup sync.Map
|
dedup sync.Map
|
||||||
errorWindow [12]ErrorStats // 5分钟一个窗口,保存最近1小时
|
lastNotify sync.Map
|
||||||
|
errorWindow [12]ErrorStats
|
||||||
currentWindow atomic.Int32
|
currentWindow atomic.Int32
|
||||||
transferWindow [12]TransferStats // 5分钟一个窗口,保存最近1小时
|
transferWindow [12]TransferStats
|
||||||
currentTWindow atomic.Int32
|
currentTWindow atomic.Int32
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,6 +91,15 @@ func (m *Monitor) processAlerts() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 检查是否在通知间隔内
|
||||||
|
notifyKey := fmt.Sprintf("notify:%s", alert.Level)
|
||||||
|
if lastTime, ok := m.lastNotify.Load(notifyKey); ok {
|
||||||
|
if time.Since(lastTime.(time.Time)) < constants.AlertNotifyInterval {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m.lastNotify.Store(notifyKey, time.Now())
|
||||||
|
|
||||||
for _, handler := range m.handlers {
|
for _, handler := range m.handlers {
|
||||||
handler.HandleAlert(alert)
|
handler.HandleAlert(alert)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user