feat(metrics): 增强指标收集和恢复日志记录

- 在指标初始化和加载过程中添加详细日志
- 改进指标恢复逻辑,支持从不同数据结构恢复统计信息
- 添加更多日志输出,帮助诊断指标加载问题
- 优化数据验证逻辑,允许小范围的统计数据误差
- 增加对未找到数据的日志提示
This commit is contained in:
wood chen 2025-03-09 10:41:24 +08:00
parent 095b087fd8
commit a0cea8f5b8

View File

@ -72,6 +72,8 @@ func InitCollector(cfg *config.Config) error {
instance.latencyBuckets.Store("200-1000ms", new(int64)) instance.latencyBuckets.Store("200-1000ms", new(int64))
instance.latencyBuckets.Store(">1s", new(int64)) instance.latencyBuckets.Store(">1s", new(int64))
log.Printf("[Metrics] Initializing metrics collector...")
// 加载历史统计数据 // 加载历史统计数据
if err := instance.LoadRecentStats(); err != nil { if err := instance.LoadRecentStats(); err != nil {
log.Printf("[Metrics] Warning: Failed to load stats: %v", err) log.Printf("[Metrics] Warning: Failed to load stats: %v", err)
@ -82,6 +84,8 @@ func InitCollector(cfg *config.Config) error {
// 启动定时保存任务 // 启动定时保存任务
instance.startMetricsSaver() instance.startMetricsSaver()
log.Printf("[Metrics] Metrics collector initialized")
}) })
return nil return nil
} }
@ -377,6 +381,7 @@ func (c *Collector) GetStats() map[string]interface{} {
"bytes_per_second": float64(atomic.LoadInt64(&c.totalBytes)) / totalRuntime.Seconds(), "bytes_per_second": float64(atomic.LoadInt64(&c.totalBytes)) / totalRuntime.Seconds(),
"status_code_stats": statusCodeStats, "status_code_stats": statusCodeStats,
"top_paths": orderedPathStats, "top_paths": orderedPathStats,
"path_stats": pathStatsMap,
"recent_requests": recentRequests, "recent_requests": recentRequests,
"latency_stats": map[string]interface{}{ "latency_stats": map[string]interface{}{
"min": fmt.Sprintf("%.2fms", float64(minLatency)/float64(time.Millisecond)), "min": fmt.Sprintf("%.2fms", float64(minLatency)/float64(time.Millisecond)),
@ -514,19 +519,28 @@ func (c *Collector) LoadRecentStats() error {
return fmt.Errorf("failed to read metrics file: %v", err) return fmt.Errorf("failed to read metrics file: %v", err)
} }
log.Printf("[Metrics] Found latest_stats.json, size: %d bytes", len(data))
// 解析JSON数据 // 解析JSON数据
var stats map[string]interface{} var stats map[string]interface{}
if err := json.Unmarshal(data, &stats); err != nil { if err := json.Unmarshal(data, &stats); err != nil {
return fmt.Errorf("failed to unmarshal metrics data: %v", err) return fmt.Errorf("failed to unmarshal metrics data: %v", err)
} }
log.Printf("[Metrics] Successfully parsed JSON data")
// 恢复统计数据 // 恢复统计数据
if totalBytes, ok := stats["total_bytes"].(float64); ok { if totalBytes, ok := stats["total_bytes"].(float64); ok {
atomic.StoreInt64(&c.totalBytes, int64(totalBytes)) atomic.StoreInt64(&c.totalBytes, int64(totalBytes))
log.Printf("[Metrics] Restored total_bytes: %v", totalBytes)
} else {
log.Printf("[Metrics] No total_bytes found in stats")
} }
// 恢复路径统计 // 恢复路径统计
pathStatsRestored := 0
if pathStats, ok := stats["path_stats"].(map[string]interface{}); ok { if pathStats, ok := stats["path_stats"].(map[string]interface{}); ok {
log.Printf("[Metrics] Found path_stats with %d entries", len(pathStats))
for path, stat := range pathStats { for path, stat := range pathStats {
if statMap, ok := stat.(map[string]interface{}); ok { if statMap, ok := stat.(map[string]interface{}); ok {
pathStat := &models.PathStats{} pathStat := &models.PathStats{}
@ -548,21 +562,59 @@ func (c *Collector) LoadRecentStats() error {
} }
c.pathStats.Store(path, pathStat) c.pathStats.Store(path, pathStat)
pathStatsRestored++
} }
} }
log.Printf("[Metrics] Restored %d path stats", pathStatsRestored)
} else {
log.Printf("[Metrics] No path_stats found in stats")
// 尝试从top_paths恢复数据
if topPaths, ok := stats["top_paths"].([]interface{}); ok {
log.Printf("[Metrics] Found top_paths with %d entries", len(topPaths))
for _, pathData := range topPaths {
if pathMap, ok := pathData.(map[string]interface{}); ok {
path, ok1 := pathMap["path"].(string)
requests, ok2 := pathMap["requests"].(float64)
errors, ok3 := pathMap["errors"].(float64)
bytes, ok4 := pathMap["bytes"].(float64)
latencySum, ok5 := pathMap["latency_sum"].(float64)
if ok1 && ok2 && ok3 && ok4 && ok5 {
pathStat := &models.PathStats{}
pathStat.Requests.Store(int64(requests))
pathStat.Errors.Store(int64(errors))
pathStat.Bytes.Store(int64(bytes))
pathStat.LatencySum.Store(int64(latencySum))
c.pathStats.Store(path, pathStat)
pathStatsRestored++
}
}
}
log.Printf("[Metrics] Restored %d path stats from top_paths", pathStatsRestored)
} else {
log.Printf("[Metrics] No top_paths found in stats")
}
} }
// 恢复状态码统计 // 恢复状态码统计
if statusStats, ok := stats["status_codes"].(map[string]interface{}); ok { statusCodesRestored := 0
if statusStats, ok := stats["status_code_stats"].(map[string]interface{}); ok {
log.Printf("[Metrics] Found status_code_stats with %d entries", len(statusStats))
for code, count := range statusStats { for code, count := range statusStats {
if countVal, ok := count.(float64); ok { if countVal, ok := count.(float64); ok {
codeInt := 0 codeInt := 0
if _, err := fmt.Sscanf(code, "%d", &codeInt); err == nil { if _, err := fmt.Sscanf(code, "%d", &codeInt); err == nil {
var counter int64 = int64(countVal) var counter int64 = int64(countVal)
c.statusCodeStats.Store(codeInt, &counter) c.statusCodeStats.Store(codeInt, &counter)
statusCodesRestored++
} }
} }
} }
log.Printf("[Metrics] Restored %d status codes", statusCodesRestored)
} else {
log.Printf("[Metrics] No status_code_stats found in stats")
} }
if err := c.validateLoadedData(); err != nil { if err := c.validateLoadedData(); err != nil {
@ -597,23 +649,33 @@ func (c *Collector) validateLoadedData() error {
c.pathStats.Range(func(_, value interface{}) bool { c.pathStats.Range(func(_, value interface{}) bool {
stats, ok := value.(*models.PathStats) stats, ok := value.(*models.PathStats)
if !ok { if !ok {
log.Printf("[Metrics] Warning: Invalid path stats type: %T", value)
return true return true
} }
requestCount := stats.Requests.Load() requestCount := stats.Requests.Load()
errorCount := stats.Errors.Load() errorCount := stats.Errors.Load()
if requestCount < 0 || errorCount < 0 { if requestCount < 0 || errorCount < 0 {
log.Printf("[Metrics] Warning: Invalid path stats values: requests=%d, errors=%d", requestCount, errorCount)
return false return false
} }
if errorCount > requestCount { if errorCount > requestCount {
log.Printf("[Metrics] Warning: Error count (%d) exceeds request count (%d)", errorCount, requestCount)
return false return false
} }
totalPathRequests += requestCount totalPathRequests += requestCount
return true return true
}) })
if totalPathRequests != statusCodeTotal { // 如果没有请求,则跳过验证
return fmt.Errorf("path stats total (%d) does not match status code total (%d)", if statusCodeTotal == 0 && totalPathRequests == 0 {
totalPathRequests, statusCodeTotal) log.Printf("[Metrics] No requests to validate")
return nil
}
// 允许一定的误差
if math.Abs(float64(totalPathRequests-statusCodeTotal)) > float64(statusCodeTotal)*0.1 {
log.Printf("[Metrics] Warning: Path stats total (%d) does not match status code total (%d)", totalPathRequests, statusCodeTotal)
// 不返回错误,只记录警告
} }
return nil return nil