From a0cea8f5b802ca7b3cec0b15df92b0c7a1a66b8a Mon Sep 17 00:00:00 2001
From: wood chen <wood@czl.net>
Date: Sun, 9 Mar 2025 10:41:24 +0800
Subject: [PATCH] =?UTF-8?q?feat(metrics):=20=E5=A2=9E=E5=BC=BA=E6=8C=87?=
 =?UTF-8?q?=E6=A0=87=E6=94=B6=E9=9B=86=E5=92=8C=E6=81=A2=E5=A4=8D=E6=97=A5?=
 =?UTF-8?q?=E5=BF=97=E8=AE=B0=E5=BD=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 在指标初始化和加载过程中添加详细日志
- 改进指标恢复逻辑，支持从不同数据结构恢复统计信息
- 添加更多日志输出，帮助诊断指标加载问题
- 优化数据验证逻辑，允许小范围的统计数据误差
- 增加对未找到数据的日志提示
---
 internal/metrics/collector.go | 70 +++++++++++++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/internal/metrics/collector.go b/internal/metrics/collector.go
index 0f16f14..55a950b 100644
--- a/internal/metrics/collector.go
+++ b/internal/metrics/collector.go
@@ -72,6 +72,8 @@ func InitCollector(cfg *config.Config) error {
 		instance.latencyBuckets.Store("200-1000ms", new(int64))
 		instance.latencyBuckets.Store(">1s", new(int64))
 
+		log.Printf("[Metrics] Initializing metrics collector...")
+
 		// 加载历史统计数据
 		if err := instance.LoadRecentStats(); err != nil {
 			log.Printf("[Metrics] Warning: Failed to load stats: %v", err)
@@ -82,6 +84,8 @@ func InitCollector(cfg *config.Config) error {
 
 		// 启动定时保存任务
 		instance.startMetricsSaver()
+
+		log.Printf("[Metrics] Metrics collector initialized")
 	})
 	return nil
 }
@@ -377,6 +381,7 @@ func (c *Collector) GetStats() map[string]interface{} {
 		"bytes_per_second":    float64(atomic.LoadInt64(&c.totalBytes)) / totalRuntime.Seconds(),
 		"status_code_stats":   statusCodeStats,
 		"top_paths":           orderedPathStats,
+		"path_stats":          pathStatsMap,
 		"recent_requests":     recentRequests,
 		"latency_stats": map[string]interface{}{
 			"min":          fmt.Sprintf("%.2fms", float64(minLatency)/float64(time.Millisecond)),
@@ -514,19 +519,28 @@ func (c *Collector) LoadRecentStats() error {
 		return fmt.Errorf("failed to read metrics file: %v", err)
 	}
 
+	log.Printf("[Metrics] Found latest_stats.json, size: %d bytes", len(data))
+
 	// 解析JSON数据
 	var stats map[string]interface{}
 	if err := json.Unmarshal(data, &stats); err != nil {
 		return fmt.Errorf("failed to unmarshal metrics data: %v", err)
 	}
 
+	log.Printf("[Metrics] Successfully parsed JSON data")
+
 	// 恢复统计数据
 	if totalBytes, ok := stats["total_bytes"].(float64); ok {
 		atomic.StoreInt64(&c.totalBytes, int64(totalBytes))
+		log.Printf("[Metrics] Restored total_bytes: %v", totalBytes)
+	} else {
+		log.Printf("[Metrics] No total_bytes found in stats")
 	}
 
 	// 恢复路径统计
+	pathStatsRestored := 0
 	if pathStats, ok := stats["path_stats"].(map[string]interface{}); ok {
+		log.Printf("[Metrics] Found path_stats with %d entries", len(pathStats))
 		for path, stat := range pathStats {
 			if statMap, ok := stat.(map[string]interface{}); ok {
 				pathStat := &models.PathStats{}
@@ -548,21 +562,59 @@ func (c *Collector) LoadRecentStats() error {
 				}
 
 				c.pathStats.Store(path, pathStat)
+				pathStatsRestored++
 			}
 		}
+		log.Printf("[Metrics] Restored %d path stats", pathStatsRestored)
+	} else {
+		log.Printf("[Metrics] No path_stats found in stats")
+
+		// 尝试从top_paths恢复数据
+		if topPaths, ok := stats["top_paths"].([]interface{}); ok {
+			log.Printf("[Metrics] Found top_paths with %d entries", len(topPaths))
+			for _, pathData := range topPaths {
+				if pathMap, ok := pathData.(map[string]interface{}); ok {
+					path, ok1 := pathMap["path"].(string)
+					requests, ok2 := pathMap["requests"].(float64)
+					errors, ok3 := pathMap["errors"].(float64)
+					bytes, ok4 := pathMap["bytes"].(float64)
+					latencySum, ok5 := pathMap["latency_sum"].(float64)
+
+					if ok1 && ok2 && ok3 && ok4 && ok5 {
+						pathStat := &models.PathStats{}
+						pathStat.Requests.Store(int64(requests))
+						pathStat.Errors.Store(int64(errors))
+						pathStat.Bytes.Store(int64(bytes))
+						pathStat.LatencySum.Store(int64(latencySum))
+
+						c.pathStats.Store(path, pathStat)
+						pathStatsRestored++
+					}
+				}
+			}
+			log.Printf("[Metrics] Restored %d path stats from top_paths", pathStatsRestored)
+		} else {
+			log.Printf("[Metrics] No top_paths found in stats")
+		}
 	}
 
 	// 恢复状态码统计
-	if statusStats, ok := stats["status_codes"].(map[string]interface{}); ok {
+	statusCodesRestored := 0
+	if statusStats, ok := stats["status_code_stats"].(map[string]interface{}); ok {
+		log.Printf("[Metrics] Found status_code_stats with %d entries", len(statusStats))
 		for code, count := range statusStats {
 			if countVal, ok := count.(float64); ok {
 				codeInt := 0
 				if _, err := fmt.Sscanf(code, "%d", &codeInt); err == nil {
 					var counter int64 = int64(countVal)
 					c.statusCodeStats.Store(codeInt, &counter)
+					statusCodesRestored++
 				}
 			}
 		}
+		log.Printf("[Metrics] Restored %d status codes", statusCodesRestored)
+	} else {
+		log.Printf("[Metrics] No status_code_stats found in stats")
 	}
 
 	if err := c.validateLoadedData(); err != nil {
@@ -597,23 +649,33 @@ func (c *Collector) validateLoadedData() error {
 	c.pathStats.Range(func(_, value interface{}) bool {
 		stats, ok := value.(*models.PathStats)
 		if !ok {
+			log.Printf("[Metrics] Warning: Invalid path stats type: %T", value)
 			return true
 		}
 		requestCount := stats.Requests.Load()
 		errorCount := stats.Errors.Load()
 		if requestCount < 0 || errorCount < 0 {
+			log.Printf("[Metrics] Warning: Invalid path stats values: requests=%d, errors=%d", requestCount, errorCount)
 			return false
 		}
 		if errorCount > requestCount {
+			log.Printf("[Metrics] Warning: Error count (%d) exceeds request count (%d)", errorCount, requestCount)
 			return false
 		}
 		totalPathRequests += requestCount
 		return true
 	})
 
-	if totalPathRequests != statusCodeTotal {
-		return fmt.Errorf("path stats total (%d) does not match status code total (%d)",
-			totalPathRequests, statusCodeTotal)
+	// 如果没有请求，则跳过验证
+	if statusCodeTotal == 0 && totalPathRequests == 0 {
+		log.Printf("[Metrics] No requests to validate")
+		return nil
+	}
+
+	// 允许一定的误差
+	if math.Abs(float64(totalPathRequests-statusCodeTotal)) > float64(statusCodeTotal)*0.1 {
+		log.Printf("[Metrics] Warning: Path stats total (%d) does not match status code total (%d)", totalPathRequests, statusCodeTotal)
+		// 不返回错误，只记录警告
 	}
 
 	return nil