feat(metrics): 统一延迟分布桶命名并优化日志记录

- 将延迟分布桶键名从 "<10ms" 和 ">1s" 改为 "lt10ms" 和 "gt1s"
- 在指标处理和收集过程中添加详细日志,帮助诊断数据处理流程
- 前端页面适配新的桶键名,并提供友好的显示转换
- 增强延迟分布数据处理的健壮性和可读性
This commit is contained in:
wood chen 2025-03-09 11:43:26 +08:00
parent 2cb88a4f5e
commit f2e1b8cbf5
3 changed files with 70 additions and 28 deletions

View File

@ -161,30 +161,54 @@ func (h *ProxyHandler) MetricsHandler(w http.ResponseWriter, r *http.Request) {
metrics.LatencyStats.Max = utils.SafeString(latencyStats["max"], "0ms") metrics.LatencyStats.Max = utils.SafeString(latencyStats["max"], "0ms")
// 处理分布数据 // 处理分布数据
log.Printf("[MetricsHandler] 处理延迟分布数据: stats[latency_stats]=%v", stats["latency_stats"])
if stats["latency_stats"] != nil { if stats["latency_stats"] != nil {
if distribution, ok := stats["latency_stats"].(map[string]interface{})["distribution"]; ok && distribution != nil { latencyStatsMap, ok := stats["latency_stats"].(map[string]interface{})
if distributionMap, ok := distribution.(map[string]interface{}); ok { if ok {
metrics.LatencyStats.Distribution = make(map[string]int64) log.Printf("[MetricsHandler] latencyStatsMap=%v", latencyStatsMap)
for k, v := range distributionMap { distribution, ok := latencyStatsMap["distribution"]
if intValue, ok := v.(float64); ok { if ok && distribution != nil {
metrics.LatencyStats.Distribution[k] = int64(intValue) log.Printf("[MetricsHandler] distribution=%v", distribution)
} else if intValue, ok := v.(int64); ok { distributionMap, ok := distribution.(map[string]interface{})
metrics.LatencyStats.Distribution[k] = intValue if ok {
log.Printf("[MetricsHandler] distributionMap=%v", distributionMap)
metrics.LatencyStats.Distribution = make(map[string]int64)
for k, v := range distributionMap {
log.Printf("[MetricsHandler] 处理延迟分布项: %s=%v (type=%T)", k, v, v)
if intValue, ok := v.(float64); ok {
metrics.LatencyStats.Distribution[k] = int64(intValue)
log.Printf("[MetricsHandler] 转换为int64: %s=%d", k, int64(intValue))
} else if intValue, ok := v.(int64); ok {
metrics.LatencyStats.Distribution[k] = intValue
log.Printf("[MetricsHandler] 已经是int64: %s=%d", k, intValue)
} else {
log.Printf("[MetricsHandler] 无法转换: %s=%v (type=%T)", k, v, v)
}
} }
} else {
log.Printf("[MetricsHandler] distribution不是map: %v (type=%T)", distribution, distribution)
} }
} else {
log.Printf("[MetricsHandler] 没有distribution字段或为nil: ok=%v, distribution=%v", ok, distribution)
} }
} else {
log.Printf("[MetricsHandler] latency_stats不是map: %v (type=%T)", stats["latency_stats"], stats["latency_stats"])
} }
} else {
log.Printf("[MetricsHandler] latency_stats为nil")
} }
// 如果分布数据为空,初始化一个空的分布 // 如果分布数据为空,初始化一个空的分布
if metrics.LatencyStats.Distribution == nil { if metrics.LatencyStats.Distribution == nil {
log.Printf("[MetricsHandler] 初始化空的延迟分布")
metrics.LatencyStats.Distribution = make(map[string]int64) metrics.LatencyStats.Distribution = make(map[string]int64)
// 添加默认的延迟桶 // 添加默认的延迟桶
metrics.LatencyStats.Distribution["<10ms"] = 0 metrics.LatencyStats.Distribution["lt10ms"] = 0
metrics.LatencyStats.Distribution["10-50ms"] = 0 metrics.LatencyStats.Distribution["10-50ms"] = 0
metrics.LatencyStats.Distribution["50-200ms"] = 0 metrics.LatencyStats.Distribution["50-200ms"] = 0
metrics.LatencyStats.Distribution["200-1000ms"] = 0 metrics.LatencyStats.Distribution["200-1000ms"] = 0
metrics.LatencyStats.Distribution[">1s"] = 0 metrics.LatencyStats.Distribution["gt1s"] = 0
} }
// 填充错误统计数据 // 填充错误统计数据

View File

@ -61,7 +61,7 @@ func InitCollector(cfg *config.Config) error {
instance.bandwidthStats.history = make(map[string]int64) instance.bandwidthStats.history = make(map[string]int64)
// 初始化延迟分布桶 // 初始化延迟分布桶
buckets := []string{"<10ms", "10-50ms", "50-200ms", "200-1000ms", ">1s"} buckets := []string{"lt10ms", "10-50ms", "50-200ms", "200-1000ms", "gt1s"}
for _, bucket := range buckets { for _, bucket := range buckets {
counter := new(int64) counter := new(int64)
*counter = 0 *counter = 0
@ -132,7 +132,7 @@ func (c *Collector) RecordRequest(path string, status int, latency time.Duration
var bucketKey string var bucketKey string
switch { switch {
case latencyMs < 10: case latencyMs < 10:
bucketKey = "<10ms" bucketKey = "lt10ms"
case latencyMs < 50: case latencyMs < 50:
bucketKey = "10-50ms" bucketKey = "10-50ms"
case latencyMs < 200: case latencyMs < 200:
@ -140,14 +140,20 @@ func (c *Collector) RecordRequest(path string, status int, latency time.Duration
case latencyMs < 1000: case latencyMs < 1000:
bucketKey = "200-1000ms" bucketKey = "200-1000ms"
default: default:
bucketKey = ">1s" bucketKey = "gt1s"
} }
log.Printf("[Metrics] 更新延迟分布: 路径=%s, 延迟=%dms, 桶=%s", path, latencyMs, bucketKey)
if counter, ok := c.latencyBuckets.Load(bucketKey); ok { if counter, ok := c.latencyBuckets.Load(bucketKey); ok {
atomic.AddInt64(counter.(*int64), 1) oldValue := atomic.LoadInt64(counter.(*int64))
newValue := atomic.AddInt64(counter.(*int64), 1)
log.Printf("[Metrics] 延迟分布桶 %s: %d -> %d", bucketKey, oldValue, newValue)
} else { } else {
counter := new(int64) counter := new(int64)
*counter = 1 *counter = 1
c.latencyBuckets.Store(bucketKey, counter) c.latencyBuckets.Store(bucketKey, counter)
log.Printf("[Metrics] 新建延迟分布桶: %s = 1", bucketKey)
} }
// 更新路径统计 // 更新路径统计
@ -347,19 +353,25 @@ func (c *Collector) GetStats() map[string]interface{} {
latencyDistribution := make(map[string]int64) latencyDistribution := make(map[string]int64)
// 确保所有桶都存在即使计数为0 // 确保所有桶都存在即使计数为0
buckets := []string{"<10ms", "10-50ms", "50-200ms", "200-1000ms", ">1s"} buckets := []string{"lt10ms", "10-50ms", "50-200ms", "200-1000ms", "gt1s"}
for _, bucket := range buckets { for _, bucket := range buckets {
if counter, ok := c.latencyBuckets.Load(bucket); ok { if counter, ok := c.latencyBuckets.Load(bucket); ok {
if counter != nil { if counter != nil {
latencyDistribution[bucket] = atomic.LoadInt64(counter.(*int64)) value := atomic.LoadInt64(counter.(*int64))
latencyDistribution[bucket] = value
log.Printf("[Metrics] 延迟分布桶 %s = %d", bucket, value)
} else { } else {
latencyDistribution[bucket] = 0 latencyDistribution[bucket] = 0
log.Printf("[Metrics] 延迟分布桶 %s = 0 (counter is nil)", bucket)
} }
} else { } else {
latencyDistribution[bucket] = 0 latencyDistribution[bucket] = 0
log.Printf("[Metrics] 延迟分布桶 %s = 0 (bucket not found)", bucket)
} }
} }
log.Printf("[Metrics] 延迟分布: %v", latencyDistribution)
// 获取最近请求记录(使用读锁) // 获取最近请求记录(使用读锁)
recentRequests := c.recentRequests.GetAll() recentRequests := c.recentRequests.GetAll()

View File

@ -266,21 +266,27 @@ export default function DashboardPage() {
Object.entries(metrics.latency_stats.distribution) Object.entries(metrics.latency_stats.distribution)
.sort((a, b) => { .sort((a, b) => {
// 按照延迟范围排序 // 按照延迟范围排序
const order = ["<10ms", "10-50ms", "50-200ms", "200-1000ms", ">1s"]; const order = ["lt10ms", "10-50ms", "50-200ms", "200-1000ms", "gt1s"];
return order.indexOf(a[0]) - order.indexOf(b[0]); return order.indexOf(a[0]) - order.indexOf(b[0]);
}) })
.map(([range, count]) => ( .map(([range, count]) => {
<div key={range} className="p-3 rounded-lg border bg-card text-card-foreground shadow-sm"> // 转换桶键为更友好的显示
<div className="text-sm font-medium text-gray-500">{range}</div> let displayRange = range;
<div className="text-lg font-semibold">{count}</div> if (range === "lt10ms") displayRange = "<10ms";
<div className="text-xs text-gray-500 mt-1"> if (range === "gt1s") displayRange = ">1s";
{Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0) > 0
? ((count / Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0)) * 100).toFixed(1) return (
: 0}% <div key={range} className="p-3 rounded-lg border bg-card text-card-foreground shadow-sm">
<div className="text-sm font-medium text-gray-500">{displayRange}</div>
<div className="text-lg font-semibold">{count}</div>
<div className="text-xs text-gray-500 mt-1">
{Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0) > 0
? ((count / Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0)) * 100).toFixed(1)
: 0}%
</div>
</div> </div>
</div> );
)) })}
}
</div> </div>
</div> </div>
</div> </div>