feat(metrics): 统一延迟分布桶命名并优化日志记录

- 将延迟分布桶键名从 "<10ms" 和 ">1s" 改为 "lt10ms" 和 "gt1s"
- 在指标处理和收集过程中添加详细日志,帮助诊断数据处理流程
- 前端页面适配新的桶键名,并提供友好的显示转换
- 增强延迟分布数据处理的健壮性和可读性
This commit is contained in:
wood chen 2025-03-09 11:43:26 +08:00
parent 2cb88a4f5e
commit f2e1b8cbf5
3 changed files with 70 additions and 28 deletions

View File

@ -161,30 +161,54 @@ func (h *ProxyHandler) MetricsHandler(w http.ResponseWriter, r *http.Request) {
metrics.LatencyStats.Max = utils.SafeString(latencyStats["max"], "0ms")
// 处理分布数据
log.Printf("[MetricsHandler] 处理延迟分布数据: stats[latency_stats]=%v", stats["latency_stats"])
if stats["latency_stats"] != nil {
if distribution, ok := stats["latency_stats"].(map[string]interface{})["distribution"]; ok && distribution != nil {
if distributionMap, ok := distribution.(map[string]interface{}); ok {
metrics.LatencyStats.Distribution = make(map[string]int64)
for k, v := range distributionMap {
if intValue, ok := v.(float64); ok {
metrics.LatencyStats.Distribution[k] = int64(intValue)
} else if intValue, ok := v.(int64); ok {
metrics.LatencyStats.Distribution[k] = intValue
latencyStatsMap, ok := stats["latency_stats"].(map[string]interface{})
if ok {
log.Printf("[MetricsHandler] latencyStatsMap=%v", latencyStatsMap)
distribution, ok := latencyStatsMap["distribution"]
if ok && distribution != nil {
log.Printf("[MetricsHandler] distribution=%v", distribution)
distributionMap, ok := distribution.(map[string]interface{})
if ok {
log.Printf("[MetricsHandler] distributionMap=%v", distributionMap)
metrics.LatencyStats.Distribution = make(map[string]int64)
for k, v := range distributionMap {
log.Printf("[MetricsHandler] 处理延迟分布项: %s=%v (type=%T)", k, v, v)
if intValue, ok := v.(float64); ok {
metrics.LatencyStats.Distribution[k] = int64(intValue)
log.Printf("[MetricsHandler] 转换为int64: %s=%d", k, int64(intValue))
} else if intValue, ok := v.(int64); ok {
metrics.LatencyStats.Distribution[k] = intValue
log.Printf("[MetricsHandler] 已经是int64: %s=%d", k, intValue)
} else {
log.Printf("[MetricsHandler] 无法转换: %s=%v (type=%T)", k, v, v)
}
}
} else {
log.Printf("[MetricsHandler] distribution不是map: %v (type=%T)", distribution, distribution)
}
} else {
log.Printf("[MetricsHandler] 没有distribution字段或为nil: ok=%v, distribution=%v", ok, distribution)
}
} else {
log.Printf("[MetricsHandler] latency_stats不是map: %v (type=%T)", stats["latency_stats"], stats["latency_stats"])
}
} else {
log.Printf("[MetricsHandler] latency_stats为nil")
}
// 如果分布数据为空,初始化一个空的分布
if metrics.LatencyStats.Distribution == nil {
log.Printf("[MetricsHandler] 初始化空的延迟分布")
metrics.LatencyStats.Distribution = make(map[string]int64)
// 添加默认的延迟桶
metrics.LatencyStats.Distribution["<10ms"] = 0
metrics.LatencyStats.Distribution["lt10ms"] = 0
metrics.LatencyStats.Distribution["10-50ms"] = 0
metrics.LatencyStats.Distribution["50-200ms"] = 0
metrics.LatencyStats.Distribution["200-1000ms"] = 0
metrics.LatencyStats.Distribution[">1s"] = 0
metrics.LatencyStats.Distribution["gt1s"] = 0
}
// 填充错误统计数据

View File

@ -61,7 +61,7 @@ func InitCollector(cfg *config.Config) error {
instance.bandwidthStats.history = make(map[string]int64)
// 初始化延迟分布桶
buckets := []string{"<10ms", "10-50ms", "50-200ms", "200-1000ms", ">1s"}
buckets := []string{"lt10ms", "10-50ms", "50-200ms", "200-1000ms", "gt1s"}
for _, bucket := range buckets {
counter := new(int64)
*counter = 0
@ -132,7 +132,7 @@ func (c *Collector) RecordRequest(path string, status int, latency time.Duration
var bucketKey string
switch {
case latencyMs < 10:
bucketKey = "<10ms"
bucketKey = "lt10ms"
case latencyMs < 50:
bucketKey = "10-50ms"
case latencyMs < 200:
@ -140,14 +140,20 @@ func (c *Collector) RecordRequest(path string, status int, latency time.Duration
case latencyMs < 1000:
bucketKey = "200-1000ms"
default:
bucketKey = ">1s"
bucketKey = "gt1s"
}
log.Printf("[Metrics] 更新延迟分布: 路径=%s, 延迟=%dms, 桶=%s", path, latencyMs, bucketKey)
if counter, ok := c.latencyBuckets.Load(bucketKey); ok {
atomic.AddInt64(counter.(*int64), 1)
oldValue := atomic.LoadInt64(counter.(*int64))
newValue := atomic.AddInt64(counter.(*int64), 1)
log.Printf("[Metrics] 延迟分布桶 %s: %d -> %d", bucketKey, oldValue, newValue)
} else {
counter := new(int64)
*counter = 1
c.latencyBuckets.Store(bucketKey, counter)
log.Printf("[Metrics] 新建延迟分布桶: %s = 1", bucketKey)
}
// 更新路径统计
@ -347,19 +353,25 @@ func (c *Collector) GetStats() map[string]interface{} {
latencyDistribution := make(map[string]int64)
// 确保所有桶都存在即使计数为0
buckets := []string{"<10ms", "10-50ms", "50-200ms", "200-1000ms", ">1s"}
buckets := []string{"lt10ms", "10-50ms", "50-200ms", "200-1000ms", "gt1s"}
for _, bucket := range buckets {
if counter, ok := c.latencyBuckets.Load(bucket); ok {
if counter != nil {
latencyDistribution[bucket] = atomic.LoadInt64(counter.(*int64))
value := atomic.LoadInt64(counter.(*int64))
latencyDistribution[bucket] = value
log.Printf("[Metrics] 延迟分布桶 %s = %d", bucket, value)
} else {
latencyDistribution[bucket] = 0
log.Printf("[Metrics] 延迟分布桶 %s = 0 (counter is nil)", bucket)
}
} else {
latencyDistribution[bucket] = 0
log.Printf("[Metrics] 延迟分布桶 %s = 0 (bucket not found)", bucket)
}
}
log.Printf("[Metrics] 延迟分布: %v", latencyDistribution)
// 获取最近请求记录(使用读锁)
recentRequests := c.recentRequests.GetAll()

View File

@ -266,21 +266,27 @@ export default function DashboardPage() {
Object.entries(metrics.latency_stats.distribution)
.sort((a, b) => {
// 按照延迟范围排序
const order = ["<10ms", "10-50ms", "50-200ms", "200-1000ms", ">1s"];
const order = ["lt10ms", "10-50ms", "50-200ms", "200-1000ms", "gt1s"];
return order.indexOf(a[0]) - order.indexOf(b[0]);
})
.map(([range, count]) => (
<div key={range} className="p-3 rounded-lg border bg-card text-card-foreground shadow-sm">
<div className="text-sm font-medium text-gray-500">{range}</div>
<div className="text-lg font-semibold">{count}</div>
<div className="text-xs text-gray-500 mt-1">
{Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0) > 0
? ((count / Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0)) * 100).toFixed(1)
: 0}%
.map(([range, count]) => {
// 转换桶键为更友好的显示
let displayRange = range;
if (range === "lt10ms") displayRange = "<10ms";
if (range === "gt1s") displayRange = ">1s";
return (
<div key={range} className="p-3 rounded-lg border bg-card text-card-foreground shadow-sm">
<div className="text-sm font-medium text-gray-500">{displayRange}</div>
<div className="text-lg font-semibold">{count}</div>
<div className="text-xs text-gray-500 mt-1">
{Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0) > 0
? ((count / Object.values(metrics.latency_stats?.distribution || {}).reduce((sum, val) => sum + val, 0)) * 100).toFixed(1)
: 0}%
</div>
</div>
</div>
))
}
);
})}
</div>
</div>
</div>