Skip to content

Commit 5ca614b

Browse files
committed
fix metrics collection
1 parent 86eafa3 commit 5ca614b

File tree

2 files changed

+71
-36
lines changed

2 files changed

+71
-36
lines changed

internal/metrics/collector.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,6 @@ func collectSystemSummary() (*SystemSummary, error) {
14531453

14541454
// Disk - aggregate all mounts (filter pseudo filesystems)
14551455
if partitions, err := disk.Partitions(false); err == nil {
1456-
var maxUsage float64
14571456
for _, p := range partitions {
14581457
// Skip pseudo filesystems that report 100% or have no real storage
14591458
if shouldSkipPartition(p) {
@@ -1463,12 +1462,12 @@ func collectSystemSummary() (*SystemSummary, error) {
14631462
s.DiskTotal += usage.Total
14641463
s.DiskUsed += usage.Used
14651464
s.DiskFree += usage.Free
1466-
if usage.UsedPercent > maxUsage {
1467-
maxUsage = usage.UsedPercent
1468-
}
14691465
}
14701466
}
1471-
s.DiskUsage = maxUsage
1467+
// Calculate percentage from aggregated values (consistent with Total/Used sums)
1468+
if s.DiskTotal > 0 {
1469+
s.DiskUsage = float64(s.DiskUsed) / float64(s.DiskTotal) * 100
1470+
}
14721471
}
14731472

14741473
// Disk IOPS

internal/metrics/cpu.go

Lines changed: 67 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"math"
55
"runtime"
66
"sync"
7+
"time"
78

89
"github.com/shirou/gopsutil/v3/cpu"
910
)
@@ -22,9 +23,12 @@ type CPUMetrics struct {
2223
var (
2324
lastCpuTimes cpu.TimesStat
2425
lastPerCoreCpuTimes []cpu.TimesStat
26+
lastCpuMetrics CPUMetrics
2527
cpuCacheMu sync.RWMutex
2628
cpuCacheInitialized bool
2729
perCoreCacheInit bool
30+
lastCpuSampleTime time.Time
31+
minCPUSampleInterval = 100 * time.Millisecond // Minimum time between samples for accurate delta
2832
)
2933

3034
// init initializes CPU monitoring by storing initial CPU times
@@ -33,6 +37,7 @@ func init() {
3337
if times, err := cpu.Times(false); err == nil && len(times) > 0 {
3438
lastCpuTimes = times[0]
3539
cpuCacheInitialized = true
40+
lastCpuSampleTime = time.Now()
3641
}
3742

3843
// Initialize per-core baseline
@@ -42,50 +47,81 @@ func init() {
4247
}
4348
}
4449

45-
// GetCPUMetrics calculates detailed CPU usage metrics using cached previous measurements.
46-
// Returns percentages for total, user, system, iowait, steal, and idle time.
47-
// This is a non-blocking call that returns instant results using delta calculation.
50+
// GetCPUMetrics calculates detailed CPU usage metrics.
51+
// Uses gopsutil's built-in cpu.Percent for accurate cross-platform CPU measurement.
52+
// On first call or when cache is stale, performs a blocking measurement (100ms).
53+
// Subsequent calls within the sample interval return cached values instantly.
4854
func GetCPUMetrics() (CPUMetrics, error) {
49-
times, err := cpu.Times(false)
50-
if err != nil || len(times) == 0 {
51-
return CPUMetrics{}, err
52-
}
53-
5455
cpuCacheMu.Lock()
5556
defer cpuCacheMu.Unlock()
5657

57-
// If not initialized yet, initialize and return zeros (first call)
58-
if !cpuCacheInitialized {
59-
lastCpuTimes = times[0]
60-
cpuCacheInitialized = true
61-
return CPUMetrics{}, nil
58+
// Check if we have a recent enough measurement WITH valid data
59+
// lastCpuMetrics.Total > 0 ensures we don't return empty cache from init()
60+
if cpuCacheInitialized && lastCpuMetrics.Total > 0 && time.Since(lastCpuSampleTime) < minCPUSampleInterval {
61+
// Return last calculated metrics (avoid too frequent measurements)
62+
return lastCpuMetrics, nil
6263
}
6364

64-
t1 := lastCpuTimes
65-
t2 := times[0]
65+
// Use gopsutil's built-in Percent function which handles all platform differences
66+
// This is the most accurate way to measure CPU on macOS/Linux/Windows
67+
percentages, err := cpu.Percent(100*time.Millisecond, false)
68+
if err != nil || len(percentages) == 0 {
69+
return CPUMetrics{}, err
70+
}
6671

67-
// Calculate deltas
68-
t1All, _ := getAllBusy(t1)
69-
t2All, _ := getAllBusy(t2)
72+
totalCPU := percentages[0]
7073

71-
totalDelta := t2All - t1All
72-
if totalDelta <= 0 {
73-
// No time passed or clock issue, return zeros
74-
return CPUMetrics{}, nil
74+
// Get CPU times for breakdown (user/system/idle/iowait)
75+
times, err := cpu.Times(false)
76+
if err != nil || len(times) == 0 {
77+
// If times fail, at least return total CPU
78+
metrics := CPUMetrics{
79+
Total: totalCPU,
80+
Idle: 100 - totalCPU,
81+
}
82+
lastCpuMetrics = metrics
83+
lastCpuSampleTime = time.Now()
84+
cpuCacheInitialized = true
85+
return metrics, nil
7586
}
7687

77-
// Calculate percentages for each metric
78-
metrics := CPUMetrics{
79-
Total: calculateBusy(t1, t2),
80-
User: clampPercent((t2.User - t1.User) / totalDelta * 100),
81-
System: clampPercent((t2.System - t1.System) / totalDelta * 100),
82-
Iowait: clampPercent((t2.Iowait - t1.Iowait) / totalDelta * 100),
83-
Steal: clampPercent((t2.Steal - t1.Steal) / totalDelta * 100), // Critical for AWS/GCP!
84-
Idle: clampPercent((t2.Idle - t1.Idle) / totalDelta * 100),
88+
// Calculate breakdown if we have previous sample
89+
var metrics CPUMetrics
90+
if cpuCacheInitialized {
91+
t1 := lastCpuTimes
92+
t2 := times[0]
93+
94+
t1All, _ := getAllBusy(t1)
95+
t2All, _ := getAllBusy(t2)
96+
totalDelta := t2All - t1All
97+
98+
if totalDelta > 0 {
99+
metrics = CPUMetrics{
100+
Total: totalCPU, // Use gopsutil's accurate total
101+
User: clampPercent((t2.User - t1.User) / totalDelta * 100),
102+
System: clampPercent((t2.System - t1.System) / totalDelta * 100),
103+
Iowait: clampPercent((t2.Iowait - t1.Iowait) / totalDelta * 100),
104+
Steal: clampPercent((t2.Steal - t1.Steal) / totalDelta * 100),
105+
Idle: clampPercent((t2.Idle - t1.Idle) / totalDelta * 100),
106+
}
107+
} else {
108+
metrics = CPUMetrics{
109+
Total: totalCPU,
110+
Idle: 100 - totalCPU,
111+
}
112+
}
113+
} else {
114+
metrics = CPUMetrics{
115+
Total: totalCPU,
116+
Idle: 100 - totalCPU,
117+
}
85118
}
86119

87-
// Update cache for next call
120+
// Update cache
88121
lastCpuTimes = times[0]
122+
lastCpuMetrics = metrics
123+
lastCpuSampleTime = time.Now()
124+
cpuCacheInitialized = true
89125

90126
return metrics, nil
91127
}

0 commit comments

Comments
 (0)