@@ -54,7 +54,10 @@ type Handler struct {
54
54
rootFs string
55
55
pid int
56
56
includedMetrics container.MetricSet
57
+ // pidMetricsCache holds CPU scheduler stats for existing processes (map key is PID) between calls to schedulerStatsFromProcs.
57
58
pidMetricsCache map [int ]* info.CpuSchedstat
59
+ // pidMetricsSaved holds accumulated CPU scheduler stats for processes that no longer exist.
60
+ pidMetricsSaved info.CpuSchedstat
58
61
cycles uint64
59
62
}
60
63
@@ -93,14 +96,9 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) {
93
96
stats := newContainerStats (libcontainerStats , h .includedMetrics )
94
97
95
98
if h .includedMetrics .Has (container .ProcessSchedulerMetrics ) {
96
- pids , err : = h .cgroupManager . GetAllPids ()
99
+ stats . Cpu . Schedstat , err = h .schedulerStatsFromProcs ()
97
100
if err != nil {
98
- klog .V (4 ).Infof ("Could not get PIDs for container %d: %v" , h .pid , err )
99
- } else {
100
- stats .Cpu .Schedstat , err = schedulerStatsFromProcs (h .rootFs , pids , h .pidMetricsCache )
101
- if err != nil {
102
- klog .V (4 ).Infof ("Unable to get Process Scheduler Stats: %v" , err )
103
- }
101
+ klog .V (4 ).Infof ("Unable to get Process Scheduler Stats: %v" , err )
104
102
}
105
103
}
106
104
@@ -314,9 +312,14 @@ func processStatsFromProcs(rootFs string, cgroupPath string, rootPid int) (info.
314
312
return processStats , nil
315
313
}
316
314
317
- func schedulerStatsFromProcs (rootFs string , pids []int , pidMetricsCache map [int ]* info.CpuSchedstat ) (info.CpuSchedstat , error ) {
315
+ func (h * Handler ) schedulerStatsFromProcs () (info.CpuSchedstat , error ) {
316
+ pids , err := h .cgroupManager .GetAllPids ()
317
+ if err != nil {
318
+ return info.CpuSchedstat {}, fmt .Errorf ("Could not get PIDs for container %d: %w" , h .pid , err )
319
+ }
320
+ alivePids := make (map [int ]struct {}, len (pids ))
318
321
for _ , pid := range pids {
319
- f , err := os .Open (path .Join (rootFs , "proc" , strconv .Itoa (pid ), "schedstat" ))
322
+ f , err := os .Open (path .Join (h . rootFs , "proc" , strconv .Itoa (pid ), "schedstat" ))
320
323
if err != nil {
321
324
return info.CpuSchedstat {}, fmt .Errorf ("couldn't open scheduler statistics for process %d: %v" , pid , err )
322
325
}
@@ -325,14 +328,15 @@ func schedulerStatsFromProcs(rootFs string, pids []int, pidMetricsCache map[int]
325
328
if err != nil {
326
329
return info.CpuSchedstat {}, fmt .Errorf ("couldn't read scheduler statistics for process %d: %v" , pid , err )
327
330
}
331
+ alivePids [pid ] = struct {}{}
328
332
rawMetrics := bytes .Split (bytes .TrimRight (contents , "\n " ), []byte (" " ))
329
333
if len (rawMetrics ) != 3 {
330
334
return info.CpuSchedstat {}, fmt .Errorf ("unexpected number of metrics in schedstat file for process %d" , pid )
331
335
}
332
- cacheEntry , ok := pidMetricsCache [pid ]
336
+ cacheEntry , ok := h . pidMetricsCache [pid ]
333
337
if ! ok {
334
338
cacheEntry = & info.CpuSchedstat {}
335
- pidMetricsCache [pid ] = cacheEntry
339
+ h . pidMetricsCache [pid ] = cacheEntry
336
340
}
337
341
for i , rawMetric := range rawMetrics {
338
342
metric , err := strconv .ParseUint (string (rawMetric ), 10 , 64 )
@@ -349,11 +353,20 @@ func schedulerStatsFromProcs(rootFs string, pids []int, pidMetricsCache map[int]
349
353
}
350
354
}
351
355
}
352
- schedstats := info. CpuSchedstat {}
353
- for _ , v := range pidMetricsCache {
356
+ schedstats := h . pidMetricsSaved // copy
357
+ for p , v := range h . pidMetricsCache {
354
358
schedstats .RunPeriods += v .RunPeriods
355
359
schedstats .RunqueueTime += v .RunqueueTime
356
360
schedstats .RunTime += v .RunTime
361
+ if _ , alive := alivePids [p ]; ! alive {
362
+ // PID p is gone: accumulate its stats ...
363
+ h .pidMetricsSaved .RunPeriods += v .RunPeriods
364
+ h .pidMetricsSaved .RunqueueTime += v .RunqueueTime
365
+ h .pidMetricsSaved .RunTime += v .RunTime
366
+ // ... and remove its cache entry, to prevent
367
+ // pidMetricsCache from growing.
368
+ delete (h .pidMetricsCache , p )
369
+ }
357
370
}
358
371
return schedstats , nil
359
372
}
0 commit comments