Skip to content

Commit d3fefb9

Browse files
dqminhFelix Ehrenpfort
authored andcommitted
Expose PSI stats in libcontainer handler
This adds 2 new set of metrics: - `psi_total`: read total number of seconds a resource is under pressure - `psi_avg`: read ratio of time a resource is under pressure over a sliding time window. For more details about these definitions, see: - https://www.kernel.org/doc/html/latest/accounting/psi.html - https://facebookmicrosites.github.io/psi/docs/overview Signed-off-by: Daniel Dao <[email protected]>
1 parent b621e78 commit d3fefb9

File tree

5 files changed

+72
-0
lines changed

5 files changed

+72
-0
lines changed

cmd/cadvisor_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ func TestToIncludedMetrics(t *testing.T) {
112112
container.ResctrlMetrics: struct{}{},
113113
container.CPUSetMetrics: struct{}{},
114114
container.OOMMetrics: struct{}{},
115+
container.PSITotalMetrics: struct{}{},
116+
container.PSIAvgMetrics: struct{}{},
115117
},
116118
container.AllMetrics,
117119
{},

container/factory.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ const (
6666
ResctrlMetrics MetricKind = "resctrl"
6767
CPUSetMetrics MetricKind = "cpuset"
6868
OOMMetrics MetricKind = "oom_event"
69+
PSITotalMetrics MetricKind = "psi_total"
70+
PSIAvgMetrics MetricKind = "psi_avg"
6971
)
7072

7173
// AllMetrics represents all kinds of metrics that cAdvisor supported.
@@ -91,6 +93,8 @@ var AllMetrics = MetricSet{
9193
ResctrlMetrics: struct{}{},
9294
CPUSetMetrics: struct{}{},
9395
OOMMetrics: struct{}{},
96+
PSITotalMetrics: struct{}{},
97+
PSIAvgMetrics: struct{}{},
9498
}
9599

96100
// AllNetworkMetrics represents all network metrics that cAdvisor supports.

container/libcontainer/handler.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,20 @@ func (h *Handler) GetProcesses() ([]int, error) {
763763
return pids, nil
764764
}
765765

766+
// Convert libcontainer cgroups.PSIData to info.PSIData
767+
func convertPSIData(from *cgroups.PSIData, to *info.PSIData) {
768+
to.Avg10 = from.Avg10
769+
to.Avg60 = from.Avg60
770+
to.Avg300 = from.Avg300
771+
to.Total = from.Total
772+
}
773+
774+
// Convert libcontainer cgroups.PSIStats to info.PSIStats
775+
func convertPSI(from *cgroups.PSIStats, to *info.PSIStats) {
776+
convertPSIData(&from.Some, &to.Some)
777+
convertPSIData(&from.Full, &to.Full)
778+
}
779+
766780
// Convert libcontainer stats to info.ContainerStats.
767781
func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
768782
ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode
@@ -772,6 +786,8 @@ func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
772786
ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods
773787
ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime
774788

789+
convertPSI(&s.CpuStats.PSI, &ret.Cpu.PSI)
790+
775791
if !withPerCPU {
776792
return
777793
}
@@ -792,6 +808,8 @@ func setDiskIoStats(s *cgroups.Stats, ret *info.ContainerStats) {
792808
ret.DiskIo.IoWaitTime = diskStatsCopy(s.BlkioStats.IoWaitTimeRecursive)
793809
ret.DiskIo.IoMerged = diskStatsCopy(s.BlkioStats.IoMergedRecursive)
794810
ret.DiskIo.IoTime = diskStatsCopy(s.BlkioStats.IoTimeRecursive)
811+
812+
convertPSI(&s.BlkioStats.PSI, &ret.DiskIo.PSI)
795813
}
796814

797815
func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
@@ -800,6 +818,8 @@ func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
800818
ret.Memory.Failcnt = s.MemoryStats.Usage.Failcnt
801819
ret.Memory.KernelUsage = s.MemoryStats.KernelUsage.Usage
802820

821+
convertPSI(&s.MemoryStats.PSI, &ret.Memory.PSI)
822+
803823
if cgroups.IsCgroup2UnifiedMode() {
804824
ret.Memory.Cache = s.MemoryStats.Stats["file"]
805825
ret.Memory.RSS = s.MemoryStats.Stats["anon"]

container/libcontainer/handler_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,20 @@ func TestSetCPUStats(t *testing.T) {
110110
UsageInKernelmode: 734746 * nanosecondsInSeconds / clockTicks,
111111
UsageInUsermode: 2767637 * nanosecondsInSeconds / clockTicks,
112112
},
113+
PSI: cgroups.PSIStats{
114+
Some: cgroups.PSIData{
115+
Avg10: 0.1,
116+
Avg60: 0.2,
117+
Avg300: 0.3,
118+
Total: 100,
119+
},
120+
Full: cgroups.PSIData{
121+
Avg10: 0.4,
122+
Avg60: 0.5,
123+
Avg300: 0.6,
124+
Total: 200,
125+
},
126+
},
113127
},
114128
}
115129
var ret info.ContainerStats
@@ -123,6 +137,20 @@ func TestSetCPUStats(t *testing.T) {
123137
System: s.CpuStats.CpuUsage.UsageInKernelmode,
124138
Total: 33802947350272,
125139
},
140+
PSI: info.PSIStats{
141+
Some: info.PSIData{
142+
Avg10: 0.1,
143+
Avg60: 0.2,
144+
Avg300: 0.3,
145+
Total: 100,
146+
},
147+
Full: info.PSIData{
148+
Avg10: 0.4,
149+
Avg60: 0.5,
150+
Avg300: 0.6,
151+
Total: 200,
152+
},
153+
},
126154
},
127155
}
128156

info/v1/container.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,18 @@ func (ci *ContainerInfo) StatsEndTime() time.Time {
261261
return ret
262262
}
263263

264+
type PSIData struct {
265+
Avg10 float64 `json:"avg10"`
266+
Avg60 float64 `json:"avg60"`
267+
Avg300 float64 `json:"avg300"`
268+
Total uint64 `json:"total"`
269+
}
270+
271+
type PSIStats struct {
272+
Some PSIData `json:"some,omitempty"`
273+
Full PSIData `json:"full,omitempty"`
274+
}
275+
264276
// This mirrors kernel internal structure.
265277
type LoadStats struct {
266278
// Number of sleeping tasks.
@@ -335,6 +347,8 @@ type CpuStats struct {
335347
LoadAverage int32 `json:"load_average"`
336348
// from LoadStats.NrUninterruptible
337349
LoadDAverage int32 `json:"load_d_average"`
350+
351+
PSI PSIStats `json:"psi,omitempty"`
338352
}
339353

340354
type PerDiskStats struct {
@@ -353,6 +367,8 @@ type DiskIoStats struct {
353367
IoWaitTime []PerDiskStats `json:"io_wait_time,omitempty"`
354368
IoMerged []PerDiskStats `json:"io_merged,omitempty"`
355369
IoTime []PerDiskStats `json:"io_time,omitempty"`
370+
371+
PSI PSIStats `json:"psi,omitempty"`
356372
}
357373

358374
type HugetlbStats struct {
@@ -411,6 +427,8 @@ type MemoryStats struct {
411427

412428
ContainerData MemoryStatsMemoryData `json:"container_data,omitempty"`
413429
HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"`
430+
431+
PSI PSIStats `json:"psi,omitempty"`
414432
}
415433

416434
type CPUSetStats struct {

0 commit comments

Comments
 (0)