Skip to content

Commit 5fcbccc

Browse files
committed
<fix> stats: CPU perusage use the wrong systemUsage
Signed-off-by: zzzzzzzzzy9 <[email protected]>
1 parent 694c405 commit 5fcbccc

File tree

5 files changed

+105
-11
lines changed

5 files changed

+105
-11
lines changed

pkg/cmd/container/stats.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,17 @@ func collect(ctx context.Context, globalOptions types.GlobalCommandOptions, s *s
379379
continue
380380
}
381381

382+
// Sample system CPU usage close to container usage to avoid
383+
// noise in metric calculations.
384+
systemUsage, onlineCPUs, err := getSystemCPUUsage()
385+
if err != nil {
386+
u <- err
387+
continue
388+
}
389+
systemInfo := statsutil.SystemInfo{
390+
OnlineCPUs: onlineCPUs,
391+
SystemUsage: systemUsage,
392+
}
382393
metric, err := task.Metrics(ctx)
383394
if err != nil {
384395
u <- err
@@ -397,7 +408,7 @@ func collect(ctx context.Context, globalOptions types.GlobalCommandOptions, s *s
397408
}
398409

399410
// when (firstSet == true), we only set container stats without rendering stat entry
400-
statsEntry, err := setContainerStatsAndRenderStatsEntry(previousStats, firstSet, anydata, int(task.Pid()), netNS.Interfaces)
411+
statsEntry, err := setContainerStatsAndRenderStatsEntry(previousStats, firstSet, anydata, int(task.Pid()), netNS.Interfaces, systemInfo)
401412
if err != nil {
402413
u <- err
403414
continue

pkg/cmd/container/stats_linux.go

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@
1717
package container
1818

1919
import (
20+
"bufio"
2021
"errors"
2122
"fmt"
23+
"io"
2224
"net"
25+
"os"
26+
"strconv"
2327
"strings"
2428
"time"
2529

@@ -33,8 +37,17 @@ import (
3337
"github.com/containerd/nerdctl/v2/pkg/statsutil"
3438
)
3539

40+
const (
41+
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
42+
// on Linux it's a constant which is safe to be hard coded,
43+
// so we can avoid using cgo here. For details, see:
44+
// https://github.com/containerd/cgroups/pull/12
45+
clockTicksPerSecond = 100
46+
nanoSecondsPerSecond = 1e9
47+
)
48+
3649
//nolint:nakedret
37-
func setContainerStatsAndRenderStatsEntry(previousStats *statsutil.ContainerStats, firstSet bool, anydata interface{}, pid int, interfaces []native.NetInterface) (statsEntry statsutil.StatsEntry, err error) {
50+
func setContainerStatsAndRenderStatsEntry(previousStats *statsutil.ContainerStats, firstSet bool, anydata interface{}, pid int, interfaces []native.NetInterface, systemInfo statsutil.SystemInfo) (statsEntry statsutil.StatsEntry, err error) {
3851

3952
var (
4053
data *v1.Metrics
@@ -96,10 +109,10 @@ func setContainerStatsAndRenderStatsEntry(previousStats *statsutil.ContainerStat
96109

97110
if data != nil {
98111
if !firstSet {
99-
statsEntry, err = statsutil.SetCgroupStatsFields(previousStats, data, nlinks)
112+
statsEntry, err = statsutil.SetCgroupStatsFields(previousStats, data, nlinks, systemInfo)
100113
}
101114
previousStats.CgroupCPU = data.CPU.Usage.Total
102-
previousStats.CgroupSystem = data.CPU.Usage.Kernel
115+
previousStats.CgroupSystem = systemInfo.SystemUsage
103116
if err != nil {
104117
return
105118
}
@@ -117,3 +130,59 @@ func setContainerStatsAndRenderStatsEntry(previousStats *statsutil.ContainerStat
117130

118131
return
119132
}
133+
134+
// getSystemCPUUsage reads the system's CPU usage from /proc/stat and returns
135+
// the total CPU usage in nanoseconds and the number of CPUs.
136+
func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, _ error) {
137+
f, err := os.Open("/proc/stat")
138+
if err != nil {
139+
return 0, 0, err
140+
}
141+
defer f.Close()
142+
143+
return readSystemCPUUsage(f)
144+
}
145+
146+
// readSystemCPUUsage parses CPU usage information from a reader providing
147+
// /proc/stat format data. It returns the total CPU usage in nanoseconds
148+
// and the number of CPUs. More:
149+
// https://github.com/moby/moby/blob/26db31fdab628a2345ed8f179e575099384166a9/daemon/stats_unix.go#L327-L368
150+
func readSystemCPUUsage(r io.Reader) (cpuUsage uint64, cpuNum uint32, _ error) {
151+
rdr := bufio.NewReaderSize(r, 1024)
152+
153+
for {
154+
data, isPartial, err := rdr.ReadLine()
155+
156+
if err != nil {
157+
return 0, 0, fmt.Errorf("error scanning /proc/stat file: %w", err)
158+
}
159+
// Assume all cpu* records are at the start of the file, like glibc:
160+
// https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135
161+
if isPartial || len(data) < 4 {
162+
break
163+
}
164+
line := string(data)
165+
if line[:3] != "cpu" {
166+
break
167+
}
168+
if line[3] == ' ' {
169+
parts := strings.Fields(line)
170+
if len(parts) < 8 {
171+
return 0, 0, fmt.Errorf("invalid number of cpu fields")
172+
}
173+
var totalClockTicks uint64
174+
for _, i := range parts[1:8] {
175+
v, err := strconv.ParseUint(i, 10, 64)
176+
if err != nil {
177+
return 0, 0, fmt.Errorf("unable to convert value %s to int: %w", i, err)
178+
}
179+
totalClockTicks += v
180+
}
181+
cpuUsage = (totalClockTicks * nanoSecondsPerSecond) / clockTicksPerSecond
182+
}
183+
if '0' <= line[3] && line[3] <= '9' {
184+
cpuNum++
185+
}
186+
}
187+
return cpuUsage, cpuNum, nil
188+
}

pkg/cmd/container/stats_nolinux.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ import (
2323
"github.com/containerd/nerdctl/v2/pkg/statsutil"
2424
)
2525

26-
func setContainerStatsAndRenderStatsEntry(previousStats *statsutil.ContainerStats, firstSet bool, anydata interface{}, pid int, interfaces []native.NetInterface) (statsutil.StatsEntry, error) {
26+
func setContainerStatsAndRenderStatsEntry(previousStats *statsutil.ContainerStats, firstSet bool, anydata interface{}, pid int, interfaces []native.NetInterface, systemInfo statsutil.SystemInfo) (statsutil.StatsEntry, error) {
2727
return statsutil.StatsEntry{}, nil
2828
}
29+
30+
// getSystemCPUUsage reads the system's CPU usage from /proc/stat and returns
31+
// the total CPU usage in nanoseconds and the number of CPUs.
32+
func getSystemCPUUsage() (uint64, uint32, error) {
33+
return 0, 0, nil
34+
}

pkg/statsutil/stats.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ import (
2626
units "github.com/docker/go-units"
2727
)
2828

29+
type SystemInfo struct {
30+
OnlineCPUs uint32
31+
SystemUsage uint64
32+
}
33+
2934
// StatsEntry represents the statistics data collected from a container
3035
type StatsEntry struct {
3136
Name string

pkg/statsutil/stats_linux.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ func calculateMemPercent(limit float64, usedNo float64) float64 {
3838
return 0
3939
}
4040

41-
func SetCgroupStatsFields(previousStats *ContainerStats, data *v1.Metrics, links []netlink.Link) (StatsEntry, error) {
42-
cpuPercent := calculateCgroupCPUPercent(previousStats, data)
41+
func SetCgroupStatsFields(previousStats *ContainerStats, data *v1.Metrics, links []netlink.Link, systemInfo SystemInfo) (StatsEntry, error) {
42+
cpuPercent := calculateCgroupCPUPercent(previousStats, data, systemInfo)
4343
blkRead, blkWrite := calculateCgroupBlockIO(data)
4444
mem := calculateCgroupMemUsage(data)
4545
memLimit := getCgroupMemLimit(float64(data.Memory.Usage.Limit))
@@ -114,18 +114,21 @@ func getHostMemLimit() float64 {
114114
return float64(^uint64(0))
115115
}
116116

117-
func calculateCgroupCPUPercent(previousStats *ContainerStats, metrics *v1.Metrics) float64 {
117+
func calculateCgroupCPUPercent(previousStats *ContainerStats, metrics *v1.Metrics, systemInfo SystemInfo) float64 {
118118
var (
119119
cpuPercent = 0.0
120120
// calculate the change for the cpu usage of the container in between readings
121121
cpuDelta = float64(metrics.CPU.Usage.Total) - float64(previousStats.CgroupCPU)
122122
// calculate the change for the entire system between readings
123-
systemDelta = float64(metrics.CPU.Usage.Kernel) - float64(previousStats.CgroupSystem)
124-
onlineCPUs = float64(len(metrics.CPU.Usage.PerCPU))
123+
systemDelta = float64(systemInfo.SystemUsage) - float64(previousStats.CgroupSystem)
124+
onlineCPUs = systemInfo.OnlineCPUs
125125
)
126126

127+
if onlineCPUs == 0 {
128+
onlineCPUs = uint32(len(metrics.CPU.Usage.PerCPU))
129+
}
127130
if systemDelta > 0.0 && cpuDelta > 0.0 {
128-
cpuPercent = (cpuDelta / systemDelta) * onlineCPUs * 100.0
131+
cpuPercent = (cpuDelta / systemDelta) * float64(onlineCPUs) * 100.0
129132
}
130133
return cpuPercent
131134
}

0 commit comments

Comments
 (0)