Skip to content

Commit b602afb

Browse files
committed
shim: add support for containerd v2 metrics
Add support for v2 containerd metrics in the shim, v2 metrics are only used when runsc is run with --system-cgroup=true. Containerd requires v2 metrics when the host is run with CGroupsV2. This issue was noticed when attempting to gather metrics on AL2023 which defaults to CGroupsV2. Fixes: #11472 Signed-off-by: Champ-Goblem <cameron@northflank.com>
1 parent dd8ea25 commit b602afb

File tree

2 files changed

+53
-2
lines changed

2 files changed

+53
-2
lines changed

pkg/shim/runsc/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ go_library(
3131
"@com_github_containerd_cgroups//:go_default_library",
3232
"@com_github_containerd_cgroups//stats/v1:go_default_library",
3333
"@com_github_containerd_cgroups//v2:go_default_library",
34+
"@com_github_containerd_cgroups//v2/stats:go_default_library",
3435
"@com_github_containerd_console//:go_default_library",
3536
"@com_github_containerd_containerd//api/events:go_default_library",
3637
"@com_github_containerd_containerd//api/types/task:go_default_library",
@@ -47,6 +48,7 @@ go_library(
4748
"@com_github_containerd_errdefs//:go_default_library",
4849
"@com_github_containerd_fifo//:go_default_library",
4950
"@com_github_containerd_log//:go_default_library",
51+
"@com_github_containerd_go_runc//:go_default_library",
5052
"@com_github_containerd_typeurl//:go_default_library",
5153
"@com_github_gogo_protobuf//types:go_default_library",
5254
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",

pkg/shim/runsc/service.go

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/containerd/cgroups"
3030
cgroupsstats "github.com/containerd/cgroups/stats/v1"
3131
cgroupsv2 "github.com/containerd/cgroups/v2"
32+
cgroupsv2stats "github.com/containerd/cgroups/v2/stats"
3233
"github.com/containerd/console"
3334
"github.com/containerd/containerd/api/events"
3435
"github.com/containerd/containerd/api/types/task"
@@ -42,6 +43,7 @@ import (
4243
taskAPI "github.com/containerd/containerd/runtime/v2/task"
4344
"github.com/containerd/containerd/sys/reaper"
4445
"github.com/containerd/errdefs"
46+
"github.com/containerd/go-runc"
4547
"github.com/containerd/log"
4648
"github.com/containerd/typeurl"
4749
"github.com/gogo/protobuf/types"
@@ -660,6 +662,18 @@ func (s *runscService) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*tas
660662
// as runc.
661663
//
662664
// [0]: https://github.com/google/gvisor/blob/277a0d5a1fbe8272d4729c01ee4c6e374d047ebc/runsc/boot/events.go#L61-L81
665+
return s.getStats(stats, r)
666+
}
667+
668+
func (s *runscService) getStats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
669+
if s.opts.RunscConfig["systemd-cgroup"] == "true" {
670+
return s.getV2Stats(stats, r)
671+
} else {
672+
return s.getV1Stats(stats, r)
673+
}
674+
}
675+
676+
func (s *runscService) getV1Stats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
663677
metrics := &cgroupsstats.Metrics{
664678
CPU: &cgroupsstats.CPUStat{
665679
Usage: &cgroupsstats.CPUUsage{
@@ -708,10 +722,45 @@ func (s *runscService) Stats(ctx context.Context, r *taskAPI.StatsRequest) (*tas
708722
}
709723
data, err := typeurl.MarshalAny(metrics)
710724
if err != nil {
711-
log.L.Debugf("Stats error, id: %s: %v", r.ID, err)
725+
log.L.Debugf("Stats error v1, id: %s: %v", r.ID, err)
726+
return nil, err
727+
}
728+
log.L.Debugf("Stats success v1, id: %s: %+v", r.ID, data)
729+
return &taskAPI.StatsResponse{
730+
Stats: data,
731+
}, nil
732+
}
733+
734+
func (s *runscService) getV2Stats(stats *runc.Stats, r *taskAPI.StatsRequest) (*taskAPI.StatsResponse, error) {
735+
metrics := &cgroupsv2stats.Metrics{
736+
// The CGroup V2 stats are in microseconds instead of nanoseconds so divide by 1000
737+
CPU: &cgroupsv2stats.CPUStat{
738+
UsageUsec: stats.Cpu.Usage.Total / 1000,
739+
UserUsec: stats.Cpu.Usage.User / 1000,
740+
SystemUsec: stats.Cpu.Usage.Kernel / 1000,
741+
NrPeriods: stats.Cpu.Throttling.Periods,
742+
NrThrottled: stats.Cpu.Throttling.ThrottledPeriods,
743+
ThrottledUsec: stats.Cpu.Throttling.ThrottledTime / 1000,
744+
},
745+
Memory: &cgroupsv2stats.MemoryStat{
746+
Usage: stats.Memory.Usage.Usage,
747+
UsageLimit: stats.Memory.Usage.Limit,
748+
SwapUsage: stats.Memory.Swap.Usage,
749+
SwapLimit: stats.Memory.Swap.Limit,
750+
Slab: stats.Memory.Kernel.Usage,
751+
File: stats.Memory.Cache,
752+
},
753+
Pids: &cgroupsv2stats.PidsStat{
754+
Current: stats.Pids.Current,
755+
Limit: stats.Pids.Limit,
756+
},
757+
}
758+
data, err := typeurl.MarshalAny(metrics)
759+
if err != nil {
760+
log.L.Debugf("Stats error v2, id: %s: %v", r.ID, err)
712761
return nil, err
713762
}
714-
log.L.Debugf("Stats success, id: %s: %+v", r.ID, data)
763+
log.L.Debugf("Stats success v2, id: %s: %+v", r.ID, data)
715764
return &taskAPI.StatsResponse{
716765
Stats: data,
717766
}, nil

0 commit comments

Comments
 (0)