Skip to content

Commit 1039822

Browse files
authored
Merge pull request kubernetes#130491 from swatisehgal/cpu-allocation-numa-spread-metric
Add metrics to capture CPU allocation spread across NUMA nodes
2 parents 076c7b0 + b6a3ac4 commit 1039822

File tree

4 files changed

+184
-11
lines changed

4 files changed

+184
-11
lines changed

pkg/kubelet/cm/cpumanager/policy_static.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package cpumanager
1818

1919
import (
2020
"fmt"
21+
"strconv"
2122

2223
v1 "k8s.io/api/core/v1"
2324
utilfeature "k8s.io/apiserver/pkg/util/feature"
@@ -389,7 +390,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
389390

390391
s.SetCPUSet(string(pod.UID), container.Name, cpuAllocation.CPUs)
391392
p.updateCPUsToReuse(pod, container, cpuAllocation.CPUs)
392-
p.updateMetricsOnAllocate(cpuAllocation)
393+
p.updateMetricsOnAllocate(s, cpuAllocation)
393394

394395
klog.V(4).InfoS("Allocated exclusive CPUs", "pod", klog.KObj(pod), "containerName", container.Name, "cpuset", cpuAllocation.CPUs.String())
395396
return nil
@@ -416,7 +417,8 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
416417
// Mutate the shared pool, adding released cpus.
417418
toRelease = toRelease.Difference(cpusInUse)
418419
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
419-
p.updateMetricsOnRelease(toRelease)
420+
p.updateMetricsOnRelease(s, toRelease)
421+
420422
}
421423
return nil
422424
}
@@ -755,33 +757,60 @@ func (p *staticPolicy) getAlignedCPUs(numaAffinity bitmask.BitMask, allocatableC
755757

756758
func (p *staticPolicy) initializeMetrics(s state.State) {
757759
metrics.CPUManagerSharedPoolSizeMilliCores.Set(float64(p.GetAvailableCPUs(s).Size() * 1000))
758-
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(countExclusiveCPUs(s)))
759760
metrics.ContainerAlignedComputeResourcesFailure.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
760761
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedPhysicalCPU).Add(0) // ensure the value exists
761762
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Add(0) // ensure the value exists
763+
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
764+
metrics.CPUManagerExclusiveCPUsAllocationCount.Set(float64(totalAssignedCPUs.Size()))
765+
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
762766
}
763767

764-
func (p *staticPolicy) updateMetricsOnAllocate(cpuAlloc topology.Allocation) {
768+
func (p *staticPolicy) updateMetricsOnAllocate(s state.State, cpuAlloc topology.Allocation) {
765769
ncpus := cpuAlloc.CPUs.Size()
766770
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(ncpus))
767771
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(-ncpus * 1000))
768772
if cpuAlloc.Aligned.UncoreCache {
769773
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignScopeContainer, metrics.AlignedUncoreCache).Inc()
770774
}
775+
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
776+
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs)
771777
}
772778

773-
func (p *staticPolicy) updateMetricsOnRelease(cset cpuset.CPUSet) {
779+
func (p *staticPolicy) updateMetricsOnRelease(s state.State, cset cpuset.CPUSet) {
774780
ncpus := cset.Size()
775781
metrics.CPUManagerExclusiveCPUsAllocationCount.Add(float64(-ncpus))
776782
metrics.CPUManagerSharedPoolSizeMilliCores.Add(float64(ncpus * 1000))
783+
totalAssignedCPUs := getTotalAssignedExclusiveCPUs(s)
784+
updateAllocationPerNUMAMetric(p.topology, totalAssignedCPUs.Difference(cset))
785+
}
786+
787+
func getTotalAssignedExclusiveCPUs(s state.State) cpuset.CPUSet {
788+
totalAssignedCPUs := cpuset.New()
789+
for _, assignment := range s.GetCPUAssignments() {
790+
for _, cset := range assignment {
791+
totalAssignedCPUs = totalAssignedCPUs.Union(cset)
792+
}
793+
794+
}
795+
return totalAssignedCPUs
777796
}
778797

779-
func countExclusiveCPUs(s state.State) int {
780-
exclusiveCPUs := 0
781-
for _, cpuAssign := range s.GetCPUAssignments() {
782-
for _, cset := range cpuAssign {
783-
exclusiveCPUs += cset.Size()
798+
func updateAllocationPerNUMAMetric(topo *topology.CPUTopology, allocatedCPUs cpuset.CPUSet) {
799+
numaCount := make(map[int]int)
800+
801+
// Count CPUs allocated per NUMA node
802+
for _, cpuID := range allocatedCPUs.UnsortedList() {
803+
numaNode, err := topo.CPUNUMANodeID(cpuID)
804+
if err != nil {
805+
//NOTE: We are logging the error but it is highly unlikely to happen as the CPUset
806+
// is already computed, evaluated and there is no room for user tampering.
807+
klog.ErrorS(err, "Unable to determine NUMA node", "cpuID", cpuID)
784808
}
809+
numaCount[numaNode]++
810+
}
811+
812+
// Update metric
813+
for numaNode, count := range numaCount {
814+
metrics.CPUManagerAllocationPerNUMA.WithLabelValues(strconv.Itoa(numaNode)).Set(float64(count))
785815
}
786-
return exclusiveCPUs
787816
}

pkg/kubelet/metrics/metrics.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ const (
113113
CPUManagerPinningErrorsTotalKey = "cpu_manager_pinning_errors_total"
114114
CPUManagerSharedPoolSizeMilliCoresKey = "cpu_manager_shared_pool_size_millicores"
115115
CPUManagerExclusiveCPUsAllocationCountKey = "cpu_manager_exclusive_cpu_allocation_count"
116+
CPUManagerAllocationPerNUMAKey = "cpu_manager_allocation_per_numa"
116117

117118
// Metrics to track the Memory manager behavior
118119
MemoryManagerPinningRequestsTotalKey = "memory_manager_pinning_requests_total"
@@ -815,6 +816,17 @@ var (
815816
},
816817
)
817818

819+
// CPUManagerAllocationPerNUMA tracks the count of CPUs allocated per NUMA node
820+
CPUManagerAllocationPerNUMA = metrics.NewGaugeVec(
821+
&metrics.GaugeOpts{
822+
Subsystem: KubeletSubsystem,
823+
Name: CPUManagerAllocationPerNUMAKey,
824+
Help: "Number of CPUs allocated per NUMA node",
825+
StabilityLevel: metrics.ALPHA,
826+
},
827+
[]string{AlignedNUMANode},
828+
)
829+
818830
// ContainerAlignedComputeResources reports the count of resources allocation which granted aligned resources, per alignment boundary
819831
ContainerAlignedComputeResources = metrics.NewCounterVec(
820832
&metrics.CounterOpts{
@@ -1126,6 +1138,7 @@ func Register(collectors ...metrics.StableCollector) {
11261138
legacyregistry.MustRegister(CPUManagerPinningErrorsTotal)
11271139
legacyregistry.MustRegister(CPUManagerSharedPoolSizeMilliCores)
11281140
legacyregistry.MustRegister(CPUManagerExclusiveCPUsAllocationCount)
1141+
legacyregistry.MustRegister(CPUManagerAllocationPerNUMA)
11291142
legacyregistry.MustRegister(ContainerAlignedComputeResources)
11301143
legacyregistry.MustRegister(ContainerAlignedComputeResourcesFailure)
11311144
legacyregistry.MustRegister(MemoryManagerPinningRequestTotal)

test/e2e_node/cpu_manager_metrics_test.go

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
3535
"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
3636
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
37+
"k8s.io/kubernetes/pkg/kubelet/metrics"
3738
"k8s.io/kubernetes/pkg/kubelet/util"
3839
"k8s.io/kubernetes/test/e2e/feature"
3940
"k8s.io/kubernetes/test/e2e/framework"
@@ -389,6 +390,129 @@ var _ = SIGDescribe("CPU Manager Metrics", framework.WithSerial(), feature.CPUMa
389390
ginkgo.By("Ensuring the metrics match the expectations about alignment metrics a few more times")
390391
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchAlignmentMetrics)
391392
})
393+
ginkgo.It("should report zero counters for allocation per NUMA after a fresh restart", func(ctx context.Context) {
394+
395+
cpuPolicyOptions := map[string]string{
396+
cpumanager.DistributeCPUsAcrossNUMAOption: "true",
397+
cpumanager.FullPCPUsOnlyOption: "true",
398+
}
399+
newCfg := configureCPUManagerInKubelet(oldCfg,
400+
&cpuManagerKubeletArguments{
401+
policyName: string(cpumanager.PolicyStatic),
402+
reservedSystemCPUs: cpuset.New(0),
403+
enableCPUManagerOptions: true,
404+
options: cpuPolicyOptions,
405+
},
406+
)
407+
408+
updateKubeletConfig(ctx, f, newCfg, true)
409+
410+
ginkgo.By("Checking the cpumanager allocation per NUMA metric right after the kubelet restart, with no pods running")
411+
numaNodes, _, _ := hostCheck()
412+
413+
framework.Logf("numaNodes on the system %d", numaNodes)
414+
415+
keys := make(map[interface{}]types.GomegaMatcher)
416+
idFn := makeCustomLabelID(metrics.AlignedNUMANode)
417+
418+
for i := 0; i < numaNodes; i++ {
419+
keys["kubelet_cpu_manager_allocation_per_numa"] = gstruct.MatchAllElements(idFn, gstruct.Elements{
420+
fmt.Sprintf("%d", i): timelessSample(0),
421+
})
422+
423+
}
424+
425+
matchSpreadMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, keys)
426+
427+
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
428+
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchSpreadMetrics)
429+
ginkgo.By("Ensuring the metrics match the expectations a few more times")
430+
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchSpreadMetrics)
431+
432+
})
433+
434+
ginkgo.It("should report allocation per NUMA metric when handling guaranteed pods", func(ctx context.Context) {
435+
var cpusNumPerNUMA, coresNumPerNUMA, numaNodes, threadsPerCore int
436+
cpuPolicyOptions := map[string]string{
437+
cpumanager.DistributeCPUsAcrossNUMAOption: "true",
438+
cpumanager.FullPCPUsOnlyOption: "true",
439+
}
440+
newCfg := configureCPUManagerInKubelet(oldCfg,
441+
&cpuManagerKubeletArguments{
442+
policyName: string(cpumanager.PolicyStatic),
443+
reservedSystemCPUs: cpuset.New(0),
444+
enableCPUManagerOptions: true,
445+
options: cpuPolicyOptions,
446+
},
447+
)
448+
449+
updateKubeletConfig(ctx, f, newCfg, true)
450+
451+
numaNodes, coresNumPerNUMA, threadsPerCore = hostCheck()
452+
cpusNumPerNUMA = coresNumPerNUMA * threadsPerCore
453+
454+
framework.Logf("numaNodes on the system %d", numaNodes)
455+
framework.Logf("Cores per NUMA on the system %d", coresNumPerNUMA)
456+
framework.Logf("Threads per Core on the system %d", threadsPerCore)
457+
framework.Logf("CPUs per NUMA on the system %d", cpusNumPerNUMA)
458+
459+
smtLevel = getSMTLevel()
460+
framework.Logf("SMT Level on the system %d", smtLevel)
461+
462+
ginkgo.By("Querying the podresources endpoint to get the baseline")
463+
endpoint, err := util.LocalEndpoint(defaultPodResourcesPath, podresources.Socket)
464+
framework.ExpectNoError(err, "LocalEndpoint() failed err: %v", err)
465+
466+
cli, conn, err := podresources.GetV1Client(endpoint, defaultPodResourcesTimeout, defaultPodResourcesMaxSize)
467+
framework.ExpectNoError(err, "GetV1Client() failed err: %v", err)
468+
defer func() {
469+
framework.ExpectNoError(conn.Close())
470+
}()
471+
472+
ginkgo.By("Checking the pool allocatable resources from the kubelet")
473+
resp, err := cli.GetAllocatableResources(ctx, &kubeletpodresourcesv1.AllocatableResourcesRequest{})
474+
framework.ExpectNoError(err, "failed to get the kubelet allocatable resources")
475+
allocatableCPUs, _ := demuxCPUsAndDevicesFromGetAllocatableResources(resp)
476+
477+
// 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed
478+
// across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation.
479+
// So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single
480+
// NUMA node. We have to pick cpuRequest such that:
481+
// 1. CPURequest > cpusNumPerNUMA
482+
// 2. Not occupy all the CPUs on the node ande leave room for reserved CPU
483+
// 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes
484+
//
485+
// In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodes) - reservedCPUscount)
486+
// Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodes
487+
cpuRequest := (cpusNumPerNUMA - smtLevel) * numaNodes
488+
if cpuRequest > allocatableCPUs.Size() {
489+
e2eskipper.Skipf("Pod requesting %d CPUs which is more than allocatable CPUs:%d", cpuRequest, allocatableCPUs.Size())
490+
}
491+
492+
ginkgo.By("Creating the test pod")
493+
testPod = e2epod.NewPodClient(f).Create(ctx, makeGuaranteedCPUExclusiveSleeperPod("test-pod-allocation-per-numa", cpuRequest))
494+
495+
ginkgo.By("Checking the cpumanager metrics after pod creation")
496+
497+
keys := make(map[interface{}]types.GomegaMatcher)
498+
idFn := makeCustomLabelID(metrics.AlignedNUMANode)
499+
500+
// On a clean environment with no other pods running if distribute-across-numa policy option is enabled
501+
for i := 0; i < numaNodes; i++ {
502+
keys["kubelet_cpu_manager_allocation_per_numa"] = gstruct.MatchAllElements(idFn, gstruct.Elements{
503+
fmt.Sprintf("%d", i): timelessSample(2),
504+
})
505+
506+
}
507+
508+
matchSpreadMetrics := gstruct.MatchKeys(gstruct.IgnoreExtras, keys)
509+
510+
ginkgo.By("Giving the Kubelet time to start up and produce metrics")
511+
gomega.Eventually(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchSpreadMetrics)
512+
ginkgo.By("Ensuring the metrics match the expectations a few more times")
513+
gomega.Consistently(ctx, getKubeletMetrics, 1*time.Minute, 15*time.Second).Should(matchSpreadMetrics)
514+
})
515+
392516
})
393517
})
394518

test/e2e_node/resource_metrics_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,13 @@ func makeCustomPairID(pri, sec string) func(interface{}) string {
187187
}
188188
}
189189

190+
func makeCustomLabelID(label string) func(interface{}) string {
191+
return func(element interface{}) string {
192+
el := element.(*model.Sample)
193+
return string(el.Metric[model.LabelName(label)])
194+
}
195+
}
196+
190197
func boundedSample(lower, upper interface{}) types.GomegaMatcher {
191198
return gstruct.PointTo(gstruct.MatchAllFields(gstruct.Fields{
192199
// We already check Metric when matching the Id

0 commit comments

Comments
 (0)