Skip to content

Commit 4bd929d

Browse files
authored
Merge pull request #1979 from AllenXu93/bugfix/fix-topology-updater-cpu
fix topology-updater cpu report
2 parents eb787fa + 913b34b commit 4bd929d

File tree

3 files changed

+188
-49
lines changed

3 files changed

+188
-49
lines changed

pkg/resourcemonitor/podresourcesscanner.go

Lines changed: 37 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ import (
2222
"strconv"
2323

2424
corev1 "k8s.io/api/core/v1"
25-
"k8s.io/apimachinery/pkg/api/resource"
2625
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2726
client "k8s.io/client-go/kubernetes"
2827
"k8s.io/klog/v2"
2928
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
29+
"k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
3030

3131
"github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
3232
"github.com/k8stopologyawareschedwg/podfingerprint"
@@ -57,58 +57,49 @@ func NewPodResourcesScanner(namespace string, podResourceClient podresourcesapi.
5757
}
5858

5959
// isWatchable tells if the the given namespace should be watched.
60-
func (resMon *PodResourcesScanner) isWatchable(podNamespace string, podName string, hasDevice bool) (bool, bool, error) {
61-
pod, err := resMon.k8sClient.CoreV1().Pods(podNamespace).Get(context.TODO(), podName, metav1.GetOptions{})
60+
// In Scan(), if watchable is false, this pods scan will skip
61+
// so we can return directly if pod's namespace is not watchable
62+
func (resMon *PodResourcesScanner) isWatchable(podResource *podresourcesapi.PodResources) (bool, bool, error) {
63+
if resMon.namespace != "*" && resMon.namespace != podResource.GetNamespace() {
64+
return false, false, nil
65+
}
66+
67+
pod, err := resMon.k8sClient.CoreV1().Pods(podResource.GetNamespace()).Get(context.TODO(), podResource.Name, metav1.GetOptions{})
6268
if err != nil {
6369
return false, false, err
6470
}
6571

66-
isIntegralGuaranteed := hasExclusiveCPUs(pod)
72+
isPodGuaranteed := qos.GetPodQOS(pod) == corev1.PodQOSGuaranteed
73+
podHasExclusiveCPUs := isPodGuaranteed && checkPodExclusiveCPUs(pod)
6774

68-
if resMon.namespace == "*" && (isIntegralGuaranteed || hasDevice) {
69-
return true, isIntegralGuaranteed, nil
70-
}
71-
// TODO: add an explicit check for guaranteed pods and pods with devices
72-
return resMon.namespace == podNamespace && (isIntegralGuaranteed || hasDevice), isIntegralGuaranteed, nil
75+
return isPodGuaranteed || hasDevice(podResource), podHasExclusiveCPUs, nil
7376
}
7477

75-
// hasExclusiveCPUs returns true if a guaranteed pod is allocated exclusive CPUs else returns false.
78+
// checkPodExclusiveCPUs returns true if a guaranteed pod is allocated exclusive CPUs else returns false.
7679
// In isWatchable() function we check for the pod QoS and proceed if it is guaranteed (i.e. request == limit)
7780
// and hence we only check for request in the function below.
78-
func hasExclusiveCPUs(pod *corev1.Pod) bool {
79-
var totalCPU int64
80-
var cpuQuantity resource.Quantity
81+
func checkPodExclusiveCPUs(pod *corev1.Pod) bool {
8182
for _, container := range pod.Spec.InitContainers {
82-
83-
var ok bool
84-
if cpuQuantity, ok = container.Resources.Requests[corev1.ResourceCPU]; !ok {
85-
continue
86-
}
87-
totalCPU += cpuQuantity.Value()
88-
isInitContainerGuaranteed := hasIntegralCPUs(pod, &container)
89-
if !isInitContainerGuaranteed {
90-
return false
83+
if hasIntegralCPUs(&container) {
84+
return true
9185
}
9286
}
9387
for _, container := range pod.Spec.Containers {
94-
var ok bool
95-
if cpuQuantity, ok = container.Resources.Requests[corev1.ResourceCPU]; !ok {
96-
continue
97-
}
98-
totalCPU += cpuQuantity.Value()
99-
isAppContainerGuaranteed := hasIntegralCPUs(pod, &container)
100-
if !isAppContainerGuaranteed {
101-
return false
88+
if hasIntegralCPUs(&container) {
89+
return true
10290
}
10391
}
10492

105-
//No CPUs requested in all the containers in the pod
106-
return totalCPU != 0
93+
//No integralCPUs requested in all the containers of the pod
94+
return false
10795
}
10896

10997
// hasIntegralCPUs returns true if a container in pod is requesting integral CPUs else returns false
110-
func hasIntegralCPUs(pod *corev1.Pod, container *corev1.Container) bool {
111-
cpuQuantity := container.Resources.Requests[corev1.ResourceCPU]
98+
func hasIntegralCPUs(container *corev1.Container) bool {
99+
cpuQuantity, ok := container.Resources.Requests[corev1.ResourceCPU]
100+
if !ok {
101+
return false
102+
}
112103
return cpuQuantity.Value()*1000 == cpuQuantity.MilliValue()
113104
}
114105

@@ -146,8 +137,7 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
146137

147138
for _, podResource := range respPodResources {
148139
klog.V(1).InfoS("scanning pod", "podName", podResource.GetName())
149-
hasDevice := hasDevice(podResource)
150-
isWatchable, isIntegralGuaranteed, err := resMon.isWatchable(podResource.GetNamespace(), podResource.GetName(), hasDevice)
140+
isWatchable, isExclusiveCPUs, err := resMon.isWatchable(podResource)
151141
if err != nil {
152142
return ScanResponse{}, fmt.Errorf("checking if pod in a namespace is watchable, namespace:%v, pod name %v: %w", podResource.GetNamespace(), podResource.GetName(), err)
153143
}
@@ -165,19 +155,17 @@ func (resMon *PodResourcesScanner) Scan() (ScanResponse, error) {
165155
Name: container.Name,
166156
}
167157

168-
if isIntegralGuaranteed {
169-
cpuIDs := container.GetCpuIds()
170-
if len(cpuIDs) > 0 {
171-
var resCPUs []string
172-
for _, cpuID := range container.GetCpuIds() {
173-
resCPUs = append(resCPUs, strconv.FormatInt(cpuID, 10))
174-
}
175-
contRes.Resources = []ResourceInfo{
176-
{
177-
Name: corev1.ResourceCPU,
178-
Data: resCPUs,
179-
},
180-
}
158+
cpuIDs := container.GetCpuIds()
159+
if len(cpuIDs) > 0 && isExclusiveCPUs {
160+
var resCPUs []string
161+
for _, cpuID := range cpuIDs {
162+
resCPUs = append(resCPUs, strconv.FormatInt(cpuID, 10))
163+
}
164+
contRes.Resources = []ResourceInfo{
165+
{
166+
Name: corev1.ResourceCPU,
167+
Data: resCPUs,
168+
},
181169
}
182170
}
183171

pkg/resourcemonitor/podresourcesscanner_test.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ func TestPodScanner(t *testing.T) {
165165
},
166166
},
167167
},
168+
Status: corev1.PodStatus{
169+
QOSClass: corev1.PodQOSGuaranteed,
170+
},
168171
}
169172

170173
fakeCli := fakeclient.NewSimpleClientset(pod)
@@ -280,6 +283,9 @@ func TestPodScanner(t *testing.T) {
280283
},
281284
},
282285
},
286+
Status: corev1.PodStatus{
287+
QOSClass: corev1.PodQOSGuaranteed,
288+
},
283289
}
284290

285291
fakeCli = fakeclient.NewSimpleClientset(pod)
@@ -368,6 +374,9 @@ func TestPodScanner(t *testing.T) {
368374
},
369375
},
370376
},
377+
Status: corev1.PodStatus{
378+
QOSClass: corev1.PodQOSGuaranteed,
379+
},
371380
}
372381
fakeCli = fakeclient.NewSimpleClientset(pod)
373382
resScan.(*PodResourcesScanner).k8sClient = fakeCli
@@ -458,6 +467,9 @@ func TestPodScanner(t *testing.T) {
458467
},
459468
},
460469
},
470+
Status: corev1.PodStatus{
471+
QOSClass: corev1.PodQOSGuaranteed,
472+
},
461473
}
462474
fakeCli = fakeclient.NewSimpleClientset(pod)
463475
resScan.(*PodResourcesScanner).k8sClient = fakeCli
@@ -628,6 +640,9 @@ func TestPodScanner(t *testing.T) {
628640
},
629641
},
630642
},
643+
Status: corev1.PodStatus{
644+
QOSClass: corev1.PodQOSGuaranteed,
645+
},
631646
}
632647
fakeCli = fakeclient.NewSimpleClientset(pod)
633648
resScan.(*PodResourcesScanner).k8sClient = fakeCli
@@ -825,6 +840,9 @@ func TestPodScanner(t *testing.T) {
825840
},
826841
},
827842
},
843+
Status: corev1.PodStatus{
844+
QOSClass: corev1.PodQOSGuaranteed,
845+
},
828846
}
829847
fakeCli = fakeclient.NewSimpleClientset(pod)
830848
resScan.(*PodResourcesScanner).k8sClient = fakeCli
@@ -1029,6 +1047,9 @@ func TestPodScanner(t *testing.T) {
10291047
},
10301048
},
10311049
},
1050+
Status: corev1.PodStatus{
1051+
QOSClass: corev1.PodQOSGuaranteed,
1052+
},
10321053
}
10331054
fakeCli = fakeclient.NewSimpleClientset(pod)
10341055
resScan.(*PodResourcesScanner).k8sClient = fakeCli
@@ -1113,6 +1134,120 @@ func TestPodScanner(t *testing.T) {
11131134
},
11141135
},
11151136
},
1137+
Status: corev1.PodStatus{
1138+
QOSClass: corev1.PodQOSGuaranteed,
1139+
},
1140+
}
1141+
fakeCli = fakeclient.NewSimpleClientset(pod)
1142+
resScan.(*PodResourcesScanner).k8sClient = fakeCli
1143+
res, err := resScan.Scan()
1144+
1145+
Convey("Error is nil", func() {
1146+
So(err, ShouldBeNil)
1147+
})
1148+
Convey("Return PodResources should have values", func() {
1149+
So(len(res.PodResources), ShouldBeGreaterThan, 0)
1150+
})
1151+
1152+
expected := []PodResources{
1153+
{
1154+
Name: "test-pod-0",
1155+
Namespace: "pod-res-test",
1156+
Containers: []ContainerResources{
1157+
{
1158+
Name: "test-cnt-0",
1159+
Resources: []ResourceInfo{
1160+
{
1161+
Name: "fake.io/resource",
1162+
Data: []string{"devA"},
1163+
},
1164+
},
1165+
},
1166+
},
1167+
},
1168+
}
1169+
So(reflect.DeepEqual(res.PodResources, expected), ShouldBeTrue)
1170+
})
1171+
1172+
Convey("When I successfully get valid response for guaranteed pods with not cpu pin containers", func() {
1173+
resp := &v1.ListPodResourcesResponse{
1174+
PodResources: []*v1.PodResources{
1175+
{
1176+
Name: "test-pod-0",
1177+
Namespace: "pod-res-test",
1178+
Containers: []*v1.ContainerResources{
1179+
{
1180+
Name: "test-cnt-0",
1181+
CpuIds: []int64{0, 1},
1182+
Devices: []*v1.ContainerDevices{
1183+
{
1184+
ResourceName: "fake.io/resource",
1185+
DeviceIds: []string{"devA"},
1186+
},
1187+
},
1188+
},
1189+
{
1190+
Name: "test-cnt-1",
1191+
Devices: []*v1.ContainerDevices{
1192+
{
1193+
ResourceName: "fake.io/resource",
1194+
DeviceIds: []string{"devA"},
1195+
},
1196+
},
1197+
},
1198+
},
1199+
},
1200+
},
1201+
}
1202+
mockPodResClient.On("List", mock.AnythingOfType("*context.timerCtx"), mock.AnythingOfType("*v1.ListPodResourcesRequest")).Return(resp, nil)
1203+
pod := &corev1.Pod{
1204+
TypeMeta: metav1.TypeMeta{
1205+
Kind: "Pod",
1206+
APIVersion: "v1",
1207+
},
1208+
ObjectMeta: metav1.ObjectMeta{
1209+
Name: "test-pod-0",
1210+
Namespace: "pod-res-test",
1211+
},
1212+
Spec: corev1.PodSpec{
1213+
Containers: []corev1.Container{
1214+
{
1215+
Name: "test-cnt-0",
1216+
Resources: corev1.ResourceRequirements{
1217+
Requests: corev1.ResourceList{
1218+
1219+
corev1.ResourceName("fake.io/resource"): *resource.NewQuantity(1, resource.DecimalSI),
1220+
corev1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI),
1221+
corev1.ResourceCPU: resource.MustParse("2"),
1222+
},
1223+
Limits: corev1.ResourceList{
1224+
corev1.ResourceName("fake.io/resource"): *resource.NewQuantity(1, resource.DecimalSI),
1225+
corev1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI),
1226+
corev1.ResourceCPU: resource.MustParse("2"),
1227+
},
1228+
},
1229+
},
1230+
{
1231+
Name: "test-cnt-1",
1232+
Resources: corev1.ResourceRequirements{
1233+
Requests: corev1.ResourceList{
1234+
1235+
corev1.ResourceName("fake.io/resource"): *resource.NewQuantity(1, resource.DecimalSI),
1236+
corev1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI),
1237+
corev1.ResourceCPU: resource.MustParse("1500m"),
1238+
},
1239+
Limits: corev1.ResourceList{
1240+
corev1.ResourceName("fake.io/resource"): *resource.NewQuantity(1, resource.DecimalSI),
1241+
corev1.ResourceMemory: *resource.NewQuantity(100, resource.DecimalSI),
1242+
corev1.ResourceCPU: resource.MustParse("1500m"),
1243+
},
1244+
},
1245+
},
1246+
},
1247+
},
1248+
Status: corev1.PodStatus{
1249+
QOSClass: corev1.PodQOSGuaranteed,
1250+
},
11161251
}
11171252
fakeCli = fakeclient.NewSimpleClientset(pod)
11181253
resScan.(*PodResourcesScanner).k8sClient = fakeCli
@@ -1132,6 +1267,19 @@ func TestPodScanner(t *testing.T) {
11321267
Containers: []ContainerResources{
11331268
{
11341269
Name: "test-cnt-0",
1270+
Resources: []ResourceInfo{
1271+
{
1272+
Name: corev1.ResourceCPU,
1273+
Data: []string{"0", "1"},
1274+
},
1275+
{
1276+
Name: "fake.io/resource",
1277+
Data: []string{"devA"},
1278+
},
1279+
},
1280+
},
1281+
{
1282+
Name: "test-cnt-1",
11351283
Resources: []ResourceInfo{
11361284
{
11371285
Name: "fake.io/resource",

test/e2e/utils/pod/pod.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ func GuaranteedSleeper(opts ...func(pod *corev1.Pod)) *corev1.Pod {
6161
},
6262
},
6363
},
64+
Status: corev1.PodStatus{
65+
QOSClass: corev1.PodQOSGuaranteed,
66+
},
6467
}
6568
for _, o := range opts {
6669
o(p)

0 commit comments

Comments
 (0)