Skip to content

Commit 9dc116e

Browse files
committed
Ensure CPUManager TopologyHints are regenerated after kubelet restart
This patch also includes test to make sure the newly added logic works as expected.
1 parent a338c8f commit 9dc116e

File tree

3 files changed

+118
-1
lines changed

3 files changed

+118
-1
lines changed

pkg/kubelet/cm/cpumanager/policy_static.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,23 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod v1.Pod, container v1.
320320
return nil
321321
}
322322

323+
// Short circuit to regenerate the same hints if there are already
324+
// guaranteed CPUs allocated to the Container. This might happen after a
325+
// kubelet restart, for example.
326+
containerID, _ := findContainerIDByName(&pod.Status, container.Name)
327+
if allocated, exists := s.GetCPUSet(containerID); exists {
328+
if allocated.Size() != requested {
329+
klog.Errorf("[cpumanager] CPUs already allocated to (pod %v, container %v) with different number than request: requested: %d, allocated: %d", string(pod.UID), container.Name, requested, allocated.Size())
330+
return map[string][]topologymanager.TopologyHint{
331+
string(v1.ResourceCPU): {},
332+
}
333+
}
334+
klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name)
335+
return map[string][]topologymanager.TopologyHint{
336+
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested),
337+
}
338+
}
339+
323340
// Get a list of available CPUs.
324341
available := p.assignableCPUs(s)
325342

pkg/kubelet/cm/cpumanager/policy_static_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,20 @@ func TestStaticPolicyAdd(t *testing.T) {
213213
expCPUAlloc: true,
214214
expCSet: cpuset.NewCPUSet(1, 5),
215215
},
216+
{
217+
description: "GuPodMultipleCores, SingleSocketHT, ExpectSameAllocation",
218+
topo: topoSingleSocketHT,
219+
numReservedCPUs: 1,
220+
containerID: "fakeID3",
221+
stAssignments: state.ContainerCPUAssignments{
222+
"fakeID3": cpuset.NewCPUSet(2, 3, 6, 7),
223+
},
224+
stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
225+
pod: makePod("4000m", "4000m"),
226+
expErr: nil,
227+
expCPUAlloc: true,
228+
expCSet: cpuset.NewCPUSet(2, 3, 6, 7),
229+
},
216230
{
217231
description: "GuPodMultipleCores, DualSocketHT, ExpectAllocOneSocket",
218232
topo: topoDualSocketHT,

pkg/kubelet/cm/cpumanager/topology_hints_test.go

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
cadvisorapi "github.com/google/cadvisor/info/v1"
2525
v1 "k8s.io/api/core/v1"
26+
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
2627
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
2728
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
2829
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@@ -72,6 +73,7 @@ func TestGetTopologyHints(t *testing.T) {
7273
name string
7374
pod v1.Pod
7475
container v1.Container
76+
assignments state.ContainerCPUAssignments
7577
defaultCPUSet cpuset.CPUSet
7678
expectedHints []topologymanager.TopologyHint
7779
}{
@@ -142,6 +144,86 @@ func TestGetTopologyHints(t *testing.T) {
142144
},
143145
},
144146
},
147+
{
148+
name: "Request more CPUs than available",
149+
pod: *testPod2,
150+
container: *testContainer2,
151+
defaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3),
152+
expectedHints: nil,
153+
},
154+
{
155+
name: "Regenerate Single-Node NUMA Hints if already allocated 1/2",
156+
pod: *testPod1,
157+
container: *testContainer1,
158+
assignments: state.ContainerCPUAssignments{
159+
"": cpuset.NewCPUSet(0, 6),
160+
},
161+
defaultCPUSet: cpuset.NewCPUSet(),
162+
expectedHints: []topologymanager.TopologyHint{
163+
{
164+
NUMANodeAffinity: firstSocketMask,
165+
Preferred: true,
166+
},
167+
{
168+
NUMANodeAffinity: crossSocketMask,
169+
Preferred: false,
170+
},
171+
},
172+
},
173+
{
174+
name: "Regenerate Single-Node NUMA Hints if already allocated 1/2",
175+
pod: *testPod1,
176+
container: *testContainer1,
177+
assignments: state.ContainerCPUAssignments{
178+
"": cpuset.NewCPUSet(3, 9),
179+
},
180+
defaultCPUSet: cpuset.NewCPUSet(),
181+
expectedHints: []topologymanager.TopologyHint{
182+
{
183+
NUMANodeAffinity: secondSocketMask,
184+
Preferred: true,
185+
},
186+
{
187+
NUMANodeAffinity: crossSocketMask,
188+
Preferred: false,
189+
},
190+
},
191+
},
192+
{
193+
name: "Regenerate Cross-NUMA Hints if already allocated",
194+
pod: *testPod4,
195+
container: *testContainer4,
196+
assignments: state.ContainerCPUAssignments{
197+
"": cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
198+
},
199+
defaultCPUSet: cpuset.NewCPUSet(),
200+
expectedHints: []topologymanager.TopologyHint{
201+
{
202+
NUMANodeAffinity: crossSocketMask,
203+
Preferred: true,
204+
},
205+
},
206+
},
207+
{
208+
name: "Requested less than already allocated",
209+
pod: *testPod1,
210+
container: *testContainer1,
211+
assignments: state.ContainerCPUAssignments{
212+
"": cpuset.NewCPUSet(0, 6, 3, 9),
213+
},
214+
defaultCPUSet: cpuset.NewCPUSet(),
215+
expectedHints: []topologymanager.TopologyHint{},
216+
},
217+
{
218+
name: "Requested more than already allocated",
219+
pod: *testPod4,
220+
container: *testContainer4,
221+
assignments: state.ContainerCPUAssignments{
222+
"": cpuset.NewCPUSet(0, 6, 3, 9),
223+
},
224+
defaultCPUSet: cpuset.NewCPUSet(),
225+
expectedHints: []topologymanager.TopologyHint{},
226+
},
145227
}
146228
for _, tc := range tcases {
147229
topology, _ := topology.Discover(&machineInfo, numaNodeInfo)
@@ -151,9 +233,13 @@ func TestGetTopologyHints(t *testing.T) {
151233
topology: topology,
152234
},
153235
state: &mockState{
236+
assignments: tc.assignments,
154237
defaultCPUSet: tc.defaultCPUSet,
155238
},
156-
topology: topology,
239+
topology: topology,
240+
activePods: func() []*v1.Pod { return nil },
241+
podStatusProvider: mockPodStatusProvider{},
242+
sourcesReady: &sourcesReadyStub{},
157243
}
158244

159245
hints := m.GetTopologyHints(tc.pod, tc.container)[string(v1.ResourceCPU)]

0 commit comments

Comments
 (0)