Skip to content

Commit 00df26a

Browse files
committed
Fix a bug whereby reusable CPUs and devices were not being honored
Previously, it was possible for reusable CPUs and reusable devices (i.e. those previously consumed by init containers) to not be reused by subsequent init containers or app containers if the TopologyManager was enabled. This would happen because hint generation for the TopologyManager was not considering the reusable devices when it made its hint calculation. As such, it would sometimes: 1) Generate a hint for a differnent NUMA node, causing the CPUs and devices to be allocated from that node instead of the one where the reusable devices live; or 2) End up thinking there were not enough CPUs or devices to allocate and throw a TopologyAffinity admission error This patch fixes this by ensuring that reusable CPUs and devices are considered as part of TopologyHint generation. This frunctionality is difficult to unit test since it spans multiple components, but an e2e test will be added in a subsequent patch to test this functionality.
1 parent 74fe936 commit 00df26a

File tree

2 files changed

+39
-15
lines changed

2 files changed

+39
-15
lines changed

pkg/kubelet/cm/cpumanager/policy_static.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -338,15 +338,16 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
338338
}
339339
klog.Infof("[cpumanager] Regenerating TopologyHints for CPUs already allocated to (pod %v, container %v)", string(pod.UID), container.Name)
340340
return map[string][]topologymanager.TopologyHint{
341-
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, requested),
341+
string(v1.ResourceCPU): p.generateCPUTopologyHints(allocated, cpuset.CPUSet{}, requested),
342342
}
343343
}
344344

345345
// Get a list of available CPUs.
346346
available := p.assignableCPUs(s)
347+
reusable := p.cpusToReuse[string(pod.UID)]
347348

348349
// Generate hints.
349-
cpuHints := p.generateCPUTopologyHints(available, requested)
350+
cpuHints := p.generateCPUTopologyHints(available, reusable, requested)
350351
klog.Infof("[cpumanager] TopologyHints generated for pod '%v', container '%v': %v", pod.Name, container.Name, cpuHints)
351352

352353
return map[string][]topologymanager.TopologyHint{
@@ -360,7 +361,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
360361
// It follows the convention of marking all hints that have the same number of
361362
// bits set as the narrowest matching NUMANodeAffinity with 'Preferred: true', and
362363
// marking all others with 'Preferred: false'.
363-
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
364+
func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reusableCPUs cpuset.CPUSet, request int) []topologymanager.TopologyHint {
364365
// Initialize minAffinitySize to include all NUMA Nodes.
365366
minAffinitySize := p.topology.CPUDetails.NUMANodes().Size()
366367
// Initialize minSocketsOnMinAffinity to include all Sockets.
@@ -380,16 +381,25 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, req
380381
}
381382
}
382383

383-
// Then check to see if we have enough CPUs available on the current
384-
// socket bitmask to satisfy the CPU request.
384+
// Then check to see if all of the reusable CPUs are part of the bitmask.
385385
numMatching := 0
386+
for _, c := range reusableCPUs.ToSlice() {
387+
// Disregard this mask if its NUMANode isn't part of it.
388+
if !mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
389+
return
390+
}
391+
numMatching++
392+
}
393+
394+
// Finally, check to see if enough available CPUs remain on the current
395+
// NUMA node combination to satisfy the CPU request.
386396
for _, c := range availableCPUs.ToSlice() {
387397
if mask.IsSet(p.topology.CPUDetails[c].NUMANodeID) {
388398
numMatching++
389399
}
390400
}
391401

392-
// If we don't, then move onto the next combination.
402+
// If they don't, then move onto the next combination.
393403
if numMatching < request {
394404
return
395405
}

pkg/kubelet/cm/devicemanager/topology_hints.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,22 @@ func (m *ManagerImpl) GetTopologyHints(pod *v1.Pod, container *v1.Container) map
5858
continue
5959
}
6060
klog.Infof("[devicemanager] Regenerating TopologyHints for resource '%v' already allocated to (pod %v, container %v)", resource, string(pod.UID), container.Name)
61-
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, requested)
61+
deviceHints[resource] = m.generateDeviceTopologyHints(resource, allocated, sets.String{}, requested)
6262
continue
6363
}
6464

6565
// Get the list of available devices, for which TopologyHints should be generated.
6666
available := m.getAvailableDevices(resource)
67-
if available.Len() < requested {
68-
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Len())
67+
reusable := m.devicesToReuse[string(pod.UID)][resource]
68+
if available.Union(reusable).Len() < requested {
69+
klog.Errorf("[devicemanager] Unable to generate topology hints: requested number of devices unavailable for '%s': requested: %d, available: %d", resource, requested, available.Union(reusable).Len())
6970
deviceHints[resource] = []topologymanager.TopologyHint{}
7071
continue
7172
}
7273

7374
// Generate TopologyHints for this resource given the current
7475
// request size and the list of available devices.
75-
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, requested)
76+
deviceHints[resource] = m.generateDeviceTopologyHints(resource, available, reusable, requested)
7677
}
7778
}
7879

@@ -94,7 +95,7 @@ func (m *ManagerImpl) getAvailableDevices(resource string) sets.String {
9495
return m.healthyDevices[resource].Difference(m.allocatedDevices[resource])
9596
}
9697

97-
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.String, request int) []topologymanager.TopologyHint {
98+
func (m *ManagerImpl) generateDeviceTopologyHints(resource string, available sets.String, reusable sets.String, request int) []topologymanager.TopologyHint {
9899
// Initialize minAffinitySize to include all NUMA Nodes
99100
minAffinitySize := len(m.numaNodes)
100101

@@ -112,16 +113,29 @@ func (m *ManagerImpl) generateDeviceTopologyHints(resource string, devices sets.
112113
minAffinitySize = mask.Count()
113114
}
114115

115-
// Then check to see if we have enough devices available on the current
116-
// NUMA Node combination to satisfy the device request.
116+
// Then check to see if all of the reusable devices are part of the bitmask.
117117
numMatching := 0
118-
for d := range devices {
118+
for d := range reusable {
119+
// Skip the device if it doesn't specify any topology info.
120+
if m.allDevices[resource][d].Topology == nil {
121+
continue
122+
}
123+
// Otherwise disregard this mask if its NUMANode isn't part of it.
124+
if !mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
125+
return
126+
}
127+
numMatching++
128+
}
129+
130+
// Finally, check to see if enough available devices remain on the
131+
// current NUMA node combination to satisfy the device request.
132+
for d := range available {
119133
if mask.AnySet(m.getNUMANodeIds(m.allDevices[resource][d].Topology)) {
120134
numMatching++
121135
}
122136
}
123137

124-
// If we don't, then move onto the next combination.
138+
// If they don't, then move onto the next combination.
125139
if numMatching < request {
126140
return
127141
}

0 commit comments

Comments
 (0)