Skip to content

Commit a780ccf

Browse files
committed
Updates logic in devicesToAllocate() to call GetPreferredAllocation()
1 parent bb56a09 commit a780ccf

File tree

1 file changed

+82
-26
lines changed

1 file changed

+82
-26
lines changed

pkg/kubelet/cm/devicemanager/manager.go

Lines changed: 82 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ import (
4343
cputopology "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
4444
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
4545
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
46-
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
4746
"k8s.io/kubernetes/pkg/kubelet/config"
4847
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
4948
"k8s.io/kubernetes/pkg/kubelet/metrics"
@@ -662,15 +661,30 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
662661
if _, ok := m.healthyDevices[resource]; !ok {
663662
return nil, fmt.Errorf("can't allocate unregistered device %s", resource)
664663
}
665-
devices = sets.NewString()
666-
// Allocates from reusableDevices list first.
667-
for device := range reusableDevices {
668-
devices.Insert(device)
669-
needed--
670-
if needed == 0 {
671-
return devices, nil
664+
665+
// Declare the list of allocated devices.
666+
// This will be populated and returned below.
667+
allocated := sets.NewString()
668+
669+
// Create a closure to help with device allocation
670+
// Returns 'true' once no more devices need to be allocated.
671+
allocateRemainingFrom := func(devices sets.String) bool {
672+
for device := range devices.Difference(allocated) {
673+
m.allocatedDevices[resource].Insert(device)
674+
allocated.Insert(device)
675+
needed--
676+
if needed == 0 {
677+
return true
678+
}
672679
}
680+
return false
681+
}
682+
683+
// Allocates from reusableDevices list first.
684+
if allocateRemainingFrom(reusableDevices) {
685+
return allocated, nil
673686
}
687+
674688
// Needs to allocate additional devices.
675689
if m.allocatedDevices[resource] == nil {
676690
m.allocatedDevices[resource] = sets.NewString()
@@ -683,25 +697,67 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
683697
if available.Len() < needed {
684698
return nil, fmt.Errorf("requested number of devices unavailable for %s. Requested: %d, Available: %d", resource, needed, available.Len())
685699
}
686-
// By default, pull devices from the unsorted list of available devices.
687-
allocated := available.UnsortedList()[:needed]
688-
// If topology alignment is desired, update allocated to the set of devices
689-
// with the best alignment.
690-
hint := m.topologyAffinityStore.GetAffinity(podUID, contName)
691-
if m.deviceHasTopologyAlignment(resource) && hint.NUMANodeAffinity != nil {
692-
allocated = m.takeByTopology(resource, available, hint.NUMANodeAffinity, needed)
700+
701+
// Filters available Devices based on NUMA affinity.
702+
aligned, unaligned, noAffinity := m.filterByAffinity(podUID, contName, resource, available)
703+
704+
// If we can allocate all remaining devices from the set of aligned ones, then
705+
// give the plugin the chance to influence which ones to allocate from that set.
706+
if needed < aligned.Len() {
707+
// First allocate from the preferred devices list (if available).
708+
preferred, err := m.callGetPreferredAllocationIfAvailable(podUID, contName, resource, aligned.Union(allocated), allocated, required)
709+
if err != nil {
710+
return nil, err
711+
}
712+
if allocateRemainingFrom(preferred.Intersection(aligned.Union(allocated))) {
713+
return allocated, nil
714+
}
715+
// Then fallback to allocate from the aligned set if no preferred list
716+
// is returned (or not enough devices are returned in that list).
717+
if allocateRemainingFrom(aligned) {
718+
return allocated, nil
719+
}
720+
721+
return nil, fmt.Errorf("unexpectedly allocated less resources than required. Requested: %d, Got: %d", required, required-needed)
722+
}
723+
724+
// If we can't allocate all remaining devices from the set of aligned ones,
725+
// then start by first allocating all of the aligned devices (to ensure
726+
// that the alignment guaranteed by the TopologyManager is honored).
727+
if allocateRemainingFrom(aligned) {
728+
return allocated, nil
729+
}
730+
731+
// Then give the plugin the chance to influence the decision on any
732+
// remaining devices to allocate.
733+
preferred, err := m.callGetPreferredAllocationIfAvailable(podUID, contName, resource, available.Union(devices), devices, required)
734+
if err != nil {
735+
return nil, err
693736
}
694-
// Updates m.allocatedDevices with allocated devices to prevent them
695-
// from being allocated to other pods/containers, given that we are
696-
// not holding lock during the rpc call.
697-
for _, device := range allocated {
698-
m.allocatedDevices[resource].Insert(device)
699-
devices.Insert(device)
737+
if allocateRemainingFrom(preferred.Intersection(available.Union(allocated))) {
738+
return allocated, nil
700739
}
701-
return devices, nil
740+
741+
// Finally, if the plugin did not return a preferred allocation (or didn't
742+
// return a large enough one), then fall back to allocating the remaining
743+
// devices from the 'unaligned' and 'noAffinity' sets.
744+
if allocateRemainingFrom(unaligned) {
745+
return allocated, nil
746+
}
747+
if allocateRemainingFrom(noAffinity) {
748+
return allocated, nil
749+
}
750+
751+
return nil, fmt.Errorf("unexpectedly allocated less resources than required. Requested: %d, Got: %d", required, required-needed)
702752
}
703753

704-
func (m *ManagerImpl) takeByTopology(resource string, available sets.String, affinity bitmask.BitMask, request int) []string {
754+
func (m *ManagerImpl) filterByAffinity(podUID, contName, resource string, available sets.String) (sets.String, sets.String, sets.String) {
755+
// If alignment information is not available, just pass the available list back.
756+
hint := m.topologyAffinityStore.GetAffinity(podUID, contName)
757+
if !m.deviceHasTopologyAlignment(resource) || hint.NUMANodeAffinity == nil {
758+
return sets.NewString(), sets.NewString(), available
759+
}
760+
705761
// Build a map of NUMA Nodes to the devices associated with them. A
706762
// device may be associated to multiple NUMA nodes at the same time. If an
707763
// available device does not have any NUMA Nodes associated with it, add it
@@ -755,7 +811,7 @@ func (m *ManagerImpl) takeByTopology(resource string, available sets.String, aff
755811
if perNodeDevices[n].Has(d) {
756812
if n == nodeWithoutTopology {
757813
withoutTopology = append(withoutTopology, d)
758-
} else if affinity.IsSet(n) {
814+
} else if hint.NUMANodeAffinity.IsSet(n) {
759815
fromAffinity = append(fromAffinity, d)
760816
} else {
761817
notFromAffinity = append(notFromAffinity, d)
@@ -765,8 +821,8 @@ func (m *ManagerImpl) takeByTopology(resource string, available sets.String, aff
765821
}
766822
}
767823

768-
// Concatenate the lists above return the first 'request' devices from it..
769-
return append(append(fromAffinity, notFromAffinity...), withoutTopology...)[:request]
824+
// Return all three lists containing the full set of devices across them.
825+
return sets.NewString(fromAffinity...), sets.NewString(notFromAffinity...), sets.NewString(withoutTopology...)
770826
}
771827

772828
// allocateContainerResources attempts to allocate all of required device

0 commit comments

Comments
 (0)