@@ -43,7 +43,6 @@ import (
43
43
cputopology "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
44
44
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
45
45
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
46
- "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
47
46
"k8s.io/kubernetes/pkg/kubelet/config"
48
47
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
49
48
"k8s.io/kubernetes/pkg/kubelet/metrics"
@@ -662,15 +661,30 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
662
661
if _ , ok := m .healthyDevices [resource ]; ! ok {
663
662
return nil , fmt .Errorf ("can't allocate unregistered device %s" , resource )
664
663
}
665
- devices = sets .NewString ()
666
- // Allocates from reusableDevices list first.
667
- for device := range reusableDevices {
668
- devices .Insert (device )
669
- needed --
670
- if needed == 0 {
671
- return devices , nil
664
+
665
+ // Declare the list of allocated devices.
666
+ // This will be populated and returned below.
667
+ allocated := sets .NewString ()
668
+
669
+ // Create a closure to help with device allocation
670
+ // Returns 'true' once no more devices need to be allocated.
671
+ allocateRemainingFrom := func (devices sets.String ) bool {
672
+ for device := range devices .Difference (allocated ) {
673
+ m .allocatedDevices [resource ].Insert (device )
674
+ allocated .Insert (device )
675
+ needed --
676
+ if needed == 0 {
677
+ return true
678
+ }
672
679
}
680
+ return false
681
+ }
682
+
683
+ // Allocates from reusableDevices list first.
684
+ if allocateRemainingFrom (reusableDevices ) {
685
+ return allocated , nil
673
686
}
687
+
674
688
// Needs to allocate additional devices.
675
689
if m .allocatedDevices [resource ] == nil {
676
690
m .allocatedDevices [resource ] = sets .NewString ()
@@ -683,25 +697,67 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
683
697
if available .Len () < needed {
684
698
return nil , fmt .Errorf ("requested number of devices unavailable for %s. Requested: %d, Available: %d" , resource , needed , available .Len ())
685
699
}
686
- // By default, pull devices from the unsorted list of available devices.
687
- allocated := available .UnsortedList ()[:needed ]
688
- // If topology alignment is desired, update allocated to the set of devices
689
- // with the best alignment.
690
- hint := m .topologyAffinityStore .GetAffinity (podUID , contName )
691
- if m .deviceHasTopologyAlignment (resource ) && hint .NUMANodeAffinity != nil {
692
- allocated = m .takeByTopology (resource , available , hint .NUMANodeAffinity , needed )
700
+
701
+ // Filters available Devices based on NUMA affinity.
702
+ aligned , unaligned , noAffinity := m .filterByAffinity (podUID , contName , resource , available )
703
+
704
+ // If we can allocate all remaining devices from the set of aligned ones, then
705
+ // give the plugin the chance to influence which ones to allocate from that set.
706
+ if needed < aligned .Len () {
707
+ // First allocate from the preferred devices list (if available).
708
+ preferred , err := m .callGetPreferredAllocationIfAvailable (podUID , contName , resource , aligned .Union (allocated ), allocated , required )
709
+ if err != nil {
710
+ return nil , err
711
+ }
712
+ if allocateRemainingFrom (preferred .Intersection (aligned .Union (allocated ))) {
713
+ return allocated , nil
714
+ }
715
+ // Then fallback to allocate from the aligned set if no preferred list
716
+ // is returned (or not enough devices are returned in that list).
717
+ if allocateRemainingFrom (aligned ) {
718
+ return allocated , nil
719
+ }
720
+
721
+ return nil , fmt .Errorf ("unexpectedly allocated less resources than required. Requested: %d, Got: %d" , required , required - needed )
722
+ }
723
+
724
+ // If we can't allocate all remaining devices from the set of aligned ones,
725
+ // then start by first allocating all of the aligned devices (to ensure
726
+ // that the alignment guaranteed by the TopologyManager is honored).
727
+ if allocateRemainingFrom (aligned ) {
728
+ return allocated , nil
729
+ }
730
+
731
+ // Then give the plugin the chance to influence the decision on any
732
+ // remaining devices to allocate.
733
+ preferred , err := m .callGetPreferredAllocationIfAvailable (podUID , contName , resource , available .Union (devices ), devices , required )
734
+ if err != nil {
735
+ return nil , err
693
736
}
694
- // Updates m.allocatedDevices with allocated devices to prevent them
695
- // from being allocated to other pods/containers, given that we are
696
- // not holding lock during the rpc call.
697
- for _ , device := range allocated {
698
- m .allocatedDevices [resource ].Insert (device )
699
- devices .Insert (device )
737
+ if allocateRemainingFrom (preferred .Intersection (available .Union (allocated ))) {
738
+ return allocated , nil
700
739
}
701
- return devices , nil
740
+
741
+ // Finally, if the plugin did not return a preferred allocation (or didn't
742
+ // return a large enough one), then fall back to allocating the remaining
743
+ // devices from the 'unaligned' and 'noAffinity' sets.
744
+ if allocateRemainingFrom (unaligned ) {
745
+ return allocated , nil
746
+ }
747
+ if allocateRemainingFrom (noAffinity ) {
748
+ return allocated , nil
749
+ }
750
+
751
+ return nil , fmt .Errorf ("unexpectedly allocated less resources than required. Requested: %d, Got: %d" , required , required - needed )
702
752
}
703
753
704
- func (m * ManagerImpl ) takeByTopology (resource string , available sets.String , affinity bitmask.BitMask , request int ) []string {
754
+ func (m * ManagerImpl ) filterByAffinity (podUID , contName , resource string , available sets.String ) (sets.String , sets.String , sets.String ) {
755
+ // If alignment information is not available, just pass the available list back.
756
+ hint := m .topologyAffinityStore .GetAffinity (podUID , contName )
757
+ if ! m .deviceHasTopologyAlignment (resource ) || hint .NUMANodeAffinity == nil {
758
+ return sets .NewString (), sets .NewString (), available
759
+ }
760
+
705
761
// Build a map of NUMA Nodes to the devices associated with them. A
706
762
// device may be associated to multiple NUMA nodes at the same time. If an
707
763
// available device does not have any NUMA Nodes associated with it, add it
@@ -755,7 +811,7 @@ func (m *ManagerImpl) takeByTopology(resource string, available sets.String, aff
755
811
if perNodeDevices [n ].Has (d ) {
756
812
if n == nodeWithoutTopology {
757
813
withoutTopology = append (withoutTopology , d )
758
- } else if affinity .IsSet (n ) {
814
+ } else if hint . NUMANodeAffinity .IsSet (n ) {
759
815
fromAffinity = append (fromAffinity , d )
760
816
} else {
761
817
notFromAffinity = append (notFromAffinity , d )
@@ -765,8 +821,8 @@ func (m *ManagerImpl) takeByTopology(resource string, available sets.String, aff
765
821
}
766
822
}
767
823
768
- // Concatenate the lists above return the first 'request' devices from it. .
769
- return append ( append ( fromAffinity , notFromAffinity ... ), withoutTopology ... )[: request ]
824
+ // Return all three lists containing the full set of devices across them .
825
+ return sets . NewString ( fromAffinity ... ), sets . NewString ( notFromAffinity ... ), sets . NewString ( withoutTopology ... )
770
826
}
771
827
772
828
// allocateContainerResources attempts to allocate all of required device
0 commit comments