Skip to content

Commit 33fe0eb

Browse files
authored
Merge pull request #500 from PiotrProkop/add-numa-distance
Make LeastNUMANodes scoringStrategy consider distance between zones
2 parents f8a1d65 + d89891a commit 33fe0eb

File tree

8 files changed

+1411
-186
lines changed

8 files changed

+1411
-186
lines changed

pkg/noderesourcetopology/least_numa.go

Lines changed: 133 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,37 @@ import (
3131
"sigs.k8s.io/scheduler-plugins/pkg/util"
3232
)
3333

34+
const (
35+
// 255 is max value as defined by ACPI SLIT(System Locality Information Tables), which means unknown/undefined
36+
maxDistanceValue = 255
37+
)
38+
3439
func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) {
3540
nodes := createNUMANodeList(zones)
3641
qos := v1qos.GetPodQOS(pod)
3742

3843
maxNUMANodesCount := 0
44+
allContainersMinAvgDistance := true
3945
// the order how TopologyManager asks for hint is important so doing it in the same order
4046
// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/cm/topologymanager/scope_container.go#L52
4147
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
48+
// if a container requests only non NUMA just continue
49+
if onlyNonNUMAResources(nodes, container.Resources.Requests) {
50+
continue
51+
}
4252
identifier := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name)
43-
numaNodes := numaNodesRequired(identifier, qos, nodes, container.Resources.Requests)
53+
numaNodes, isMinAvgDistance := numaNodesRequired(identifier, qos, nodes, container.Resources.Requests)
4454
// container's resources can't fit onto node, return MinNodeScore for whole pod
4555
if numaNodes == nil {
56+
// score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes
57+
klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier)
4658
return framework.MinNodeScore, nil
4759
}
4860

61+
if !isMinAvgDistance {
62+
allContainersMinAvgDistance = false
63+
}
64+
4965
if numaNodes.Count() > maxNUMANodesCount {
5066
maxNUMANodesCount = numaNodes.Count()
5167
}
@@ -55,7 +71,11 @@ func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList)
5571
subtractFromNUMAs(container.Resources.Requests, nodes, numaNodes.GetBits()...)
5672
}
5773

58-
return normalizeScore(maxNUMANodesCount), nil
74+
if maxNUMANodesCount == 0 {
75+
return framework.MaxNodeScore, nil
76+
}
77+
78+
return normalizeScore(maxNUMANodesCount, allContainersMinAvgDistance), nil
5979
}
6080

6181
func leastNUMAPodScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) {
@@ -65,77 +85,69 @@ func leastNUMAPodScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64
6585
identifier := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
6686

6787
resources := util.GetPodEffectiveRequest(pod)
88+
// if a pod requests only non NUMA resources return max score
89+
if onlyNonNUMAResources(nodes, resources) {
90+
return framework.MaxNodeScore, nil
91+
}
6892

69-
numaNodes := numaNodesRequired(identifier, qos, nodes, resources)
93+
numaNodes, isMinAvgDistance := numaNodesRequired(identifier, qos, nodes, resources)
7094
// pod's resources can't fit onto node, return MinNodeScore
7195
if numaNodes == nil {
96+
// score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes
97+
klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier)
7298
return framework.MinNodeScore, nil
7399
}
74100

75-
return normalizeScore(numaNodes.Count()), nil
101+
return normalizeScore(numaNodes.Count(), isMinAvgDistance), nil
76102
}
77103

78-
func normalizeScore(numaNodes int) int64 {
79-
numaScore := framework.MaxNodeScore / highestNUMAID
80-
return framework.MaxNodeScore - int64(numaNodes)*numaScore
104+
func normalizeScore(numaNodesCount int, isMinAvgDistance bool) int64 {
105+
numaNodeScore := framework.MaxNodeScore / highestNUMAID
106+
score := framework.MaxNodeScore - int64(numaNodesCount)*numaNodeScore
107+
if isMinAvgDistance {
108+
// if distance between NUMA domains is optimal add half of numaNodeScore to make this node more favorable
109+
return score + numaNodeScore/2
110+
}
111+
112+
return score
81113
}
82114

83-
// numaNodesRequired returns bitmask with minimal NUMA nodes required to run given resources
84-
// or nil when resources can't be fitted onto the node
85-
func numaNodesRequired(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList) bitmask.BitMask {
86-
combinationBitmask := bitmask.NewEmptyBitMask()
87-
// we will generate combination of numa nodes from len = 1 to the number of numa nodes present on the machine
88-
for i := 1; i <= len(numaNodes); i++ {
89-
// generate combinations of len i
90-
numaNodesCombination := combin.Combinations(len(numaNodes), i)
91-
// iterate over combinations for given i
92-
for _, combination := range numaNodesCombination {
93-
// accumulate resources for given combination
94-
combinationResources := combineResources(numaNodes, combination)
95-
96-
resourcesFit := true
97-
onlyNonNUMAResources := true
98-
for resource, quantity := range resources {
99-
if quantity.IsZero() {
100-
// why bother? everything's fine from the perspective of this resource
101-
klog.V(4).InfoS("ignoring zero-qty resource request", "identifier", identifier, "resource", resource)
102-
continue
103-
}
104-
105-
combinationQuantity, ok := combinationResources[resource]
106-
if !ok {
107-
// non NUMA resource continue
108-
continue
109-
}
110-
111-
// there can be a situation where container/pod requests only non NUMA resources
112-
onlyNonNUMAResources = false
113-
114-
if !isResourceSetSuitable(qos, resource, quantity, combinationQuantity) {
115-
resourcesFit = false
116-
break
117-
}
115+
func minAvgDistanceInCombinations(numaNodes NUMANodeList, numaNodesCombination [][]int) float32 {
116+
// max distance for NUMA node
117+
var minDistance float32 = maxDistanceValue
118118

119+
for _, combination := range numaNodesCombination {
120+
avgDistance := nodesAvgDistance(numaNodes, combination...)
121+
if avgDistance < minDistance {
122+
minDistance = avgDistance
123+
}
124+
}
125+
126+
return minDistance
127+
}
128+
129+
func nodesAvgDistance(numaNodes NUMANodeList, nodes ...int) float32 {
130+
if len(nodes) == 0 {
131+
return maxDistanceValue
132+
}
133+
134+
var (
135+
accu int
136+
)
137+
138+
for _, node1 := range nodes {
139+
for _, node2 := range nodes {
140+
cost, ok := numaNodes[node1].Costs[node2]
141+
// we couldn't read Costs assign maxDistanceValue
142+
if !ok {
143+
klog.Warningf("cannot retrieve Costs information for node : %s", node1)
144+
cost = maxDistanceValue
119145
}
120-
// if resources can be fit on given combination, just return the number of numa nodes requires to fit them
121-
// according to TopologyManager if both masks are the same size pick the one that has less bits set
122-
// https://github.com/kubernetes/kubernetes/blob/3e26e104bdf9d0dc3c4046d6350b93557c67f3f4/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go#L146
123-
// combin.Combinations is generating combinations in an order from the smallest to highest value
124-
if resourcesFit {
125-
// if a container/pod requests only non NUMA resources return empty bitmask and score of 0
126-
if onlyNonNUMAResources {
127-
return combinationBitmask
128-
}
129-
130-
combinationBitmask.Add(combination...)
131-
return combinationBitmask
132-
}
146+
accu += cost
133147
}
134148
}
135149

136-
// score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes
137-
klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier)
138-
return nil
150+
return float32(accu) / float32(len(nodes)*len(nodes))
139151
}
140152

141153
func combineResources(numaNodes NUMANodeList, combination []int) v1.ResourceList {
@@ -153,3 +165,66 @@ func combineResources(numaNodes NUMANodeList, combination []int) v1.ResourceList
153165

154166
return resources
155167
}
168+
169+
// numaNodesRequired returns bitmask with minimal NUMA nodes required to run given resources
170+
// or nil when resources can't be fitted onto the worker node
171+
// second value returned is a boolean indicating if bitmask is optimal from distance perspective
172+
func numaNodesRequired(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList) (bitmask.BitMask, bool) {
173+
for bitmaskLen := 1; bitmaskLen <= len(numaNodes); bitmaskLen++ {
174+
numaNodesCombination := combin.Combinations(len(numaNodes), bitmaskLen)
175+
suitableCombination, isMinDistance := findSuitableCombination(identifier, qos, numaNodes, resources, numaNodesCombination)
176+
// we have found suitable combination for given bitmaskLen
177+
if suitableCombination != nil {
178+
bm := bitmask.NewEmptyBitMask()
179+
bm.Add(suitableCombination...)
180+
return bm, isMinDistance
181+
}
182+
}
183+
184+
return nil, false
185+
}
186+
187+
// findSuitableCombination returns combination from numaNodesCombination that can fit resources, otherwise return nil
188+
// second value returned is a boolean indicating if returned combination is optimal from distance perspective
189+
// this function will always return combination that provides minimal average distance between nodes in combination
190+
func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList, numaNodesCombination [][]int) ([]int, bool) {
191+
minAvgDistance := minAvgDistanceInCombinations(numaNodes, numaNodesCombination)
192+
var (
193+
minDistanceCombination []int
194+
// init as max distance
195+
minDistance float32 = 256
196+
)
197+
for _, combination := range numaNodesCombination {
198+
combinationResources := combineResources(numaNodes, combination)
199+
resourcesFit := checkResourcesFit(identifier, qos, resources, combinationResources)
200+
201+
if resourcesFit {
202+
distance := nodesAvgDistance(numaNodes, combination...)
203+
if distance == minAvgDistance {
204+
// return early if we can fit resources into combination and provide minDistance
205+
return combination, true
206+
}
207+
// we don't have to check which combination bitmask has lower value since we are generating them from lowest value
208+
if distance < minDistance {
209+
minDistance = distance
210+
minDistanceCombination = combination
211+
}
212+
}
213+
}
214+
215+
return minDistanceCombination, false
216+
}
217+
218+
func checkResourcesFit(identifier string, qos v1.PodQOSClass, resources v1.ResourceList, combinationResources v1.ResourceList) bool {
219+
for resource, quantity := range resources {
220+
if quantity.IsZero() {
221+
klog.V(4).InfoS("ignoring zero-qty resource request", "identifier", identifier, "resource", resource)
222+
continue
223+
}
224+
if combinationQuantity := combinationResources[resource]; !isResourceSetSuitable(qos, resource, quantity, combinationQuantity) {
225+
return false
226+
}
227+
}
228+
229+
return true
230+
}

0 commit comments

Comments
 (0)