@@ -31,21 +31,37 @@ import (
31
31
"sigs.k8s.io/scheduler-plugins/pkg/util"
32
32
)
33
33
34
+ const (
35
+ // 255 is max value as defined by ACPI SLIT(System Locality Information Tables), which means unknown/undefined
36
+ maxDistanceValue = 255
37
+ )
38
+
34
39
func leastNUMAContainerScopeScore (pod * v1.Pod , zones topologyv1alpha2.ZoneList ) (int64 , * framework.Status ) {
35
40
nodes := createNUMANodeList (zones )
36
41
qos := v1qos .GetPodQOS (pod )
37
42
38
43
maxNUMANodesCount := 0
44
+ allContainersMinAvgDistance := true
39
45
// the order how TopologyManager asks for hint is important so doing it in the same order
40
46
// https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/cm/topologymanager/scope_container.go#L52
41
47
for _ , container := range append (pod .Spec .InitContainers , pod .Spec .Containers ... ) {
48
+ // if a container requests only non NUMA just continue
49
+ if onlyNonNUMAResources (nodes , container .Resources .Requests ) {
50
+ continue
51
+ }
42
52
identifier := fmt .Sprintf ("%s/%s/%s" , pod .Namespace , pod .Name , container .Name )
43
- numaNodes := numaNodesRequired (identifier , qos , nodes , container .Resources .Requests )
53
+ numaNodes , isMinAvgDistance := numaNodesRequired (identifier , qos , nodes , container .Resources .Requests )
44
54
// container's resources can't fit onto node, return MinNodeScore for whole pod
45
55
if numaNodes == nil {
56
+ // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes
57
+ klog .Warningf ("cannot calculate how many NUMA nodes are required for: %s" , identifier )
46
58
return framework .MinNodeScore , nil
47
59
}
48
60
61
+ if ! isMinAvgDistance {
62
+ allContainersMinAvgDistance = false
63
+ }
64
+
49
65
if numaNodes .Count () > maxNUMANodesCount {
50
66
maxNUMANodesCount = numaNodes .Count ()
51
67
}
@@ -55,7 +71,11 @@ func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList)
55
71
subtractFromNUMAs (container .Resources .Requests , nodes , numaNodes .GetBits ()... )
56
72
}
57
73
58
- return normalizeScore (maxNUMANodesCount ), nil
74
+ if maxNUMANodesCount == 0 {
75
+ return framework .MaxNodeScore , nil
76
+ }
77
+
78
+ return normalizeScore (maxNUMANodesCount , allContainersMinAvgDistance ), nil
59
79
}
60
80
61
81
func leastNUMAPodScopeScore (pod * v1.Pod , zones topologyv1alpha2.ZoneList ) (int64 , * framework.Status ) {
@@ -65,77 +85,69 @@ func leastNUMAPodScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64
65
85
identifier := fmt .Sprintf ("%s/%s" , pod .Namespace , pod .Name )
66
86
67
87
resources := util .GetPodEffectiveRequest (pod )
88
+ // if a pod requests only non NUMA resources return max score
89
+ if onlyNonNUMAResources (nodes , resources ) {
90
+ return framework .MaxNodeScore , nil
91
+ }
68
92
69
- numaNodes := numaNodesRequired (identifier , qos , nodes , resources )
93
+ numaNodes , isMinAvgDistance := numaNodesRequired (identifier , qos , nodes , resources )
70
94
// pod's resources can't fit onto node, return MinNodeScore
71
95
if numaNodes == nil {
96
+ // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes
97
+ klog .Warningf ("cannot calculate how many NUMA nodes are required for: %s" , identifier )
72
98
return framework .MinNodeScore , nil
73
99
}
74
100
75
- return normalizeScore (numaNodes .Count ()), nil
101
+ return normalizeScore (numaNodes .Count (), isMinAvgDistance ), nil
76
102
}
77
103
78
- func normalizeScore (numaNodes int ) int64 {
79
- numaScore := framework .MaxNodeScore / highestNUMAID
80
- return framework .MaxNodeScore - int64 (numaNodes )* numaScore
104
+ func normalizeScore (numaNodesCount int , isMinAvgDistance bool ) int64 {
105
+ numaNodeScore := framework .MaxNodeScore / highestNUMAID
106
+ score := framework .MaxNodeScore - int64 (numaNodesCount )* numaNodeScore
107
+ if isMinAvgDistance {
108
+ // if distance between NUMA domains is optimal add half of numaNodeScore to make this node more favorable
109
+ return score + numaNodeScore / 2
110
+ }
111
+
112
+ return score
81
113
}
82
114
83
- // numaNodesRequired returns bitmask with minimal NUMA nodes required to run given resources
84
- // or nil when resources can't be fitted onto the node
85
- func numaNodesRequired (identifier string , qos v1.PodQOSClass , numaNodes NUMANodeList , resources v1.ResourceList ) bitmask.BitMask {
86
- combinationBitmask := bitmask .NewEmptyBitMask ()
87
- // we will generate combination of numa nodes from len = 1 to the number of numa nodes present on the machine
88
- for i := 1 ; i <= len (numaNodes ); i ++ {
89
- // generate combinations of len i
90
- numaNodesCombination := combin .Combinations (len (numaNodes ), i )
91
- // iterate over combinations for given i
92
- for _ , combination := range numaNodesCombination {
93
- // accumulate resources for given combination
94
- combinationResources := combineResources (numaNodes , combination )
95
-
96
- resourcesFit := true
97
- onlyNonNUMAResources := true
98
- for resource , quantity := range resources {
99
- if quantity .IsZero () {
100
- // why bother? everything's fine from the perspective of this resource
101
- klog .V (4 ).InfoS ("ignoring zero-qty resource request" , "identifier" , identifier , "resource" , resource )
102
- continue
103
- }
104
-
105
- combinationQuantity , ok := combinationResources [resource ]
106
- if ! ok {
107
- // non NUMA resource continue
108
- continue
109
- }
110
-
111
- // there can be a situation where container/pod requests only non NUMA resources
112
- onlyNonNUMAResources = false
113
-
114
- if ! isResourceSetSuitable (qos , resource , quantity , combinationQuantity ) {
115
- resourcesFit = false
116
- break
117
- }
115
+ func minAvgDistanceInCombinations (numaNodes NUMANodeList , numaNodesCombination [][]int ) float32 {
116
+ // max distance for NUMA node
117
+ var minDistance float32 = maxDistanceValue
118
118
119
+ for _ , combination := range numaNodesCombination {
120
+ avgDistance := nodesAvgDistance (numaNodes , combination ... )
121
+ if avgDistance < minDistance {
122
+ minDistance = avgDistance
123
+ }
124
+ }
125
+
126
+ return minDistance
127
+ }
128
+
129
+ func nodesAvgDistance (numaNodes NUMANodeList , nodes ... int ) float32 {
130
+ if len (nodes ) == 0 {
131
+ return maxDistanceValue
132
+ }
133
+
134
+ var (
135
+ accu int
136
+ )
137
+
138
+ for _ , node1 := range nodes {
139
+ for _ , node2 := range nodes {
140
+ cost , ok := numaNodes [node1 ].Costs [node2 ]
141
+ // we couldn't read Costs assign maxDistanceValue
142
+ if ! ok {
143
+ klog .Warningf ("cannot retrieve Costs information for node : %s" , node1 )
144
+ cost = maxDistanceValue
119
145
}
120
- // if resources can be fit on given combination, just return the number of numa nodes requires to fit them
121
- // according to TopologyManager if both masks are the same size pick the one that has less bits set
122
- // https://github.com/kubernetes/kubernetes/blob/3e26e104bdf9d0dc3c4046d6350b93557c67f3f4/pkg/kubelet/cm/topologymanager/bitmask/bitmask.go#L146
123
- // combin.Combinations is generating combinations in an order from the smallest to highest value
124
- if resourcesFit {
125
- // if a container/pod requests only non NUMA resources return empty bitmask and score of 0
126
- if onlyNonNUMAResources {
127
- return combinationBitmask
128
- }
129
-
130
- combinationBitmask .Add (combination ... )
131
- return combinationBitmask
132
- }
146
+ accu += cost
133
147
}
134
148
}
135
149
136
- // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes
137
- klog .Warningf ("cannot calculate how many NUMA nodes are required for: %s" , identifier )
138
- return nil
150
+ return float32 (accu ) / float32 (len (nodes )* len (nodes ))
139
151
}
140
152
141
153
func combineResources (numaNodes NUMANodeList , combination []int ) v1.ResourceList {
@@ -153,3 +165,66 @@ func combineResources(numaNodes NUMANodeList, combination []int) v1.ResourceList
153
165
154
166
return resources
155
167
}
168
+
169
+ // numaNodesRequired returns bitmask with minimal NUMA nodes required to run given resources
170
+ // or nil when resources can't be fitted onto the worker node
171
+ // second value returned is a boolean indicating if bitmask is optimal from distance perspective
172
+ func numaNodesRequired (identifier string , qos v1.PodQOSClass , numaNodes NUMANodeList , resources v1.ResourceList ) (bitmask.BitMask , bool ) {
173
+ for bitmaskLen := 1 ; bitmaskLen <= len (numaNodes ); bitmaskLen ++ {
174
+ numaNodesCombination := combin .Combinations (len (numaNodes ), bitmaskLen )
175
+ suitableCombination , isMinDistance := findSuitableCombination (identifier , qos , numaNodes , resources , numaNodesCombination )
176
+ // we have found suitable combination for given bitmaskLen
177
+ if suitableCombination != nil {
178
+ bm := bitmask .NewEmptyBitMask ()
179
+ bm .Add (suitableCombination ... )
180
+ return bm , isMinDistance
181
+ }
182
+ }
183
+
184
+ return nil , false
185
+ }
186
+
187
+ // findSuitableCombination returns combination from numaNodesCombination that can fit resources, otherwise return nil
188
+ // second value returned is a boolean indicating if returned combination is optimal from distance perspective
189
+ // this function will always return combination that provides minimal average distance between nodes in combination
190
+ func findSuitableCombination (identifier string , qos v1.PodQOSClass , numaNodes NUMANodeList , resources v1.ResourceList , numaNodesCombination [][]int ) ([]int , bool ) {
191
+ minAvgDistance := minAvgDistanceInCombinations (numaNodes , numaNodesCombination )
192
+ var (
193
+ minDistanceCombination []int
194
+ // init as max distance
195
+ minDistance float32 = 256
196
+ )
197
+ for _ , combination := range numaNodesCombination {
198
+ combinationResources := combineResources (numaNodes , combination )
199
+ resourcesFit := checkResourcesFit (identifier , qos , resources , combinationResources )
200
+
201
+ if resourcesFit {
202
+ distance := nodesAvgDistance (numaNodes , combination ... )
203
+ if distance == minAvgDistance {
204
+ // return early if we can fit resources into combination and provide minDistance
205
+ return combination , true
206
+ }
207
+ // we don't have to check which combination bitmask has lower value since we are generating them from lowest value
208
+ if distance < minDistance {
209
+ minDistance = distance
210
+ minDistanceCombination = combination
211
+ }
212
+ }
213
+ }
214
+
215
+ return minDistanceCombination , false
216
+ }
217
+
218
+ func checkResourcesFit (identifier string , qos v1.PodQOSClass , resources v1.ResourceList , combinationResources v1.ResourceList ) bool {
219
+ for resource , quantity := range resources {
220
+ if quantity .IsZero () {
221
+ klog .V (4 ).InfoS ("ignoring zero-qty resource request" , "identifier" , identifier , "resource" , resource )
222
+ continue
223
+ }
224
+ if combinationQuantity := combinationResources [resource ]; ! isResourceSetSuitable (qos , resource , quantity , combinationQuantity ) {
225
+ return false
226
+ }
227
+ }
228
+
229
+ return true
230
+ }
0 commit comments