Skip to content

Commit 31eb0a1

Browse files
authored
Merge pull request #8181 from MenD32/fix/topology-spread-binpacking
fix: binpacking simulator scale up optimization on pods with topology…
2 parents 8014ae2 + 0002157 commit 31eb0a1

File tree

3 files changed

+74
-7
lines changed

3 files changed

+74
-7
lines changed

cluster-autoscaler/estimator/binpacking_estimator.go

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,16 @@ import (
2020
"fmt"
2121
"strconv"
2222

23+
"slices"
24+
2325
apiv1 "k8s.io/api/core/v1"
2426
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
2527
"k8s.io/autoscaler/cluster-autoscaler/metrics"
2628
core_utils "k8s.io/autoscaler/cluster-autoscaler/simulator"
2729
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
2830
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
2931
"k8s.io/klog/v2"
32+
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/podtopologyspread"
3033
)
3134

3235
// BinpackingNodeEstimator estimates the number of needed nodes to handle the given amount of pods.
@@ -171,7 +174,8 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
171174

172175
if estimationState.lastNodeName != "" {
173176
// Try to schedule the pod on only newly created node.
174-
if err := e.clusterSnapshot.SchedulePod(pod, estimationState.lastNodeName); err == nil {
177+
err := e.clusterSnapshot.SchedulePod(pod, estimationState.lastNodeName)
178+
if err == nil {
175179
// The pod was scheduled on the newly created node.
176180
found = true
177181
estimationState.trackScheduledPod(pod, estimationState.lastNodeName)
@@ -180,6 +184,24 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
180184
return false, err
181185
}
182186
// The pod can't be scheduled on the newly created node because of scheduling predicates.
187+
188+
// Check if node failed because of topology constraints.
189+
if isPodUsingHostNameTopologyKey(pod) && hasTopologyConstraintError(err) {
190+
// If the pod can't be scheduled on the last node because of topology constraints, we can stop binpacking.
191+
// The pod can't be scheduled on any new node either, because it has the same topology constraints.
192+
nodeName, err := e.clusterSnapshot.SchedulePodOnAnyNodeMatching(pod, func(nodeInfo *framework.NodeInfo) bool {
193+
return nodeInfo.Node().Name != estimationState.lastNodeName // only skip the last node that failed scheduling
194+
})
195+
if err != nil && err.Type() == clustersnapshot.SchedulingInternalError {
196+
// Unexpected error.
197+
return false, err
198+
}
199+
if nodeName != "" {
200+
// The pod was scheduled on a different node, so we can continue binpacking.
201+
found = true
202+
estimationState.trackScheduledPod(pod, nodeName)
203+
}
204+
}
183205
}
184206

185207
if !found {
@@ -240,6 +262,33 @@ func (e *BinpackingNodeEstimator) addNewNodeToSnapshot(
240262
return nil
241263
}
242264

265+
// isTopologyConstraintError determines if an error is related to pod topology spread constraints
266+
// by checking the predicate name and reasons
267+
func hasTopologyConstraintError(err clustersnapshot.SchedulingError) bool {
268+
if err == nil {
269+
return false
270+
}
271+
272+
// Check reasons for mentions of topology or constraints
273+
return slices.Contains(err.FailingPredicateReasons(), podtopologyspread.ErrReasonConstraintsNotMatch)
274+
}
275+
276+
// isPodUsingHostNameTopoKey returns true if the pod has any topology spread
277+
// constraint that uses the kubernetes.io/hostname topology key
278+
func isPodUsingHostNameTopologyKey(pod *apiv1.Pod) bool {
279+
if pod == nil || pod.Spec.TopologySpreadConstraints == nil {
280+
return false
281+
}
282+
283+
for _, constraint := range pod.Spec.TopologySpreadConstraints {
284+
if constraint.TopologyKey == apiv1.LabelHostname {
285+
return true
286+
}
287+
}
288+
289+
return false
290+
}
291+
243292
func observeBinpackingHeterogeneity(podsEquivalenceGroups []PodEquivalenceGroup, nodeTemplate *framework.NodeInfo) {
244293
node := nodeTemplate.Node()
245294
var instanceType, cpuCount string

cluster-autoscaler/estimator/binpacking_estimator_test.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,13 @@ func TestBinpackingEstimate(t *testing.T) {
178178
podsEquivalenceGroup: []PodEquivalenceGroup{makePodEquivalenceGroup(
179179
BuildTestPod(
180180
"estimatee",
181-
20,
182-
100,
181+
200,
182+
200,
183183
WithNamespace("universe"),
184184
WithLabels(map[string]string{
185185
"app": "estimatee",
186186
}),
187-
WithMaxSkew(2, "kubernetes.io/hostname")), 8)},
187+
WithMaxSkew(2, "kubernetes.io/hostname", 1)), 8)},
188188
expectNodeCount: 4,
189189
expectPodCount: 8,
190190
},
@@ -201,10 +201,27 @@ func TestBinpackingEstimate(t *testing.T) {
201201
WithLabels(map[string]string{
202202
"app": "estimatee",
203203
}),
204-
WithMaxSkew(2, "topology.kubernetes.io/zone")), 8)},
204+
WithMaxSkew(2, "topology.kubernetes.io/zone", 1)), 8)},
205205
expectNodeCount: 1,
206206
expectPodCount: 2,
207207
},
208+
{
209+
name: "hostname topology spreading with maxSkew=1 with a large scaleup handles scheduling pods retroactively",
210+
millicores: 1000,
211+
memory: 5000,
212+
podsEquivalenceGroup: []PodEquivalenceGroup{makePodEquivalenceGroup(
213+
BuildTestPod(
214+
"estimatee",
215+
20,
216+
100,
217+
WithNamespace("universe"),
218+
WithLabels(map[string]string{
219+
"app": "estimatee",
220+
}),
221+
WithMaxSkew(1, "kubernetes.io/hostname", 3)), 12)},
222+
expectNodeCount: 3,
223+
expectPodCount: 12,
224+
},
208225
}
209226
for _, tc := range testCases {
210227
t.Run(tc.name, func(t *testing.T) {

cluster-autoscaler/utils/test/test_utils.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,8 @@ func WithHostPort(hostport int32) func(*apiv1.Pod) {
160160
}
161161
}
162162

163-
// WithMaxSkew sets a namespace to the pod.
164-
func WithMaxSkew(maxSkew int32, topologySpreadingKey string) func(*apiv1.Pod) {
163+
// WithMaxSkew sets a topology spread constraint to the pod.
164+
func WithMaxSkew(maxSkew int32, topologySpreadingKey string, minDomains int32) func(*apiv1.Pod) {
165165
return func(pod *apiv1.Pod) {
166166
if maxSkew > 0 {
167167
pod.Spec.TopologySpreadConstraints = []apiv1.TopologySpreadConstraint{
@@ -174,6 +174,7 @@ func WithMaxSkew(maxSkew int32, topologySpreadingKey string) func(*apiv1.Pod) {
174174
"app": "estimatee",
175175
},
176176
},
177+
MinDomains: &minDomains,
177178
},
178179
}
179180
}

0 commit comments

Comments
 (0)