Skip to content

Commit 3374be7

Browse files
authored
Merge pull request #8124 from jbtk/estimator_fix
Prevent the binpacking estimator from retrying to add additional nodes
2 parents 2abc138 + c7f7cb5 commit 3374be7

File tree

1 file changed

+16
-11
lines changed

1 file changed

+16
-11
lines changed

cluster-autoscaler/estimator/binpacking_estimator.go

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ func (e *BinpackingNodeEstimator) Estimate(
108108
}()
109109

110110
estimationState := newEstimationState()
111+
newNodesAvailable := true
111112
for _, podsEquivalenceGroup := range podsEquivalenceGroups {
112113
var err error
113114
var remainingPods []*apiv1.Pod
@@ -118,10 +119,12 @@ func (e *BinpackingNodeEstimator) Estimate(
118119
return 0, nil
119120
}
120121

121-
err = e.tryToScheduleOnNewNodes(estimationState, nodeTemplate, remainingPods)
122-
if err != nil {
123-
klog.Error(err.Error())
124-
return 0, nil
122+
if newNodesAvailable {
123+
newNodesAvailable, err = e.tryToScheduleOnNewNodes(estimationState, nodeTemplate, remainingPods)
124+
if err != nil {
125+
klog.Error(err.Error())
126+
return 0, nil
127+
}
125128
}
126129
}
127130

@@ -156,11 +159,13 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnExistingNodes(
156159
return pods[index:], nil
157160
}
158161

162+
// Returns whether it is worth retrying adding new nodes and error in unexpected
163+
// situations where whole estimation should be stopped.
159164
func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
160165
estimationState *estimationState,
161166
nodeTemplate *framework.NodeInfo,
162167
pods []*apiv1.Pod,
163-
) error {
168+
) (bool, error) {
164169
for _, pod := range pods {
165170
found := false
166171

@@ -172,7 +177,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
172177
estimationState.trackScheduledPod(pod, estimationState.lastNodeName)
173178
} else if err.Type() == clustersnapshot.SchedulingInternalError {
174179
// Unexpected error.
175-
return err
180+
return false, err
176181
}
177182
// The pod can't be scheduled on the newly created node because of scheduling predicates.
178183
}
@@ -182,7 +187,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
182187
// on a new node either. There is no point adding more nodes to snapshot in such case, especially because of
183188
// performance cost each extra node adds to future FitsAnyNodeMatching calls.
184189
if estimationState.lastNodeName != "" && !estimationState.newNodesWithPods[estimationState.lastNodeName] {
185-
break
190+
return true, nil
186191
}
187192

188193
// Stop binpacking if we reach the limit of nodes we can add.
@@ -192,12 +197,12 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
192197
// each call that returns true, one node gets added. Therefore this
193198
// must be the last check right before really adding a node.
194199
if !e.limiter.PermissionToAddNode() {
195-
break
200+
return false, nil
196201
}
197202

198203
// Add new node
199204
if err := e.addNewNodeToSnapshot(estimationState, nodeTemplate); err != nil {
200-
return fmt.Errorf("Error while adding new node for template to ClusterSnapshot; %w", err)
205+
return false, fmt.Errorf("Error while adding new node for template to ClusterSnapshot; %w", err)
201206
}
202207

203208
// And try to schedule pod to it.
@@ -206,7 +211,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
206211
// adding and removing node to snapshot for each such pod.
207212
if err := e.clusterSnapshot.SchedulePod(pod, estimationState.lastNodeName); err != nil && err.Type() == clustersnapshot.SchedulingInternalError {
208213
// Unexpected error.
209-
return err
214+
return false, err
210215
} else if err != nil {
211216
// The pod can't be scheduled on the new node because of scheduling predicates.
212217
break
@@ -215,7 +220,7 @@ func (e *BinpackingNodeEstimator) tryToScheduleOnNewNodes(
215220
estimationState.trackScheduledPod(pod, estimationState.lastNodeName)
216221
}
217222
}
218-
return nil
223+
return true, nil
219224
}
220225

221226
func (e *BinpackingNodeEstimator) addNewNodeToSnapshot(

0 commit comments

Comments
 (0)