Skip to content

Commit e017121

Browse files
authored
Merge pull request kubernetes#3573 from ysy2020/cluster-autoscaler-huaweicloud
simplify DeleteNode logic by removing an extra Mutex
2 parents 5dabdc3 + a9ec9df commit e017121

File tree

3 files changed

+12
-32
lines changed

3 files changed

+12
-32
lines changed

cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_cloud_provider.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ func getAutoscaleNodePools(manager *huaweicloudCloudManager, opts config.Autosca
185185
}
186186

187187
clusterUpdateLock := sync.Mutex{}
188-
deleteMux := sync.Mutex{}
189188

190189
// Given our current implementation just support single node pool,
191190
// please make sure there is only one node pool with Autoscaling flag turned on in CCE cluster
@@ -200,7 +199,6 @@ func getAutoscaleNodePools(manager *huaweicloudCloudManager, opts config.Autosca
200199

201200
nodePoolsWithAutoscalingEnabled = append(nodePoolsWithAutoscalingEnabled, NodeGroup{
202201
huaweiCloudManager: manager,
203-
deleteMutex: &deleteMux,
204202
clusterUpdateMutex: &clusterUpdateLock,
205203
nodePoolName: nodePool.Metadata.Name,
206204
nodePoolId: nodePool.Metadata.Uid,

cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_node_group.go

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ type NodeGroup struct {
4646
nodePoolSizeTmp int
4747
getNodePoolSizeTime time.Time
4848

49-
deleteMutex *sync.Mutex
5049
clusterUpdateMutex *sync.Mutex
5150
}
5251

@@ -134,20 +133,19 @@ func (ng *NodeGroup) IncreaseSize(delta int) error {
134133
// DeleteNodes deletes nodes from this node group. This function
135134
// should wait until node group size is updated.
136135
func (ng *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
137-
// use ng.deleteMutex to update ng.nodePoolSizeTmp, ng.getNodePoolSizeTime and ng.nodesToDelete
138-
ng.deleteMutex.Lock()
136+
// start the process of deleting nodes
137+
ng.clusterUpdateMutex.Lock()
138+
defer ng.clusterUpdateMutex.Unlock()
139+
140+
// check whether deleting the nodes will cause the size of the node pool below minimum size
141+
// and update ng.nodesToDelete (as well as ng.nodePoolSizeTmp and ng.getNodePoolSizeTime if necessary)
139142
currentSize, err := checkAndUpdate(ng, nodes)
140-
ng.deleteMutex.Unlock()
141143
if err != nil {
142144
return err
143145
}
144146

145-
// start the process of deleting nodes
146-
ng.clusterUpdateMutex.Lock()
147-
defer ng.clusterUpdateMutex.Unlock()
148-
149147
// get the unique ids of the nodes to be deleted
150-
nodeIDs, finished, err := getNodeIDsToDelete(ng, nodes, currentSize)
148+
nodeIDs, finished, err := getNodeIDsToDelete(ng)
151149
if finished { // nodes have been deleted by other goroutine
152150
return nil
153151
}
@@ -215,26 +213,13 @@ func checkAndUpdate(ng *NodeGroup, nodes []*apiv1.Node) (int, error) {
215213
// getNodeIDsToDelete checks whether there're still nodes waiting for being deleted. If there're no nodes
216214
// to delete, it will return true, representing that the process of deleting the nodes has been finished;
217215
// otherwise, it will return a slice of node ids to be deleted.
218-
func getNodeIDsToDelete(ng *NodeGroup, nodes []*apiv1.Node, currentSize int) ([]string, bool, error) {
216+
func getNodeIDsToDelete(ng *NodeGroup) ([]string, bool, error) {
219217
// check whether the nodes has already been deleted by other goroutine
220-
ng.deleteMutex.Lock()
221218
// If current goroutine is not the first one to acquire the ng.clusterUpdateMutex,
222219
// it's possible that the nodes have already been deleted, which makes ng.nodesToDelete to be empty.
223220
if len(ng.nodesToDelete) == 0 {
224-
ng.deleteMutex.Unlock()
225221
return nil, true, nil
226222
}
227-
ng.deleteMutex.Unlock()
228-
229-
// wait for more nodes to be added to ng.nodesToDelete
230-
time.Sleep(ng.deleteWaitTime)
231-
232-
// get a copy of the nodes to be deleted and release the lock
233-
ng.deleteMutex.Lock()
234-
nodes = make([]*apiv1.Node, len(ng.nodesToDelete))
235-
copy(nodes, ng.nodesToDelete)
236-
ng.nodesToDelete = nil
237-
ng.deleteMutex.Unlock()
238223

239224
// check whether the cluster is available for deleting nodes
240225
canUpdate, status, err := ng.huaweiCloudManager.canUpdate()
@@ -245,17 +230,15 @@ func getNodeIDsToDelete(ng *NodeGroup, nodes []*apiv1.Node, currentSize int) ([]
245230
return nil, false, fmt.Errorf("cluster is in %s status, cannot perform node deletion now", status)
246231
}
247232

248-
// check again whether deleting the nodes is valid
249-
if currentSize-len(nodes) < ng.MinSize() {
250-
return nil, false, fmt.Errorf("the size of the node pool is not sufficient for deleting the nodes, target size:%d, minimum size:%d", currentSize-len(nodes), ng.MinSize())
251-
}
252-
253233
nodeIDs := make([]string, 0)
254-
for _, node := range nodes {
234+
for _, node := range ng.nodesToDelete {
255235
// the node.Spec.ProviderID is the node's uid
256236
klog.Infof("Delete node with node id: %s", node.Spec.ProviderID)
257237
nodeIDs = append(nodeIDs, node.Spec.ProviderID)
258238
}
239+
240+
ng.nodesToDelete = nil
241+
259242
return nodeIDs, false, nil
260243
}
261244

cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_node_group_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ func createTestNodeGroup(manager *huaweicloudCloudManager) *NodeGroup {
4343
size := nodePoolNodeCount
4444
return &NodeGroup{
4545
huaweiCloudManager: manager,
46-
deleteMutex: &sync.Mutex{},
4746
clusterUpdateMutex: &sync.Mutex{},
4847
nodePoolName: nodePoolName,
4948
nodePoolId: nodePoolUID,

0 commit comments

Comments
 (0)