Skip to content

Commit 2f6c19a

Browse files
committed
CA: stop mutating the result of .TemplateNodeInfo() in SanitizedTemplateNodeInfoFromNodeGroup
SanitizedTemplateNodeInfoFromNodeGroup() calls .TemplateNodeInfo(), adds deprecated labels to the result, then sanitizes it - which includes deep-copying. This commit moves adding the deprecated labels after the sanitization process, directly to the deep-copied result. If .TemplateNodeInfo() caches its result internally, and is called from a CloudProvider-specific goroutine at the same time as SanitizedTemplateNodeInfoFromNodeGroup(), CA panics because of a concurrent map read/write. This change removes the race condition.
1 parent e69b1a9 commit 2f6c19a

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

cluster-autoscaler/simulator/node_info_utils.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,12 @@ func SanitizedTemplateNodeInfoFromNodeGroup(nodeGroup nodeGroupTemplateNodeInfoG
4949
if err != nil {
5050
return nil, errors.ToAutoscalerError(errors.CloudProviderError, err).AddPrefix("failed to obtain template NodeInfo from node group %q: ", nodeGroup.Id())
5151
}
52-
labels.UpdateDeprecatedLabels(baseNodeInfo.Node().ObjectMeta.Labels)
53-
54-
return SanitizedTemplateNodeInfoFromNodeInfo(baseNodeInfo, nodeGroup.Id(), daemonsets, true, taintConfig)
52+
sanitizedNodeInfo, aErr := SanitizedTemplateNodeInfoFromNodeInfo(baseNodeInfo, nodeGroup.Id(), daemonsets, true, taintConfig)
53+
if aErr != nil {
54+
return nil, aErr
55+
}
56+
labels.UpdateDeprecatedLabels(sanitizedNodeInfo.Node().Labels)
57+
return sanitizedNodeInfo, nil
5558
}
5659

5760
// SanitizedTemplateNodeInfoFromNodeInfo returns a template NodeInfo object based on a real example NodeInfo from the cluster. The template is sanitized, and only

cluster-autoscaler/simulator/node_info_utils_test.go

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
9393
exampleNode.Spec.Taints = []apiv1.Taint{
9494
{Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule},
9595
}
96+
exampleNode.Labels = map[string]string{
97+
"custom": "label",
98+
apiv1.LabelInstanceTypeStable: "some-instance",
99+
apiv1.LabelTopologyRegion: "some-region",
100+
}
96101

97102
for _, tc := range []struct {
98103
testName string
@@ -155,7 +160,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) {
155160
// Pass empty string as nameSuffix so that it's auto-determined from the sanitized templateNodeInfo, because
156161
// TemplateNodeInfoFromNodeGroupTemplate randomizes the suffix.
157162
// Pass non-empty expectedPods to verify that the set of pods is changed as expected (e.g. DS pods added, non-DS/deleted pods removed).
158-
if err := verifyNodeInfoSanitization(tc.nodeGroup.templateNodeInfoResult, templateNodeInfo, tc.wantPods, "template-node-for-"+tc.nodeGroup.id, "", nil); err != nil {
163+
if err := verifyNodeInfoSanitization(tc.nodeGroup.templateNodeInfoResult, templateNodeInfo, tc.wantPods, "template-node-for-"+tc.nodeGroup.id, "", true, nil); err != nil {
159164
t.Fatalf("TemplateNodeInfoFromExampleNodeInfo(): NodeInfo wasn't properly sanitized: %v", err)
160165
}
161166
})
@@ -167,6 +172,11 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
167172
exampleNode.Spec.Taints = []apiv1.Taint{
168173
{Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule},
169174
}
175+
exampleNode.Labels = map[string]string{
176+
"custom": "label",
177+
apiv1.LabelInstanceTypeStable: "some-instance",
178+
apiv1.LabelTopologyRegion: "some-region",
179+
}
170180

171181
testCases := []struct {
172182
name string
@@ -317,7 +327,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) {
317327
// Pass empty string as nameSuffix so that it's auto-determined from the sanitized templateNodeInfo, because
318328
// TemplateNodeInfoFromExampleNodeInfo randomizes the suffix.
319329
// Pass non-empty expectedPods to verify that the set of pods is changed as expected (e.g. DS pods added, non-DS/deleted pods removed).
320-
if err := verifyNodeInfoSanitization(exampleNodeInfo, templateNodeInfo, tc.wantPods, "template-node-for-"+nodeGroupId, "", nil); err != nil {
330+
if err := verifyNodeInfoSanitization(exampleNodeInfo, templateNodeInfo, tc.wantPods, "template-node-for-"+nodeGroupId, "", false, nil); err != nil {
321331
t.Fatalf("TemplateNodeInfoFromExampleNodeInfo(): NodeInfo wasn't properly sanitized: %v", err)
322332
}
323333
})
@@ -332,6 +342,12 @@ func TestSanitizedNodeInfo(t *testing.T) {
332342
{Key: taints.ToBeDeletedTaint, Value: "2312532423", Effect: apiv1.TaintEffectNoSchedule},
333343
{Key: "a", Value: "b", Effect: apiv1.TaintEffectNoSchedule},
334344
}
345+
templateNode.Labels = map[string]string{
346+
"custom": "label",
347+
apiv1.LabelInstanceTypeStable: "some-instance",
348+
apiv1.LabelTopologyRegion: "some-region",
349+
}
350+
335351
pods := []*framework.PodInfo{
336352
{Pod: BuildTestPod("p1", 80, 0, WithNodeName(nodeName))},
337353
{Pod: BuildTestPod("p2", 80, 0, WithNodeName(nodeName))},
@@ -346,7 +362,7 @@ func TestSanitizedNodeInfo(t *testing.T) {
346362
// Verify that the taints are not sanitized (they should be sanitized in the template already).
347363
// Verify that the NodeInfo is sanitized using the template Node name as base.
348364
initialTaints := templateNodeInfo.Node().Spec.Taints
349-
if err := verifyNodeInfoSanitization(templateNodeInfo, freshNodeInfo, nil, templateNodeInfo.Node().Name, suffix, initialTaints); err != nil {
365+
if err := verifyNodeInfoSanitization(templateNodeInfo, freshNodeInfo, nil, templateNodeInfo.Node().Name, suffix, false, initialTaints); err != nil {
350366
t.Fatalf("FreshNodeInfoFromTemplateNodeInfo(): NodeInfo wasn't properly sanitized: %v", err)
351367
}
352368
}
@@ -357,9 +373,11 @@ func TestCreateSanitizedNodeInfo(t *testing.T) {
357373

358374
labelsNode := basicNode.DeepCopy()
359375
labelsNode.Labels = map[string]string{
360-
apiv1.LabelHostname: oldNodeName,
361-
"a": "b",
362-
"x": "y",
376+
apiv1.LabelHostname: oldNodeName,
377+
"a": "b",
378+
"x": "y",
379+
apiv1.LabelInstanceTypeStable: "some-instance",
380+
apiv1.LabelTopologyRegion: "some-region",
363381
}
364382

365383
taintsNode := basicNode.DeepCopy()
@@ -491,7 +509,7 @@ func TestCreateSanitizedNodeInfo(t *testing.T) {
491509
if err != nil {
492510
t.Fatalf("sanitizeNodeInfo(): want nil error, got %v", err)
493511
}
494-
if err := verifyNodeInfoSanitization(tc.nodeInfo, nodeInfo, nil, newNameBase, suffix, tc.wantTaints); err != nil {
512+
if err := verifyNodeInfoSanitization(tc.nodeInfo, nodeInfo, nil, newNameBase, suffix, false, tc.wantTaints); err != nil {
495513
t.Fatalf("sanitizeNodeInfo(): NodeInfo wasn't properly sanitized: %v", err)
496514
}
497515
})
@@ -506,7 +524,7 @@ func TestCreateSanitizedNodeInfo(t *testing.T) {
506524
//
507525
// If expectedPods is nil, the set of pods is expected not to change between initialNodeInfo and sanitizedNodeInfo. If the sanitization is
508526
// expected to change the set of pods, the expected set should be passed to expectedPods.
509-
func verifyNodeInfoSanitization(initialNodeInfo, sanitizedNodeInfo *framework.NodeInfo, expectedPods []*apiv1.Pod, nameBase, nameSuffix string, wantTaints []apiv1.Taint) error {
527+
func verifyNodeInfoSanitization(initialNodeInfo, sanitizedNodeInfo *framework.NodeInfo, expectedPods []*apiv1.Pod, nameBase, nameSuffix string, wantDeprecatedLabels bool, wantTaints []apiv1.Taint) error {
510528
if nameSuffix == "" {
511529
// Determine the suffix from the provided sanitized NodeInfo - it should be the last part of a dash-separated name.
512530
nameParts := strings.Split(sanitizedNodeInfo.Node().Name, "-")
@@ -526,7 +544,7 @@ func verifyNodeInfoSanitization(initialNodeInfo, sanitizedNodeInfo *framework.No
526544

527545
// Verification below assumes the same set of pods between initialNodeInfo and sanitizedNodeInfo.
528546
wantNodeName := fmt.Sprintf("%s-%s", nameBase, nameSuffix)
529-
if err := verifySanitizedNode(initialNodeInfo.Node(), sanitizedNodeInfo.Node(), wantNodeName, wantTaints); err != nil {
547+
if err := verifySanitizedNode(initialNodeInfo.Node(), sanitizedNodeInfo.Node(), wantNodeName, wantDeprecatedLabels, wantTaints); err != nil {
530548
return err
531549
}
532550
if err := verifySanitizedNodeResourceSlices(initialNodeInfo.LocalResourceSlices, sanitizedNodeInfo.LocalResourceSlices, nameSuffix); err != nil {
@@ -539,7 +557,7 @@ func verifyNodeInfoSanitization(initialNodeInfo, sanitizedNodeInfo *framework.No
539557
return nil
540558
}
541559

542-
func verifySanitizedNode(initialNode, sanitizedNode *apiv1.Node, wantNodeName string, wantTaints []apiv1.Taint) error {
560+
func verifySanitizedNode(initialNode, sanitizedNode *apiv1.Node, wantNodeName string, wantDeprecatedLabels bool, wantTaints []apiv1.Taint) error {
543561
if gotName := sanitizedNode.Name; gotName != wantNodeName {
544562
return fmt.Errorf("want sanitized Node name %q, got %q", wantNodeName, gotName)
545563
}
@@ -552,6 +570,9 @@ func verifySanitizedNode(initialNode, sanitizedNode *apiv1.Node, wantNodeName st
552570
wantLabels[k] = v
553571
}
554572
wantLabels[apiv1.LabelHostname] = wantNodeName
573+
if wantDeprecatedLabels {
574+
labels.UpdateDeprecatedLabels(wantLabels)
575+
}
555576
if diff := cmp.Diff(wantLabels, sanitizedNode.Labels); diff != "" {
556577
return fmt.Errorf("sanitized Node labels unexpected, diff (-want +got): %s", diff)
557578
}

0 commit comments

Comments
 (0)