Skip to content

Commit 7359be9

Browse files
Merge pull request #1913 from dgrisonnet/concurrent-installer
OCPBUGS-45924: Prevent the installer controller from stomping statuses it just applied
2 parents 99ccd53 + 5101e8c commit 7359be9

File tree

1 file changed

+40
-1
lines changed

1 file changed

+40
-1
lines changed

pkg/operator/staticpod/controller/installer/installer_controller.go

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ type InstallerController struct {
102102
clock clock.Clock
103103
installerBackOff func(count int) time.Duration
104104
fallbackBackOff func(count int) time.Duration
105+
106+
// track StaticPodOperatorStatus apply requests to perform live read during
107+
// the next controller sync
108+
podOperatorStatusApplied bool
109+
// resource version of the last StaticPodOperatorStatus applied
110+
lastPodOperatorAppliedRV uint64
105111
}
106112

107113
// InstallerPodMutationFunc is a function that has a chance at changing the installer pod before it is created
@@ -1106,12 +1112,42 @@ func (c InstallerController) ensureRequiredResourcesExist(ctx context.Context, r
11061112
}
11071113

11081114
func (c InstallerController) Sync(ctx context.Context, syncCtx factory.SyncContext) error {
1109-
operatorSpec, originalOperatorStatus, _, err := c.operatorClient.GetStaticPodOperatorState()
1115+
operatorSpec, originalOperatorStatus, operatorResourceVersion, err := c.operatorClient.GetStaticPodOperatorState()
11101116
if err != nil {
11111117
return err
11121118
}
11131119
operatorStatus := originalOperatorStatus.DeepCopy()
11141120

1121+
// Perform a live get to obtain the RV of the object the controller applied
1122+
// because for the MOM effort it is not allowed to get the RV from
1123+
// Apply response.
1124+
if c.podOperatorStatusApplied {
1125+
if c.lastPodOperatorAppliedRV == 0 {
1126+
_, _, resourceVersion, err := c.operatorClient.GetOperatorStateWithQuorum(ctx)
1127+
if err != nil {
1128+
return err
1129+
}
1130+
1131+
c.lastPodOperatorAppliedRV, err = strconv.ParseUint(resourceVersion, 10, 64)
1132+
if err != nil {
1133+
return err
1134+
}
1135+
}
1136+
1137+
operatorRV, err := strconv.ParseUint(operatorResourceVersion, 10, 64)
1138+
if err != nil {
1139+
return err
1140+
}
1141+
1142+
if operatorRV < c.lastPodOperatorAppliedRV {
1143+
klog.V(4).Info("Skipping installer controller sync, StaticPodOperator lister hasn't observed the effect of its most recent write")
1144+
return nil
1145+
}
1146+
1147+
c.podOperatorStatusApplied = false
1148+
c.lastPodOperatorAppliedRV = 0
1149+
}
1150+
11151151
if !management.IsOperatorManaged(operatorSpec.ManagementState) {
11161152
return nil
11171153
}
@@ -1141,6 +1177,9 @@ func (c InstallerController) Sync(ctx context.Context, syncCtx factory.SyncConte
11411177
status := applyoperatorv1.StaticPodOperatorStatus().
11421178
WithConditions(operatorConditionApplyConfigurations...).
11431179
WithNodeStatuses(nodeStatusApplyConfigurations...)
1180+
// track the apply request regardless of an error being returned by the
1181+
// client since the write could still succeed.
1182+
c.podOperatorStatusApplied = true
11441183
if updateErr := c.operatorClient.ApplyStaticPodOperatorStatus(ctx, c.controllerInstanceName, status); updateErr != nil {
11451184
return updateErr
11461185
} else if updatedNodeReportOnSuccessfulUpdateFn != nil {

0 commit comments

Comments
 (0)