Skip to content

Commit c0c9364

Browse files
Merge pull request #1383 from damdo/revert-1380-fix-controllers-guard-on-empty-status-authoritativeAPI
Revert "OCPCLOUD-2986,OCPBUGS-56849: fix: controllers: guard on empty .status.authoritativeAPI"
2 parents 917e2fa + 774ae69 commit c0c9364

File tree

5 files changed

+104
-200
lines changed

5 files changed

+104
-200
lines changed

pkg/controller/machine/controller.go

Lines changed: 31 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -177,60 +177,43 @@ func (r *ReconcileMachine) Reconcile(ctx context.Context, request reconcile.Requ
177177
originalConditions := conditions.DeepCopyConditions(m.Status.Conditions)
178178

179179
if r.gate.Enabled(featuregate.Feature(openshiftfeatures.FeatureGateMachineAPIMigration)) {
180-
switch m.Status.AuthoritativeAPI {
181-
case "":
182-
// An empty .status.authoritativeAPI normally means the resource has not yet been reconciled
183-
// by the migration controller, which has the responsibility for propagating .spec.authoritativeAPI to the status.
184-
// Pause the resource and take no further action but return until that field is populated.
185-
desiredCondition := conditions.TrueConditionWithReason(
186-
PausedCondition, PausedConditionReason,
187-
"The AuthoritativeAPI status is not yet set",
188-
)
189-
190-
if _, err := r.ensureUpdatedPausedCondition(ctx, m, desiredCondition,
191-
fmt.Sprintf("%v: machine .status.authoritativeAPI is not yet set, ensuring machine is paused", machineName)); err != nil {
192-
return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err)
193-
}
194-
195-
klog.Infof("%v: machine is paused, taking no further action", machineName)
196-
197-
return reconcile.Result{}, nil
198-
199-
case machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating:
200-
// In cases when .status.authoritativeAPI is set to machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating
201-
// the resource should be paused and not reconciled further.
202-
desiredCondition := conditions.TrueConditionWithReason(
203-
PausedCondition, PausedConditionReason,
204-
"The AuthoritativeAPI status is set to '%s'", string(m.Status.AuthoritativeAPI),
205-
)
206-
207-
if _, err := r.ensureUpdatedPausedCondition(ctx, m, desiredCondition,
208-
fmt.Sprintf("%v: machine .status.authoritativeAPI is set to '%s', ensuring machine is paused", machineName, m.Status.AuthoritativeAPI)); err != nil {
209-
return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err)
180+
// Check Status.AuthoritativeAPI
181+
// If not MachineAPI. Set the paused condition true and return early.
182+
//
183+
// Once we have a webhook, we want to remove the check that the AuthoritativeAPI
184+
// field is populated.
185+
if m.Status.AuthoritativeAPI != "" &&
186+
m.Status.AuthoritativeAPI != machinev1.MachineAuthorityMachineAPI {
187+
conditions.Set(m, conditions.TrueConditionWithReason(
188+
PausedCondition,
189+
PausedConditionReason,
190+
"The AuthoritativeAPI is set to %s", string(m.Status.AuthoritativeAPI),
191+
))
192+
if patchErr := r.updateStatus(ctx, m, ptr.Deref(m.Status.Phase, ""), nil, originalConditions); patchErr != nil {
193+
klog.Errorf("%v: error patching status: %v", machineName, patchErr)
210194
}
211195

212196
klog.Infof("%v: machine is paused, taking no further action", machineName)
213-
214197
return reconcile.Result{}, nil
198+
}
215199

216-
case machinev1.MachineAuthorityMachineAPI:
217-
// The authority is MachineAPI and the resource should not be paused.
218-
desiredCondition := conditions.FalseCondition(
219-
PausedCondition, NotPausedConditionReason, machinev1.ConditionSeverityInfo, "%s",
220-
fmt.Sprintf("The AuthoritativeAPI status is set to '%s'", string(m.Status.AuthoritativeAPI)),
221-
)
222-
223-
if updated, err := r.ensureUpdatedPausedCondition(ctx, m, desiredCondition,
224-
fmt.Sprintf("%v: machine .status.authoritativeAPI is set to '%s', unpausing machine", machineName, m.Status.AuthoritativeAPI)); err != nil {
225-
return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err)
226-
} else if updated {
227-
klog.Infof("%v: setting machine paused condition to false", machineName)
228-
}
200+
var pausedFalseReason string
201+
if m.Status.AuthoritativeAPI != "" {
202+
pausedFalseReason = fmt.Sprintf("The AuthoritativeAPI is set to %s", string(m.Status.AuthoritativeAPI))
203+
} else {
204+
pausedFalseReason = "The AuthoritativeAPI is not set"
205+
}
229206

230-
// Fallthrough and continue reconcilation.
231-
default:
232-
klog.Errorf("%v: invalid .status.authoritativeAPI '%s'", machineName, m.Status.AuthoritativeAPI)
233-
return reconcile.Result{}, nil // Do not return an error to avoid immediate requeue.
207+
// Set the paused condition to false, continue reconciliation
208+
conditions.Set(m, conditions.FalseCondition(
209+
PausedCondition,
210+
NotPausedConditionReason,
211+
machinev1.ConditionSeverityInfo,
212+
"%s",
213+
pausedFalseReason,
214+
))
215+
if patchErr := r.updateStatus(ctx, m, ptr.Deref(m.Status.Phase, ""), nil, originalConditions); patchErr != nil {
216+
klog.Errorf("%v: error patching status: %v", machineName, patchErr)
234217
}
235218
}
236219

@@ -450,23 +433,6 @@ func (r *ReconcileMachine) deleteNode(ctx context.Context, name string) error {
450433
return r.Client.Delete(ctx, &node)
451434
}
452435

453-
// ensureUpdatedPausedCondition updates the paused condition if needed.
454-
func (r *ReconcileMachine) ensureUpdatedPausedCondition(ctx context.Context, m *machinev1.Machine, desiredCondition *machinev1.Condition, logMessage string) (bool, error) {
455-
oldM := m.DeepCopy()
456-
if !conditions.IsEquivalentTo(conditions.Get(m, PausedCondition), desiredCondition) {
457-
klog.Info(logMessage)
458-
conditions.Set(m, desiredCondition)
459-
if err := r.updateStatus(ctx, m, ptr.Deref(m.Status.Phase, ""), nil, oldM.Status.Conditions); err != nil {
460-
klog.Errorf("%v: error updating status: %v", oldM.Name, err)
461-
return false, fmt.Errorf("error updating status for machine %s: %w", oldM.Name, err)
462-
}
463-
464-
return true, nil
465-
}
466-
467-
return false, nil
468-
}
469-
470436
func delayIfRequeueAfterError(err error) (reconcile.Result, error) {
471437
var requeueAfterError *RequeueAfterError
472438
if errors.As(err, &requeueAfterError) {

pkg/controller/machine/machine_controller_test.go

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -135,20 +135,19 @@ var _ = Describe("Machine Reconciler", func() {
135135

136136
It("Should set the Paused condition appropriately", func() {
137137

138-
By("Creating the Machine (empty status.authoritativeAPI)")
138+
By("Creating the Machine")
139139
Expect(k8sClient.Create(ctx, instance)).To(Succeed())
140140

141141
By("Setting the AuthoritativeAPI to ClusterAPI")
142142
Eventually(k.UpdateStatus(instance, func() {
143143
instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityClusterAPI
144144
})).Should(Succeed())
145145

146-
By("Verifying the paused condition is appropriately set to true")
146+
By("Verifying the paused condition is approproately set to true")
147147
Eventually(k.Object(instance), timeout).Should(SatisfyAll(
148148
HaveField("Status.Conditions", ContainElement(SatisfyAll(
149149
HaveField("Type", Equal(PausedCondition)),
150150
HaveField("Status", Equal(corev1.ConditionTrue)),
151-
HaveField("Message", Equal("The AuthoritativeAPI status is set to 'ClusterAPI'")),
152151
))),
153152
HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityClusterAPI)),
154153
))
@@ -190,50 +189,36 @@ var _ = Describe("Machine Reconciler", func() {
190189
})
191190
}()
192191

193-
By("Changing status.authoritativeAPI from ClusterAPI to Migrating")
192+
By("Transitioning the AuthoritativeAPI though 'Migrating' to MachineAPI")
194193
Eventually(k.UpdateStatus(instance, func() {
195194
instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMigrating
196195
})).Should(Succeed())
197196

198-
By("Verifying the paused condition is appropriately set to true")
199-
Eventually(k.Object(instance), timeout).Should(SatisfyAll(
200-
HaveField("Status.Conditions", ContainElement(SatisfyAll(
201-
HaveField("Type", Equal(PausedCondition)),
202-
HaveField("Status", Equal(corev1.ConditionTrue)),
203-
HaveField("Message", Equal("The AuthoritativeAPI status is set to 'Migrating'")),
204-
))),
205-
HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityMigrating)),
206-
))
207-
208-
By("Changing status.authoritativeAPI from Migrating to MachineAPI")
197+
By("Updating the AuthoritativeAPI from Migrating to MachineAPI")
209198
Eventually(k.UpdateStatus(instance, func() {
210199
instance.Status.AuthoritativeAPI = machinev1.MachineAuthorityMachineAPI
211200
})).Should(Succeed())
212201

213-
By("Verifying the paused condition is appropriately set to false")
202+
By("Verifying the paused condition is approproately set to false")
214203
Eventually(k.Object(instance), timeout).Should(SatisfyAll(
215204
HaveField("Status.Conditions", ContainElement(SatisfyAll(
216205
HaveField("Type", Equal(PausedCondition)),
217206
HaveField("Status", Equal(corev1.ConditionFalse)),
218-
HaveField("Message", Equal("The AuthoritativeAPI status is set to 'MachineAPI'")),
219207
))),
220208
HaveField("Status.AuthoritativeAPI", Equal(machinev1.MachineAuthorityMachineAPI)),
221209
))
222210

223-
By("Changing status.authoritativeAPI from MachineAPI to empty")
211+
By("Unsetting the AuthoritativeAPI field in the status")
224212
Eventually(k.UpdateStatus(instance, func() {
225213
instance.Status.AuthoritativeAPI = ""
226214
})).Should(Succeed())
227215

228-
By("Verifying the paused condition is still appropriately set to true when empty status.authoritativeAPI")
229-
Eventually(k.Object(instance), timeout).Should(SatisfyAll(
230-
HaveField("Status.Conditions", ContainElement(SatisfyAll(
231-
HaveField("Type", Equal(PausedCondition)),
232-
HaveField("Status", Equal(corev1.ConditionTrue)),
233-
HaveField("Message", Equal("The AuthoritativeAPI status is not yet set")),
234-
))),
235-
HaveField("Status.AuthoritativeAPI", BeEquivalentTo("")),
236-
))
216+
By("Verifying the paused condition is still approproately set to false")
217+
Eventually(k.Object(instance), timeout).Should(HaveField("Status.Conditions", ContainElement(SatisfyAll(
218+
HaveField("Type", Equal(PausedCondition)),
219+
HaveField("Status", Equal(corev1.ConditionFalse)),
220+
HaveField("Message", Equal("The AuthoritativeAPI is not set")),
221+
))))
237222
})
238223
})
239224
})

pkg/controller/machineset/controller.go

Lines changed: 45 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -174,60 +174,59 @@ func (r *ReconcileMachineSet) Reconcile(ctx context.Context, request reconcile.R
174174
}
175175

176176
if r.gate.Enabled(featuregate.Feature(openshiftfeatures.FeatureGateMachineAPIMigration)) {
177-
switch machineSet.Status.AuthoritativeAPI {
178-
case "":
179-
// An empty .status.authoritativeAPI normally means the resource has not yet been reconciled
180-
// by the migration controller, which has the responsibility for propagating .spec.authoritativeAPI to the status.
181-
// Pause the resource and take no further action but return until that field is populated.
182-
desiredCondition := conditions.TrueConditionWithReason(
183-
machine.PausedCondition, machine.PausedConditionReason,
184-
"The AuthoritativeAPI status is not yet set",
185-
)
186-
187-
if _, err := r.ensureUpdatedPausedCondition(machineSet, desiredCondition,
188-
fmt.Sprintf("%v: machine set .status.authoritativeAPI is not yet set, ensuring machine set is paused", machineSet.Name)); err != nil {
189-
return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err)
177+
machineSetName := machineSet.GetName()
178+
machineSetCopy := machineSet.DeepCopy()
179+
// Check Status.AuthoritativeAPI. If it's not set to MachineAPI. Set the
180+
// paused condition true and return early.
181+
//
182+
// Once we have a webhook, we want to remove the check that the AuthoritativeAPI
183+
// field is populated.
184+
if machineSet.Status.AuthoritativeAPI != "" &&
185+
machineSet.Status.AuthoritativeAPI != machinev1.MachineAuthorityMachineAPI {
186+
conditions.Set(machineSetCopy, conditions.TrueConditionWithReason(
187+
machine.PausedCondition,
188+
machine.PausedConditionReason,
189+
"The AuthoritativeAPI is set to %s", string(machineSet.Status.AuthoritativeAPI),
190+
))
191+
192+
_, err := updateMachineSetStatus(r.Client, machineSet, machineSetCopy.Status)
193+
if err != nil && !apierrors.IsNotFound(err) {
194+
klog.Errorf("%v: error updating status: %v", machineSetName, err)
195+
return reconcile.Result{}, fmt.Errorf("error updating status: %w", err)
196+
} else if apierrors.IsNotFound(err) {
197+
return reconcile.Result{}, nil
190198
}
191-
klog.Infof("%v: machine set is paused, taking no further action", machineSet.Name)
192199

200+
klog.Infof("%v: machine set is paused, taking no further action", machineSetName)
193201
return reconcile.Result{}, nil
202+
}
194203

195-
case machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating:
196-
// In case .status.authoritativeAPI is set to machinev1.MachineAuthorityClusterAPI, machinev1.MachineAuthorityMigrating
197-
// the resource should be paused and not reconciled further.
198-
desiredCondition := conditions.TrueConditionWithReason(
199-
machine.PausedCondition, machine.PausedConditionReason,
200-
"The AuthoritativeAPI status is set to '%s'", string(machineSet.Status.AuthoritativeAPI),
201-
)
202-
203-
if _, err := r.ensureUpdatedPausedCondition(machineSet, desiredCondition,
204-
fmt.Sprintf("%v: machine set .status.authoritativeAPI is set to '%s', ensuring machine set is paused", machineSet.Name, machineSet.Status.AuthoritativeAPI)); err != nil {
205-
return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err)
206-
}
204+
var pausedFalseReason string
205+
if machineSet.Status.AuthoritativeAPI != "" {
206+
pausedFalseReason = fmt.Sprintf("The AuthoritativeAPI is set to %s", string(machineSet.Status.AuthoritativeAPI))
207+
} else {
208+
pausedFalseReason = "The AuthoritativeAPI is not set"
209+
}
207210

208-
klog.Infof("%v: machine set is paused, taking no further action", machineSet.Name)
211+
// Set the paused condition to false, continue reconciliation
212+
conditions.Set(machineSetCopy, conditions.FalseCondition(
213+
machine.PausedCondition,
214+
machine.NotPausedConditionReason,
215+
machinev1.ConditionSeverityInfo,
216+
"%s",
217+
pausedFalseReason,
218+
))
209219

220+
var err error
221+
machineSet, err = updateMachineSetStatus(r.Client, machineSet, machineSetCopy.Status)
222+
if err != nil && !apierrors.IsNotFound(err) {
223+
klog.Errorf("%v: error updating status: %v", machineSetName, err)
224+
return reconcile.Result{}, fmt.Errorf("error updating status: %w", err)
225+
} else if apierrors.IsNotFound(err) {
210226
return reconcile.Result{}, nil
211-
212-
case machinev1.MachineAuthorityMachineAPI:
213-
// The authority is MachineAPI and the resource should not be paused.
214-
desiredCondition := conditions.FalseCondition(
215-
machine.PausedCondition, machine.NotPausedConditionReason, machinev1.ConditionSeverityInfo, "%s",
216-
fmt.Sprintf("The AuthoritativeAPI status is set to '%s'", string(machineSet.Status.AuthoritativeAPI)),
217-
)
218-
219-
if updated, err := r.ensureUpdatedPausedCondition(machineSet, desiredCondition,
220-
fmt.Sprintf("%v: machine set .status.authoritativeAPI is set to '%s', unpausing machine set", machineSet.Name, machineSet.Status.AuthoritativeAPI)); err != nil {
221-
return reconcile.Result{}, fmt.Errorf("failed to ensure paused condition: %w", err)
222-
} else if updated {
223-
klog.Infof("%v: setting machine set paused condition to false", machineSet.Name)
224-
}
225-
226-
// Fallthrough and continue reconciliation.
227-
default:
228-
klog.Errorf("%v: invalid .status.authoritativeAPI '%s'", machineSet.Name, machineSet.Status.AuthoritativeAPI)
229-
return reconcile.Result{}, nil // Do not return an error to avoid immediate requeue.
230227
}
228+
229+
klog.Infof("%v: setting paused to false and continuing reconcile", machineSetName)
231230
}
232231

233232
result, err := r.reconcile(ctx, machineSet)
@@ -330,23 +329,6 @@ func (r *ReconcileMachineSet) reconcile(ctx context.Context, machineSet *machine
330329
return reconcile.Result{}, nil
331330
}
332331

333-
// ensureUpdatedPausedCondition updates the paused condition if needed.
334-
func (r *ReconcileMachineSet) ensureUpdatedPausedCondition(ms *machinev1.MachineSet, desiredCondition *machinev1.Condition, logMessage string) (bool, error) {
335-
newMs := ms.DeepCopy()
336-
if !conditions.IsEquivalentTo(conditions.Get(ms, machine.PausedCondition), desiredCondition) {
337-
klog.Info(logMessage)
338-
conditions.Set(newMs, desiredCondition)
339-
if _, err := updateMachineSetStatus(r.Client, ms, newMs.Status); err != nil {
340-
klog.Errorf("%v: error updating status: %v", newMs.Name, err)
341-
return false, fmt.Errorf("error updating status for machine set %s: %w", newMs.Name, err)
342-
}
343-
344-
return true, nil
345-
}
346-
347-
return false, nil
348-
}
349-
350332
// syncReplicas essentially scales machine resources up and down.
351333
func (r *ReconcileMachineSet) syncReplicas(ms *machinev1.MachineSet, machines []*machinev1.Machine) error {
352334
if ms.Spec.Replicas == nil {

0 commit comments

Comments
 (0)