@@ -197,6 +197,33 @@ func (c *Controller) runWorker() {
197197 }
198198}
199199
200+ // clearMachineError is a convenience function to remove a error on the machine if its set.
201+ // It does not return an error as it's used around the sync handler
202+ func (c * Controller ) clearMachineError (key string ) {
203+ namespace , name , err := cache .SplitMetaNamespaceKey (key )
204+ if err != nil {
205+ utilruntime .HandleError (fmt .Errorf ("failed to split metaNamespaceKey: %v" , err ))
206+ return
207+ }
208+ listerMachine , err := c .machinesLister .Machines (namespace ).Get (name )
209+ if err != nil {
210+ utilruntime .HandleError (fmt .Errorf ("failed to get Machine from lister: %v" , err ))
211+ return
212+ }
213+ machine := listerMachine .DeepCopy ()
214+
215+ if machine .Status .ErrorMessage != nil || machine .Status .ErrorReason != nil {
216+ _ , err := c .updateMachine (machine , func (m * clusterv1alpha1.Machine ) {
217+ m .Status .ErrorMessage = nil
218+ m .Status .ErrorReason = nil
219+ })
220+ if err != nil {
221+ utilruntime .HandleError (fmt .Errorf ("failed to update machine: %v" , err ))
222+ return
223+ }
224+ }
225+ }
226+
200227func (c * Controller ) processNextWorkItem () bool {
201228 key , quit := c .workqueue .Get ()
202229 if quit {
@@ -208,6 +235,8 @@ func (c *Controller) processNextWorkItem() bool {
208235 glog .V (6 ).Infof ("Processing machine: %s" , key )
209236 err := c .syncHandler (key .(string ))
210237 if err == nil {
238+ // Every time we successfully sync a Machine, we should check if we should remove the error if its set
239+ c .clearMachineError (key .(string ))
211240 c .workqueue .Forget (key )
212241 return true
213242 }
@@ -276,16 +305,6 @@ func (c *Controller) updateMachine(machine *clusterv1alpha1.Machine, modify func
276305 return updatedMachine , err
277306}
278307
279- func (c * Controller ) clearMachineErrorIfSet (machine * clusterv1alpha1.Machine ) (* clusterv1alpha1.Machine , error ) {
280- if machine .Status .ErrorMessage != nil || machine .Status .ErrorReason != nil {
281- return c .updateMachine (machine , func (m * clusterv1alpha1.Machine ) {
282- m .Status .ErrorMessage = nil
283- m .Status .ErrorReason = nil
284- })
285- }
286- return machine , nil
287- }
288-
289308// updateMachine updates machine's ErrorMessage and ErrorReason regardless if they were set or not
290309// this essentially overwrites previous values
291310func (c * Controller ) updateMachineError (machine * clusterv1alpha1.Machine , reason common.MachineStatusError , message string ) (* clusterv1alpha1.Machine , error ) {
@@ -428,11 +447,6 @@ func (c *Controller) syncHandler(key string) error {
428447 }
429448
430449 if c .nodeIsReady (node ) {
431- // If we have an ready node, we should clear the error in case one was set.
432- // Useful when there was a network outage & a cloud-provider api outage at the same time
433- if machine , err = c .clearMachineErrorIfSet (machine ); err != nil {
434- return fmt .Errorf ("failed to clear machine error: %v" , err )
435- }
436450 // We must do this to ensure the informers in the machineSet and machineDeployment controller
437451 // get triggered as soon as a ready node exists for a machine
438452 if machine , err = c .ensureMachineHasNodeReadyCondition (machine ); err != nil {
@@ -618,10 +632,6 @@ func (c *Controller) ensureInstanceExistsForMachine(prov cloud.Provider, machine
618632
619633 // case 2.1: instance was not found and we are going to create one
620634 if err == cloudprovidererrors .ErrInstanceNotFound {
621- // remove an error message in case it was set
622- if machine , err = c .clearMachineErrorIfSet (machine ); err != nil {
623- return fmt .Errorf ("failed to update machine after removing the failed validation error: %v" , err )
624- }
625635 glog .V (4 ).Infof ("Validated machine spec of %s" , machine .Name )
626636
627637 kubeconfig , err := c .createBootstrapKubeconfig (machine .Name )
@@ -643,10 +653,6 @@ func (c *Controller) ensureInstanceExistsForMachine(prov cloud.Provider, machine
643653 return c .updateMachineErrorIfTerminalError (machine , common .CreateMachineError , message , err , "failed to create machine at cloudprover" )
644654 }
645655 c .recorder .Event (machine , corev1 .EventTypeNormal , "Created" , "Successfully created instance" )
646- // remove error message in case it was set
647- if machine , err = c .clearMachineErrorIfSet (machine ); err != nil {
648- return fmt .Errorf ("failed to update machine after removing the create machine error: %v" , err )
649- }
650656 glog .V (4 ).Infof ("Created machine %s at cloud provider" , machine .Name )
651657 return nil
652658 }
0 commit comments