@@ -25,7 +25,9 @@ import (
2525 "time"
2626
2727 "github.com/go-logr/logr"
28+ "github.com/google/go-cmp/cmp"
2829 apierrors "k8s.io/apimachinery/pkg/api/errors"
30+ "k8s.io/apimachinery/pkg/util/wait"
2931 "k8s.io/klog/v2"
3032 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3133 "sigs.k8s.io/cluster-api/util"
@@ -145,6 +147,8 @@ func (r *HCloudMachineReconciler) Reconcile(ctx context.Context, req reconcile.R
145147 return reconcile.Result {}, fmt .Errorf ("failed to create scope: %+v" , err )
146148 }
147149
150+ initialHCloudMachine := hcloudMachine .DeepCopy ()
151+ startReconcile := time .Now ()
148152 // Always close the scope when exiting this function so we can persist any HCloudMachine changes.
149153 defer func () {
150154 if reterr != nil && errors .Is (reterr , hcloudclient .ErrUnauthorized ) {
@@ -153,10 +157,66 @@ func (r *HCloudMachineReconciler) Reconcile(ctx context.Context, req reconcile.R
153157 conditions .MarkTrue (hcloudMachine , infrav1 .HCloudTokenAvailableCondition )
154158 }
155159
160+ // the Close() will use PatchHelper to store the changes.
156161 if err := machineScope .Close (ctx ); err != nil {
157162 res = reconcile.Result {}
158163 reterr = errors .Join (reterr , err )
159164 }
165+
166+ if ! cmp .Equal (initialHCloudMachine , hcloudMachine ) {
167+ // The hcloudMachine was changed. Wait until the local cache contains the revision
168+ // which was created by above machineScope.Close().
169+ // We want to read our own writes.
170+ err := wait .PollUntilContextTimeout (ctx , 100 * time .Millisecond , 5 * time .Second , true , func (ctx context.Context ) (done bool , err error ) {
171+ // new resource, read from local cache
172+ latest := & infrav1.HCloudMachine {}
173+ getErr := r .Get (ctx , client .ObjectKeyFromObject (machineScope .HCloudMachine ), latest )
174+ if apierrors .IsNotFound (getErr ) {
175+ // the object was deleted. All is fine.
176+ return true , nil
177+ }
178+ if getErr != nil {
179+ return false , getErr
180+ }
181+ // When the ResourceVersion has changed, then it is very likely that the local
182+ // cache has the new version.
183+ return latest .ResourceVersion != hcloudMachine .ResourceVersion , nil
184+ })
185+ if err != nil {
186+ log .Error (err , "cache sync failed after BootState change" )
187+ }
188+ }
189+
190+ readyReason := conditions .GetReason (machineScope .HCloudMachine , clusterv1 .ReadyCondition )
191+ readyMessage := conditions .GetMessage (machineScope .HCloudMachine , clusterv1 .ReadyCondition )
192+
193+ duration := time .Since (startReconcile )
194+
195+ if duration > 5 * time .Second {
196+ log .Info ("Reconcile took too long" ,
197+ "reconcileDuration" , duration ,
198+ "res" , res ,
199+ "reterr" , reterr ,
200+ "oldState" , initialHCloudMachine .Status .BootState ,
201+ "newState" , machineScope .HCloudMachine .Status .BootState ,
202+ "readyReason" , readyReason ,
203+ "readyMessage" , readyMessage ,
204+ )
205+ }
206+
207+ if initialHCloudMachine .Status .BootState != machineScope .HCloudMachine .Status .BootState {
208+ startBootState := initialHCloudMachine .Status .BootStateSince
209+ if startBootState .IsZero () {
210+ startBootState = initialHCloudMachine .CreationTimestamp
211+ }
212+ log .Info ("BootState changed" ,
213+ "oldState" , initialHCloudMachine .Status .BootState ,
214+ "newState" , machineScope .HCloudMachine .Status .BootState ,
215+ "durationInState" , machineScope .HCloudMachine .Status .BootStateSince .Time .Sub (startBootState .Time ),
216+ "readyReason" , readyReason ,
217+ "readyMessage" , readyMessage ,
218+ )
219+ }
160220 }()
161221
162222 // Check whether rate limit has been reached and if so, then wait.
@@ -168,6 +228,10 @@ func (r *HCloudMachineReconciler) Reconcile(ctx context.Context, req reconcile.R
168228 return r .reconcileDelete (ctx , machineScope )
169229 }
170230
231+ if hcloudMachine .Status .FailureReason != nil {
232+ // This machine will be removed.
233+ return reconcile.Result {}, nil
234+ }
171235 return r .reconcileNormal (ctx , machineScope )
172236}
173237
@@ -460,6 +524,13 @@ func IgnoreInsignificantHCloudMachineStatusUpdates(logger logr.Logger) predicate
460524 oldHCloudMachine .ResourceVersion = ""
461525 newHCloudMachine .ResourceVersion = ""
462526
527+ // The ProviderID is set by the controller. Do not react if that changes.
528+ // Otherwise the next Reconcile is likely to read outdated data, because
529+ // the Status was not updated yet. PatchHelper updates three times in this order:
530+ // Status.Conditions, Resource, Status.
531+ oldHCloudMachine .Spec .ProviderID = nil
532+ newHCloudMachine .Spec .ProviderID = nil
533+
463534 oldHCloudMachine .Status = infrav1.HCloudMachineStatus {}
464535 newHCloudMachine .Status = infrav1.HCloudMachineStatus {}
465536
0 commit comments