@@ -169,26 +169,21 @@ func resolveMachineResources(scope *scope.WithLogger, clusterResourceName string
169169 openStackMachine .Status .Resolved = resolved
170170 }
171171 // Resolve and store resources
172- changed , err := compute .ResolveMachineSpec (scope ,
172+ return compute .ResolveMachineSpec (scope ,
173173 & openStackMachine .Spec , resolved ,
174174 clusterResourceName , openStackMachine .Name ,
175175 openStackCluster , getManagedSecurityGroup (openStackCluster , machine ))
176- if err != nil {
177- return false , err
178- }
179- if changed {
180- // If the resolved machine spec changed we need to start the reconcile again to prevent inconsistency between reconciles.
181- return true , nil
182- }
176+ }
183177
178+ func adoptMachineResources (scope * scope.WithLogger , openStackMachine * infrav1.OpenStackMachine ) error {
184179 resources := openStackMachine .Status .Resources
185180 if resources == nil {
186181 resources = & infrav1.MachineResources {}
187182 openStackMachine .Status .Resources = resources
188183 }
189184
190185 // Adopt any existing resources
191- return false , compute .AdoptMachineResources (scope , resolved , resources )
186+ return compute .AdoptMachineResources (scope , openStackMachine . Status . Resolved , resources )
192187}
193188
194189func patchMachine (ctx context.Context , patchHelper * patch.Helper , openStackMachine * infrav1.OpenStackMachine , machine * clusterv1.Machine , options ... patch.Option ) error {
@@ -256,66 +251,72 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope *scope.WithLogger, cl
256251 return ctrl.Result {}, err
257252 }
258253
259- // We may have resources to adopt if the cluster is ready
260- if openStackCluster .Status .Ready && openStackCluster .Status .Network != nil {
261- if _ , err := resolveMachineResources (scope , clusterResourceName , openStackCluster , openStackMachine , machine ); err != nil {
262- return ctrl.Result {}, err
263- }
254+ // Nothing to do if the cluster is not ready because no machine resources were created.
255+ if ! openStackCluster .Status .Ready || openStackCluster .Status .Network == nil {
256+ // The finalizer should not have been added yet in this case,
257+ // but the following handles the upgrade case.
258+ controllerutil .RemoveFinalizer (openStackMachine , infrav1 .MachineFinalizer )
259+ return ctrl.Result {}, nil
264260 }
265261
266- if openStackCluster .Spec .APIServerLoadBalancer .IsEnabled () {
267- loadBalancerService , err := loadbalancer .NewService (scope )
268- if err != nil {
269- return ctrl.Result {}, err
270- }
262+ // For machines created after v0.10, or any machine which has been
263+ // reconciled at least once by v0.10 or later, status.Resolved always
264+ // exists before any resources are created. We can therefore assume
265+ // that if it does not exist, no resources were created.
266+ //
267+ // There is an upgrade edge case where a machine may have been marked
268+ // deleted before upgrade but we are completing it after upgrade. For
269+ // this use case only we make a best effort to resolve resources before
270+ // continuing, but if we get an error we log it and continue anyway.
271+ // This has the potential to leak resources, but only in this specific
272+ // edge case. The alternative is to continue retrying until it succeeds,
273+ // but that risks never deleting a machine which cannot be resolved due
274+ // to a spec error.
275+ //
276+ // This code can and should be deleted in a future release when we are
277+ // sure that all machines have been reconciled at least by a v0.10 or
278+ // later controller.
279+ if _ , err := resolveMachineResources (scope , clusterResourceName , openStackCluster , openStackMachine , machine ); err != nil {
280+ // Return the error, but allow the resource to be removed anyway.
281+ controllerutil .RemoveFinalizer (openStackMachine , infrav1 .MachineFinalizer )
282+ return ctrl.Result {}, err
283+ }
271284
272- err = loadBalancerService .DeleteLoadBalancerMember (openStackCluster , machine , openStackMachine , clusterResourceName )
273- if err != nil {
274- conditions .MarkFalse (openStackMachine , infrav1 .APIServerIngressReadyCondition , infrav1 .LoadBalancerMemberErrorReason , clusterv1 .ConditionSeverityWarning , "Machine could not be removed from load balancer: %v" , err )
275- return ctrl.Result {}, err
276- }
285+ // Check for any orphaned resources
286+ // N.B. Unlike resolveMachineResources, we must always look for orphaned resources in the delete path.
287+ if err := adoptMachineResources (scope , openStackMachine ); err != nil {
288+ return ctrl.Result {}, fmt .Errorf ("adopting machine resources: %w" , err )
277289 }
278290
279- var instanceStatus * compute.InstanceStatus
280- if openStackMachine .Status .InstanceID != nil {
281- instanceStatus , err = computeService .GetInstanceStatus (* openStackMachine .Status .InstanceID )
282- if err != nil {
283- return ctrl.Result {}, err
284- }
285- } else if instanceStatus , err = computeService .GetInstanceStatusByName (openStackMachine , openStackMachine .Name ); err != nil {
291+ instanceStatus , err := getInstanceStatus (openStackMachine , computeService )
292+ if err != nil {
286293 return ctrl.Result {}, err
287294 }
288- if ! openStackCluster .Spec .APIServerLoadBalancer .IsEnabled () && util .IsControlPlaneMachine (machine ) && openStackCluster .Spec .APIServerFloatingIP == nil {
289- if instanceStatus != nil {
290- instanceNS , err := instanceStatus .NetworkStatus ()
291- if err != nil {
292- openStackMachine .SetFailure (
293- capierrors .UpdateMachineError ,
294- fmt .Errorf ("get network status for OpenStack instance %s with ID %s: %v" , instanceStatus .Name (), instanceStatus .ID (), err ),
295- )
296- return ctrl.Result {}, nil
297- }
298295
299- addresses := instanceNS .Addresses ()
300- for _ , address := range addresses {
301- if address .Type == corev1 .NodeExternalIP {
302- if err = networkingService .DeleteFloatingIP (openStackMachine , address .Address ); err != nil {
303- conditions .MarkFalse (openStackMachine , infrav1 .APIServerIngressReadyCondition , infrav1 .FloatingIPErrorReason , clusterv1 .ConditionSeverityError , "Deleting floating IP failed: %v" , err )
304- return ctrl.Result {}, fmt .Errorf ("delete floating IP %q: %w" , address .Address , err )
305- }
306- }
307- }
296+ if util .IsControlPlaneMachine (machine ) {
297+ if err := removeAPIServerEndpoint (scope , openStackCluster , openStackMachine , instanceStatus , clusterResourceName ); err != nil {
298+ return ctrl.Result {}, err
308299 }
309300 }
310301
311- instanceSpec , err := machineToInstanceSpec (openStackCluster , machine , openStackMachine , "" )
312- if err != nil {
313- return ctrl.Result {}, err
302+ // If no instance was created we currently need to check for orphaned
303+ // volumes. This requires resolving the instance spec.
304+ // TODO: write volumes to status resources on creation so this is no longer required.
305+ if instanceStatus == nil && openStackMachine .Status .Resolved != nil {
306+ instanceSpec , err := machineToInstanceSpec (openStackCluster , machine , openStackMachine , "" )
307+ if err != nil {
308+ return ctrl.Result {}, err
309+ }
310+ if err := computeService .DeleteVolumes (instanceSpec ); err != nil {
311+ return ctrl.Result {}, fmt .Errorf ("delete volumes: %w" , err )
312+ }
314313 }
315314
316- if err := computeService .DeleteInstance (openStackMachine , instanceStatus , instanceSpec ); err != nil {
317- conditions .MarkFalse (openStackMachine , infrav1 .InstanceReadyCondition , infrav1 .InstanceDeleteFailedReason , clusterv1 .ConditionSeverityError , "Deleting instance failed: %v" , err )
318- return ctrl.Result {}, fmt .Errorf ("delete instance: %w" , err )
315+ if instanceStatus != nil {
316+ if err := computeService .DeleteInstance (openStackMachine , instanceStatus ); err != nil {
317+ conditions .MarkFalse (openStackMachine , infrav1 .InstanceReadyCondition , infrav1 .InstanceDeleteFailedReason , clusterv1 .ConditionSeverityError , "Deleting instance failed: %v" , err )
318+ return ctrl.Result {}, fmt .Errorf ("delete instance: %w" , err )
319+ }
319320 }
320321
321322 trunkSupported , err := networkingService .IsTrunkExtSupported ()
@@ -341,6 +342,62 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope *scope.WithLogger, cl
341342 return ctrl.Result {}, nil
342343}
343344
345+ func getInstanceStatus (openStackMachine * infrav1.OpenStackMachine , computeService * compute.Service ) (* compute.InstanceStatus , error ) {
346+ if openStackMachine .Status .InstanceID != nil {
347+ return computeService .GetInstanceStatus (* openStackMachine .Status .InstanceID )
348+ }
349+ return computeService .GetInstanceStatusByName (openStackMachine , openStackMachine .Name )
350+ }
351+
352+ func removeAPIServerEndpoint (scope * scope.WithLogger , openStackCluster * infrav1.OpenStackCluster , openStackMachine * infrav1.OpenStackMachine , instanceStatus * compute.InstanceStatus , clusterResourceName string ) error {
353+ if openStackCluster .Spec .APIServerLoadBalancer .IsEnabled () {
354+ loadBalancerService , err := loadbalancer .NewService (scope )
355+ if err != nil {
356+ return err
357+ }
358+
359+ err = loadBalancerService .DeleteLoadBalancerMember (openStackCluster , openStackMachine , clusterResourceName )
360+ if err != nil {
361+ conditions .MarkFalse (openStackMachine , infrav1 .APIServerIngressReadyCondition , infrav1 .LoadBalancerMemberErrorReason , clusterv1 .ConditionSeverityWarning , "Machine could not be removed from load balancer: %v" , err )
362+ return err
363+ }
364+ return nil
365+ }
366+
367+ // XXX(mdbooth): This looks wrong to me. Surely we should only ever
368+ // disassociate the floating IP here. I would expect the API server
369+ // floating IP to be created and deleted with the cluster. And if the
370+ // delete behaviour is correct, we leak it if the instance was
371+ // previously deleted.
372+ if openStackCluster .Spec .APIServerFloatingIP == nil && instanceStatus != nil {
373+ instanceNS , err := instanceStatus .NetworkStatus ()
374+ if err != nil {
375+ openStackMachine .SetFailure (
376+ capierrors .UpdateMachineError ,
377+ fmt .Errorf ("get network status for OpenStack instance %s with ID %s: %v" , instanceStatus .Name (), instanceStatus .ID (), err ),
378+ )
379+ return nil
380+ }
381+
382+ networkingService , err := networking .NewService (scope )
383+ if err != nil {
384+ return err
385+ }
386+
387+ addresses := instanceNS .Addresses ()
388+ for _ , address := range addresses {
389+ if address .Type == corev1 .NodeExternalIP {
390+ if err = networkingService .DeleteFloatingIP (openStackMachine , address .Address ); err != nil {
391+ conditions .MarkFalse (openStackMachine , infrav1 .APIServerIngressReadyCondition , infrav1 .FloatingIPErrorReason , clusterv1 .ConditionSeverityError , "Deleting floating IP failed: %v" , err )
392+ return fmt .Errorf ("delete floating IP %q: %w" , address .Address , err )
393+ }
394+ }
395+ }
396+ }
397+
398+ return nil
399+ }
400+
344401// GetPortIDs returns a list of port IDs from a list of PortStatus.
345402func GetPortIDs (ports []infrav1.PortStatus ) []string {
346403 portIDs := make ([]string , len (ports ))
@@ -489,34 +546,50 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
489546 return ctrl.Result {}, nil
490547 }
491548
492- // If the OpenStackMachine doesn't have our finalizer, add it.
493- if controllerutil .AddFinalizer (openStackMachine , infrav1 .MachineFinalizer ) {
494- // Register the finalizer immediately to avoid orphaning OpenStack resources on delete
495- return ctrl.Result {}, nil
496- }
497-
498549 if ! openStackCluster .Status .Ready {
499550 scope .Logger ().Info ("Cluster infrastructure is not ready yet, re-queuing machine" )
500551 conditions .MarkFalse (openStackMachine , infrav1 .InstanceReadyCondition , infrav1 .WaitingForClusterInfrastructureReason , clusterv1 .ConditionSeverityInfo , "" )
501552 return ctrl.Result {RequeueAfter : waitForClusterInfrastructureReadyDuration }, nil
502553 }
503554
504- if changed , err := resolveMachineResources (scope , clusterResourceName , openStackCluster , openStackMachine , machine ); changed || err != nil {
505- scope .Logger ().V (6 ).Info ("Machine resources updated, requeuing" )
506- return ctrl.Result {}, err
507- }
508-
509555 // Make sure bootstrap data is available and populated.
510556 if machine .Spec .Bootstrap .DataSecretName == nil {
511557 scope .Logger ().Info ("Bootstrap data secret reference is not yet available" )
512558 conditions .MarkFalse (openStackMachine , infrav1 .InstanceReadyCondition , infrav1 .WaitingForBootstrapDataReason , clusterv1 .ConditionSeverityInfo , "" )
513559 return ctrl.Result {}, nil
514560 }
515- userData , err := r .getBootstrapData (ctx , machine , openStackMachine )
561+
562+ changed , err := resolveMachineResources (scope , clusterResourceName , openStackCluster , openStackMachine , machine )
516563 if err != nil {
517564 return ctrl.Result {}, err
518565 }
566+
567+ // Also add the finalizer when writing resolved resources so we can start creating resources on the next reconcile.
568+ if controllerutil .AddFinalizer (openStackMachine , infrav1 .MachineFinalizer ) {
569+ changed = true
570+ }
571+
572+ // We requeue if we either added the finalizer or resolved machine
573+ // resources. This means that we never create any resources unless we
574+ // have observed that the finalizer and resolved machine resources were
575+ // successfully written in a previous transaction. This in turn means
576+ // that in the delete path we can be sure that if there are no resolved
577+ // resources then no resources were created.
578+ if changed {
579+ scope .Logger ().V (6 ).Info ("Machine resources updated, requeuing" )
580+ return ctrl.Result {}, nil
581+ }
582+
583+ // Check for orphaned resources previously created but not written to the status
584+ if err := adoptMachineResources (scope , openStackMachine ); err != nil {
585+ return ctrl.Result {}, fmt .Errorf ("adopting machine resources: %w" , err )
586+ }
587+
519588 scope .Logger ().Info ("Reconciling Machine" )
589+ userData , err := r .getBootstrapData (ctx , machine , openStackMachine )
590+ if err != nil {
591+ return ctrl.Result {}, err
592+ }
520593
521594 computeService , err := compute .NewService (scope )
522595 if err != nil {
0 commit comments