diff --git a/api/v1alpha1/metalstackcluster_types.go b/api/v1alpha1/metalstackcluster_types.go index 31007bc..29fe547 100644 --- a/api/v1alpha1/metalstackcluster_types.go +++ b/api/v1alpha1/metalstackcluster_types.go @@ -31,8 +31,8 @@ const ( ClusterControlPlaneEndpointDefaultPort = 443 - ClusterNodeNetworkEnsured clusterv1.ConditionType = "ClusterNodeNetworkEnsured" - ClusterControlPlaneEndpointEnsured clusterv1.ConditionType = "ClusterControlPlaneEndpointEnsured" + ClusterNodeNetworkEnsured clusterv1.ConditionType = "ClusterNodeNetworkEnsured" + ClusterControlPlaneIPEnsured clusterv1.ConditionType = "ClusterControlPlaneIPEnsured" ) var ( @@ -52,14 +52,13 @@ type MetalStackClusterSpec struct { ProjectID string `json:"projectID"` // NodeNetworkID is the network ID in metal-stack in which the worker nodes and the firewall of the cluster are placed. - // If not provided this will automatically be acquired during reconcile. Note that this field is not patched after auto-acquisition. - // The ID of the auto-acquired network can be looked up in the status resource instead. + // If not provided this will automatically be acquired during reconcile. // +optional NodeNetworkID *string `json:"nodeNetworkID,omitempty"` // ControlPlaneIP is the ip address in metal-stack on which the control plane will be exposed. - // If this ip and the control plane endpoint are not provided this will automatically be acquired during reconcile. Note that this field is not patched after auto-acquisition. - // The address of the auto-acquired ip can be looked up in the control plane endpoint. + // If this ip and the control plane endpoint are not provided, an ephemeral ip will automatically be acquired during reconcile. + // Static ip addresses will not be deleted. // +optional ControlPlaneIP *string `json:"controlPlaneIP,omitempty"` @@ -79,6 +78,7 @@ type APIEndpoint struct { // MetalStackClusterStatus defines the observed state of MetalStackCluster. type MetalStackClusterStatus struct { // Ready denotes that the cluster is ready. + // +kubebuilder:default=false Ready bool `json:"ready"` // FailureReason indicates that there is a fatal problem reconciling the @@ -95,13 +95,6 @@ type MetalStackClusterStatus struct { // Conditions defines current service state of the MetalStackCluster. // +optional Conditions clusterv1.Conditions `json:"conditions,omitempty"` - - // NodeCIDR is set as soon as the node network was created. - // +optional - NodeCIDR *string `json:"nodeCIDR,omitempty"` - // NodeNetworkID is set as soon as the node network was created. - // +optional - NodeNetworkID *string `json:"nodeNetworkID,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/metalstackmachine_types.go b/api/v1alpha1/metalstackmachine_types.go index 6c5a2c5..aeece1d 100644 --- a/api/v1alpha1/metalstackmachine_types.go +++ b/api/v1alpha1/metalstackmachine_types.go @@ -49,6 +49,7 @@ type MetalStackMachineSpec struct { // MetalStackMachineStatus defines the observed state of MetalStackMachine. type MetalStackMachineStatus struct { // Ready denotes that the machine is ready. + // +kubebuilder:default=false Ready bool `json:"ready"` // FailureReason indicates that there is a fatal problem reconciling the diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2abc344..b024946 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -146,16 +146,6 @@ func (in *MetalStackClusterStatus) DeepCopyInto(out *MetalStackClusterStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.NodeCIDR != nil { - in, out := &in.NodeCIDR, &out.NodeCIDR - *out = new(string) - **out = **in - } - if in.NodeNetworkID != nil { - in, out := &in.NodeNetworkID, &out.NodeNetworkID - *out = new(string) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetalStackClusterStatus. diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackclusters.yaml index bca9867..2e3e2ac 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackclusters.yaml @@ -88,14 +88,13 @@ spec: controlPlaneIP: description: |- ControlPlaneIP is the ip address in metal-stack on which the control plane will be exposed. - If this ip and the control plane endpoint are not provided this will automatically be acquired during reconcile. Note that this field is not patched after auto-acquisition. - The address of the auto-acquired ip can be looked up in the control plane endpoint. + If this ip and the control plane endpoint are not provided, an ephemeral ip will automatically be acquired during reconcile. + Static ip addresses will not be deleted. type: string nodeNetworkID: description: |- NodeNetworkID is the network ID in metal-stack in which the worker nodes and the firewall of the cluster are placed. - If not provided this will automatically be acquired during reconcile. Note that this field is not patched after auto-acquisition. - The ID of the auto-acquired network can be looked up in the status resource instead. + If not provided this will automatically be acquired during reconcile. type: string partition: description: Partition is the data center partition in which the resources @@ -168,14 +167,8 @@ spec: state, and will be set to a token value suitable for programmatic interpretation. type: string - nodeCIDR: - description: NodeCIDR is set as soon as the node network was created. - type: string - nodeNetworkID: - description: NodeNetworkID is set as soon as the node network was - created. - type: string ready: + default: false description: Ready denotes that the cluster is ready. type: boolean required: diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachines.yaml index 9e3824d..2b47453 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachines.yaml @@ -160,6 +160,7 @@ spec: programmatic interpretation. type: string ready: + default: false description: Ready denotes that the machine is ready. type: boolean required: diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachinetemplates.yaml index fb230c1..a214d8b 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_metalstackmachinetemplates.yaml @@ -175,6 +175,7 @@ spec: programmatic interpretation. type: string ready: + default: false description: Ready denotes that the machine is ready. type: boolean required: diff --git a/go.mod b/go.mod index 7fdc45a..3fef427 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,6 @@ require ( github.com/onsi/gomega v1.36.0 github.com/stretchr/testify v1.9.0 go.uber.org/zap v1.27.0 - golang.org/x/sync v0.10.0 k8s.io/api v0.31.3 k8s.io/apimachinery v0.31.3 k8s.io/client-go v0.31.3 @@ -107,6 +106,7 @@ require ( golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sync v0.10.0 // indirect golang.org/x/sys v0.28.0 // indirect golang.org/x/term v0.27.0 // indirect golang.org/x/text v0.21.0 // indirect diff --git a/internal/controller/metalstackcluster_controller.go b/internal/controller/metalstackcluster_controller.go index 6e72192..4822ff6 100644 --- a/internal/controller/metalstackcluster_controller.go +++ b/internal/controller/metalstackcluster_controller.go @@ -21,13 +21,11 @@ import ( "errors" "fmt" - "golang.org/x/sync/errgroup" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" - corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/utils/ptr" @@ -40,7 +38,6 @@ import ( "github.com/go-logr/logr" "github.com/metal-stack/cluster-api-provider-metal-stack/api/v1alpha1" - infrastructurev1alpha1 "github.com/metal-stack/cluster-api-provider-metal-stack/api/v1alpha1" ipmodels "github.com/metal-stack/metal-go/api/client/ip" "github.com/metal-stack/metal-go/api/client/network" "github.com/metal-stack/metal-go/api/models" @@ -109,56 +106,33 @@ func (r *MetalStackClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re infraCluster: infraCluster, } - defer func() { - statusErr := reconciler.status() - if statusErr != nil { - err = errors.Join(err, fmt.Errorf("unable to update status: %w", statusErr)) - } else if !reconciler.infraCluster.Status.Ready { - err = errors.New("cluster is not yet ready, requeuing") - } - }() - - if !infraCluster.DeletionTimestamp.IsZero() { - if !controllerutil.ContainsFinalizer(infraCluster, v1alpha1.ClusterFinalizer) { - return ctrl.Result{}, nil - } - - log.Info("reconciling resource deletion flow") - err := reconciler.delete() - if err != nil { - return ctrl.Result{}, err - } - - log.Info("deletion finished, removing finalizer") - controllerutil.RemoveFinalizer(infraCluster, v1alpha1.ClusterFinalizer) - if err := r.Client.Update(ctx, infraCluster); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to remove finalizer: %w", err) - } - - return ctrl.Result{}, nil + helper, err := patch.NewHelper(infraCluster, r.Client) + if err != nil { + return ctrl.Result{}, err } - log.Info("reconciling cluster") - - if !controllerutil.ContainsFinalizer(infraCluster, v1alpha1.ClusterFinalizer) { + if !infraCluster.DeletionTimestamp.IsZero() { + err = reconciler.delete() + } else if !controllerutil.ContainsFinalizer(infraCluster, v1alpha1.ClusterFinalizer) { log.Info("adding finalizer") controllerutil.AddFinalizer(infraCluster, v1alpha1.ClusterFinalizer) - if err := r.Client.Update(ctx, infraCluster); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to add finalizer: %w", err) - } - - return ctrl.Result{}, nil + } else { + log.Info("reconciling cluster") + err = reconciler.reconcile() } - err = reconciler.reconcile() + updateErr := helper.Patch(ctx, infraCluster) + if updateErr != nil { + err = errors.Join(err, fmt.Errorf("failed to update infra cluster: %w", updateErr)) + } - return ctrl.Result{}, err // remember to return err here and not nil because the defer func can influence this + return ctrl.Result{}, err } // SetupWithManager sets up the controller with the Manager. func (r *MetalStackClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&infrastructurev1alpha1.MetalStackCluster{}). + For(&v1alpha1.MetalStackCluster{}). Named("metalstackcluster"). WithEventFilter(predicates.ResourceIsNotExternallyManaged(mgr.GetScheme(), mgr.GetLogger())). // TODO: implement resource paused from cluster-api's predicates? @@ -168,161 +142,101 @@ func (r *MetalStackClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *clusterReconciler) reconcile() error { nodeNetworkID, err := r.ensureNodeNetwork() if err != nil { + conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterNodeNetworkEnsured, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) return fmt.Errorf("unable to ensure node network: %w", err) } + conditions.MarkTrue(r.infraCluster, v1alpha1.ClusterNodeNetworkEnsured) + r.infraCluster.Spec.NodeNetworkID = &nodeNetworkID r.log.Info("reconciled node network", "network-id", nodeNetworkID) if r.infraCluster.Spec.ControlPlaneEndpoint.Host == "" { ip, err := r.ensureControlPlaneIP() if err != nil { + conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterControlPlaneIPEnsured, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) return fmt.Errorf("unable to ensure control plane ip: %w", err) } + conditions.MarkTrue(r.infraCluster, v1alpha1.ClusterControlPlaneIPEnsured) - r.log.Info("reconciled control plane ip", "ip", *ip.Ipaddress) + r.log.Info("reconciled control plane ip", "ip", ip) r.log.Info("setting control plane endpoint into cluster resource") - helper, err := patch.NewHelper(r.infraCluster, r.client) - if err != nil { - return err - } - - r.infraCluster.Spec.ControlPlaneEndpoint = infrastructurev1alpha1.APIEndpoint{ - Host: *ip.Ipaddress, + r.infraCluster.Spec.ControlPlaneEndpoint = v1alpha1.APIEndpoint{ + Host: ip, Port: v1alpha1.ClusterControlPlaneEndpointDefaultPort, } - err = helper.Patch(r.ctx, r.infraCluster) // TODO:check whether patch is not executed when no changes occur - if err != nil { - return fmt.Errorf("failed to update infra cluster control plane endpoint: %w", err) - } - return nil } - return err + r.infraCluster.Status.Ready = true + + return nil } func (r *clusterReconciler) delete() error { var err error - defer func() { - statusErr := r.status() - if statusErr != nil { - err = errors.Join(err, fmt.Errorf("unable to update status: %w", statusErr)) - } - }() + + if !controllerutil.ContainsFinalizer(r.infraCluster, v1alpha1.ClusterFinalizer) { + return nil + } + + r.log.Info("reconciling resource deletion flow") err = r.deleteControlPlaneIP() if err != nil { return fmt.Errorf("unable to delete control plane ip: %w", err) } + r.infraCluster.Spec.ControlPlaneIP = nil err = r.deleteNodeNetwork() if err != nil { return fmt.Errorf("unable to delete node network: %w", err) } + r.infraCluster.Spec.NodeNetworkID = nil + + r.log.Info("deletion finished, removing finalizer") + controllerutil.RemoveFinalizer(r.infraCluster, v1alpha1.ClusterFinalizer) return err } func (r *clusterReconciler) ensureNodeNetwork() (string, error) { - nws, err := r.findNodeNetwork() - if err != nil { - return "", err - } - - switch len(nws) { - case 0: - resp, err := r.metalClient.Network().AllocateNetwork(network.NewAllocateNetworkParams().WithBody(&models.V1NetworkAllocateRequest{ - Projectid: r.infraCluster.Spec.ProjectID, - Partitionid: r.infraCluster.Spec.Partition, - Name: r.infraCluster.GetName(), - Description: fmt.Sprintf("%s/%s", r.infraCluster.GetNamespace(), r.infraCluster.GetName()), - Labels: map[string]string{tag.ClusterID: string(r.infraCluster.GetUID())}, - }).WithContext(r.ctx), nil) - if err != nil { - return "", fmt.Errorf("error creating node network: %w", err) - } - - return *resp.Payload.ID, nil - case 1: - nw := nws[0] - - if len(nw.Prefixes) == 0 { - return "", errors.New("node network exists but the prefix is gone") - } - - return *nw.ID, nil - default: - return "", fmt.Errorf("more than a single node network exists for this cluster, operator investigation is required") - } -} - -func (r *clusterReconciler) deleteNodeNetwork() error { if r.infraCluster.Spec.NodeNetworkID != nil { - r.log.Info("skip deletion of node network") - return nil + return *r.infraCluster.Spec.NodeNetworkID, nil } - nws, err := r.findNodeNetwork() + resp, err := r.metalClient.Network().AllocateNetwork(network.NewAllocateNetworkParams().WithBody(&models.V1NetworkAllocateRequest{ + Projectid: r.infraCluster.Spec.ProjectID, + Partitionid: r.infraCluster.Spec.Partition, + Name: r.infraCluster.GetName(), + Description: fmt.Sprintf("%s/%s", r.infraCluster.GetNamespace(), r.infraCluster.GetName()), + Labels: map[string]string{tag.ClusterID: string(r.infraCluster.GetUID())}, + }).WithContext(r.ctx), nil) if err != nil { - return err + return "", fmt.Errorf("error creating node network: %w", err) } - switch len(nws) { - case 0: - return nil - case 1: - nw := nws[0] - - if nw.ID == nil { - return fmt.Errorf("node network id not set") - } - - _, err := r.metalClient.Network().FreeNetwork(network.NewFreeNetworkParams().WithID(*nw.ID).WithContext(r.ctx), nil) - if err != nil { - return err - } - r.log.Info("deleted node network") - - return nil - default: - return errors.New("more than a single node network exists for this cluster, operator investigation is required") - } + return *resp.Payload.ID, nil } -func (r *clusterReconciler) findNodeNetwork() ([]*models.V1NetworkResponse, error) { - if r.infraCluster.Spec.NodeNetworkID != nil { - resp, err := r.metalClient.Network().FindNetwork(network.NewFindNetworkParams().WithID(*r.infraCluster.Spec.NodeNetworkID).WithContext(r.ctx), nil) - if err != nil { - return nil, err - } - - return []*models.V1NetworkResponse{resp.Payload}, nil +func (r *clusterReconciler) deleteNodeNetwork() error { + if r.infraCluster.Spec.NodeNetworkID == nil { + return nil } - resp, err := r.metalClient.Network().FindNetworks(network.NewFindNetworksParams().WithBody(&models.V1NetworkFindRequest{ - Projectid: r.infraCluster.Spec.ProjectID, - Partitionid: r.infraCluster.Spec.Partition, - Labels: map[string]string{tag.ClusterID: string(r.infraCluster.GetUID())}, - }).WithContext(r.ctx), nil) + _, err := r.metalClient.Network().FreeNetwork(network.NewFreeNetworkParams().WithID(*r.infraCluster.Spec.NodeNetworkID).WithContext(r.ctx), nil) if err != nil { - return nil, err + return err } + r.log.Info("deleted node network") - return resp.Payload, nil + return nil } -func (r *clusterReconciler) ensureControlPlaneIP() (*models.V1IPResponse, error) { - ip, err := r.findControlPlaneIP() - if ip != nil { - return ip, nil - } - if errors.Is(err, errProviderIPTooManyFound) { - return nil, fmt.Errorf("more than a single control plane ip exists for this cluster, operator investigation is required") - } - if err != nil && !errors.Is(err, errProviderIPNotFound) { - return nil, err +func (r *clusterReconciler) ensureControlPlaneIP() (string, error) { + if r.infraCluster.Spec.ControlPlaneIP != nil { + return *r.infraCluster.Spec.ControlPlaneIP, nil } nwResp, err := r.metalClient.Network().FindNetworks(network.NewFindNetworksParams().WithBody(&models.V1NetworkFindRequest{ @@ -331,17 +245,18 @@ func (r *clusterReconciler) ensureControlPlaneIP() (*models.V1IPResponse, error) }, }).WithContext(r.ctx), nil) if err != nil { - return nil, fmt.Errorf("error finding default network: %w", err) + return "", fmt.Errorf("error finding default network: %w", err) } if len(nwResp.Payload) != 1 { - return nil, fmt.Errorf("no distinct default network configured in the metal-api") + return "", fmt.Errorf("no distinct default network configured in the metal-api") } + defaultNetwork := nwResp.Payload[0] resp, err := r.metalClient.IP().AllocateIP(ipmodels.NewAllocateIPParams().WithBody(&models.V1IPAllocateRequest{ Description: fmt.Sprintf("%s/%s control plane ip", r.infraCluster.GetNamespace(), r.infraCluster.GetName()), Name: r.infraCluster.GetName() + "-control-plane", - Networkid: nwResp.Payload[0].ID, + Networkid: defaultNetwork.ID, Projectid: &r.infraCluster.Spec.ProjectID, Tags: []string{ tag.New(tag.ClusterID, string(r.infraCluster.GetUID())), @@ -350,17 +265,16 @@ func (r *clusterReconciler) ensureControlPlaneIP() (*models.V1IPResponse, error) Type: ptr.To(models.V1IPBaseTypeEphemeral), }).WithContext(r.ctx), nil) if err != nil { - return nil, fmt.Errorf("error creating ip: %w", err) + return "", fmt.Errorf("error creating ip: %w", err) + } + if resp.Payload.Ipaddress == nil { + return "", fmt.Errorf("error creating ip address") } - return resp.Payload, nil + return *resp.Payload.Ipaddress, nil } func (r *clusterReconciler) deleteControlPlaneIP() error { - if r.infraCluster.Spec.ControlPlaneIP != nil { - r.log.Info("skip deletion of provided control plane ip") - return nil - } ip, err := r.findControlPlaneIP() if err != nil && errors.Is(err, errProviderIPNotFound) { return nil @@ -420,105 +334,3 @@ func (r *clusterReconciler) findControlPlaneIP() (*models.V1IPResponse, error) { return nil, errProviderIPTooManyFound } } - -func (r *clusterReconciler) status() error { - var ( - g, _ = errgroup.WithContext(r.ctx) - conditionUpdates = make(chan func()) - - // TODO: probably there is a helper for this available somewhere? - allConditionsTrue = func() bool { - for _, c := range r.infraCluster.Status.Conditions { - if c.Status != corev1.ConditionTrue { - return false - } - } - - return true - } - ) - - g.Go(func() error { - nws, err := r.findNodeNetwork() - - conditionUpdates <- func() { - if err != nil { - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterNodeNetworkEnsured, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) - return - } - - switch len(nws) { - case 0: - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterNodeNetworkEnsured, "NotCreated", clusterv1.ConditionSeverityError, "node network was not yet created") - case 1: - nw := nws[0] - - if len(nw.Prefixes) > 0 { - r.infraCluster.Status.NodeCIDR = &nw.Prefixes[0] - } - r.infraCluster.Status.NodeNetworkID = nw.ID - - conditions.MarkTrue(r.infraCluster, v1alpha1.ClusterNodeNetworkEnsured) - default: - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterNodeNetworkEnsured, "InternalError", clusterv1.ConditionSeverityError, "more than a single node network exists for this cluster, operator investigation is required") - } - } - - return err - }) - - g.Go(func() error { - ip, err := r.findControlPlaneIP() - - conditionUpdates <- func() { - if errors.Is(err, errProviderIPNotFound) { - if r.infraCluster.Spec.ControlPlaneEndpoint.Host != "" { - conditions.MarkTrue(r.infraCluster, v1alpha1.ClusterControlPlaneEndpointEnsured) - } else { - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterControlPlaneEndpointEnsured, "NotCreated", clusterv1.ConditionSeverityError, "control plane ip was not yet created") - } - - } - if errors.Is(err, errProviderIPTooManyFound) { - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterControlPlaneEndpointEnsured, "InternalError", clusterv1.ConditionSeverityError, "more than a single control plane ip exists for this cluster, operator investigation is required") - } - if err != nil { - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterControlPlaneEndpointEnsured, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) - return - } - - if r.infraCluster.Spec.ControlPlaneEndpoint.Host == *ip.Ipaddress { - conditions.MarkTrue(r.infraCluster, v1alpha1.ClusterControlPlaneEndpointEnsured) - } else { - conditions.MarkFalse(r.infraCluster, v1alpha1.ClusterControlPlaneEndpointEnsured, "NotSet", clusterv1.ConditionSeverityWarning, "control plane ip was not yet patched into the cluster's spec") - } - } - - return err - }) - - ready := make(chan bool) - defer func() { - close(ready) - }() - - go func() { - for u := range conditionUpdates { - u() - } - ready <- true - }() - - groupErr := g.Wait() - - close(conditionUpdates) - <-ready - - if groupErr == nil && allConditionsTrue() { - r.infraCluster.Status.Ready = true - } - - err := r.client.Status().Update(r.ctx, r.infraCluster) - - return errors.Join(groupErr, err) -} diff --git a/internal/controller/metalstackcluster_controller_test.go b/internal/controller/metalstackcluster_controller_test.go index bca7145..9bb7211 100644 --- a/internal/controller/metalstackcluster_controller_test.go +++ b/internal/controller/metalstackcluster_controller_test.go @@ -18,7 +18,6 @@ package controller import ( "context" - "net/http" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -32,13 +31,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/metal-stack/cluster-api-provider-metal-stack/api/v1alpha1" - infrastructurev1alpha1 "github.com/metal-stack/cluster-api-provider-metal-stack/api/v1alpha1" metalip "github.com/metal-stack/metal-go/api/client/ip" metalnetwork "github.com/metal-stack/metal-go/api/client/network" "github.com/metal-stack/metal-go/api/models" metalgoclient "github.com/metal-stack/metal-go/test/client" - "github.com/metal-stack/metal-lib/httperrors" - "github.com/metal-stack/metal-lib/pkg/pointer" "github.com/metal-stack/metal-lib/pkg/testcommon" clusterv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -50,13 +46,13 @@ var _ = Describe("MetalStackCluster Controller", func() { var ( ctx context.Context cancel func() - resource *infrastructurev1alpha1.MetalStackCluster + resource *v1alpha1.MetalStackCluster controllerReconciler *MetalStackClusterReconciler ) BeforeEach(func() { ctx, cancel = context.WithCancel(suiteCtx) - resource = &infrastructurev1alpha1.MetalStackCluster{ + resource = &v1alpha1.MetalStackCluster{ ObjectMeta: metav1.ObjectMeta{ Namespace: "default", GenerateName: resourcePrefix, @@ -108,8 +104,8 @@ var _ = Describe("MetalStackCluster Controller", func() { Context("reconciliation with auto-acquiring dependent resources", func() { BeforeEach(func() { - resource.Spec = infrastructurev1alpha1.MetalStackClusterSpec{ - ControlPlaneEndpoint: infrastructurev1alpha1.APIEndpoint{}, + resource.Spec = v1alpha1.MetalStackClusterSpec{ + ControlPlaneEndpoint: v1alpha1.APIEndpoint{}, ProjectID: "test-project", NodeNetworkID: nil, ControlPlaneIP: nil, @@ -143,16 +139,6 @@ var _ = Describe("MetalStackCluster Controller", func() { controllerReconciler.MetalClient, _ = metalgoclient.NewMetalMockClient(testingT, &metalgoclient.MetalMockFns{ IP: func(m *mock.Mock) { - findIPResponse := &metalip.FindIPsOK{} - - m.On("FindIPs", testcommon.MatchIgnoreContext(testingT, metalip.NewFindIPsParams().WithBody(&models.V1IPFindRequest{ - Projectid: "test-project", - Tags: []string{ - "cluster.metal-stack.io/id=" + string(resource.UID), - "metal-stack.infrastructure.cluster.x-k8s.io/purpose=control-plane", - }, - })), nil).Return(findIPResponse, nil) - m.On("AllocateIP", testcommon.MatchIgnoreContext(testingT, metalip.NewAllocateIPParams().WithBody(&models.V1IPAllocateRequest{ Tags: []string{ "cluster.metal-stack.io/id=" + string(resource.UID), @@ -163,29 +149,13 @@ var _ = Describe("MetalStackCluster Controller", func() { Networkid: ptr.To("internet"), Projectid: ptr.To("test-project"), Type: ptr.To("ephemeral"), - })), nil).Run(func(args mock.Arguments) { - findIPResponse.Payload = []*models.V1IPResponse{ - { - Ipaddress: ptr.To("192.168.42.1"), - }, - } - }).Return(&metalip.AllocateIPCreated{ + })), nil).Return(&metalip.AllocateIPCreated{ Payload: &models.V1IPResponse{ Ipaddress: ptr.To("192.168.42.1"), }, }, nil) }, Network: func(m *mock.Mock) { - findNetworkResponse := &metalnetwork.FindNetworksOK{} - - m.On("FindNetworks", testcommon.MatchIgnoreContext(testingT, metalnetwork.NewFindNetworksParams().WithBody(&models.V1NetworkFindRequest{ - Labels: map[string]string{ - "cluster.metal-stack.io/id": string(resource.UID), - }, - Partitionid: "test-partition", - Projectid: "test-project", - })), nil).Return(findNetworkResponse, nil) - m.On("AllocateNetwork", testcommon.MatchIgnoreContext(testingT, metalnetwork.NewAllocateNetworkParams().WithBody(&models.V1NetworkAllocateRequest{ Name: resource.Name, Description: resource.Namespace + "/" + resource.Name, @@ -194,17 +164,7 @@ var _ = Describe("MetalStackCluster Controller", func() { }, Partitionid: "test-partition", Projectid: "test-project", - })), nil).Run(func(args mock.Arguments) { - findNetworkResponse.Payload = []*models.V1NetworkResponse{{ - Labels: map[string]string{ - "cluster.metal-stack.io/id": string(resource.UID), - }, - Partitionid: "test-partition", - Projectid: "test-project", - ID: ptr.To("node-network-id"), - Prefixes: []string{"192.168.42.0/24"}, - }} - }).Return(&metalnetwork.AllocateNetworkCreated{ + })), nil).Return(&metalnetwork.AllocateNetworkCreated{ Payload: &models.V1NetworkResponse{ Labels: map[string]string{ "cluster.metal-stack.io/id": string(resource.UID), @@ -237,7 +197,7 @@ var _ = Describe("MetalStackCluster Controller", func() { Expect(err).NotTo(HaveOccurred()) Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).To(Succeed()) - Expect(resource.Finalizers).To(ContainElement(infrastructurev1alpha1.ClusterFinalizer)) + Expect(resource.Finalizers).To(ContainElement(v1alpha1.ClusterFinalizer)) // second reconcile @@ -260,10 +220,14 @@ var _ = Describe("MetalStackCluster Controller", func() { "Status": Equal(corev1.ConditionTrue), }))) Expect(resource.Status.Conditions).To(ContainElement(MatchFields(IgnoreExtras, Fields{ - "Type": Equal(v1alpha1.ClusterControlPlaneEndpointEnsured), + "Type": Equal(v1alpha1.ClusterControlPlaneIPEnsured), "Status": Equal(corev1.ConditionTrue), }))) Expect(resource.Status.Ready).To(BeTrue()) + Expect(resource.Spec.ControlPlaneEndpoint).To(Equal(v1alpha1.APIEndpoint{ + Host: "192.168.42.1", + Port: 443, + })) }) }) Context("reconciliation when external resources are provided", func() { @@ -277,8 +241,8 @@ var _ = Describe("MetalStackCluster Controller", func() { nodeNetworkID = "test-network" controlPlaneIP = "192.168.42.1" - resource.Spec = infrastructurev1alpha1.MetalStackClusterSpec{ - ControlPlaneEndpoint: infrastructurev1alpha1.APIEndpoint{}, + resource.Spec = v1alpha1.MetalStackClusterSpec{ + ControlPlaneEndpoint: v1alpha1.APIEndpoint{}, ProjectID: "test-project", NodeNetworkID: &nodeNetworkID, ControlPlaneIP: &controlPlaneIP, @@ -311,37 +275,7 @@ var _ = Describe("MetalStackCluster Controller", func() { Namespace: "default", } - controllerReconciler.MetalClient, _ = metalgoclient.NewMetalMockClient(testingT, &metalgoclient.MetalMockFns{ - IP: func(m *mock.Mock) { - m.On("FindIP", testcommon.MatchIgnoreContext(testingT, metalip.NewFindIPParams().WithID(controlPlaneIP)), nil).Return(&metalip.FindIPOK{ - Payload: &models.V1IPResponse{ - Ipaddress: &controlPlaneIP, - Projectid: pointer.Pointer("test-project"), - Tags: []string{ - "cluster.metal-stack.io/id=" + string(resource.UID), - "metal-stack.infrastructure.cluster.x-k8s.io/purpose=control-plane", - }, - Type: ptr.To(models.V1IPBaseTypeStatic), - }, - }, nil) - }, - Network: func(m *mock.Mock) { - m.On("FindNetwork", testcommon.MatchIgnoreContext(testingT, metalnetwork.NewFindNetworkParams().WithID(nodeNetworkID)), nil). - Return(&metalnetwork.FindNetworkOK{ - Payload: &models.V1NetworkResponse{ - ID: &nodeNetworkID, - Name: resource.Name, - Description: resource.Namespace + "/" + resource.Name, - Labels: map[string]string{ - "cluster.metal-stack.io/id": string(resource.UID), - }, - Partitionid: "test-partition", - Projectid: "test-project", - Prefixes: []string{"192.168.42.0/24"}, - }, - }, nil) - }, - }) + controllerReconciler.MetalClient, _ = metalgoclient.NewMetalMockClient(testingT, &metalgoclient.MetalMockFns{}) Eventually(func() clusterv1beta1.Conditions { _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ @@ -358,7 +292,7 @@ var _ = Describe("MetalStackCluster Controller", func() { "Status": Equal(corev1.ConditionTrue), }), MatchFields(IgnoreExtras, Fields{ - "Type": Equal(v1alpha1.ClusterControlPlaneEndpointEnsured), + "Type": Equal(v1alpha1.ClusterControlPlaneIPEnsured), "Status": Equal(corev1.ConditionTrue), }), )) @@ -368,7 +302,7 @@ var _ = Describe("MetalStackCluster Controller", func() { }) When("referenced resources do not exist", func() { - It("should fail reconciling", func() { + It("should succeed reconciling even if linked resources do not exist", func() { Expect(k8sClient.Create(ctx, resource)).To(Succeed()) By("creating the cluster resource and setting the owner reference") @@ -392,50 +326,27 @@ var _ = Describe("MetalStackCluster Controller", func() { Namespace: "default", } - controllerReconciler.MetalClient, _ = metalgoclient.NewMetalMockClient(testingT, &metalgoclient.MetalMockFns{ - IP: func(m *mock.Mock) { - m.On("FindIP", testcommon.MatchIgnoreContext(testingT, metalip.NewFindIPParams().WithID(controlPlaneIP)), nil). - Return(nil, &metalip.FindIPDefault{ - Payload: &httperrors.HTTPErrorResponse{ - StatusCode: http.StatusNotFound, - Message: "ip not found", - }, - }) - }, - Network: func(m *mock.Mock) { - m.On("FindNetwork", testcommon.MatchIgnoreContext(testingT, metalnetwork.NewFindNetworkParams().WithID(nodeNetworkID)), nil). - Return(nil, &metalnetwork.FindNetworkDefault{ - Payload: &httperrors.HTTPErrorResponse{ - StatusCode: http.StatusNotFound, - Message: "network not found", - }, - }) - }, - }) - - Eventually(func() error { + Eventually(func() clusterv1beta1.Conditions { _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ NamespacedName: typeNamespacedName, }) - return err - }).Should(MatchError(ContainSubstring("not found"))) - - Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).ToNot(HaveOccurred()) - - Expect(resource.Status.Conditions).To(ContainElement(MatchFields(IgnoreExtras, Fields{ - "Type": Equal(v1alpha1.ClusterNodeNetworkEnsured), - "Status": Equal(corev1.ConditionFalse), - "Reason": Equal("InternalError"), - "Message": ContainSubstring("network not found"), - }))) - Expect(resource.Status.Conditions).To(ContainElement(MatchFields(IgnoreExtras, Fields{ - "Type": Equal(v1alpha1.ClusterControlPlaneEndpointEnsured), - "Status": Equal(corev1.ConditionFalse), - "Reason": Equal("InternalError"), - "Message": ContainSubstring("ip not found"), - }))) - - Expect(resource.Status.Ready).To(BeFalse()) + Expect(err).ToNot(HaveOccurred()) + + Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).ToNot(HaveOccurred()) + + return resource.Status.Conditions + }, "20s").Should(ContainElements( + MatchFields(IgnoreExtras, Fields{ + "Type": Equal(v1alpha1.ClusterNodeNetworkEnsured), + "Status": Equal(corev1.ConditionTrue), + }), + MatchFields(IgnoreExtras, Fields{ + "Type": Equal(v1alpha1.ClusterControlPlaneIPEnsured), + "Status": Equal(corev1.ConditionTrue), + }), + )) + + Expect(resource.Status.Ready).To(BeTrue()) }) }) }) diff --git a/internal/controller/metalstackmachine_controller.go b/internal/controller/metalstackmachine_controller.go index 5b81283..b6f9c36 100644 --- a/internal/controller/metalstackmachine_controller.go +++ b/internal/controller/metalstackmachine_controller.go @@ -21,8 +21,8 @@ import ( "errors" "fmt" "strings" + "time" - "golang.org/x/sync/errgroup" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -38,7 +38,6 @@ import ( "github.com/go-logr/logr" "github.com/metal-stack/cluster-api-provider-metal-stack/api/v1alpha1" - infrastructurev1alpha1 "github.com/metal-stack/cluster-api-provider-metal-stack/api/v1alpha1" metalgo "github.com/metal-stack/metal-go" ipmodels "github.com/metal-stack/metal-go/api/client/ip" metalmachine "github.com/metal-stack/metal-go/api/client/machine" @@ -46,6 +45,8 @@ import ( "github.com/metal-stack/metal-lib/pkg/tag" ) +const defaultProviderMachineRequeueTime = time.Second * 30 + var ( errProviderMachineNotFound = errors.New("provider machine not found") errProviderMachineTooManyFound = errors.New("multiple provider machines found") @@ -139,109 +140,103 @@ func (r *MetalStackMachineReconciler) Reconcile(ctx context.Context, req ctrl.Re infraMachine: infraMachine, } - defer func() { - statusErr := reconciler.status() - if statusErr != nil { - err = errors.Join(err, fmt.Errorf("unable to update status: %w", statusErr)) - } else if !reconciler.infraMachine.Status.Ready { - err = errors.New("machine is not yet ready, requeuing") - } - }() - - if !infraMachine.DeletionTimestamp.IsZero() { - if !controllerutil.ContainsFinalizer(infraMachine, v1alpha1.MachineFinalizer) { - return ctrl.Result{}, nil - } - - log.Info("reconciling resource deletion flow") - err := reconciler.delete() - if err != nil { - return ctrl.Result{}, err - } - - log.Info("deletion finished, removing finalizer") - controllerutil.RemoveFinalizer(infraMachine, v1alpha1.MachineFinalizer) - if err := r.Client.Update(ctx, infraMachine); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to remove finalizer: %w", err) - } - - return ctrl.Result{}, nil + helper, err := patch.NewHelper(infraMachine, r.Client) + if err != nil { + return ctrl.Result{}, err } + var result ctrl.Result - log.Info("reconciling machine") - - if !controllerutil.ContainsFinalizer(infraMachine, v1alpha1.MachineFinalizer) { + if !infraMachine.DeletionTimestamp.IsZero() { + err = reconciler.delete() + } else if !controllerutil.ContainsFinalizer(infraMachine, v1alpha1.MachineFinalizer) { log.Info("adding finalizer") - controllerutil.AddFinalizer(infraMachine, v1alpha1.MachineFinalizer) - if err := r.Client.Update(ctx, infraMachine); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to add finalizer: %w", err) - } - - return ctrl.Result{}, nil - } - - if infraCluster.Status.NodeNetworkID == nil { - // this should not happen because before setting this id the cluster status should not become ready, but we check it anyway - return ctrl.Result{}, errors.New("waiting until node network id was set to cluster status") - } - - if infraCluster.Spec.ControlPlaneEndpoint.Host == "" { - return ctrl.Result{}, errors.New("waiting until control plane ip was set to cluster spec") + } else { + result, err = reconciler.reconcile() } - if machine.Spec.Bootstrap.DataSecretName == nil { - return ctrl.Result{}, errors.New("waiting until bootstrap data secret was created") + updateErr := helper.Patch(ctx, infraMachine) + if updateErr != nil { + err = errors.Join(err, fmt.Errorf("failed to update infra machine: %w", updateErr)) } - err = reconciler.reconcile() - - return ctrl.Result{}, err // remember to return err here and not nil because the defer func can influence this + return result, err } // SetupWithManager sets up the controller with the Manager. func (r *MetalStackMachineReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - For(&infrastructurev1alpha1.MetalStackMachine{}). + For(&v1alpha1.MetalStackMachine{}). Named("metalstackmachine"). Complete(r) } -func (r *machineReconciler) reconcile() error { +func (r *machineReconciler) reconcile() (ctrl.Result, error) { + if r.infraCluster.Spec.NodeNetworkID == nil { + // this should not happen because before setting this id the cluster status should not become ready, but we check it anyway + return ctrl.Result{}, errors.New("waiting until node network id was set to infrastructure cluster status") + } + + if r.infraCluster.Spec.ControlPlaneEndpoint.Host == "" { + return ctrl.Result{}, errors.New("waiting until control plane ip was set to infrastructure cluster spec") + } + + if r.clusterMachine.Spec.Bootstrap.DataSecretName == nil { + return ctrl.Result{}, errors.New("waiting until bootstrap data secret was created") + } + + r.log.Info("reconciling machine") + m, err := r.findProviderMachine() if err != nil && !errors.Is(err, errProviderMachineNotFound) { - return err + conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineCreated, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) + return ctrl.Result{}, err } if errors.Is(err, errProviderMachineNotFound) { m, err = r.create() if err != nil { - return fmt.Errorf("unable to create machine at provider: %w", err) + conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineCreated, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) + return ctrl.Result{}, fmt.Errorf("unable to create machine at provider: %w", err) } } + conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineCreated) if m.ID == nil { - return errors.New("machine allocated but got no provider ID") + return ctrl.Result{}, errors.New("machine allocated but got no provider ID") } + r.infraMachine.Spec.ProviderID = "metal://" + *m.ID - r.log.Info("setting provider id into machine resource") + result := ctrl.Result{} - helper, err := patch.NewHelper(r.infraMachine, r.client) + isReady, err := r.getMachineStatus(m) if err != nil { - return err + conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineHealthy, "NotHealthy", clusterv1.ConditionSeverityWarning, "%s", err) + result.RequeueAfter = defaultProviderMachineRequeueTime + } else { + conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineHealthy) } - r.infraMachine.Spec.ProviderID = "metal://" + *m.ID - - err = helper.Patch(r.ctx, r.infraMachine) // TODO:check whether patch is not executed when no changes occur - if err != nil { - return fmt.Errorf("failed to update infra machine provider ID %q: %w", r.infraMachine.Spec.ProviderID, err) + if isReady { + conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineReady) + r.infraMachine.Status.Ready = isReady + } else { + conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineReady, "NotReady", clusterv1.ConditionSeverityWarning, "machine is not in phoned home state") + result.RequeueAfter = defaultProviderMachineRequeueTime } - return nil + r.infraMachine.Status.Addresses = r.getMachineAddresses(m) + + return result, nil } func (r *machineReconciler) delete() error { + if !controllerutil.ContainsFinalizer(r.infraMachine, v1alpha1.MachineFinalizer) { + return nil + } + + r.log.Info("reconciling resource deletion flow") + m, err := r.findProviderMachine() if errors.Is(err, errProviderMachineNotFound) { // metal-stack machine already freed @@ -258,6 +253,9 @@ func (r *machineReconciler) delete() error { r.log.Info("freed provider machine") + r.log.Info("deletion finished, removing finalizer") + controllerutil.RemoveFinalizer(r.infraMachine, v1alpha1.MachineFinalizer) + return nil } @@ -278,7 +276,7 @@ func (r *machineReconciler) create() (*models.V1MachineResponse, error) { nws = []*models.V1MachineAllocationNetwork{ { Autoacquire: ptr.To(true), - Networkid: r.infraCluster.Status.NodeNetworkID, + Networkid: r.infraCluster.Spec.NodeNetworkID, }, } ) @@ -319,123 +317,59 @@ func (r *machineReconciler) create() (*models.V1MachineResponse, error) { return resp.Payload, nil } -func (r *machineReconciler) status() error { - var ( - g, _ = errgroup.WithContext(r.ctx) - conditionUpdates = make(chan func()) - - // TODO: probably there is a helper for this available somewhere? - allConditionsTrue = func() bool { - for _, c := range r.infraMachine.Status.Conditions { - if c.Status != corev1.ConditionTrue { - return false - } - } +func (r *machineReconciler) getMachineStatus(mr *models.V1MachineResponse) (bool, error) { + var errs []error - return true - } - ) + switch l := ptr.Deref(mr.Liveliness, ""); l { + case "Alive": + default: + errs = append(errs, fmt.Errorf("machine is not alive but %q", l)) + } - defer func() { - close(conditionUpdates) - }() - - g.Go(func() error { - m, err := r.findProviderMachine() - - conditionUpdates <- func() { - switch { - case err != nil && !errors.Is(err, errProviderMachineNotFound): - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineCreated, "InternalError", clusterv1.ConditionSeverityError, "%s", err.Error()) - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineHealthy, "NotHealthy", clusterv1.ConditionSeverityWarning, "machine not created") - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineReady, "NotReady", clusterv1.ConditionSeverityWarning, "machine not created") - case err != nil && errors.Is(err, errProviderMachineNotFound): - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineCreated, "NotCreated", clusterv1.ConditionSeverityError, "%s", err.Error()) - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineHealthy, "NotHealthy", clusterv1.ConditionSeverityWarning, "machine not created") - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineReady, "NotReady", clusterv1.ConditionSeverityWarning, "machine not created") - default: - if r.infraMachine.Spec.ProviderID == "metal://"+*m.ID { - conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineCreated) - } else { - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineCreated, "NotSet", clusterv1.ConditionSeverityWarning, "provider id was not yet patched into the machine's spec") - } - - var errs []error - - switch l := ptr.Deref(m.Liveliness, ""); l { - case "Alive": - default: - errs = append(errs, fmt.Errorf("machine is not alive but %q", l)) - } - - if m.Events != nil { - if m.Events.CrashLoop != nil && *m.Events.CrashLoop { - errs = append(errs, errors.New("machine is in a crash loop")) - } - if m.Events.FailedMachineReclaim != nil && *m.Events.FailedMachineReclaim { - errs = append(errs, errors.New("machine reclaim is failing")) - } - } - - if m.Events != nil && len(m.Events.Log) > 0 && ptr.Deref(m.Events.Log[0].Event, "") == "Phoned Home" { - conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineReady) - } else { - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineReady, "NotReady", clusterv1.ConditionSeverityWarning, "machine is not in phoned home state") - } - - if len(errs) == 0 { - conditions.MarkTrue(r.infraMachine, v1alpha1.ProviderMachineHealthy) - } else { - conditions.MarkFalse(r.infraMachine, v1alpha1.ProviderMachineHealthy, "NotHealthy", clusterv1.ConditionSeverityWarning, "%s", errors.Join(errs...).Error()) - } - - r.infraMachine.Status.Addresses = nil - - if m.Allocation.Hostname != nil { - r.infraMachine.Status.Addresses = append(r.infraMachine.Status.Addresses, clusterv1.MachineAddress{ - Type: clusterv1.MachineHostName, - Address: *m.Allocation.Hostname, - }) - } - - for _, nw := range m.Allocation.Networks { - switch ptr.Deref(nw.Networktype, "") { - case "privateprimaryunshared": - for _, ip := range nw.Ips { - r.infraMachine.Status.Addresses = append(r.infraMachine.Status.Addresses, clusterv1.MachineAddress{ - Type: clusterv1.MachineInternalIP, - Address: ip, - }) - } - case "external": - for _, ip := range nw.Ips { - r.infraMachine.Status.Addresses = append(r.infraMachine.Status.Addresses, clusterv1.MachineAddress{ - Type: clusterv1.MachineExternalIP, - Address: ip, - }) - } - } - } - } + if mr.Events != nil { + if mr.Events.CrashLoop != nil && *mr.Events.CrashLoop { + errs = append(errs, errors.New("machine is in a crash loop")) } + if mr.Events.FailedMachineReclaim != nil && *mr.Events.FailedMachineReclaim { + errs = append(errs, errors.New("machine reclaim is failing")) + } + } - return err - }) + isReady := mr.Events != nil && len(mr.Events.Log) > 0 && ptr.Deref(mr.Events.Log[0].Event, "") == "Phoned Home" - go func() { - for u := range conditionUpdates { - u() - } - }() + return isReady, errors.Join(errs...) +} - groupErr := g.Wait() - if groupErr == nil && allConditionsTrue() { - r.infraMachine.Status.Ready = true +func (r *machineReconciler) getMachineAddresses(m *models.V1MachineResponse) clusterv1.MachineAddresses { + var maddrs clusterv1.MachineAddresses + + if m.Allocation.Hostname != nil { + maddrs = append(maddrs, clusterv1.MachineAddress{ + Type: clusterv1.MachineHostName, + Address: *m.Allocation.Hostname, + }) } - err := r.client.Status().Update(r.ctx, r.infraMachine) + for _, nw := range m.Allocation.Networks { + switch ptr.Deref(nw.Networktype, "") { + case "privateprimaryunshared": + for _, ip := range nw.Ips { + maddrs = append(maddrs, clusterv1.MachineAddress{ + Type: clusterv1.MachineInternalIP, + Address: ip, + }) + } + case "external": + for _, ip := range nw.Ips { + maddrs = append(maddrs, clusterv1.MachineAddress{ + Type: clusterv1.MachineExternalIP, + Address: ip, + }) + } + } + } - return errors.Join(groupErr, err) + return maddrs } func (r *machineReconciler) findProviderMachine() (*models.V1MachineResponse, error) {