diff --git a/Makefile b/Makefile index 7beef49e8f46..aa5e8686ca91 100644 --- a/Makefile +++ b/Makefile @@ -603,6 +603,7 @@ generate-e2e-templates-v1.11: $(KUSTOMIZE) generate-e2e-templates-main: $(KUSTOMIZE) $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-md-remediation.yaml + $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-mp-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-mp-remediation.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-remediation.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step1 --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml echo "---" >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml diff --git a/api/core/v1beta1/conversion.go b/api/core/v1beta1/conversion.go index 9f67740c47ac..3c37703283e5 100644 --- a/api/core/v1beta1/conversion.go +++ b/api/core/v1beta1/conversion.go @@ -1854,6 +1854,10 @@ func Convert_v1beta1_MachinePoolSpec_To_v1beta2_MachinePoolSpec(in *MachinePoolS return autoConvert_v1beta1_MachinePoolSpec_To_v1beta2_MachinePoolSpec(in, out, s) } +func Convert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in *clusterv1.MachinePoolSpec, out *MachinePoolSpec, s apimachineryconversion.Scope) error { + return autoConvert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in, out, s) +} + func Convert_v1beta1_ClusterClassStatusVariableDefinition_To_v1beta2_ClusterClassStatusVariableDefinition(in *ClusterClassStatusVariableDefinition, out *clusterv1.ClusterClassStatusVariableDefinition, s apimachineryconversion.Scope) error { if err := autoConvert_v1beta1_ClusterClassStatusVariableDefinition_To_v1beta2_ClusterClassStatusVariableDefinition(in, out, s); err != nil { return err diff --git a/api/core/v1beta1/conversion_test.go b/api/core/v1beta1/conversion_test.go index 10d557048460..098b8e45e5f5 100644 --- a/api/core/v1beta1/conversion_test.go +++ b/api/core/v1beta1/conversion_test.go @@ -781,6 +781,7 @@ func spokeObjectReference(in *corev1.ObjectReference, c randfill.Continue) { func MachinePoolFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { return []interface{}{ + hubMachinePoolSpec, hubMachinePoolStatus, hubMachineSpec, spokeMachinePool, @@ -789,6 +790,13 @@ func MachinePoolFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { } } +func hubMachinePoolSpec(in *clusterv1.MachinePoolSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // Readded in v1beta2 with different type than in v1alpha's, no conversion possible + in.Remediation = clusterv1.MachinePoolRemediationSpec{} +} + func hubMachinePoolStatus(in *clusterv1.MachinePoolStatus, c randfill.Continue) { c.FillNoCustom(in) // Always create struct with at least one mandatory fields. diff --git a/api/core/v1beta1/machinepool_types.go b/api/core/v1beta1/machinepool_types.go index 5ecd8fa227c9..5f8515743f75 100644 --- a/api/core/v1beta1/machinepool_types.go +++ b/api/core/v1beta1/machinepool_types.go @@ -104,6 +104,17 @@ type MachinePoolSpec struct { FailureDomains []string `json:"failureDomains,omitempty"` } +// MachinePoolStrategy describes how to replace existing machines +// with new ones. +type MachinePoolStrategy struct { + // remediation controls the strategy of remediating unhealthy machines + // as marked by a MachineHealthCheck. This only applies to infrastructure + // providers supporting "MachinePool Machines". For other providers, + // no remediation is done. + // +optional + Remediation *RemediationStrategy `json:"remediation,omitempty"` +} + // MachinePoolStatus defines the observed state of MachinePool. type MachinePoolStatus struct { // nodeRefs will point to the corresponding Nodes if it they exist. diff --git a/api/core/v1beta1/zz_generated.conversion.go b/api/core/v1beta1/zz_generated.conversion.go index 00cf25f842ad..2cad1a403e1a 100644 --- a/api/core/v1beta1/zz_generated.conversion.go +++ b/api/core/v1beta1/zz_generated.conversion.go @@ -339,11 +339,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta2.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(a.(*v1beta2.MachinePoolSpec), b.(*MachinePoolSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*MachinePoolVariables)(nil), (*v1beta2.MachinePoolVariables)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_MachinePoolVariables_To_v1beta2_MachinePoolVariables(a.(*MachinePoolVariables), b.(*v1beta2.MachinePoolVariables), scope) }); err != nil { @@ -874,6 +869,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta2.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(a.(*v1beta2.MachinePoolSpec), b.(*MachinePoolSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta2.MachinePoolStatus)(nil), (*MachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta2_MachinePoolStatus_To_v1beta1_MachinePoolStatus(a.(*v1beta2.MachinePoolStatus), b.(*MachinePoolStatus), scope) }); err != nil { @@ -2803,14 +2803,10 @@ func autoConvert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in *v1beta2. } out.ProviderIDList = *(*[]string)(unsafe.Pointer(&in.ProviderIDList)) out.FailureDomains = *(*[]string)(unsafe.Pointer(&in.FailureDomains)) + // WARNING: in.Remediation requires manual conversion: does not exist in peer-type return nil } -// Convert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec is an autogenerated conversion function. -func Convert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in *v1beta2.MachinePoolSpec, out *MachinePoolSpec, s conversion.Scope) error { - return autoConvert_v1beta2_MachinePoolSpec_To_v1beta1_MachinePoolSpec(in, out, s) -} - func autoConvert_v1beta1_MachinePoolStatus_To_v1beta2_MachinePoolStatus(in *MachinePoolStatus, out *v1beta2.MachinePoolStatus, s conversion.Scope) error { out.NodeRefs = *(*[]corev1.ObjectReference)(unsafe.Pointer(&in.NodeRefs)) if err := v1.Convert_int32_To_Pointer_int32(&in.Replicas, &out.Replicas, s); err != nil { diff --git a/api/core/v1beta1/zz_generated.deepcopy.go b/api/core/v1beta1/zz_generated.deepcopy.go index a1b090669cb4..81aaaa8526b7 100644 --- a/api/core/v1beta1/zz_generated.deepcopy.go +++ b/api/core/v1beta1/zz_generated.deepcopy.go @@ -2213,6 +2213,26 @@ func (in *MachinePoolStatus) DeepCopy() *MachinePoolStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachinePoolStrategy) DeepCopyInto(out *MachinePoolStrategy) { + *out = *in + if in.Remediation != nil { + in, out := &in.Remediation, &out.Remediation + *out = new(RemediationStrategy) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachinePoolStrategy. +func (in *MachinePoolStrategy) DeepCopy() *MachinePoolStrategy { + if in == nil { + return nil + } + out := new(MachinePoolStrategy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachinePoolTopology) DeepCopyInto(out *MachinePoolTopology) { *out = *in diff --git a/api/core/v1beta1/zz_generated.openapi.go b/api/core/v1beta1/zz_generated.openapi.go index 723cf5a90c93..2ab3a737e5a3 100644 --- a/api/core/v1beta1/zz_generated.openapi.go +++ b/api/core/v1beta1/zz_generated.openapi.go @@ -97,6 +97,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolList": schema_cluster_api_api_core_v1beta1_MachinePoolList(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolSpec": schema_cluster_api_api_core_v1beta1_MachinePoolSpec(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolStatus": schema_cluster_api_api_core_v1beta1_MachinePoolStatus(ref), + "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolStrategy": schema_cluster_api_api_core_v1beta1_MachinePoolStrategy(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolTopology": schema_cluster_api_api_core_v1beta1_MachinePoolTopology(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolV1Beta2Status": schema_cluster_api_api_core_v1beta1_MachinePoolV1Beta2Status(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.MachinePoolVariables": schema_cluster_api_api_core_v1beta1_MachinePoolVariables(ref), @@ -3896,6 +3897,27 @@ func schema_cluster_api_api_core_v1beta1_MachinePoolStatus(ref common.ReferenceC } } +func schema_cluster_api_api_core_v1beta1_MachinePoolStrategy(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "MachinePoolStrategy describes how to replace existing machines with new ones.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "remediation": { + SchemaProps: spec.SchemaProps{ + Description: "remediation controls the strategy of remediating unhealthy machines as marked by a MachineHealthCheck. This only applies to infrastructure providers supporting \"MachinePool Machines\". For other providers, no remediation is done.", + Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta1.RemediationStrategy"), + }, + }, + }, + }, + }, + Dependencies: []string{ + "sigs.k8s.io/cluster-api/api/core/v1beta1.RemediationStrategy"}, + } +} + func schema_cluster_api_api_core_v1beta1_MachinePoolTopology(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/api/core/v1beta2/machinepool_types.go b/api/core/v1beta2/machinepool_types.go index a6d16aff1467..cfb86423ab1b 100644 --- a/api/core/v1beta2/machinepool_types.go +++ b/api/core/v1beta2/machinepool_types.go @@ -19,6 +19,7 @@ package v1beta2 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" capierrors "sigs.k8s.io/cluster-api/errors" ) @@ -97,6 +98,29 @@ type MachinePoolSpec struct { // +kubebuilder:validation:items:MinLength=1 // +kubebuilder:validation:items:MaxLength=256 FailureDomains []string `json:"failureDomains,omitempty"` + + // remediation controls how unhealthy Machines are remediated (through a MachineHealthCheck). + // This only applies to infrastructure providers supporting and enabling the + // "MachinePool Machines" feature. For other setups, no remediation is done. + // +optional + Remediation MachinePoolRemediationSpec `json:"remediation,omitempty,omitzero"` +} + +// MachinePoolRemediationSpec controls how unhealthy Machines are remediated (through a MachineHealthCheck). +// This only applies to infrastructure providers supporting and enabling the +// "MachinePool Machines" feature. For other setups, no remediation is done. +// +kubebuilder:validation:MinProperties=1 +type MachinePoolRemediationSpec struct { + // maxInFlight determines how many in flight remediations should happen at the same time. + // + // MaxInFlight can be set to a fixed number or a percentage. + // Example: when this is set to 20%, the MachinePool controller deletes at most 20% of + // the desired replicas. + // + // If not set, remediation is limited to all machines under the active MachinePool's management. + // + // +optional + MaxInFlight *intstr.IntOrString `json:"maxInFlight,omitempty"` } // MachinePoolStatus defines the observed state of MachinePool. diff --git a/api/core/v1beta2/zz_generated.deepcopy.go b/api/core/v1beta2/zz_generated.deepcopy.go index 0c5ee6208d3c..b79968cdd986 100644 --- a/api/core/v1beta2/zz_generated.deepcopy.go +++ b/api/core/v1beta2/zz_generated.deepcopy.go @@ -2977,6 +2977,26 @@ func (in *MachinePoolList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachinePoolRemediationSpec) DeepCopyInto(out *MachinePoolRemediationSpec) { + *out = *in + if in.MaxInFlight != nil { + in, out := &in.MaxInFlight, &out.MaxInFlight + *out = new(intstr.IntOrString) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachinePoolRemediationSpec. +func (in *MachinePoolRemediationSpec) DeepCopy() *MachinePoolRemediationSpec { + if in == nil { + return nil + } + out := new(MachinePoolRemediationSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MachinePoolSpec) DeepCopyInto(out *MachinePoolSpec) { *out = *in @@ -2996,6 +3016,7 @@ func (in *MachinePoolSpec) DeepCopyInto(out *MachinePoolSpec) { *out = make([]string, len(*in)) copy(*out, *in) } + in.Remediation.DeepCopyInto(&out.Remediation) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachinePoolSpec. diff --git a/api/core/v1beta2/zz_generated.openapi.go b/api/core/v1beta2/zz_generated.openapi.go index 42744b6836e6..0f62aa8ee2c7 100644 --- a/api/core/v1beta2/zz_generated.openapi.go +++ b/api/core/v1beta2/zz_generated.openapi.go @@ -147,6 +147,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolDeprecatedStatus": schema_cluster_api_api_core_v1beta2_MachinePoolDeprecatedStatus(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolInitializationStatus": schema_cluster_api_api_core_v1beta2_MachinePoolInitializationStatus(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolList": schema_cluster_api_api_core_v1beta2_MachinePoolList(ref), + "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolRemediationSpec": schema_cluster_api_api_core_v1beta2_MachinePoolRemediationSpec(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolSpec": schema_cluster_api_api_core_v1beta2_MachinePoolSpec(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolStatus": schema_cluster_api_api_core_v1beta2_MachinePoolStatus(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolTopology": schema_cluster_api_api_core_v1beta2_MachinePoolTopology(ref), @@ -5193,6 +5194,27 @@ func schema_cluster_api_api_core_v1beta2_MachinePoolList(ref common.ReferenceCal } } +func schema_cluster_api_api_core_v1beta2_MachinePoolRemediationSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "MachinePoolRemediationSpec controls how unhealthy Machines are remediated (through a MachineHealthCheck). This only applies to infrastructure providers supporting and enabling the \"MachinePool Machines\" feature. For other setups, no remediation is done.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "maxInFlight": { + SchemaProps: spec.SchemaProps{ + Description: "maxInFlight determines how many in flight remediations should happen at the same time.\n\nMaxInFlight can be set to a fixed number or a percentage. Example: when this is set to 20%, the MachinePool controller deletes at most 20% of the desired replicas.\n\nIf not set, remediation is limited to all machines under the active MachinePool's management.", + Ref: ref("k8s.io/apimachinery/pkg/util/intstr.IntOrString"), + }, + }, + }, + }, + }, + Dependencies: []string{ + "k8s.io/apimachinery/pkg/util/intstr.IntOrString"}, + } +} + func schema_cluster_api_api_core_v1beta2_MachinePoolSpec(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -5261,12 +5283,19 @@ func schema_cluster_api_api_core_v1beta2_MachinePoolSpec(ref common.ReferenceCal }, }, }, + "remediation": { + SchemaProps: spec.SchemaProps{ + Description: "remediation controls how unhealthy Machines are remediated (through a MachineHealthCheck). This only applies to infrastructure providers supporting and enabling the \"MachinePool Machines\" feature. For other setups, no remediation is done.", + Default: map[string]interface{}{}, + Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolRemediationSpec"), + }, + }, }, Required: []string{"clusterName", "template"}, }, }, Dependencies: []string{ - "sigs.k8s.io/cluster-api/api/core/v1beta2.MachineTemplateSpec"}, + "sigs.k8s.io/cluster-api/api/core/v1beta2.MachinePoolRemediationSpec", "sigs.k8s.io/cluster-api/api/core/v1beta2.MachineTemplateSpec"}, } } diff --git a/config/crd/bases/cluster.x-k8s.io_machinepools.yaml b/config/crd/bases/cluster.x-k8s.io_machinepools.yaml index 9f287129fd04..b847ec939453 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinepools.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinepools.yaml @@ -1706,6 +1706,27 @@ spec: maxItems: 10000 type: array x-kubernetes-list-type: atomic + remediation: + description: |- + remediation controls how unhealthy Machines are remediated (through a MachineHealthCheck). + This only applies to infrastructure providers supporting and enabling the + "MachinePool Machines" feature. For other setups, no remediation is done. + minProperties: 1 + properties: + maxInFlight: + anyOf: + - type: integer + - type: string + description: |- + maxInFlight determines how many in flight remediations should happen at the same time. + + MaxInFlight can be set to a fixed number or a percentage. + Example: when this is set to 20%, the MachinePool controller deletes at most 20% of + the desired replicas. + + If not set, remediation is limited to all machines under the active MachinePool's management. + x-kubernetes-int-or-string: true + type: object replicas: description: |- replicas is the number of desired machines. Defaults to 1. diff --git a/go.mod b/go.mod index 32cbce0bcfde..e9d1d6ac8ef5 100644 --- a/go.mod +++ b/go.mod @@ -34,7 +34,7 @@ require ( go.etcd.io/etcd/client/pkg/v3 v3.6.5 go.etcd.io/etcd/client/v3 v3.6.5 go.uber.org/zap v1.27.0 - golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 golang.org/x/oauth2 v0.32.0 golang.org/x/text v0.30.0 gomodules.xyz/jsonpatch/v2 v2.5.0 diff --git a/internal/api/core/v1alpha3/conversion.go b/internal/api/core/v1alpha3/conversion.go index 8cda6f6a25f1..4d8cf01b8c0a 100644 --- a/internal/api/core/v1alpha3/conversion.go +++ b/internal/api/core/v1alpha3/conversion.go @@ -655,6 +655,16 @@ func (dst *MachinePool) ConvertFrom(srcRaw conversion.Hub) error { return utilconversion.MarshalData(src, dst) } +func Convert_v1alpha3_MachineDeploymentStrategy_To_v1beta2_MachinePoolStrategy(in *MachineDeploymentStrategy, out *clusterv1.MachinePoolRemediationSpec, _ apimachineryconversion.Scope) error { + // Subfields differ in v1beta2, no conversion possible + out.MaxInFlight = nil + return nil +} + +func Convert_v1beta2_MachinePoolStrategy_To_v1alpha3_MachineDeploymentStrategy(in *clusterv1.MachinePoolRemediationSpec, out *MachineDeploymentStrategy, _ apimachineryconversion.Scope) error { + return nil +} + func Convert_v1beta2_MachineSetStatus_To_v1alpha3_MachineSetStatus(in *clusterv1.MachineSetStatus, out *MachineSetStatus, _ apimachineryconversion.Scope) error { // Status.Conditions was introduced in v1alpha4, thus requiring a custom conversion function; the values is going to be preserved in an annotation thus allowing roundtrip without loosing informations // V1Beta2 was added in v1beta1. @@ -973,7 +983,10 @@ func Convert_v1alpha3_MachineSetSpec_To_v1beta2_MachineSetSpec(in *MachineSetSpe return nil } -// Convert_v1alpha3_MachinePoolSpec_To_v1beta2_MachinePoolSpec is an autogenerated conversion function. +func Convert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(in *clusterv1.MachinePoolSpec, out *MachinePoolSpec, s apimachineryconversion.Scope) error { + return autoConvert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(in, out, s) +} + func Convert_v1alpha3_MachinePoolSpec_To_v1beta2_MachinePoolSpec(in *MachinePoolSpec, out *clusterv1.MachinePoolSpec, s apimachineryconversion.Scope) error { return autoConvert_v1alpha3_MachinePoolSpec_To_v1beta2_MachinePoolSpec(in, out, s) } diff --git a/internal/api/core/v1alpha3/conversion_test.go b/internal/api/core/v1alpha3/conversion_test.go index 49ee38de30bb..80b837bbdaa1 100644 --- a/internal/api/core/v1alpha3/conversion_test.go +++ b/internal/api/core/v1alpha3/conversion_test.go @@ -119,6 +119,13 @@ func hubMachineSpec(in *clusterv1.MachineSpec, c randfill.Continue) { } } +func hubMachinePoolSpec(in *clusterv1.MachinePoolSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // Subfields differ in v1beta2, no conversion possible + in.Remediation = clusterv1.MachinePoolRemediationSpec{} +} + func hubMachineStatus(in *clusterv1.MachineStatus, c randfill.Continue) { c.FillNoCustom(in) // Drop empty structs with only omit empty fields. @@ -470,6 +477,7 @@ func MachinePoolFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { spokeBootstrap, spokeObjectMeta, spokeMachinePoolSpec, + hubMachinePoolSpec, hubMachinePoolStatus, spokeMachineSpec, } @@ -503,8 +511,7 @@ func spokeMachinePool(in *MachinePool, c randfill.Continue) { func spokeMachinePoolSpec(in *MachinePoolSpec, c randfill.Continue) { c.FillNoCustom(in) - // These fields have been removed in v1beta1 - // data is going to be lost, so we're forcing zero values here. + // Subfields differ in v1beta2, no conversion possible in.Strategy = nil } diff --git a/internal/api/core/v1alpha3/zz_generated.conversion.go b/internal/api/core/v1alpha3/zz_generated.conversion.go index 0c9c762839fe..f192e5e72019 100644 --- a/internal/api/core/v1alpha3/zz_generated.conversion.go +++ b/internal/api/core/v1alpha3/zz_generated.conversion.go @@ -164,11 +164,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta2.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(a.(*v1beta2.MachinePoolSpec), b.(*MachinePoolSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*MachineSet)(nil), (*v1beta2.MachineSet)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha3_MachineSet_To_v1beta2_MachineSet(a.(*MachineSet), b.(*v1beta2.MachineSet), scope) }); err != nil { @@ -349,6 +344,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta2.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(a.(*v1beta2.MachinePoolSpec), b.(*MachinePoolSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta2.MachinePoolStatus)(nil), (*MachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta2_MachinePoolStatus_To_v1alpha3_MachinePoolStatus(a.(*v1beta2.MachinePoolStatus), b.(*MachinePoolStatus), scope) }); err != nil { @@ -1090,14 +1090,10 @@ func autoConvert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(in *v1beta2 } out.ProviderIDList = *(*[]string)(unsafe.Pointer(&in.ProviderIDList)) out.FailureDomains = *(*[]string)(unsafe.Pointer(&in.FailureDomains)) + // WARNING: in.Remediation requires manual conversion: does not exist in peer-type return nil } -// Convert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec is an autogenerated conversion function. -func Convert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(in *v1beta2.MachinePoolSpec, out *MachinePoolSpec, s conversion.Scope) error { - return autoConvert_v1beta2_MachinePoolSpec_To_v1alpha3_MachinePoolSpec(in, out, s) -} - func autoConvert_v1alpha3_MachinePoolStatus_To_v1beta2_MachinePoolStatus(in *MachinePoolStatus, out *v1beta2.MachinePoolStatus, s conversion.Scope) error { out.NodeRefs = *(*[]corev1.ObjectReference)(unsafe.Pointer(&in.NodeRefs)) if err := v1.Convert_int32_To_Pointer_int32(&in.Replicas, &out.Replicas, s); err != nil { diff --git a/internal/api/core/v1alpha4/conversion.go b/internal/api/core/v1alpha4/conversion.go index 57ac08a4ea98..f30c294995c7 100644 --- a/internal/api/core/v1alpha4/conversion.go +++ b/internal/api/core/v1alpha4/conversion.go @@ -790,6 +790,10 @@ func Convert_v1alpha4_LocalObjectTemplate_To_v1beta2_InfrastructureClass(in *Loc return nil } +func Convert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in *clusterv1.MachinePoolSpec, out *MachinePoolSpec, s apimachineryconversion.Scope) error { + return autoConvert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in, out, s) +} + func Convert_v1beta2_MachineSpec_To_v1alpha4_MachineSpec(in *clusterv1.MachineSpec, out *MachineSpec, s apimachineryconversion.Scope) error { // spec.nodeDeletionTimeout was added in v1beta1. // ReadinessGates was added in v1beta1. diff --git a/internal/api/core/v1alpha4/conversion_test.go b/internal/api/core/v1alpha4/conversion_test.go index 126009a1d891..32106e0122cf 100644 --- a/internal/api/core/v1alpha4/conversion_test.go +++ b/internal/api/core/v1alpha4/conversion_test.go @@ -538,11 +538,19 @@ func MachinePoolFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { return []interface{}{ hubMachineSpec, spokeMachinePool, + hubMachinePoolSpec, hubMachinePoolStatus, spokeMachineSpec, } } +func hubMachinePoolSpec(in *clusterv1.MachinePoolSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // Subfields differ in v1beta2, no conversion possible + in.Remediation = clusterv1.MachinePoolRemediationSpec{} +} + func hubMachinePoolStatus(in *clusterv1.MachinePoolStatus, c randfill.Continue) { c.FillNoCustom(in) // Always create struct with at least one mandatory fields. diff --git a/internal/api/core/v1alpha4/zz_generated.conversion.go b/internal/api/core/v1alpha4/zz_generated.conversion.go index aca82dc6be1d..177763841b73 100644 --- a/internal/api/core/v1alpha4/zz_generated.conversion.go +++ b/internal/api/core/v1alpha4/zz_generated.conversion.go @@ -204,11 +204,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta2.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(a.(*v1beta2.MachinePoolSpec), b.(*MachinePoolSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*MachineSet)(nil), (*v1beta2.MachineSet)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_MachineSet_To_v1beta2_MachineSet(a.(*MachineSet), b.(*v1beta2.MachineSet), scope) }); err != nil { @@ -479,6 +474,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta2.MachinePoolSpec)(nil), (*MachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(a.(*v1beta2.MachinePoolSpec), b.(*MachinePoolSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta2.MachinePoolStatus)(nil), (*MachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta2_MachinePoolStatus_To_v1alpha4_MachinePoolStatus(a.(*v1beta2.MachinePoolStatus), b.(*MachinePoolStatus), scope) }); err != nil { @@ -1460,14 +1460,10 @@ func autoConvert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in *v1beta2 } out.ProviderIDList = *(*[]string)(unsafe.Pointer(&in.ProviderIDList)) out.FailureDomains = *(*[]string)(unsafe.Pointer(&in.FailureDomains)) + // WARNING: in.Remediation requires manual conversion: does not exist in peer-type return nil } -// Convert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec is an autogenerated conversion function. -func Convert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in *v1beta2.MachinePoolSpec, out *MachinePoolSpec, s conversion.Scope) error { - return autoConvert_v1beta2_MachinePoolSpec_To_v1alpha4_MachinePoolSpec(in, out, s) -} - func autoConvert_v1alpha4_MachinePoolStatus_To_v1beta2_MachinePoolStatus(in *MachinePoolStatus, out *v1beta2.MachinePoolStatus, s conversion.Scope) error { out.NodeRefs = *(*[]corev1.ObjectReference)(unsafe.Pointer(&in.NodeRefs)) if err := v1.Convert_int32_To_Pointer_int32(&in.Replicas, &out.Replicas, s); err != nil { diff --git a/internal/controllers/machinepool/machinepool_controller_phases.go b/internal/controllers/machinepool/machinepool_controller_phases.go index 07596b643b41..146307dd09da 100644 --- a/internal/controllers/machinepool/machinepool_controller_phases.go +++ b/internal/controllers/machinepool/machinepool_controller_phases.go @@ -20,14 +20,17 @@ import ( "context" "fmt" "reflect" + "sort" "time" "github.com/pkg/errors" + "golang.org/x/exp/slices" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" "k8s.io/utils/ptr" @@ -43,6 +46,8 @@ import ( "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/collections" + "sigs.k8s.io/cluster-api/util/conditions" v1beta1conditions "sigs.k8s.io/cluster-api/util/conditions/deprecated/v1beta1" "sigs.k8s.io/cluster-api/util/labels" "sigs.k8s.io/cluster-api/util/labels/format" @@ -303,7 +308,10 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr // Get the nodeRefsMap from the cluster. s.nodeRefMap, getNodeRefsErr = r.getNodeRefMap(ctx, clusterClient) - err = r.reconcileMachines(ctx, s, infraConfig) + res := ctrl.Result{} + + reconcileMachinesRes, err := r.reconcileMachines(ctx, s, infraConfig) + res = util.LowestNonZeroResult(res, reconcileMachinesRes) if err != nil || getNodeRefsErr != nil { return ctrl.Result{}, kerrors.NewAggregate([]error{errors.Wrapf(err, "failed to reconcile Machines for MachinePool %s", klog.KObj(mp)), errors.Wrapf(getNodeRefsErr, "failed to get nodeRefs for MachinePool %s", klog.KObj(mp))}) @@ -311,7 +319,7 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr if !ptr.Deref(mp.Status.Initialization.InfrastructureProvisioned, false) { log.Info("Infrastructure provider is not yet ready", infraConfig.GetKind(), klog.KObj(infraConfig)) - return ctrl.Result{}, nil + return res, nil } var providerIDList []string @@ -346,7 +354,7 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr mp.Status.Deprecated.V1Beta1.UnavailableReplicas = ptr.Deref(mp.Status.Replicas, 0) } - return ctrl.Result{}, nil + return res, nil } // reconcileMachines reconciles Machines associated with a MachinePool. @@ -356,7 +364,7 @@ func (r *Reconciler) reconcileInfrastructure(ctx context.Context, s *scope) (ctr // infrastructure is created accordingly. // Note: When supported by the cloud provider implementation of the MachinePool, machines will provide a means to interact // with the corresponding infrastructure (e.g. delete a specific machine in case MachineHealthCheck detects it is unhealthy). -func (r *Reconciler) reconcileMachines(ctx context.Context, s *scope, infraMachinePool *unstructured.Unstructured) error { +func (r *Reconciler) reconcileMachines(ctx context.Context, s *scope, infraMachinePool *unstructured.Unstructured) (ctrl.Result, error) { log := ctrl.LoggerFrom(ctx) mp := s.machinePool @@ -364,10 +372,10 @@ func (r *Reconciler) reconcileMachines(ctx context.Context, s *scope, infraMachi if err := util.UnstructuredUnmarshalField(infraMachinePool, &infraMachineKind, "status", "infrastructureMachineKind"); err != nil { if errors.Is(err, util.ErrUnstructuredFieldNotFound) { log.V(4).Info("MachinePool Machines not supported, no infraMachineKind found") - return nil + return ctrl.Result{}, nil } - return errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %s", klog.KObj(mp)) + return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %s", klog.KObj(mp)) } infraMachineSelector := metav1.LabelSelector{ @@ -384,7 +392,7 @@ func (r *Reconciler) reconcileMachines(ctx context.Context, s *scope, infraMachi infraMachineList.SetAPIVersion(infraMachinePool.GetAPIVersion()) infraMachineList.SetKind(infraMachineKind + "List") if err := r.Client.List(ctx, &infraMachineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil { - return errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + return ctrl.Result{}, errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) } // Add watcher for infraMachine, if there isn't one already; this will allow this controller to reconcile @@ -395,21 +403,26 @@ func (r *Reconciler) reconcileMachines(ctx context.Context, s *scope, infraMachi // Add watcher for infraMachine, if there isn't one already. if err := r.externalTracker.Watch(log, sampleInfraMachine, handler.EnqueueRequestsFromMapFunc(r.infraMachineToMachinePoolMapper), predicates.ResourceIsChanged(r.Client.Scheme(), *r.externalTracker.PredicateLogger)); err != nil { - return err + return ctrl.Result{}, err } // Get the list of machines managed by this controller, and align it with the infra machines managed by // the InfraMachinePool controller. machineList := &clusterv1.MachineList{} if err := r.Client.List(ctx, machineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil { - return err + return ctrl.Result{}, err } if err := r.createOrUpdateMachines(ctx, s, machineList.Items, infraMachineList.Items); err != nil { - return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) + return ctrl.Result{}, errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace) } - return nil + res, err := r.reconcileUnhealthyMachinePoolMachines(ctx, s, machineList.Items) + if err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile unhealthy machines for MachinePool %s", klog.KObj(mp)) + } + + return res, nil } // createOrUpdateMachines creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef. @@ -606,3 +619,119 @@ func (r *Reconciler) getNodeRefMap(ctx context.Context, c client.Client) (map[st return nodeRefsMap, nil } + +func (r *Reconciler) reconcileUnhealthyMachinePoolMachines(ctx context.Context, s *scope, machines []clusterv1.Machine) (ctrl.Result, error) { + if len(machines) == 0 { + return ctrl.Result{}, nil + } + + log := ctrl.LoggerFrom(ctx) + mp := s.machinePool + + machinesWithHealthCheck := slices.DeleteFunc(slices.Clone(machines), func(machine clusterv1.Machine) bool { + return !conditions.Has(&machine, clusterv1.MachineHealthCheckSucceededCondition) + }) + if len(machinesWithHealthCheck) == 0 { + // This means there is no MachineHealthCheck selecting any machines + // of this machine pool. In this case, do not requeue so often, + // but still check regularly in case a MachineHealthCheck became + // deployed or activated. This long interval shouldn't be a problem + // at cluster creation, since newly-created nodes should anyway + // trigger MachinePool reconciliation as the infrastructure provider + // creates the InfraMachines. + log.V(4).Info("Skipping reconciliation of unhealthy MachinePool machines because there are no health-checked machines") + return ctrl.Result{RequeueAfter: 10 * time.Minute}, nil + } + + unhealthyMachines := slices.DeleteFunc(slices.Clone(machines), func(machine clusterv1.Machine) bool { + return !collections.IsUnhealthyAndOwnerRemediated(&machine) + }) + log.V(4).Info("Reconciling unhealthy MachinePool machines", "unhealthyMachines", len(unhealthyMachines)) + + // Calculate how many in flight machines we should remediate. + // By default, we allow all machines to be remediated at the same time. + maxInFlight := len(unhealthyMachines) + if mp.Spec.Remediation.MaxInFlight != nil { + var err error + replicas := int(ptr.Deref(mp.Spec.Replicas, 1)) + maxInFlight, err = intstr.GetScaledValueFromIntOrPercent(mp.Spec.Remediation.MaxInFlight, replicas, true) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed to calculate maxInFlight to remediate machines: %v", err) + } + log = log.WithValues("maxInFlight", maxInFlight, "replicas", replicas) + } + + machinesToRemediate := make([]*clusterv1.Machine, 0, len(unhealthyMachines)) + inFlight := 0 + for _, m := range unhealthyMachines { + if !m.DeletionTimestamp.IsZero() { + if conditions.IsTrue(&m, clusterv1.MachineOwnerRemediatedCondition) { + // Machine has been remediated by this controller and still in flight. + inFlight++ + } + continue + } + if conditions.IsFalse(&m, clusterv1.MachineOwnerRemediatedCondition) { + machinesToRemediate = append(machinesToRemediate, &m) + } + } + log = log.WithValues("inFlight", inFlight) + + if len(machinesToRemediate) == 0 { + // There's a MachineHealthCheck monitoring the machines, but currently + // no action to be taken. A machine could require remediation at any + // time, so use a short interval until next reconciliation. + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + + if inFlight >= maxInFlight { + log.V(3).Info("Remediation strategy is set, and maximum in flight has been reached", "machinesToBeRemediated", len(machinesToRemediate)) + + // Check soon again whether the already-remediating (= deleting) machines are gone + // so that more machines can be remediated + return ctrl.Result{RequeueAfter: 15 * time.Second}, nil + } + + // Sort the machines from newest to oldest. + // We are trying to remediate machines failing to come up first because + // there is a chance that they are not hosting any workloads (minimize disruption). + sort.SliceStable(machinesToRemediate, func(i, j int) bool { + return machinesToRemediate[i].CreationTimestamp.After(machinesToRemediate[j].CreationTimestamp.Time) + }) + + haveMoreMachinesToRemediate := false + if len(machinesToRemediate) > (maxInFlight - inFlight) { + haveMoreMachinesToRemediate = true + log.V(5).Info("Remediation strategy is set, limiting in flight operations", "machinesToBeRemediated", len(machinesToRemediate)) + machinesToRemediate = machinesToRemediate[:(maxInFlight - inFlight)] + } + + // Remediate unhealthy machines by deleting them + var errs []error + for _, m := range machinesToRemediate { + log.Info("Deleting unhealthy Machine", "Machine", klog.KObj(m)) + patch := client.MergeFrom(m.DeepCopy()) + if err := r.Client.Delete(ctx, m); err != nil { + if apierrors.IsNotFound(err) { + continue + } + errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m))) + continue + } + v1beta1conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition) + if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) { + errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m))) + } + } + + if len(errs) > 0 { + return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines") + } + + if haveMoreMachinesToRemediate { + // More machines need remediation, so reconcile again sooner + return ctrl.Result{RequeueAfter: 15 * time.Second}, nil + } + + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil +} diff --git a/internal/controllers/machinepool/machinepool_controller_phases_test.go b/internal/controllers/machinepool/machinepool_controller_phases_test.go index 6b618eef7ede..337e6a156803 100644 --- a/internal/controllers/machinepool/machinepool_controller_phases_test.go +++ b/internal/controllers/machinepool/machinepool_controller_phases_test.go @@ -41,8 +41,11 @@ import ( "sigs.k8s.io/cluster-api/controllers/external" externalfake "sigs.k8s.io/cluster-api/controllers/external/fake" "sigs.k8s.io/cluster-api/internal/util/ssa" + "sigs.k8s.io/cluster-api/util/conditions" + v1beta1conditions "sigs.k8s.io/cluster-api/util/conditions/deprecated/v1beta1" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/labels/format" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/test/builder" ) @@ -1412,7 +1415,7 @@ func TestReconcileMachinePoolMachines(t *testing.T) { scope := &scope{ machinePool: &machinePool, } - err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + _, err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) r.reconcilePhase(&machinePool) g.Expect(err).ToNot(HaveOccurred()) @@ -1483,7 +1486,7 @@ func TestReconcileMachinePoolMachines(t *testing.T) { machinePool: &machinePool, } - err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + _, err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) r.reconcilePhase(&machinePool) g.Expect(err).ToNot(HaveOccurred()) @@ -1536,15 +1539,24 @@ func TestReconcileMachinePoolMachines(t *testing.T) { r := &Reconciler{ Client: env, ssaCache: ssa.NewCache(testController), + externalTracker: external.ObjectTracker{ + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: env.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), + }, } scope := &scope{ machinePool: &machinePool, } - err = r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + res, err := r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) r.reconcilePhase(&machinePool) g.Expect(err).ToNot(HaveOccurred()) + // Regular reconciliation makes no sense if infra provider + // doesn't support MachinePool machines + g.Expect(res.RequeueAfter).To(BeZero()) machineList := &clusterv1.MachineList{} labels := map[string]string{ @@ -1554,6 +1566,136 @@ func TestReconcileMachinePoolMachines(t *testing.T) { g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace), client.MatchingLabels(labels))).To(Succeed()) g.Expect(machineList.Items).To(BeEmpty()) }) + + t.Run("Should delete unhealthy machines", func(*testing.T) { + machinePool := getMachinePool(3, "machinepool-test-4", clusterName, ns.Name) + g.Expect(env.CreateAndWait(ctx, &machinePool)).To(Succeed()) + + infraMachines := getInfraMachines(3, machinePool.Name, clusterName, ns.Name) + for i := range infraMachines { + g.Expect(env.CreateAndWait(ctx, &infraMachines[i])).To(Succeed()) + } + + machines := getMachines(3, machinePool.Name, clusterName, ns.Name) + for i := range machines { + g.Expect(env.CreateAndWait(ctx, &machines[i])).To(Succeed()) + } + + // machines[0] isn't changed here (no conditions = considered healthy). + + // machines[1] is marked as unhealthy by conditions + patchHelper, err := patch.NewHelper(&machines[1], env) + unhealthyMachineName := machines[1].Name + v1beta1conditions.MarkFalse(&machines[1], clusterv1.MachineHealthCheckSucceededV1Beta1Condition, clusterv1.MachineHasFailureV1Beta1Reason, clusterv1.ConditionSeverityWarning, "") + conditions.Set(&machines[1], metav1.Condition{ + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineHasFailureV1Beta1Reason, + }) + v1beta1conditions.MarkFalse(&machines[1], clusterv1.MachineOwnerRemediatedV1Beta1Condition, clusterv1.WaitingForRemediationV1Beta1Reason, clusterv1.ConditionSeverityWarning, "") + conditions.Set(&machines[1], metav1.Condition{ + Type: clusterv1.MachineOwnerRemediatedCondition, + Status: metav1.ConditionFalse, + Reason: clusterv1.WaitingForRemediationV1Beta1Reason, + }) + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(patchHelper.Patch(ctx, &machines[1], patch.WithStatusObservedGeneration{}, patch.WithOwnedConditions{Conditions: []string{ + clusterv1.MachineHealthCheckSucceededCondition, + clusterv1.MachineOwnerRemediatedCondition, + }}, patch.WithOwnedV1Beta1Conditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededV1Beta1Condition, + clusterv1.MachineOwnerRemediatedV1Beta1Condition, + }})).To(Succeed()) + + // machines[2] is marked as healthy by conditions + patchHelper, err = patch.NewHelper(&machines[2], env) + v1beta1conditions.MarkTrue(&machines[2], clusterv1.MachineHealthCheckSucceededV1Beta1Condition) + conditions.Set(&machines[2], metav1.Condition{ + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckSucceededReason, + }) + g.Expect(err).ShouldNot(HaveOccurred()) + g.Expect(patchHelper.Patch(ctx, &machines[2], patch.WithStatusObservedGeneration{}, patch.WithOwnedConditions{Conditions: []string{ + clusterv1.MachineHealthCheckSucceededCondition, + clusterv1.MachineOwnerRemediatedCondition, + }}, patch.WithOwnedV1Beta1Conditions{Conditions: []clusterv1.ConditionType{ + clusterv1.MachineHealthCheckSucceededV1Beta1Condition, + clusterv1.MachineOwnerRemediatedV1Beta1Condition, + }})).To(Succeed()) + + infraConfig := map[string]interface{}{ + "kind": builder.GenericInfrastructureMachinePoolKind, + "apiVersion": builder.InfrastructureGroupVersion.String(), + "metadata": map[string]interface{}{ + "name": "infra-config4", + "namespace": ns.Name, + }, + "spec": map[string]interface{}{ + "providerIDList": []interface{}{ + "test://id-1", + }, + }, + "status": map[string]interface{}{ + "ready": true, + "addresses": []interface{}{ + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.1", + }, + map[string]interface{}{ + "type": "InternalIP", + "address": "10.0.0.2", + }, + }, + "infrastructureMachineKind": builder.GenericInfrastructureMachineKind, + }, + } + g.Expect(env.CreateAndWait(ctx, &unstructured.Unstructured{Object: infraConfig})).To(Succeed()) + + r := &Reconciler{ + Client: env, + ssaCache: ssa.NewCache("test-controller"), + externalTracker: external.ObjectTracker{ + Controller: externalfake.Controller{}, + Cache: &informertest.FakeInformers{}, + Scheme: env.Scheme(), + PredicateLogger: ptr.To(logr.New(log.NullLogSink{})), + }, + } + scope := &scope{ + machinePool: &machinePool, + } + res, err := r.reconcileMachines(ctx, scope, &unstructured.Unstructured{Object: infraConfig}) + r.reconcilePhase(&machinePool) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(res.RequeueAfter).To(BeNumerically(">=", 0)) + + machineList := &clusterv1.MachineList{} + labels := map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + clusterv1.MachinePoolNameLabel: machinePool.Name, + } + g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace), client.MatchingLabels(labels))).To(Succeed()) + + // The unhealthy machine should have been remediated (= deleted) + g.Expect(machineList.Items).To(HaveLen(2)) + + for i := range machineList.Items { + machine := &machineList.Items[i] + + // Healthy machines should remain + g.Expect(machine.Name).ToNot(Equal(unhealthyMachineName)) + + _, err := external.Get(ctx, r.Client, &corev1.ObjectReference{ + APIVersion: builder.InfrastructureGroupVersion.String(), + Kind: machine.Spec.InfrastructureRef.Kind, + Namespace: machine.Namespace, + Name: machine.Spec.InfrastructureRef.Name, + }) + g.Expect(err).ToNot(HaveOccurred()) + } + }) }) } diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index 8fbd50edf2ba..fbaa223874ed 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -332,6 +332,7 @@ providers: # Add cluster templates - sourcePath: "../data/infrastructure-docker/main/cluster-template.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-md-remediation.yaml" + - sourcePath: "../data/infrastructure-docker/main/cluster-template-mp-remediation.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-machine-pool.yaml" diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/kustomization.yaml new file mode 100644 index 000000000000..b3bf33cb250c --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/kustomization.yaml @@ -0,0 +1,6 @@ +resources: + - ../cluster-template-machine-pool + - mhc.yaml + +patches: +- path: mp.yaml diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/mhc.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/mhc.yaml new file mode 100644 index 000000000000..1c65f920dc68 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/mhc.yaml @@ -0,0 +1,21 @@ +--- +# MachineHealthCheck object with +# - a selector that targets all the machines with label e2e.remediation.label="" +# - unhealthyNodeConditions triggering remediation after 10s the condition is set +apiVersion: cluster.x-k8s.io/v1beta2 +kind: MachineHealthCheck +metadata: + name: "${CLUSTER_NAME}-mhc-0" +spec: + clusterName: "${CLUSTER_NAME}" + selector: + matchLabels: + e2e.remediation.label: "" + checks: + unhealthyNodeConditions: + - type: e2e.remediation.condition + status: "False" + timeoutSeconds: 10 + remediation: + triggerIf: + unhealthyLessThanOrEqualTo: 100% diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/mp.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/mp.yaml new file mode 100644 index 000000000000..9ab2808942a0 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/main/cluster-template-mp-remediation/mp.yaml @@ -0,0 +1,9 @@ +apiVersion: cluster.x-k8s.io/v1beta2 +kind: MachinePool +metadata: + name: "${CLUSTER_NAME}-mp-0" +spec: + template: + metadata: + labels: + "e2e.remediation.label": "" diff --git a/test/e2e/mp_remediations.go b/test/e2e/mp_remediations.go new file mode 100644 index 000000000000..a5215b36db05 --- /dev/null +++ b/test/e2e/mp_remediations.go @@ -0,0 +1,142 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "os" + "path/filepath" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" +) + +// MachinePoolRemediationSpecInput is the input for MachinePoolRemediationSpec. +type MachinePoolRemediationSpecInput struct { + E2EConfig *clusterctl.E2EConfig + ClusterctlConfigPath string + BootstrapClusterProxy framework.ClusterProxy + ArtifactFolder string + SkipCleanup bool + ControlPlaneWaiters clusterctl.ControlPlaneWaiters + + // InfrastructureProviders specifies the infrastructure to use for clusterctl + // operations (Example: get cluster templates). + // Note: In most cases this need not be specified. It only needs to be specified when + // multiple infrastructure providers are installed on the cluster as clusterctl will not be + // able to identify the default. + InfrastructureProvider *string + + // Flavor, if specified, must refer to a template that has a MachineHealthCheck + // resource configured to match the MachinePool managed Machines and be + // configured to treat "e2e.remediation.condition" "False" as an unhealthy + // condition with a short timeout. + // If not specified, "mp-remediation" is used. + Flavor *string + + // Allows to inject a function to be run after test namespace is created. + // If not specified, this is a no-op. + PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string) +} + +// MachinePoolRemediationSpec implements a test that verifies that Machines are remediated by MHC during unhealthy conditions. +func MachinePoolRemediationSpec(ctx context.Context, inputGetter func() MachinePoolRemediationSpecInput) { + var ( + specName = "mp-remediation" + input MachinePoolRemediationSpecInput + namespace *corev1.Namespace + cancelWatches context.CancelFunc + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult + ) + + BeforeEach(func() { + Expect(ctx).NotTo(BeNil(), "ctx is required for %s spec", specName) + input = inputGetter() + Expect(input.E2EConfig).ToNot(BeNil(), "Invalid argument. input.E2EConfig can't be nil when calling %s spec", specName) + Expect(input.ClusterctlConfigPath).To(BeAnExistingFile(), "Invalid argument. input.ClusterctlConfigPath must be an existing file when calling %s spec", specName) + Expect(input.BootstrapClusterProxy).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName) + Expect(os.MkdirAll(input.ArtifactFolder, 0750)).To(Succeed(), "Invalid argument. input.ArtifactFolder can't be created for %s spec", specName) + Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersion)) + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = framework.SetupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder, input.PostNamespaceCreated) + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + }) + + It("Should replace unhealthy machines", func() { + By("Creating a workload cluster") + + infrastructureProvider := clusterctl.DefaultInfrastructureProvider + if input.InfrastructureProvider != nil { + infrastructureProvider = *input.InfrastructureProvider + } + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: filepath.Join(input.ArtifactFolder, "clusters", input.BootstrapClusterProxy.GetName()), + ClusterctlConfigPath: input.ClusterctlConfigPath, + KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Flavor: ptr.Deref(input.Flavor, "mp-remediation"), + Namespace: namespace.Name, + ClusterName: fmt.Sprintf("%s-%s", specName, util.RandomString(6)), + KubernetesVersion: input.E2EConfig.MustGetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To[int64](1), + WorkerMachineCount: ptr.To[int64](1), + }, + ControlPlaneWaiters: input.ControlPlaneWaiters, + WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }, clusterResources) + + By("Setting a machine unhealthy and wait for MachinePool remediation") + framework.DiscoverMachineHealthChecksAndWaitForRemediation(ctx, framework.DiscoverMachineHealthCheckAndWaitForRemediationInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + WaitForMachineRemediation: input.E2EConfig.GetIntervals(specName, "wait-machine-remediation"), + }) + + Byf("Verify Cluster Available condition is true") + framework.VerifyClusterAvailable(ctx, framework.VerifyClusterAvailableInput{ + Getter: input.BootstrapClusterProxy.GetClient(), + Name: clusterResources.Cluster.Name, + Namespace: clusterResources.Cluster.Namespace, + }) + + Byf("Verify Machines Ready condition is true") + framework.VerifyMachinesReady(ctx, framework.VerifyMachinesReadyInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + Name: clusterResources.Cluster.Name, + Namespace: clusterResources.Cluster.Namespace, + }) + + By("PASSED!") + }) + + AfterEach(func() { + // Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself. + framework.DumpSpecResourcesAndCleanup(ctx, specName, input.BootstrapClusterProxy, input.ClusterctlConfigPath, input.ArtifactFolder, namespace, cancelWatches, clusterResources.Cluster, input.E2EConfig.GetIntervals, input.SkipCleanup) + }) +} diff --git a/test/e2e/mp_remediations_test.go b/test/e2e/mp_remediations_test.go new file mode 100644 index 000000000000..384c1e1f89ed --- /dev/null +++ b/test/e2e/mp_remediations_test.go @@ -0,0 +1,36 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + . "github.com/onsi/ginkgo/v2" +) + +var _ = Describe("When testing MachinePool remediation", func() { + MachinePoolRemediationSpec(ctx, func() MachinePoolRemediationSpecInput { + return MachinePoolRemediationSpecInput{ + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + SkipCleanup: skipCleanup, + } + }) +}) diff --git a/test/framework/machine_helpers.go b/test/framework/machine_helpers.go index 75d3364417ab..5abe3032a848 100644 --- a/test/framework/machine_helpers.go +++ b/test/framework/machine_helpers.go @@ -229,8 +229,8 @@ type PatchNodeConditionInput struct { Machine clusterv1.Machine } -// PatchNodeCondition patches a node condition to any one of the machines with a node ref. -func PatchNodeCondition(ctx context.Context, input PatchNodeConditionInput) { +// PatchNodeCondition patches a node condition to any one of the machines with a node ref. The node name is returned. +func PatchNodeCondition(ctx context.Context, input PatchNodeConditionInput) string { Expect(ctx).NotTo(BeNil(), "ctx is required for PatchNodeConditions") Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling PatchNodeConditions") Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling PatchNodeConditions") @@ -249,6 +249,7 @@ func PatchNodeCondition(ctx context.Context, input PatchNodeConditionInput) { Eventually(func() error { return patchHelper.Patch(ctx, node) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch node %s", input.Machine.Status.NodeRef.Name) + return node.Name } // MachineStatusCheck is a type that operates a status check on a Machine. diff --git a/test/framework/machinehealthcheck_helpers.go b/test/framework/machinehealthcheck_helpers.go index 1a147d4afcc6..f538fc48c29b 100644 --- a/test/framework/machinehealthcheck_helpers.go +++ b/test/framework/machinehealthcheck_helpers.go @@ -73,7 +73,7 @@ func DiscoverMachineHealthChecksAndWaitForRemediation(ctx context.Context, input Status: mhc.Spec.Checks.UnhealthyNodeConditions[0].Status, LastTransitionTime: metav1.Time{Time: time.Now()}, } - PatchNodeCondition(ctx, PatchNodeConditionInput{ + nodeName := PatchNodeCondition(ctx, PatchNodeConditionInput{ ClusterProxy: input.ClusterProxy, Cluster: input.Cluster, NodeCondition: unhealthyNodeCondition, @@ -82,10 +82,11 @@ func DiscoverMachineHealthChecksAndWaitForRemediation(ctx context.Context, input fmt.Fprintln(GinkgoWriter, "Waiting for remediation") WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx, WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput{ - ClusterProxy: input.ClusterProxy, - Cluster: input.Cluster, - MachineHealthCheck: mhc, - MachinesCount: len(machines), + ClusterProxy: input.ClusterProxy, + Cluster: input.Cluster, + MachineHealthCheck: mhc, + MachinesCount: len(machines), + ExpectedDeletedNode: nodeName, }, input.WaitForMachineRemediation...) } } @@ -122,10 +123,11 @@ func machineHealthCheckOptions(machineHealthCheck clusterv1.MachineHealthCheck) // WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput is the input for WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition. type WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput struct { - ClusterProxy ClusterProxy - Cluster *clusterv1.Cluster - MachineHealthCheck *clusterv1.MachineHealthCheck - MachinesCount int + ClusterProxy ClusterProxy + Cluster *clusterv1.Cluster + MachineHealthCheck *clusterv1.MachineHealthCheck + MachinesCount int + ExpectedDeletedNode string } // WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition patches a node condition to any one of the machines with a node ref. @@ -135,6 +137,7 @@ func WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx context.Cont Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") Expect(input.MachineHealthCheck).NotTo(BeNil(), "Invalid argument. input.MachineHealthCheck can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") Expect(input.MachinesCount).NotTo(BeZero(), "Invalid argument. input.MachinesCount can't be zero when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") + Expect(input.ExpectedDeletedNode).NotTo(BeEmpty(), "Invalid argument. input.ExpectedDeletedNode can't be empty when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") fmt.Fprintln(GinkgoWriter, "Waiting until the node with unhealthy node condition is remediated") Eventually(func() bool { @@ -164,6 +167,22 @@ func WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx context.Cont return false } } + + nodes := &corev1.NodeList{} + // This should not be an Expect(), because it may return error during machine deletion. + err := input.ClusterProxy.GetWorkloadCluster(ctx, input.Cluster.Namespace, input.Cluster.Name).GetClient().List(ctx, nodes) + if err != nil { + fmt.Fprintf(GinkgoWriter, "Failed to get nodes: %s", err) + return false + } + Expect(nodes.Items).NotTo(BeEmpty()) + for _, existingNode := range nodes.Items { + if existingNode.Name == input.ExpectedDeletedNode { + // This node should be deleted by remediation, but still exists + return false + } + } + return true }, intervals...).Should(BeTrue()) } diff --git a/test/infrastructure/docker/config/default/manager_image_patch.yaml b/test/infrastructure/docker/config/default/manager_image_patch.yaml index 4ebbfa36e52c..4158dc47be3d 100644 --- a/test/infrastructure/docker/config/default/manager_image_patch.yaml +++ b/test/infrastructure/docker/config/default/manager_image_patch.yaml @@ -8,5 +8,5 @@ spec: spec: containers: # Change the value of image field below to your controller image URL - - image: gcr.io/k8s-staging-cluster-api/capd-manager:main + - image: gcr.io/k8s-staging-cluster-api/capd-manager-arm64:dev name: manager diff --git a/test/infrastructure/docker/config/default/manager_pull_policy.yaml b/test/infrastructure/docker/config/default/manager_pull_policy.yaml index 74a0879c604a..cd7ae12c01ea 100644 --- a/test/infrastructure/docker/config/default/manager_pull_policy.yaml +++ b/test/infrastructure/docker/config/default/manager_pull_policy.yaml @@ -8,4 +8,4 @@ spec: spec: containers: - name: manager - imagePullPolicy: Always + imagePullPolicy: IfNotPresent