From 264f9e9c1c17f2acfd9b09d4f2831d6c0c8f4b4d Mon Sep 17 00:00:00 2001 From: Shalin Patel Date: Wed, 25 Jun 2025 17:28:07 -0700 Subject: [PATCH 1/4] fix: add tolerations and nodeAffinity overrides for registry addon --- .../registry/cncf-distribution/values-template.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml index 3c37dbdb1..22cb1bf02 100644 --- a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml +++ b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml @@ -25,3 +25,14 @@ statefulSet: cpu: 100m memory: 75Mi tlsSecretName: {{ .TLSSecretName }} +tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: Exists + effect: "NoSchedule" + +nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists From 3312d92050c9e75a92881c52aee7a72b2ffd2227 Mon Sep 17 00:00:00 2001 From: Shalin Patel Date: Fri, 27 Jun 2025 08:59:32 -0700 Subject: [PATCH 2/4] fix: add AntiAffinity rules for registry addon --- .../cncf-distribution/values-template.yaml | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml index 22cb1bf02..255cf9458 100644 --- a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml +++ b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml @@ -30,9 +30,20 @@ tolerations: operator: Exists effect: "NoSchedule" -nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists +podLabels: + caren.nutanix.com/cluster-uuid: {{ .ClusterUUID }} # ensure the labels match with pod AntiAffinity. + +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + caren.nutanix.com/cluster-uuid: {{ .ClusterUUID }} \ No newline at end of file From ab718109f6c0b1cd69765733e8b2697b5ae7dabf Mon Sep 17 00:00:00 2001 From: Shalin Patel Date: Fri, 27 Jun 2025 16:34:47 -0700 Subject: [PATCH 3/4] test: unit test to verify if anti affinity is set --- .../cncf-distribution/values-template.yaml | 3 +- test/e2e/quick_start_test.go | 9 ++++ test/e2e/registry.go | 54 +++++++++++++++++++ test/e2e/self_hosted_test.go | 9 ++++ test/e2e/statefulset_helpers.go | 2 +- 5 files changed, 75 insertions(+), 2 deletions(-) diff --git a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml index 255cf9458..cf03f47e3 100644 --- a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml +++ b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml @@ -46,4 +46,5 @@ affinity: podAffinityTerm: labelSelector: matchLabels: - caren.nutanix.com/cluster-uuid: {{ .ClusterUUID }} \ No newline at end of file + caren.nutanix.com/cluster-uuid: {{ .ClusterUUID }} + topologyKey: kubernetes.io/hostname diff --git a/test/e2e/quick_start_test.go b/test/e2e/quick_start_test.go index f0b89865d..9af412bb6 100644 --- a/test/e2e/quick_start_test.go +++ b/test/e2e/quick_start_test.go @@ -315,6 +315,15 @@ var _ = Describe("Quick start", func() { ClusterProxy: proxy, }, ) + + EnsureAntiAffnityForRegistryAddon( + ctx, + EnsureAntiAffnityForRegistryAddonInput{ + Registry: addonsConfig.Registry, + WorkloadCluster: workloadCluster, + ClusterProxy: proxy, + }, + ) }, } }) diff --git a/test/e2e/registry.go b/test/e2e/registry.go index ae9668d03..a249e8ace 100644 --- a/test/e2e/registry.go +++ b/test/e2e/registry.go @@ -101,3 +101,57 @@ func EnsureClusterCAForRegistryAddon( const caCrtKey = "ca.crt" Expect(rootCASecret.Data[caCrtKey]).To(Equal(rootCASecret.Data[caCrtKey])) } + +type EnsureAntiAffnityForRegistryAddonInput struct { + Registry *v1alpha1.RegistryAddon + WorkloadCluster *clusterv1.Cluster + ClusterProxy framework.ClusterProxy +} + +func EnsureAntiAffnityForRegistryAddon( + ctx context.Context, + input EnsureAntiAffnityForRegistryAddonInput, +) { + if input.Registry == nil { + return + } + Log("Ensuring anti-affinity for registry addon in workload cluster") + workloadClusterClient := input.ClusterProxy.GetWorkloadCluster( + ctx, input.WorkloadCluster.Namespace, input.WorkloadCluster.Name, + ).GetClient() + + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cncf-distribution-registry-docker-registry", + Namespace: "registry-system", + }, + } + err := workloadClusterClient.Get(ctx, ctrlclient.ObjectKeyFromObject(sts), sts) + Expect(err).NotTo(HaveOccurred()) + Expect(sts.Spec.Template.Spec.Affinity).ToNot(BeNil()) + Expect(sts.Spec.Template.Spec.Affinity.PodAntiAffinity).ToNot(BeNil()) + podAntiAffinity := sts.Spec.Template.Spec.Affinity.PodAntiAffinity + Expect( + podAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, + ).ToNot(BeEmpty()) + Expect( + podAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].Weight, + ).To(Equal(int32(100))) + podAffinityTerm := podAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution[0].PodAffinityTerm + Expect(podAffinityTerm).ToNot(BeNil()) + Expect(podAffinityTerm.TopologyKey).To(Equal("kubernetes.io/hostname")) + Expect(podAffinityTerm.LabelSelector).ToNot(BeNil()) + affinityLabels := podAffinityTerm.LabelSelector.MatchLabels + Expect( + affinityLabels[v1alpha1.ClusterUUIDAnnotationKey], + ).To(Equal(input.WorkloadCluster.Annotations[v1alpha1.ClusterUUIDAnnotationKey])) + + // test node affinity + nodeAffinity := sts.Spec.Template.Spec.Affinity.NodeAffinity + Expect(nodeAffinity).ToNot(BeNil()) + Expect(nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution).ToNot(BeNil()) + nodeSelectorTerm := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0] + Expect(nodeSelectorTerm).ToNot(BeNil()) + Expect(nodeSelectorTerm.MatchExpressions).ToNot(BeEmpty()) + Expect(nodeSelectorTerm.MatchExpressions[0].Key).To(Equal("node-role.kubernetes.io/control-plane")) +} diff --git a/test/e2e/self_hosted_test.go b/test/e2e/self_hosted_test.go index 26cc9b4a7..d9b44772c 100644 --- a/test/e2e/self_hosted_test.go +++ b/test/e2e/self_hosted_test.go @@ -177,6 +177,15 @@ var _ = Describe("Self-hosted", Serial, func() { ClusterProxy: proxy, }, ) + + EnsureAntiAffnityForRegistryAddon( + ctx, + EnsureAntiAffnityForRegistryAddonInput{ + Registry: addonsConfig.Registry, + WorkloadCluster: workloadCluster, + ClusterProxy: proxy, + }, + ) }, } }, diff --git a/test/e2e/statefulset_helpers.go b/test/e2e/statefulset_helpers.go index 1ed7344bd..1fa0b08cc 100644 --- a/test/e2e/statefulset_helpers.go +++ b/test/e2e/statefulset_helpers.go @@ -21,7 +21,7 @@ type WaitForStatefulSetAvailableInput struct { StatefulSet *appsv1.StatefulSet } -// WaitForStatefulSetsAvailable waits until the Deployment has observedGeneration equal to generation and +// WaitForStatefulSetsAvailable waits until the Statefulset has observedGeneration equal to generation and // status.Available = True, that signals that all the desired replicas are in place. func WaitForStatefulSetsAvailable( ctx context.Context, input WaitForStatefulSetAvailableInput, intervals ...interface{}, From 446a932db01878e4f8b49f9cb99e0c231eb0c987 Mon Sep 17 00:00:00 2001 From: Shalin Patel Date: Tue, 1 Jul 2025 19:55:15 -0700 Subject: [PATCH 4/4] fix: use fixed pod affinity label --- .../addons/registry/cncf-distribution/values-template.yaml | 4 ++-- test/e2e/registry.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml index cf03f47e3..62089fe1a 100644 --- a/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml +++ b/charts/cluster-api-runtime-extensions-nutanix/addons/registry/cncf-distribution/values-template.yaml @@ -31,7 +31,7 @@ tolerations: effect: "NoSchedule" podLabels: - caren.nutanix.com/cluster-uuid: {{ .ClusterUUID }} # ensure the labels match with pod AntiAffinity. + cncf-distribution-registry: "true" # ensure the labels match with pod AntiAffinity. affinity: nodeAffinity: @@ -46,5 +46,5 @@ affinity: podAffinityTerm: labelSelector: matchLabels: - caren.nutanix.com/cluster-uuid: {{ .ClusterUUID }} + cncf-distribution-registry: "true" topologyKey: kubernetes.io/hostname diff --git a/test/e2e/registry.go b/test/e2e/registry.go index a249e8ace..357048097 100644 --- a/test/e2e/registry.go +++ b/test/e2e/registry.go @@ -143,8 +143,8 @@ func EnsureAntiAffnityForRegistryAddon( Expect(podAffinityTerm.LabelSelector).ToNot(BeNil()) affinityLabels := podAffinityTerm.LabelSelector.MatchLabels Expect( - affinityLabels[v1alpha1.ClusterUUIDAnnotationKey], - ).To(Equal(input.WorkloadCluster.Annotations[v1alpha1.ClusterUUIDAnnotationKey])) + affinityLabels["cncf-distribution-registry"], + ).To(Equal("true")) // Ensure the label matches the pod AntiAffinity. // test node affinity nodeAffinity := sts.Spec.Template.Spec.Affinity.NodeAffinity