From 313a440a37d69e11a861963c7ba7bf0cf69bcc71 Mon Sep 17 00:00:00 2001 From: tiffanny29631 Date: Tue, 19 Aug 2025 23:36:15 +0000 Subject: [PATCH 1/5] test: Add client side install method for restoring Config Sync To guarantee the e2e client claims ownership of all fields for objects that might have drifted, use client-side apply when reinstalling Config Sync and Webhook fter ConfigManagement was previously installed and removed. --- e2e/nomostest/config_sync.go | 37 ++++++++++++++++++++++++++++++++---- e2e/testcases/cli_test.go | 24 ++++++++++++----------- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/e2e/nomostest/config_sync.go b/e2e/nomostest/config_sync.go index 58918a7803..6927dc67ef 100644 --- a/e2e/nomostest/config_sync.go +++ b/e2e/nomostest/config_sync.go @@ -83,10 +83,11 @@ var ( // // All paths must be relative to the test file that is running. There is probably // a more elegant way to do this. - baseDir = filepath.FromSlash("../..") - outputManifestsDir = filepath.Join(baseDir, ".output", "staging", "oss") - configSyncManifest = filepath.Join(outputManifestsDir, "config-sync-manifest.yaml") - multiConfigMaps = filepath.Join(baseDir, "e2e", "raw-nomos", configSyncManifests, multiConfigMapsName) + baseDir = filepath.FromSlash("../..") + outputManifestsDir = filepath.Join(baseDir, ".output", "staging", "oss") + configSyncManifest = filepath.Join(outputManifestsDir, "config-sync-manifest.yaml") + admissionWebhookManifest = filepath.Join(outputManifestsDir, "admission-webhook.yaml") + multiConfigMaps = filepath.Join(baseDir, "e2e", "raw-nomos", configSyncManifests, multiConfigMapsName) ) var ( @@ -245,6 +246,34 @@ func InstallConfigSync(nt *NT) error { return nil } +// InstallConfigSyncFromManifest installs ConfigSync on the test cluster by directly +// applying the manifest file using kubectl client-side apply +func InstallConfigSyncFromManifest(nt *NT) error { + nt.T.Log("[SETUP] Installing Config Sync directly from manifest file") + + nt.T.Logf("Applying Config Sync manifest directly from %s", configSyncManifest) + + out, err := nt.Shell.Kubectl("apply", "-f", configSyncManifest) + if err != nil { + return fmt.Errorf("failed to apply Config Sync manifest: %v\n%s", err, out) + } + + nt.T.Logf("Applying multi-repo configmaps from %s", multiConfigMaps) + out, err = nt.Shell.Kubectl("apply", "-f", multiConfigMaps) + if err != nil { + return fmt.Errorf("failed to apply multi-repo configmaps: %v\n%s", err, out) + } + + // Apply the admission webhook manifest + nt.T.Logf("Applying admission webhook manifest from %s", admissionWebhookManifest) + out, err = nt.Shell.Kubectl("apply", "-f", admissionWebhookManifest) + if err != nil { + return fmt.Errorf("failed to apply admission webhook manifest: %v\n%s", err, out) + } + + return nil +} + // uninstallConfigSync uninstalls ConfigSync on the test cluster func uninstallConfigSync(nt *NT) error { nt.T.Log("[CLEANUP] Uninstalling Config Sync") diff --git a/e2e/testcases/cli_test.go b/e2e/testcases/cli_test.go index 854051e3e5..70ae1d1b0c 100644 --- a/e2e/testcases/cli_test.go +++ b/e2e/testcases/cli_test.go @@ -1297,13 +1297,14 @@ func TestApiResourceFormatting(t *testing.T) { } func TestNomosMigrate(t *testing.T) { - nt := nomostest.New(t, nomostesting.NomosCLI, ntopts.SkipConfigSyncInstall) + nt := nomostest.New(t, nomostesting.NomosCLI) nt.T.Cleanup(func() { // Restore state of Config Sync installation after test - if err := nomostest.InstallConfigSync(nt); err != nil { + if err := nomostest.InstallConfigSyncFromManifest(nt); err != nil { nt.T.Fatal(err) } + nt.Must(nt.WatchForAllSyncs()) }) nt.T.Cleanup(func() { cmObj := &unstructured.Unstructured{ @@ -1451,11 +1452,11 @@ func TestNomosMigrate(t *testing.T) { configmanagement.RGControllerName, configmanagement.RGControllerNamespace) }) tg.Go(func() error { - return nt.Watcher.WatchForNotFound(kinds.Deployment(), + return nt.Watcher.WatchForCurrentStatus(kinds.Deployment(), core.RootReconcilerName(configsync.RootSyncName), configsync.ControllerNamespace) }) tg.Go(func() error { - return nt.Watcher.WatchForNotFound(kinds.RootSyncV1Beta1(), + return nt.Watcher.WatchForCurrentStatus(kinds.RootSyncV1Beta1(), configsync.RootSyncName, configsync.ControllerNamespace) }) if err := tg.Wait(); err != nil { @@ -1464,14 +1465,14 @@ func TestNomosMigrate(t *testing.T) { } func TestNomosMigrateMonoRepo(t *testing.T) { - nt := nomostest.New(t, nomostesting.NomosCLI, ntopts.SkipConfigSyncInstall) + nt := nomostest.New(t, nomostesting.NomosCLI) nt.T.Cleanup(func() { // Restore state of Config Sync installation after test. - // This also emulates upgrading to the current version after migrating - if err := nomostest.InstallConfigSync(nt); err != nil { + if err := nomostest.InstallConfigSyncFromManifest(nt); err != nil { nt.T.Fatal(err) } + nt.Must(nt.WatchForAllSyncs()) }) nt.T.Cleanup(func() { crds := []string{ @@ -1707,13 +1708,14 @@ func TestNomosMigrateMonoRepo(t *testing.T) { // This test case validates the behavior of the uninstall script defined // at installation/uninstall_configmanagement.sh func TestACMUninstallScript(t *testing.T) { - nt := nomostest.New(t, nomostesting.NomosCLI, ntopts.SkipConfigSyncInstall) + nt := nomostest.New(t, nomostesting.NomosCLI) nt.T.Cleanup(func() { // Restore state of Config Sync installation after test - if err := nomostest.InstallConfigSync(nt); err != nil { + if err := nomostest.InstallConfigSyncFromManifest(nt); err != nil { nt.T.Fatal(err) } + nt.Must(nt.WatchForAllSyncs()) }) nt.T.Cleanup(func() { cmObj := &unstructured.Unstructured{ @@ -1861,11 +1863,11 @@ func TestACMUninstallScript(t *testing.T) { configmanagement.RGControllerName, configmanagement.RGControllerNamespace) }) tg.Go(func() error { - return nt.Watcher.WatchForNotFound(kinds.Deployment(), + return nt.Watcher.WatchForCurrentStatus(kinds.Deployment(), core.RootReconcilerName(configsync.RootSyncName), configsync.ControllerNamespace) }) tg.Go(func() error { - return nt.Watcher.WatchForNotFound(kinds.RootSyncV1Beta1(), + return nt.Watcher.WatchForCurrentStatus(kinds.RootSyncV1Beta1(), configsync.RootSyncName, configsync.ControllerNamespace) }) if err := tg.Wait(); err != nil { From 77a9062d208681afffe0bc63165bf82505525329 Mon Sep 17 00:00:00 2001 From: tiffanny29631 Date: Fri, 5 Sep 2025 04:14:38 +0000 Subject: [PATCH 2/5] Add server-side false flag --- e2e/nomostest/config_sync.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/e2e/nomostest/config_sync.go b/e2e/nomostest/config_sync.go index 6927dc67ef..293083995f 100644 --- a/e2e/nomostest/config_sync.go +++ b/e2e/nomostest/config_sync.go @@ -259,14 +259,14 @@ func InstallConfigSyncFromManifest(nt *NT) error { } nt.T.Logf("Applying multi-repo configmaps from %s", multiConfigMaps) - out, err = nt.Shell.Kubectl("apply", "-f", multiConfigMaps) + out, err = nt.Shell.Kubectl("apply", "--server-side=false", "-f", multiConfigMaps) if err != nil { return fmt.Errorf("failed to apply multi-repo configmaps: %v\n%s", err, out) } // Apply the admission webhook manifest nt.T.Logf("Applying admission webhook manifest from %s", admissionWebhookManifest) - out, err = nt.Shell.Kubectl("apply", "-f", admissionWebhookManifest) + out, err = nt.Shell.Kubectl("apply", "--server-side=false", "-f", admissionWebhookManifest) if err != nil { return fmt.Errorf("failed to apply admission webhook manifest: %v\n%s", err, out) } From 9e508cc366ac718302436c30ce32e95435cd8761 Mon Sep 17 00:00:00 2001 From: tiffanny29631 Date: Fri, 5 Sep 2025 04:14:38 +0000 Subject: [PATCH 3/5] Add server-side false flag --- e2e/nomostest/config_sync.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/nomostest/config_sync.go b/e2e/nomostest/config_sync.go index 293083995f..2f69cb548e 100644 --- a/e2e/nomostest/config_sync.go +++ b/e2e/nomostest/config_sync.go @@ -253,7 +253,7 @@ func InstallConfigSyncFromManifest(nt *NT) error { nt.T.Logf("Applying Config Sync manifest directly from %s", configSyncManifest) - out, err := nt.Shell.Kubectl("apply", "-f", configSyncManifest) + out, err := nt.Shell.Kubectl("apply", "--server-side=false", "-f", configSyncManifest) if err != nil { return fmt.Errorf("failed to apply Config Sync manifest: %v\n%s", err, out) } From d7aa3bc7007297481534acab4a5e91ad1097f2fa Mon Sep 17 00:00:00 2001 From: tiffanny29631 Date: Wed, 6 Aug 2025 22:36:21 +0000 Subject: [PATCH 4/5] Remove readiness probe from otel-agent sidecar The readiness probe on the otel-agent container was causing operational issues: - False alarms during slow cluster startup due to health check binding failures - Inconsistent with other containers (git-sync, reconciler) which don't use readiness probes - Redundant for a telemetry sidecar that doesn't provide direct user-facing services --- .../templates/reconciler-manager-configmap.yaml | 5 ----- manifests/templates/reconciler-manager.yaml | 4 ---- manifests/templates/resourcegroup-manifest.yaml | 4 ---- test/kustomization/expected.yaml | 13 ------------- 4 files changed, 26 deletions(-) diff --git a/manifests/templates/reconciler-manager-configmap.yaml b/manifests/templates/reconciler-manager-configmap.yaml index 92882fa746..c99f9bdf3f 100644 --- a/manifests/templates/reconciler-manager-configmap.yaml +++ b/manifests/templates/reconciler-manager-configmap.yaml @@ -195,11 +195,6 @@ data: volumeMounts: - name: otel-agent-config-reconciler-vol mountPath: /conf - readinessProbe: - httpGet: - path: / - port: 13133 # Health Check extension default port. - scheme: HTTP imagePullPolicy: IfNotPresent # These KUBE env vars help populate OTEL_RESOURCE_ATTRIBUTES which # is used by the otel-agent to populate resource attributes when diff --git a/manifests/templates/reconciler-manager.yaml b/manifests/templates/reconciler-manager.yaml index ecde8f5c75..a4c6b729a9 100644 --- a/manifests/templates/reconciler-manager.yaml +++ b/manifests/templates/reconciler-manager.yaml @@ -91,10 +91,6 @@ spec: volumeMounts: - name: otel-agent-config-vol mountPath: /conf - readinessProbe: - httpGet: - path: / - port: 13133 # Health Check extension default port. # These KUBE env vars help populate OTEL_RESOURCE_ATTRIBUTES which # is used by the otel-agent to populate resource attributes when # emiting metrics to the otel-collector. This is more efficient than diff --git a/manifests/templates/resourcegroup-manifest.yaml b/manifests/templates/resourcegroup-manifest.yaml index 49c95f66ba..8aa4099b91 100644 --- a/manifests/templates/resourcegroup-manifest.yaml +++ b/manifests/templates/resourcegroup-manifest.yaml @@ -275,10 +275,6 @@ spec: - containerPort: 55678 - containerPort: 8888 - containerPort: 13133 - readinessProbe: - httpGet: - path: / - port: 13133 resources: requests: cpu: 10m diff --git a/test/kustomization/expected.yaml b/test/kustomization/expected.yaml index 185079ec48..2675de64ed 100644 --- a/test/kustomization/expected.yaml +++ b/test/kustomization/expected.yaml @@ -5907,11 +5907,6 @@ data: volumeMounts: - name: otel-agent-config-reconciler-vol mountPath: /conf - readinessProbe: - httpGet: - path: / - port: 13133 # Health Check extension default port. - scheme: HTTP imagePullPolicy: IfNotPresent # These KUBE env vars help populate OTEL_RESOURCE_ATTRIBUTES which # is used by the otel-agent to populate resource attributes when @@ -6370,10 +6365,6 @@ spec: - containerPort: 55678 - containerPort: 8888 - containerPort: 13133 - readinessProbe: - httpGet: - path: / - port: 13133 resources: limits: cpu: 1 @@ -6494,10 +6485,6 @@ spec: - containerPort: 55678 - containerPort: 8888 - containerPort: 13133 - readinessProbe: - httpGet: - path: / - port: 13133 resources: requests: cpu: 10m From 42b9ab4b2b97edf35c03ca5bf65363042aeff92c Mon Sep 17 00:00:00 2001 From: tiffanny29631 Date: Thu, 7 Aug 2025 18:29:10 +0000 Subject: [PATCH 5/5] remove healthcheck from otel-agent --- manifests/otel-agent-cm.yaml | 4 ---- manifests/otel-agent-reconciler-cm.yaml | 4 ---- .../templates/reconciler-manager-configmap.yaml | 2 -- manifests/templates/reconciler-manager.yaml | 1 - manifests/templates/resourcegroup-manifest.yaml | 5 ----- test/kustomization/expected.yaml | 16 ---------------- 6 files changed, 32 deletions(-) diff --git a/manifests/otel-agent-cm.yaml b/manifests/otel-agent-cm.yaml index 82594daa96..eee4c2d46d 100644 --- a/manifests/otel-agent-cm.yaml +++ b/manifests/otel-agent-cm.yaml @@ -39,11 +39,7 @@ data: batch: resourcedetection: detectors: [env, gcp] - extensions: - health_check: - endpoint: 0.0.0.0:13133 service: - extensions: [health_check] pipelines: metrics: receivers: [opencensus] diff --git a/manifests/otel-agent-reconciler-cm.yaml b/manifests/otel-agent-reconciler-cm.yaml index ac56154b61..64710491c5 100644 --- a/manifests/otel-agent-reconciler-cm.yaml +++ b/manifests/otel-agent-reconciler-cm.yaml @@ -59,11 +59,7 @@ data: # the GCE metadata service, if available. resourcedetection: detectors: [env, gcp] - extensions: - health_check: - endpoint: 0.0.0.0:13133 service: - extensions: [health_check] pipelines: metrics: receivers: [opencensus] diff --git a/manifests/templates/reconciler-manager-configmap.yaml b/manifests/templates/reconciler-manager-configmap.yaml index c99f9bdf3f..6102428cc4 100644 --- a/manifests/templates/reconciler-manager-configmap.yaml +++ b/manifests/templates/reconciler-manager-configmap.yaml @@ -190,8 +190,6 @@ data: protocol: TCP - containerPort: 8888 # Metrics. protocol: TCP - - containerPort: 13133 # Health check - protocol: TCP volumeMounts: - name: otel-agent-config-reconciler-vol mountPath: /conf diff --git a/manifests/templates/reconciler-manager.yaml b/manifests/templates/reconciler-manager.yaml index a4c6b729a9..ade4143ed4 100644 --- a/manifests/templates/reconciler-manager.yaml +++ b/manifests/templates/reconciler-manager.yaml @@ -81,7 +81,6 @@ spec: ports: - containerPort: 55678 # Default OpenCensus receiver port. - containerPort: 8888 # Metrics. - - containerPort: 13133 # Health check securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true diff --git a/manifests/templates/resourcegroup-manifest.yaml b/manifests/templates/resourcegroup-manifest.yaml index 8aa4099b91..48ab9afc3f 100644 --- a/manifests/templates/resourcegroup-manifest.yaml +++ b/manifests/templates/resourcegroup-manifest.yaml @@ -167,11 +167,7 @@ data: # the GCE metadata service, if available. resourcedetection: detectors: [env, gcp] - extensions: - health_check: - endpoint: 0.0.0.0:13133 service: - extensions: [health_check] pipelines: metrics: receivers: [opencensus] @@ -274,7 +270,6 @@ spec: ports: - containerPort: 55678 - containerPort: 8888 - - containerPort: 13133 resources: requests: cpu: 10m diff --git a/test/kustomization/expected.yaml b/test/kustomization/expected.yaml index 2675de64ed..7c5fbedc2f 100644 --- a/test/kustomization/expected.yaml +++ b/test/kustomization/expected.yaml @@ -5650,11 +5650,7 @@ data: batch: resourcedetection: detectors: [env, gcp] - extensions: - health_check: - endpoint: 0.0.0.0:13133 service: - extensions: [health_check] pipelines: metrics: receivers: [opencensus] @@ -5708,11 +5704,7 @@ data: # the GCE metadata service, if available. resourcedetection: detectors: [env, gcp] - extensions: - health_check: - endpoint: 0.0.0.0:13133 service: - extensions: [health_check] pipelines: metrics: receivers: [opencensus] @@ -5902,8 +5894,6 @@ data: protocol: TCP - containerPort: 8888 # Metrics. protocol: TCP - - containerPort: 13133 # Health check - protocol: TCP volumeMounts: - name: otel-agent-config-reconciler-vol mountPath: /conf @@ -6029,11 +6019,7 @@ data: # the GCE metadata service, if available. resourcedetection: detectors: [env, gcp] - extensions: - health_check: - endpoint: 0.0.0.0:13133 service: - extensions: [health_check] pipelines: metrics: receivers: [opencensus] @@ -6364,7 +6350,6 @@ spec: ports: - containerPort: 55678 - containerPort: 8888 - - containerPort: 13133 resources: limits: cpu: 1 @@ -6484,7 +6469,6 @@ spec: ports: - containerPort: 55678 - containerPort: 8888 - - containerPort: 13133 resources: requests: cpu: 10m