Skip to content

Commit a746697

Browse files
fix(RHOAIENG-52863): avoid to install servicemonitor CRD if already present (#3266)
1 parent 8597da0 commit a746697

File tree

7 files changed

+200
-27
lines changed

7 files changed

+200
-27
lines changed

cmd/cloudmanager/app/cache.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,12 @@ func DefaultCacheOptions(scheme *runtime.Scheme) (cache.Options, error) {
5050
Scheme: scheme,
5151
DefaultNamespaces: nsConfig,
5252
ByObject: map[client.Object]cache.ByObject{
53-
&rbacv1.ClusterRole{}: clusterScopedConfig,
54-
&rbacv1.ClusterRoleBinding{}: clusterScopedConfig,
55-
&extv1.CustomResourceDefinition{}: clusterScopedConfig,
53+
&rbacv1.ClusterRole{}: clusterScopedConfig,
54+
&rbacv1.ClusterRoleBinding{}: clusterScopedConfig,
55+
// TODO: consider using a metadata-only cache for CRDs to reduce memory
56+
// usage now that all CRDs are cached (not just labeled ones).
57+
// See controller-runtime's support for metadata-only informers.
58+
&extv1.CustomResourceDefinition{}: {},
5659
resources.GvkToUnstructured(gvk.RoleBinding): {
5760
Namespaces: roleBindingCacheNamespaces,
5861
},

get_all_manifests.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ declare -A RHOAI_COMPONENT_MANIFESTS=(
6161

6262
# ODH Component Charts
6363
declare -A ODH_COMPONENT_CHARTS=(
64-
["cert-manager-operator"]="opendatahub-io:odh-gitops:main@f600f22bdec2fb6ee24ead6fa04b6b3d896781fb:charts/cert-manager-operator"
65-
["lws-operator"]="opendatahub-io:odh-gitops:main@f600f22bdec2fb6ee24ead6fa04b6b3d896781fb:charts/lws-operator"
66-
["sail-operator"]="opendatahub-io:odh-gitops:main@f600f22bdec2fb6ee24ead6fa04b6b3d896781fb:charts/sail-operator"
67-
["gateway-api"]="opendatahub-io:odh-gitops:main@f600f22bdec2fb6ee24ead6fa04b6b3d896781fb:charts/gateway-api"
64+
["cert-manager-operator"]="opendatahub-io:odh-gitops:main:charts/cert-manager-operator"
65+
["lws-operator"]="opendatahub-io:odh-gitops:main:charts/lws-operator"
66+
["sail-operator"]="opendatahub-io:odh-gitops:main:charts/sail-operator"
67+
["gateway-api"]="opendatahub-io:odh-gitops:main:charts/gateway-api"
6868
)
6969

7070
# RHOAI Component Charts

internal/controller/cloudmanager/azure/azurekubernetesengine_controller.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@ package azure
33
import (
44
"context"
55

6+
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
67
ctrl "sigs.k8s.io/controller-runtime"
78

89
ccmv1alpha1 "github.com/opendatahub-io/opendatahub-operator/v2/api/cloudmanager/azure/v1alpha1"
10+
"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/cloudmanager/common"
911
certmanager "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency/certmanager"
1012
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/cloudmanager"
13+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/handlers"
14+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources"
1115
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler"
1216
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels"
1317
)
@@ -16,6 +20,11 @@ func NewReconciler(ctx context.Context, mgr ctrl.Manager) error {
1620
resourceID := labels.NormalizePartOfValue(ccmv1alpha1.AzureKubernetesEngineKind)
1721
_, err := reconciler.ReconcilerFor(mgr, &ccmv1alpha1.AzureKubernetesEngine{}).
1822
WithDynamicOwnership().
23+
Watches(
24+
&extv1.CustomResourceDefinition{},
25+
reconciler.WithEventHandler(handlers.ToNamed(ccmv1alpha1.AzureKubernetesEngineInstanceName)),
26+
reconciler.WithPredicates(resources.CreatedOrUpdatedOrDeletedNamed(common.ServiceMonitorCRDName)),
27+
).
1928
WithAction(initialize).
2029
ComposeWith(certmanager.Bootstrap[*ccmv1alpha1.AzureKubernetesEngine](
2130
ccmv1alpha1.AzureKubernetesEngineInstanceName, certmanager.DefaultBootstrapConfig())).

internal/controller/cloudmanager/common/charts.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ func allChartDefs() []chartDef {
7575
"namespace": NamespaceLWSOperator,
7676
}),
7777
},
78-
PreApply: []types.HookFn{},
78+
PreApply: []types.HookFn{SkipCRDIfPresent(ServiceMonitorCRDName)},
7979
},
8080
},
8181
{

internal/controller/cloudmanager/common/hooks.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,29 @@ package common
22

33
import (
44
"context"
5+
"fmt"
56

67
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
78
k8serr "k8s.io/apimachinery/pkg/api/errors"
9+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
810
k8stypes "k8s.io/apimachinery/pkg/types"
911
"sigs.k8s.io/controller-runtime/pkg/client"
1012
logf "sigs.k8s.io/controller-runtime/pkg/log"
1113

14+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster"
15+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk"
1216
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types"
17+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels"
1318
)
1419

1520
const (
1621
sailOperatorIgnoreAnnotation = "sailoperator.io/ignore"
1722

1823
istioSidecarInjectorWebhook = "istio-sidecar-injector"
1924
istioValidatorWebhook = "istio-validator-istio-system"
25+
26+
// ServiceMonitorCRDName is the fully qualified name of the ServiceMonitor CRD.
27+
ServiceMonitorCRDName = "servicemonitors.monitoring.coreos.com"
2028
)
2129

2230
// AnnotateIstioWebhooksHook returns a PostApply hook that annotates Istio
@@ -50,6 +58,40 @@ func AnnotateIstioWebhooksHook() types.HookFn {
5058
}
5159
}
5260

61+
// SkipCRDIfPresent returns a PreApply hook that removes a CRD from the
62+
// rendered resources if it already exists in the cluster and is not managed
63+
// by this controller. This avoids SSA ForceOwnership conflicts with other
64+
// operators that may own the CRD (e.g., Prometheus Operator for ServiceMonitor).
65+
// If the CRD carries the infrastructure label it is kept in resources so it can
66+
// be updated via SSA. On clusters without the CRD, it is kept in resources and
67+
// deployed normally.
68+
func SkipCRDIfPresent(crdName string) types.HookFn {
69+
return func(ctx context.Context, rr *types.ReconciliationRequest) error {
70+
crd, err := cluster.GetCRD(ctx, rr.Client, crdName)
71+
if err != nil {
72+
if k8serr.IsNotFound(err) {
73+
return nil
74+
}
75+
76+
return fmt.Errorf("failed to check CRD %s: %w", crdName, err)
77+
}
78+
79+
// If the CRD is managed by this controller, keep it in resources
80+
// so it gets updated via SSA.
81+
if _, ok := crd.GetLabels()[labels.InfrastructurePartOf]; ok {
82+
return nil
83+
}
84+
85+
logger := logf.FromContext(ctx)
86+
logger.Info("CRD already exists, skipping installation", "crd", crdName)
87+
88+
return rr.RemoveResources(func(obj *unstructured.Unstructured) bool {
89+
return obj.GetKind() == gvk.CustomResourceDefinition.Kind &&
90+
obj.GetName() == crdName
91+
})
92+
}
93+
}
94+
5395
func ensureSailOperatorIgnoreAnnotation(ctx context.Context, c client.Client, name string, obj client.Object) error {
5496
logger := logf.FromContext(ctx)
5597

internal/controller/cloudmanager/common/hooks_test.go

Lines changed: 129 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,34 @@ import (
55
"context"
66
"testing"
77

8-
"github.com/stretchr/testify/assert"
9-
"github.com/stretchr/testify/require"
108
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
9+
corev1 "k8s.io/api/core/v1"
1110
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
1212
"k8s.io/apimachinery/pkg/types"
13+
"sigs.k8s.io/controller-runtime/pkg/client"
1314

15+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk"
1416
odhtypes "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/types"
17+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels"
1518
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/utils/test/fakeclient"
19+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/utils/test/mocks"
20+
21+
. "github.com/onsi/gomega"
22+
)
23+
24+
const (
25+
testCRDGroup = "test.example.com"
26+
testCRDVersion = "v1"
27+
testCRDKind = "Widget"
28+
testCRDComponent = "test"
1629
)
1730

1831
// TODO(OSSM-12397): Remove this test once the sail-operator ships a fix.
1932
func TestAnnotateIstioWebhooksHook(t *testing.T) {
2033
t.Run("should annotate webhooks when they exist without annotation", func(t *testing.T) {
34+
g := NewWithT(t)
35+
2136
mutatingWH := &admissionregistrationv1.MutatingWebhookConfiguration{
2237
ObjectMeta: metav1.ObjectMeta{
2338
Name: istioSidecarInjectorWebhook,
@@ -30,26 +45,28 @@ func TestAnnotateIstioWebhooksHook(t *testing.T) {
3045
}
3146

3247
cli, err := fakeclient.New(fakeclient.WithObjects(mutatingWH, validatingWH))
33-
require.NoError(t, err)
48+
g.Expect(err).ShouldNot(HaveOccurred())
3449

3550
rr := &odhtypes.ReconciliationRequest{Client: cli}
3651

3752
hook := AnnotateIstioWebhooksHook()
3853
err = hook(context.Background(), rr)
39-
require.NoError(t, err)
54+
g.Expect(err).ShouldNot(HaveOccurred())
4055

4156
var updatedMutating admissionregistrationv1.MutatingWebhookConfiguration
4257
err = cli.Get(context.Background(), types.NamespacedName{Name: istioSidecarInjectorWebhook}, &updatedMutating)
43-
require.NoError(t, err)
44-
assert.Equal(t, "true", updatedMutating.Annotations[sailOperatorIgnoreAnnotation])
58+
g.Expect(err).ShouldNot(HaveOccurred())
59+
g.Expect(updatedMutating.Annotations[sailOperatorIgnoreAnnotation]).Should(Equal("true"))
4560

4661
var updatedValidating admissionregistrationv1.ValidatingWebhookConfiguration
4762
err = cli.Get(context.Background(), types.NamespacedName{Name: istioValidatorWebhook}, &updatedValidating)
48-
require.NoError(t, err)
49-
assert.Equal(t, "true", updatedValidating.Annotations[sailOperatorIgnoreAnnotation])
63+
g.Expect(err).ShouldNot(HaveOccurred())
64+
g.Expect(updatedValidating.Annotations[sailOperatorIgnoreAnnotation]).Should(Equal("true"))
5065
})
5166

5267
t.Run("should be a no-op when webhooks already have the annotation", func(t *testing.T) {
68+
g := NewWithT(t)
69+
5370
mutatingWH := &admissionregistrationv1.MutatingWebhookConfiguration{
5471
ObjectMeta: metav1.ObjectMeta{
5572
Name: istioSidecarInjectorWebhook,
@@ -68,32 +85,36 @@ func TestAnnotateIstioWebhooksHook(t *testing.T) {
6885
}
6986

7087
cli, err := fakeclient.New(fakeclient.WithObjects(mutatingWH, validatingWH))
71-
require.NoError(t, err)
88+
g.Expect(err).ShouldNot(HaveOccurred())
7289

7390
rr := &odhtypes.ReconciliationRequest{Client: cli}
7491

7592
hook := AnnotateIstioWebhooksHook()
7693
err = hook(context.Background(), rr)
77-
require.NoError(t, err)
94+
g.Expect(err).ShouldNot(HaveOccurred())
7895

7996
var updatedMutating admissionregistrationv1.MutatingWebhookConfiguration
8097
err = cli.Get(context.Background(), types.NamespacedName{Name: istioSidecarInjectorWebhook}, &updatedMutating)
81-
require.NoError(t, err)
82-
assert.Equal(t, "true", updatedMutating.Annotations[sailOperatorIgnoreAnnotation])
98+
g.Expect(err).ShouldNot(HaveOccurred())
99+
g.Expect(updatedMutating.Annotations[sailOperatorIgnoreAnnotation]).Should(Equal("true"))
83100
})
84101

85102
t.Run("should be a no-op when webhooks do not exist", func(t *testing.T) {
103+
g := NewWithT(t)
104+
86105
cli, err := fakeclient.New()
87-
require.NoError(t, err)
106+
g.Expect(err).ShouldNot(HaveOccurred())
88107

89108
rr := &odhtypes.ReconciliationRequest{Client: cli}
90109

91110
hook := AnnotateIstioWebhooksHook()
92111
err = hook(context.Background(), rr)
93-
require.NoError(t, err)
112+
g.Expect(err).ShouldNot(HaveOccurred())
94113
})
95114

96115
t.Run("should preserve existing annotations when adding the ignore annotation", func(t *testing.T) {
116+
g := NewWithT(t)
117+
97118
mutatingWH := &admissionregistrationv1.MutatingWebhookConfiguration{
98119
ObjectMeta: metav1.ObjectMeta{
99120
Name: istioSidecarInjectorWebhook,
@@ -104,18 +125,107 @@ func TestAnnotateIstioWebhooksHook(t *testing.T) {
104125
}
105126

106127
cli, err := fakeclient.New(fakeclient.WithObjects(mutatingWH))
107-
require.NoError(t, err)
128+
g.Expect(err).ShouldNot(HaveOccurred())
108129

109130
rr := &odhtypes.ReconciliationRequest{Client: cli}
110131

111132
hook := AnnotateIstioWebhooksHook()
112133
err = hook(context.Background(), rr)
113-
require.NoError(t, err)
134+
g.Expect(err).ShouldNot(HaveOccurred())
114135

115136
var updatedMutating admissionregistrationv1.MutatingWebhookConfiguration
116137
err = cli.Get(context.Background(), types.NamespacedName{Name: istioSidecarInjectorWebhook}, &updatedMutating)
117-
require.NoError(t, err)
118-
assert.Equal(t, "true", updatedMutating.Annotations[sailOperatorIgnoreAnnotation])
119-
assert.Equal(t, "existing-value", updatedMutating.Annotations["existing-annotation"])
138+
g.Expect(err).ShouldNot(HaveOccurred())
139+
g.Expect(updatedMutating.Annotations[sailOperatorIgnoreAnnotation]).Should(Equal("true"))
140+
g.Expect(updatedMutating.Annotations["existing-annotation"]).Should(Equal("existing-value"))
141+
})
142+
}
143+
144+
func TestSkipCRDIfPresent(t *testing.T) {
145+
// Use a fake CRD name unrelated to any real resource.
146+
// mocks.NewMockCRD generates the name as "<plural>.<group>".
147+
testCRDName := mocks.NewMockCRD(testCRDGroup, testCRDVersion, testCRDKind, testCRDComponent).Name
148+
149+
t.Run("should keep resources when CRD does not exist in cluster", func(t *testing.T) {
150+
g := NewWithT(t)
151+
152+
cli, err := fakeclient.New()
153+
g.Expect(err).ShouldNot(HaveOccurred())
154+
155+
rr := &odhtypes.ReconciliationRequest{Client: cli}
156+
rr.Resources = newUnstructuredResources(t, cli)
157+
initialCount := len(rr.Resources)
158+
159+
hook := SkipCRDIfPresent(testCRDName)
160+
err = hook(context.Background(), rr)
161+
g.Expect(err).ShouldNot(HaveOccurred())
162+
g.Expect(rr.Resources).Should(HaveLen(initialCount))
120163
})
164+
165+
t.Run("should remove CRD from resources when it exists without infrastructure label", func(t *testing.T) {
166+
g := NewWithT(t)
167+
168+
crd := mocks.NewMockCRD(testCRDGroup, testCRDVersion, testCRDKind, testCRDComponent)
169+
170+
cli, err := fakeclient.New(fakeclient.WithObjects(crd))
171+
g.Expect(err).ShouldNot(HaveOccurred())
172+
173+
rr := &odhtypes.ReconciliationRequest{Client: cli}
174+
rr.Resources = newUnstructuredResources(t, cli)
175+
initialCount := len(rr.Resources)
176+
177+
hook := SkipCRDIfPresent(testCRDName)
178+
err = hook(context.Background(), rr)
179+
g.Expect(err).ShouldNot(HaveOccurred())
180+
g.Expect(rr.Resources).Should(HaveLen(initialCount - 1))
181+
182+
for _, res := range rr.Resources {
183+
if res.GetKind() == gvk.CustomResourceDefinition.Kind {
184+
g.Expect(res.GetName()).ShouldNot(Equal(testCRDName))
185+
}
186+
}
187+
})
188+
189+
t.Run("should keep resources when CRD exists with infrastructure label", func(t *testing.T) {
190+
g := NewWithT(t)
191+
192+
crd := mocks.NewMockCRD(testCRDGroup, testCRDVersion, testCRDKind, testCRDComponent)
193+
crd.Labels[labels.InfrastructurePartOf] = "test-component"
194+
195+
cli, err := fakeclient.New(fakeclient.WithObjects(crd))
196+
g.Expect(err).ShouldNot(HaveOccurred())
197+
198+
rr := &odhtypes.ReconciliationRequest{Client: cli}
199+
rr.Resources = newUnstructuredResources(t, cli)
200+
initialCount := len(rr.Resources)
201+
202+
hook := SkipCRDIfPresent(testCRDName)
203+
err = hook(context.Background(), rr)
204+
g.Expect(err).ShouldNot(HaveOccurred())
205+
g.Expect(rr.Resources).Should(HaveLen(initialCount))
206+
})
207+
}
208+
209+
// newUnstructuredResources creates a slice of unstructured resources containing
210+
// a CRD and a ConfigMap for use in hook tests.
211+
func newUnstructuredResources(t *testing.T, cli client.Client) []unstructured.Unstructured {
212+
t.Helper()
213+
214+
g := NewWithT(t)
215+
216+
rr := &odhtypes.ReconciliationRequest{Client: cli}
217+
218+
err := rr.AddResources(
219+
mocks.NewMockCRD(testCRDGroup, testCRDVersion, testCRDKind, testCRDComponent),
220+
mocks.NewMockCRD("other.example.com", "v1", "Gadget", "other"),
221+
&corev1.ConfigMap{
222+
ObjectMeta: metav1.ObjectMeta{
223+
Name: "test-configmap",
224+
Namespace: "default",
225+
},
226+
},
227+
)
228+
g.Expect(err).ShouldNot(HaveOccurred())
229+
230+
return rr.Resources
121231
}

internal/controller/cloudmanager/coreweave/coreweavekubernetesengine_controller.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@ package coreweave
33
import (
44
"context"
55

6+
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
67
ctrl "sigs.k8s.io/controller-runtime"
78

89
ccmv1alpha1 "github.com/opendatahub-io/opendatahub-operator/v2/api/cloudmanager/coreweave/v1alpha1"
10+
"github.com/opendatahub-io/opendatahub-operator/v2/internal/controller/cloudmanager/common"
911
certmanager "github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/actions/dependency/certmanager"
1012
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/cloudmanager"
13+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/handlers"
14+
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/predicates/resources"
1115
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/controller/reconciler"
1216
"github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels"
1317
)
@@ -16,6 +20,11 @@ func NewReconciler(ctx context.Context, mgr ctrl.Manager) error {
1620
resourceID := labels.NormalizePartOfValue(ccmv1alpha1.CoreWeaveKubernetesEngineKind)
1721
_, err := reconciler.ReconcilerFor(mgr, &ccmv1alpha1.CoreWeaveKubernetesEngine{}).
1822
WithDynamicOwnership().
23+
Watches(
24+
&extv1.CustomResourceDefinition{},
25+
reconciler.WithEventHandler(handlers.ToNamed(ccmv1alpha1.CoreWeaveKubernetesEngineInstanceName)),
26+
reconciler.WithPredicates(resources.CreatedOrUpdatedOrDeletedNamed(common.ServiceMonitorCRDName)),
27+
).
1928
WithAction(initialize).
2029
ComposeWith(certmanager.Bootstrap[*ccmv1alpha1.CoreWeaveKubernetesEngine](
2130
ccmv1alpha1.CoreWeaveKubernetesEngineInstanceName, certmanager.DefaultBootstrapConfig())).

0 commit comments

Comments
 (0)