Skip to content

Commit 7e46681

Browse files
committed
fix: remediator missing custom resource events
Prior to this change, the remediator watches were only being started for new custom resources after the apply attempt had fully completed. This left some time after the object was applied that the remediator could miss events made by third-parties. Normally, this would be fine, because the remediator would revert any change after the watch was started. But if a DELETE event was missed, the object wouldn't be recreated until the next apply attempt. This change adds a CRD Controller to the remediator that watches CRDs and executes any registered handlers when the CRD is established, unestablished, or deleted. The remediator now registers CRD handlers for each resource type it watches, starting and stopping watchers as soon as possible, without waiting for a new apply attempt or watch update retry.
1 parent efad112 commit 7e46681

File tree

14 files changed

+349
-161
lines changed

14 files changed

+349
-161
lines changed

cmd/reconciler-manager/main.go

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,9 @@ func main() {
9595
}
9696
watchFleetMembership := fleetMembershipCRDExists(dynamicClient, mgr.GetRESTMapper(), &setupLog)
9797

98-
crdController := controllers.NewCRDReconciler(
98+
crdObserver := &controllers.CRDStatusObserver{}
99+
100+
crdController := controllers.NewCRDController(crdObserver, mgr.GetCache(),
99101
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName("CRD"))
100102
if err := crdController.Register(mgr); err != nil {
101103
setupLog.Error(err, "failed to register controller", "controller", "CRD")
@@ -108,11 +110,14 @@ func main() {
108110
mgr.GetClient(), watcher, dynamicClient,
109111
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName(configsync.RepoSyncKind),
110112
mgr.GetScheme())
111-
crdController.SetCRDHandler(configsync.RepoSyncCRDName, func() error {
112-
if err := repoSyncController.Register(mgr, watchFleetMembership); err != nil {
113-
return fmt.Errorf("registering %s controller: %w", configsync.RepoSyncKind, err)
113+
crdObserver.SetHandler(configsync.RepoSyncCRDName, func(_ context.Context, established bool) error {
114+
if established {
115+
if err := repoSyncController.Register(mgr, watchFleetMembership); err != nil {
116+
return fmt.Errorf("registering %s controller: %w", configsync.RepoSyncKind, err)
117+
}
118+
setupLog.Info("RepoSync controller registration successful")
114119
}
115-
setupLog.Info("RepoSync controller registration successful")
120+
// TODO: unregister RepoSync controller if CRD is deleted?
116121
return nil
117122
})
118123
setupLog.Info("RepoSync controller registration scheduled")
@@ -122,11 +127,14 @@ func main() {
122127
mgr.GetClient(), watcher, dynamicClient,
123128
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName(configsync.RootSyncKind),
124129
mgr.GetScheme())
125-
crdController.SetCRDHandler(configsync.RootSyncCRDName, func() error {
126-
if err := rootSyncController.Register(mgr, watchFleetMembership); err != nil {
127-
return fmt.Errorf("registering %s controller: %w", configsync.RootSyncKind, err)
130+
crdObserver.SetHandler(configsync.RootSyncCRDName, func(_ context.Context, established bool) error {
131+
if established {
132+
if err := rootSyncController.Register(mgr, watchFleetMembership); err != nil {
133+
return fmt.Errorf("registering %s controller: %w", configsync.RootSyncKind, err)
134+
}
135+
setupLog.Info("RootSync controller registration successful")
128136
}
129-
setupLog.Info("RootSync controller registration successful")
137+
// TODO: unregister RootSync controller if CRD is deleted?
130138
return nil
131139
})
132140
setupLog.Info("RootSync controller registration scheduled")

manifests/base/kustomization.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ resources:
1919
# Applying hierarchyconfig-crd.yaml allows client-side validation of the HierarchyConfig resources.
2020
- ../hierarchyconfig-crd.yaml
2121
- ../namespace-selector-crd.yaml
22+
- ../ns-reconciler-cluster-scope-cluster-role.yaml
2223
- ../ns-reconciler-base-cluster-role.yaml
2324
- ../root-reconciler-base-cluster-role.yaml
2425
- ../otel-agent-cm.yaml
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This ClusterRole is used by both root-reconcilers and ns-reconcilers.
16+
# It includes read access for cluster-scope resources.
17+
apiVersion: rbac.authorization.k8s.io/v1
18+
kind: ClusterRole
19+
metadata:
20+
name: configsync.gke.io:ns-reconciler:cluster-scope
21+
labels:
22+
configmanagement.gke.io/system: "true"
23+
configmanagement.gke.io/arch: "csmr"
24+
rules:
25+
- apiGroups: ["apiextensions.k8s.io"]
26+
resources: ["customresourcedefinitions"]
27+
verbs: ["get","list","watch"]

manifests/root-reconciler-base-cluster-role.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,6 @@ rules:
3232
- apiGroups: ["kpt.dev"]
3333
resources: ["resourcegroups/status"]
3434
verbs: ["*"]
35+
- apiGroups: ["apiextensions.k8s.io"]
36+
resources: ["customresourcedefinitions"]
37+
verbs: ["get","list","watch"]

pkg/reconciler/reconciler.go

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
"kpt.dev/configsync/pkg/parse/events"
3838
"kpt.dev/configsync/pkg/reconciler/finalizer"
3939
"kpt.dev/configsync/pkg/reconciler/namespacecontroller"
40+
"kpt.dev/configsync/pkg/reconcilermanager/controllers"
4041
"kpt.dev/configsync/pkg/remediator"
4142
"kpt.dev/configsync/pkg/remediator/conflict"
4243
"kpt.dev/configsync/pkg/remediator/watch"
@@ -219,10 +220,46 @@ func Run(opts Options) {
219220
klog.Fatalf("Error creating rest config for the remediator: %v", err)
220221
}
221222

223+
// Start listening to signals
224+
signalCtx := signals.SetupSignalHandler()
225+
226+
// Create the ControllerManager
227+
ctrl.SetLogger(textlogger.NewLogger(textlogger.NewConfig()))
228+
mgrOptions := ctrl.Options{
229+
Scheme: core.Scheme,
230+
MapperProvider: func(_ *rest.Config, _ *http.Client) (meta.RESTMapper, error) {
231+
return mapper, nil
232+
},
233+
BaseContext: func() context.Context {
234+
return signalCtx
235+
},
236+
}
237+
// For Namespaced Reconcilers, set the default namespace to watch.
238+
// Otherwise, all namespaced informers will watch at the cluster-scope.
239+
// This prevents Namespaced Reconcilers from needing cluster-scoped read
240+
// permissions.
241+
if opts.ReconcilerScope != declared.RootScope {
242+
mgrOptions.Cache.DefaultNamespaces = map[string]cache.Config{
243+
string(opts.ReconcilerScope): {},
244+
}
245+
}
246+
mgr, err := ctrl.NewManager(cfgForWatch, mgrOptions)
247+
if err != nil {
248+
klog.Fatalf("Instantiating Controller Manager: %v", err)
249+
}
250+
251+
crdStatusWatcher := &controllers.CRDStatusObserver{}
252+
253+
crdController := controllers.NewCRDController(crdStatusWatcher, mgr.GetCache(),
254+
textlogger.NewLogger(textlogger.NewConfig()).WithName("controllers").WithName("CRD"))
255+
if err := crdController.Register(mgr); err != nil {
256+
klog.Fatalf("Instantiating CRD Controller: %v", err)
257+
}
258+
222259
conflictHandler := conflict.NewHandler()
223260
fightHandler := fight.NewHandler()
224261

225-
rem, err := remediator.New(opts.ReconcilerScope, opts.SyncName, cfgForWatch, baseApplier, conflictHandler, fightHandler, decls, opts.NumWorkers)
262+
rem, err := remediator.New(opts.ReconcilerScope, opts.SyncName, cfgForWatch, baseApplier, conflictHandler, fightHandler, crdStatusWatcher, decls, opts.NumWorkers)
226263
if err != nil {
227264
klog.Fatalf("Instantiating Remediator: %v", err)
228265
}
@@ -303,34 +340,6 @@ func Run(opts Options) {
303340
parser = parse.NewNamespaceRunner(parseOpts)
304341
}
305342

306-
// Start listening to signals
307-
signalCtx := signals.SetupSignalHandler()
308-
309-
// Create the ControllerManager
310-
ctrl.SetLogger(textlogger.NewLogger(textlogger.NewConfig()))
311-
mgrOptions := ctrl.Options{
312-
Scheme: core.Scheme,
313-
MapperProvider: func(_ *rest.Config, _ *http.Client) (meta.RESTMapper, error) {
314-
return mapper, nil
315-
},
316-
BaseContext: func() context.Context {
317-
return signalCtx
318-
},
319-
}
320-
// For Namespaced Reconcilers, set the default namespace to watch.
321-
// Otherwise, all namespaced informers will watch at the cluster-scope.
322-
// This prevents Namespaced Reconcilers from needing cluster-scoped read
323-
// permissions.
324-
if opts.ReconcilerScope != declared.RootScope {
325-
mgrOptions.Cache.DefaultNamespaces = map[string]cache.Config{
326-
string(opts.ReconcilerScope): {},
327-
}
328-
}
329-
mgr, err := ctrl.NewManager(cfgForWatch, mgrOptions)
330-
if err != nil {
331-
klog.Fatalf("Instantiating Controller Manager: %v", err)
332-
}
333-
334343
// This cancelFunc will be used by the Finalizer to stop all the other
335344
// controllers (Parser & Remediator).
336345
ctx, stopControllers := context.WithCancel(signalCtx)

pkg/reconcilermanager/controllers/build_names.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,25 @@ import (
2222
)
2323

2424
const (
25+
// RepoSyncClusterScopeClusterRoleName is the name of the ClusterRole with
26+
// cluster-scoped read permissions for the namespace reconciler.
27+
// e.g. configsync.gke.io:ns-reconciler:cluster-scope
28+
RepoSyncClusterScopeClusterRoleName = configsync.GroupName + ":" + core.NsReconcilerPrefix + ":cluster-scope"
2529
// RepoSyncBaseClusterRoleName is the namespace reconciler permissions name.
2630
// e.g. configsync.gke.io:ns-reconciler
2731
RepoSyncBaseClusterRoleName = configsync.GroupName + ":" + core.NsReconcilerPrefix
2832
// RootSyncBaseClusterRoleName is the root reconciler base ClusterRole name.
2933
// e.g. configsync.gke.io:root-reconciler
3034
RootSyncBaseClusterRoleName = configsync.GroupName + ":" + core.RootReconcilerPrefix
35+
// RepoSyncClusterScopeClusterRoleBindingName is the name of the default
36+
// ClusterRoleBinding created for RepoSync objects. This contains basic
37+
// cluster-scoped permissions for RepoSync reconcilers
38+
// (e.g. CustomResourceDefinition watch).
39+
RepoSyncClusterScopeClusterRoleBindingName = RepoSyncClusterScopeClusterRoleName
3140
// RepoSyncBaseRoleBindingName is the name of the default RoleBinding created
32-
// for RepoSync objects. This contains basic permissions for RepoSync reconcilers
33-
//(e.g. RepoSync status update).
41+
// for RepoSync objects. This contains basic namespace-scoped permissions
42+
// for RepoSync reconcilers
43+
// (e.g. RepoSync status update).
3444
RepoSyncBaseRoleBindingName = RepoSyncBaseClusterRoleName
3545
// RootSyncLegacyClusterRoleBindingName is the name of the legacy ClusterRoleBinding created
3646
// for RootSync objects. It is always bound to cluster-admin.

0 commit comments

Comments
 (0)