Skip to content

Commit 3ea4a3d

Browse files
authored
Merge pull request #5395 from nrb/disable-controllers
✨ Allow disabling controllers
2 parents 8849d87 + bbd58d2 commit 3ea4a3d

File tree

4 files changed

+268
-23
lines changed

4 files changed

+268
-23
lines changed

controllers/disabled_controllers.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controllers
18+
19+
import (
20+
"fmt"
21+
"strings"
22+
)
23+
24+
// TODO: EKS and ROSA are excluded from this list for the time being because they are behind feature gates.
25+
// They should be added to this list when they graduate.
26+
const (
27+
Unmanaged = "unmanaged"
28+
)
29+
30+
// disabledControllers tracks which controllers are disabled.
31+
// A value of `false` (default) means a controller is _enabled_.
32+
var disabledControllers = map[string]bool{
33+
Unmanaged: false,
34+
}
35+
36+
var notValidErr = "%q is not a valid controller name"
37+
38+
// IsDisabled checks if a controller is disabled.
39+
// If the name provided is not in the map, this will return 'false'.
40+
func IsDisabled(name string) bool {
41+
return disabledControllers[name]
42+
}
43+
44+
// GetValidNames returns a list of controller names that are valid to disable.
45+
func GetValidNames() []string {
46+
ret := make([]string, 0, len(disabledControllers))
47+
for name := range disabledControllers {
48+
ret = append(ret, name)
49+
}
50+
return ret
51+
}
52+
53+
// ValidateNamesAndDisable validates a list of controller names against the known set, and disables valid names.
54+
func ValidateNamesAndDisable(names []string) error {
55+
// This list is not de-deduplicated, so in someone could specify valid names multiple times.
56+
for _, n := range names {
57+
// Make sure we're doing a case-insensitive comaparison
58+
name := strings.ToLower(n)
59+
if _, ok := disabledControllers[name]; !ok {
60+
return fmt.Errorf(notValidErr, name)
61+
}
62+
disabledControllers[name] = true
63+
}
64+
return nil
65+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controllers
18+
19+
import (
20+
"slices"
21+
"testing"
22+
23+
. "github.com/onsi/gomega"
24+
)
25+
26+
func TestIsDisabled(t *testing.T) {
27+
g := NewWithT(t)
28+
defer resetDefaults()
29+
30+
// Valid names default to false
31+
g.Expect(IsDisabled(Unmanaged)).To(BeFalse(), "unmanaged should default to false")
32+
33+
// Invalid names are also false
34+
g.Expect(IsDisabled("eks")).To(BeFalse(), "invalid controller name eks should report disabled")
35+
g.Expect(IsDisabled("rosa")).To(BeFalse(), "invalid controller name rosa should report disabled")
36+
37+
// Disable the known names
38+
disabledControllers = map[string]bool{
39+
Unmanaged: true,
40+
}
41+
42+
// Valid names
43+
g.Expect(IsDisabled(Unmanaged)).To(BeTrue(), "unmanaged should have been disabled")
44+
45+
// Invalid names are still false
46+
g.Expect(IsDisabled("eks")).To(BeFalse(), "invalid controller name eks should report disabled")
47+
g.Expect(IsDisabled("rosa")).To(BeFalse(), "invalid controller name rosa should report disabled")
48+
}
49+
50+
func TestGetValidNames(t *testing.T) {
51+
g := NewWithT(t)
52+
defer resetDefaults()
53+
54+
actual := GetValidNames()
55+
// Make sure we have a stable order for testing
56+
slices.Sort(actual)
57+
58+
g.Expect(actual).To(Equal([]string{
59+
Unmanaged,
60+
}), "should only have 1 name")
61+
}
62+
63+
func TestValidateNames(t *testing.T) {
64+
g := NewWithT(t)
65+
defer resetDefaults()
66+
67+
// Valid set of names. Will mutate the map.
68+
err := ValidateNamesAndDisable([]string{Unmanaged})
69+
g.Expect(err).To(BeNil())
70+
g.Expect(disabledControllers[Unmanaged]).To(BeTrue(), "should disable valid name unmanaged")
71+
72+
// TODO: This test should fail and require updating when EKS and ROSA controllers graduate.
73+
err = ValidateNamesAndDisable([]string{"eks", "rosa"})
74+
g.Expect(err.Error()).To(ContainSubstring("eks"), "should error on first invalid entry")
75+
g.Expect(disabledControllers[Unmanaged]).To(BeTrue(), "should not change existing key unmanaged")
76+
g.Expect(disabledControllers["eks"]).To(BeFalse(), "eks should not be in the default map")
77+
}
78+
79+
// resetDefaults returns the disabledControllers map to expected default state.
80+
func resetDefaults() {
81+
disabledControllers = map[string]bool{
82+
Unmanaged: false,
83+
}
84+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Problem
2+
3+
EKS and ROSA are implemented in (relatively) standalone sets of controllers. These are currently grouped with the associated feature gates.
4+
5+
However, the feature gate mechanism has no specifier for GA, meaning that both implementations have stayed in beta.
6+
7+
This proposal supercedes `docs/proposal/20250314-optional-controllers.md`.
8+
9+
# Background
10+
11+
Cluster API Provider for AWS offers a `--feature-flags` argument to manage introduction of new features.
12+
13+
This allows features to move through `alpha` and `beta` phases, but there is not currently a defined path to general availability.
14+
15+
Specifically of interest is controllers for the `EKS` and ROSA`, which offer optional features to the program.
16+
17+
This proposal focuses on how these controllers could be grouped in order to remain optional, while also giving a path to GA.
18+
19+
## Goals
20+
21+
- Allow features spanning groups of controllers to graduate, while also remaining optional
22+
23+
## Non-goals
24+
25+
- Provide a generalized path to general availbility
26+
27+
28+
## Proposed solution
29+
30+
Introduce a new argument, `--disable-controllers`, which allows controllers to be logically grouped as feature sets and turned on and off independently.
31+
32+
The groups and their disabled status will be tracked in a map within a private module, and can be queried via exposed functions.
33+
34+
The map's structure and functions would be as follows.
35+
36+
```go
37+
var disabledControllers = map[string]bool{
38+
ControllerGroupName: false,
39+
}
40+
41+
// IsDisabled checks if a controller is disabled.
42+
// If the name provided is not in the map, this will return 'false'.
43+
func IsDisabled(name string) bool
44+
45+
// GetValidNames returns a list of controller names that are valid to disable.
46+
// Note: these are the entries in the `disabledControllers` variable.
47+
// Used for error and help messages
48+
func GetValidNames() []string
49+
50+
// ValidateNamesAndDisable validates a list of controller names against the known set, and disables valid names.
51+
func ValidateNamesAndDisable(names []string) error
52+
```
53+
54+
Within `main.go`, `ValidateNamesAndDisable` will check against the contents of the `--disable-controllers` slice.
55+
Valid entires will then be marked as `true`, indicating they are disabled.
56+
57+
Before initializing a controller or group of controllers, `IsDisabled` can be checked to determine whether or not they should register with the manager and start.
58+
59+
If a controller is disabled, a log message indicating it is disabled should be emitted.
60+
This will aid users in troubleshooting, should the deployment behave unexpectedly.
61+
62+
## Logical groups
63+
64+
EKS and ROSA are currently behind feature gate checks.
65+
These checks can be updated to instead use `IsDisabled` and entries within the `disabledControllers` map can be made.
66+
67+
## Core controllers and alternatives
68+
69+
The proposal `2025-01-07-aws-self-managed-feature-gates.md` was merged and planned to add `AWSMachine` and `AWSCluster` feature gates.
70+
This merged with lazy concensus and the implementation was proposed in [PR #5284](https://github.com/kubernetes-sigs/cluster-api-provider-aws/pull/5284).
71+
72+
However, maintainers were opposed to moving these core controllers into feature gates, which would have put them into a permanent pre-GA phase.
73+
This leads to a conflict between the implementation and the proposal, which was indeed accepted.
74+
75+
This current proposal includes the ability to disable `AWSMachine` and `AWSCluster` as a compromise, using the `unmanaged` set; it achieves the goals of the original proposal, while addressing the objections to the proposed implementation.

main.go

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"net/http"
2626
_ "net/http/pprof"
2727
"os"
28+
"strings"
2829
"time"
2930

3031
"github.com/spf13/pflag"
@@ -109,6 +110,7 @@ var (
109110
webhookCertDir string
110111
healthAddr string
111112
serviceEndpoints string
113+
disabledControllers []string
112114

113115
// maxEKSSyncPeriod is the maximum allowed duration for the sync-period flag when using EKS. It is set to 10 minutes
114116
// because during resync it will create a new AWS auth token which can a maximum life of 15 minutes and this ensures
@@ -130,6 +132,11 @@ func main() {
130132
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
131133
pflag.Parse()
132134

135+
if err := controllers.ValidateNamesAndDisable(disabledControllers); err != nil {
136+
setupLog.Error(err, "unable to validate disabled controller names")
137+
os.Exit(1)
138+
}
139+
133140
if err := v1.ValidateAndApply(logOptions, nil); err != nil {
134141
setupLog.Error(err, "unable to validate and apply log options")
135142
os.Exit(1)
@@ -298,29 +305,36 @@ func main() {
298305
func setupReconcilersAndWebhooks(ctx context.Context, mgr ctrl.Manager, awsServiceEndpoints []scope.ServiceEndpoint,
299306
externalResourceGC, alternativeGCStrategy bool,
300307
) {
301-
if err := (&controllers.AWSMachineReconciler{
302-
Client: mgr.GetClient(),
303-
Log: ctrl.Log.WithName("controllers").WithName("AWSMachine"),
304-
Recorder: mgr.GetEventRecorderFor("awsmachine-controller"),
305-
Endpoints: awsServiceEndpoints,
306-
WatchFilterValue: watchFilterValue,
307-
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
308-
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsMachineConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
309-
setupLog.Error(err, "unable to create controller", "controller", "AWSMachine")
310-
os.Exit(1)
311-
}
308+
// Default case - unmanaged controllers are enabled.
309+
if !controllers.IsDisabled(controllers.Unmanaged) {
310+
if err := (&controllers.AWSMachineReconciler{
311+
Client: mgr.GetClient(),
312+
Log: ctrl.Log.WithName("controllers").WithName("AWSMachine"),
313+
Recorder: mgr.GetEventRecorderFor("awsmachine-controller"),
314+
Endpoints: awsServiceEndpoints,
315+
WatchFilterValue: watchFilterValue,
316+
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
317+
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsMachineConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
318+
setupLog.Error(err, "unable to create controller", "controller", "AWSMachine")
319+
os.Exit(1)
320+
}
321+
setupLog.Info("controller disabled", "controller", "AWSMachine", "congrollerGroup", controllers.Unmanaged)
312322

313-
if err := (&controllers.AWSClusterReconciler{
314-
Client: mgr.GetClient(),
315-
Recorder: mgr.GetEventRecorderFor("awscluster-controller"),
316-
Endpoints: awsServiceEndpoints,
317-
WatchFilterValue: watchFilterValue,
318-
ExternalResourceGC: externalResourceGC,
319-
AlternativeGCStrategy: alternativeGCStrategy,
320-
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
321-
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsClusterConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
322-
setupLog.Error(err, "unable to create controller", "controller", "AWSCluster")
323-
os.Exit(1)
323+
if err := (&controllers.AWSClusterReconciler{
324+
Client: mgr.GetClient(),
325+
Recorder: mgr.GetEventRecorderFor("awscluster-controller"),
326+
Endpoints: awsServiceEndpoints,
327+
WatchFilterValue: watchFilterValue,
328+
ExternalResourceGC: externalResourceGC,
329+
AlternativeGCStrategy: alternativeGCStrategy,
330+
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
331+
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsClusterConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
332+
setupLog.Error(err, "unable to create controller", "controller", "AWSCluster")
333+
os.Exit(1)
334+
}
335+
} else {
336+
setupLog.Info("controller disabled", "controller", "AWSMachine", "congrollerGroup", controllers.Unmanaged)
337+
setupLog.Info("controller disabled", "controller", "AWSCluster", "controller-group", controllers.Unmanaged)
324338
}
325339

326340
if feature.Gates.Enabled(feature.MachinePool) {
@@ -594,7 +608,7 @@ func initFlags(fs *pflag.FlagSet) {
594608
fs.StringVar(&serviceEndpoints,
595609
"service-endpoints",
596610
"",
597-
"Set custom AWS service endpoins in semi-colon separated format: ${SigningRegion1}:${ServiceID1}=${URL},${ServiceID2}=${URL};${SigningRegion2}...",
611+
"Set custom AWS service endpoints in semi-colon separated format: ${SigningRegion1}:${ServiceID1}=${URL},${ServiceID2}=${URL};${SigningRegion2}...",
598612
)
599613

600614
fs.StringVar(
@@ -604,6 +618,13 @@ func initFlags(fs *pflag.FlagSet) {
604618
fmt.Sprintf("Label value that the controller watches to reconcile cluster-api objects. Label key is always %s. If unspecified, the controller watches for all cluster-api objects.", clusterv1.WatchLabel),
605619
)
606620

621+
fs.StringSliceVar(
622+
&disabledControllers,
623+
"disable-controllers",
624+
nil,
625+
fmt.Sprintf("Sets of controllers that should be disabled for this instance of the controller manager in a comma-separated list. Options are: %q", strings.Join(controllers.GetValidNames(), ",")),
626+
)
627+
607628
logs.AddFlags(fs, logs.SkipLoggingConfigurationFlags())
608629
v1.AddFlags(logOptions, fs)
609630

0 commit comments

Comments
 (0)