Skip to content

Commit bbd58d2

Browse files
committed
Allow disabling controllers
Start with "unmanaged", or non-hosted control planes. Other controllers that can be optional, such as the EKS, ROSA, and MachinePool ones, are currently managed with feature flags. When they graudate, they should be controlled by the `--disable-controllers` flag.
1 parent 084f726 commit bbd58d2

File tree

4 files changed

+268
-23
lines changed

4 files changed

+268
-23
lines changed

controllers/disabled_controllers.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controllers
18+
19+
import (
20+
"fmt"
21+
"strings"
22+
)
23+
24+
// TODO: EKS and ROSA are excluded from this list for the time being because they are behind feature gates.
25+
// They should be added to this list when they graduate.
26+
const (
27+
Unmanaged = "unmanaged"
28+
)
29+
30+
// disabledControllers tracks which controllers are disabled.
31+
// A value of `false` (default) means a controller is _enabled_.
32+
var disabledControllers = map[string]bool{
33+
Unmanaged: false,
34+
}
35+
36+
var notValidErr = "%q is not a valid controller name"
37+
38+
// IsDisabled checks if a controller is disabled.
39+
// If the name provided is not in the map, this will return 'false'.
40+
func IsDisabled(name string) bool {
41+
return disabledControllers[name]
42+
}
43+
44+
// GetValidNames returns a list of controller names that are valid to disable.
45+
func GetValidNames() []string {
46+
ret := make([]string, 0, len(disabledControllers))
47+
for name := range disabledControllers {
48+
ret = append(ret, name)
49+
}
50+
return ret
51+
}
52+
53+
// ValidateNamesAndDisable validates a list of controller names against the known set, and disables valid names.
54+
func ValidateNamesAndDisable(names []string) error {
55+
// This list is not de-deduplicated, so in someone could specify valid names multiple times.
56+
for _, n := range names {
57+
// Make sure we're doing a case-insensitive comaparison
58+
name := strings.ToLower(n)
59+
if _, ok := disabledControllers[name]; !ok {
60+
return fmt.Errorf(notValidErr, name)
61+
}
62+
disabledControllers[name] = true
63+
}
64+
return nil
65+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package controllers
18+
19+
import (
20+
"slices"
21+
"testing"
22+
23+
. "github.com/onsi/gomega"
24+
)
25+
26+
func TestIsDisabled(t *testing.T) {
27+
g := NewWithT(t)
28+
defer resetDefaults()
29+
30+
// Valid names default to false
31+
g.Expect(IsDisabled(Unmanaged)).To(BeFalse(), "unmanaged should default to false")
32+
33+
// Invalid names are also false
34+
g.Expect(IsDisabled("eks")).To(BeFalse(), "invalid controller name eks should report disabled")
35+
g.Expect(IsDisabled("rosa")).To(BeFalse(), "invalid controller name rosa should report disabled")
36+
37+
// Disable the known names
38+
disabledControllers = map[string]bool{
39+
Unmanaged: true,
40+
}
41+
42+
// Valid names
43+
g.Expect(IsDisabled(Unmanaged)).To(BeTrue(), "unmanaged should have been disabled")
44+
45+
// Invalid names are still false
46+
g.Expect(IsDisabled("eks")).To(BeFalse(), "invalid controller name eks should report disabled")
47+
g.Expect(IsDisabled("rosa")).To(BeFalse(), "invalid controller name rosa should report disabled")
48+
}
49+
50+
func TestGetValidNames(t *testing.T) {
51+
g := NewWithT(t)
52+
defer resetDefaults()
53+
54+
actual := GetValidNames()
55+
// Make sure we have a stable order for testing
56+
slices.Sort(actual)
57+
58+
g.Expect(actual).To(Equal([]string{
59+
Unmanaged,
60+
}), "should only have 1 name")
61+
}
62+
63+
func TestValidateNames(t *testing.T) {
64+
g := NewWithT(t)
65+
defer resetDefaults()
66+
67+
// Valid set of names. Will mutate the map.
68+
err := ValidateNamesAndDisable([]string{Unmanaged})
69+
g.Expect(err).To(BeNil())
70+
g.Expect(disabledControllers[Unmanaged]).To(BeTrue(), "should disable valid name unmanaged")
71+
72+
// TODO: This test should fail and require updating when EKS and ROSA controllers graduate.
73+
err = ValidateNamesAndDisable([]string{"eks", "rosa"})
74+
g.Expect(err.Error()).To(ContainSubstring("eks"), "should error on first invalid entry")
75+
g.Expect(disabledControllers[Unmanaged]).To(BeTrue(), "should not change existing key unmanaged")
76+
g.Expect(disabledControllers["eks"]).To(BeFalse(), "eks should not be in the default map")
77+
}
78+
79+
// resetDefaults returns the disabledControllers map to expected default state.
80+
func resetDefaults() {
81+
disabledControllers = map[string]bool{
82+
Unmanaged: false,
83+
}
84+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Problem
2+
3+
EKS and ROSA are implemented in (relatively) standalone sets of controllers. These are currently grouped with the associated feature gates.
4+
5+
However, the feature gate mechanism has no specifier for GA, meaning that both implementations have stayed in beta.
6+
7+
This proposal supercedes `docs/proposal/20250314-optional-controllers.md`.
8+
9+
# Background
10+
11+
Cluster API Provider for AWS offers a `--feature-flags` argument to manage introduction of new features.
12+
13+
This allows features to move through `alpha` and `beta` phases, but there is not currently a defined path to general availability.
14+
15+
Specifically of interest is controllers for the `EKS` and ROSA`, which offer optional features to the program.
16+
17+
This proposal focuses on how these controllers could be grouped in order to remain optional, while also giving a path to GA.
18+
19+
## Goals
20+
21+
- Allow features spanning groups of controllers to graduate, while also remaining optional
22+
23+
## Non-goals
24+
25+
- Provide a generalized path to general availbility
26+
27+
28+
## Proposed solution
29+
30+
Introduce a new argument, `--disable-controllers`, which allows controllers to be logically grouped as feature sets and turned on and off independently.
31+
32+
The groups and their disabled status will be tracked in a map within a private module, and can be queried via exposed functions.
33+
34+
The map's structure and functions would be as follows.
35+
36+
```go
37+
var disabledControllers = map[string]bool{
38+
ControllerGroupName: false,
39+
}
40+
41+
// IsDisabled checks if a controller is disabled.
42+
// If the name provided is not in the map, this will return 'false'.
43+
func IsDisabled(name string) bool
44+
45+
// GetValidNames returns a list of controller names that are valid to disable.
46+
// Note: these are the entries in the `disabledControllers` variable.
47+
// Used for error and help messages
48+
func GetValidNames() []string
49+
50+
// ValidateNamesAndDisable validates a list of controller names against the known set, and disables valid names.
51+
func ValidateNamesAndDisable(names []string) error
52+
```
53+
54+
Within `main.go`, `ValidateNamesAndDisable` will check against the contents of the `--disable-controllers` slice.
55+
Valid entires will then be marked as `true`, indicating they are disabled.
56+
57+
Before initializing a controller or group of controllers, `IsDisabled` can be checked to determine whether or not they should register with the manager and start.
58+
59+
If a controller is disabled, a log message indicating it is disabled should be emitted.
60+
This will aid users in troubleshooting, should the deployment behave unexpectedly.
61+
62+
## Logical groups
63+
64+
EKS and ROSA are currently behind feature gate checks.
65+
These checks can be updated to instead use `IsDisabled` and entries within the `disabledControllers` map can be made.
66+
67+
## Core controllers and alternatives
68+
69+
The proposal `2025-01-07-aws-self-managed-feature-gates.md` was merged and planned to add `AWSMachine` and `AWSCluster` feature gates.
70+
This merged with lazy concensus and the implementation was proposed in [PR #5284](https://github.com/kubernetes-sigs/cluster-api-provider-aws/pull/5284).
71+
72+
However, maintainers were opposed to moving these core controllers into feature gates, which would have put them into a permanent pre-GA phase.
73+
This leads to a conflict between the implementation and the proposal, which was indeed accepted.
74+
75+
This current proposal includes the ability to disable `AWSMachine` and `AWSCluster` as a compromise, using the `unmanaged` set; it achieves the goals of the original proposal, while addressing the objections to the proposed implementation.

main.go

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"net/http"
2626
_ "net/http/pprof"
2727
"os"
28+
"strings"
2829
"time"
2930

3031
"github.com/spf13/pflag"
@@ -109,6 +110,7 @@ var (
109110
webhookCertDir string
110111
healthAddr string
111112
serviceEndpoints string
113+
disabledControllers []string
112114

113115
// maxEKSSyncPeriod is the maximum allowed duration for the sync-period flag when using EKS. It is set to 10 minutes
114116
// because during resync it will create a new AWS auth token which can a maximum life of 15 minutes and this ensures
@@ -130,6 +132,11 @@ func main() {
130132
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
131133
pflag.Parse()
132134

135+
if err := controllers.ValidateNamesAndDisable(disabledControllers); err != nil {
136+
setupLog.Error(err, "unable to validate disabled controller names")
137+
os.Exit(1)
138+
}
139+
133140
if err := v1.ValidateAndApply(logOptions, nil); err != nil {
134141
setupLog.Error(err, "unable to validate and apply log options")
135142
os.Exit(1)
@@ -298,29 +305,36 @@ func main() {
298305
func setupReconcilersAndWebhooks(ctx context.Context, mgr ctrl.Manager, awsServiceEndpoints []scope.ServiceEndpoint,
299306
externalResourceGC, alternativeGCStrategy bool,
300307
) {
301-
if err := (&controllers.AWSMachineReconciler{
302-
Client: mgr.GetClient(),
303-
Log: ctrl.Log.WithName("controllers").WithName("AWSMachine"),
304-
Recorder: mgr.GetEventRecorderFor("awsmachine-controller"),
305-
Endpoints: awsServiceEndpoints,
306-
WatchFilterValue: watchFilterValue,
307-
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
308-
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsMachineConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
309-
setupLog.Error(err, "unable to create controller", "controller", "AWSMachine")
310-
os.Exit(1)
311-
}
308+
// Default case - unmanaged controllers are enabled.
309+
if !controllers.IsDisabled(controllers.Unmanaged) {
310+
if err := (&controllers.AWSMachineReconciler{
311+
Client: mgr.GetClient(),
312+
Log: ctrl.Log.WithName("controllers").WithName("AWSMachine"),
313+
Recorder: mgr.GetEventRecorderFor("awsmachine-controller"),
314+
Endpoints: awsServiceEndpoints,
315+
WatchFilterValue: watchFilterValue,
316+
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
317+
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsMachineConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
318+
setupLog.Error(err, "unable to create controller", "controller", "AWSMachine")
319+
os.Exit(1)
320+
}
321+
setupLog.Info("controller disabled", "controller", "AWSMachine", "congrollerGroup", controllers.Unmanaged)
312322

313-
if err := (&controllers.AWSClusterReconciler{
314-
Client: mgr.GetClient(),
315-
Recorder: mgr.GetEventRecorderFor("awscluster-controller"),
316-
Endpoints: awsServiceEndpoints,
317-
WatchFilterValue: watchFilterValue,
318-
ExternalResourceGC: externalResourceGC,
319-
AlternativeGCStrategy: alternativeGCStrategy,
320-
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
321-
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsClusterConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
322-
setupLog.Error(err, "unable to create controller", "controller", "AWSCluster")
323-
os.Exit(1)
323+
if err := (&controllers.AWSClusterReconciler{
324+
Client: mgr.GetClient(),
325+
Recorder: mgr.GetEventRecorderFor("awscluster-controller"),
326+
Endpoints: awsServiceEndpoints,
327+
WatchFilterValue: watchFilterValue,
328+
ExternalResourceGC: externalResourceGC,
329+
AlternativeGCStrategy: alternativeGCStrategy,
330+
TagUnmanagedNetworkResources: feature.Gates.Enabled(feature.TagUnmanagedNetworkResources),
331+
}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: awsClusterConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil {
332+
setupLog.Error(err, "unable to create controller", "controller", "AWSCluster")
333+
os.Exit(1)
334+
}
335+
} else {
336+
setupLog.Info("controller disabled", "controller", "AWSMachine", "congrollerGroup", controllers.Unmanaged)
337+
setupLog.Info("controller disabled", "controller", "AWSCluster", "controller-group", controllers.Unmanaged)
324338
}
325339

326340
if feature.Gates.Enabled(feature.MachinePool) {
@@ -594,7 +608,7 @@ func initFlags(fs *pflag.FlagSet) {
594608
fs.StringVar(&serviceEndpoints,
595609
"service-endpoints",
596610
"",
597-
"Set custom AWS service endpoins in semi-colon separated format: ${SigningRegion1}:${ServiceID1}=${URL},${ServiceID2}=${URL};${SigningRegion2}...",
611+
"Set custom AWS service endpoints in semi-colon separated format: ${SigningRegion1}:${ServiceID1}=${URL},${ServiceID2}=${URL};${SigningRegion2}...",
598612
)
599613

600614
fs.StringVar(
@@ -604,6 +618,13 @@ func initFlags(fs *pflag.FlagSet) {
604618
fmt.Sprintf("Label value that the controller watches to reconcile cluster-api objects. Label key is always %s. If unspecified, the controller watches for all cluster-api objects.", clusterv1.WatchLabel),
605619
)
606620

621+
fs.StringSliceVar(
622+
&disabledControllers,
623+
"disable-controllers",
624+
nil,
625+
fmt.Sprintf("Sets of controllers that should be disabled for this instance of the controller manager in a comma-separated list. Options are: %q", strings.Join(controllers.GetValidNames(), ",")),
626+
)
627+
607628
logs.AddFlags(fs, logs.SkipLoggingConfigurationFlags())
608629
v1.AddFlags(logOptions, fs)
609630

0 commit comments

Comments
 (0)