Skip to content

Commit baf309a

Browse files
authored
feat: allow to configure the namespaces to watch (#1351)
**Description** Allows configuring the namespaces to watch by the controller, to minimize the permissions and resources needed. The following properties have been added to the Helm values: ```yaml controller: # Configuration for how the Kubernetes controllers watch the different resources. watch: # Namespaces to watch. An empty list means to watch all namespaces. # Default is an empty list, to watch all namespaces. namespaces: [] # Sync timeout for the Kubernetes cache. If the cache is not synced within this time, the controller will exit. # Default is 2 minutes. cacheSyncTimeout: 2m ``` **Related Issues/PRs (if applicable)** Fixes #1334 **Special notes for reviewers (if applicable)** N/A --------- Signed-off-by: Ignasi Barrera <[email protected]>
1 parent df7280c commit baf309a

File tree

11 files changed

+474
-24
lines changed

11 files changed

+474
-24
lines changed

.github/workflows/build_and_test.yaml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,36 @@ jobs:
321321
# do not depend on the EG version.
322322
EG_VERSION: v1.5.0
323323

324+
test_e2e_namespaced:
325+
needs: changes
326+
if: ${{ needs.changes.outputs.code == 'true' }}
327+
name: E2E Test for Namespaced Controller
328+
# TODO: make it possible to run this job on macOS as well, which is a bit tricky due to the nested
329+
# virtualization is not supported on macOS runners.
330+
# E.g. Use https://github.com/douglascamata/setup-docker-macos-action per the comment in
331+
# https://github.com/actions/runner-images/issues/17#issuecomment-1971073406
332+
runs-on: ubuntu-latest
333+
steps:
334+
- uses: actions/checkout@v4
335+
- uses: actions/setup-go@v5
336+
with:
337+
cache: false
338+
go-version-file: go.mod
339+
- uses: actions/cache@v4
340+
with:
341+
path: |
342+
~/.cache/go-build
343+
~/.cache/golangci-lint
344+
~/go/pkg/mod
345+
~/go/bin
346+
key: e2e-test-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }}
347+
- uses: docker/setup-buildx-action@v3
348+
- run: make test-e2e-namespaced
349+
env:
350+
# We only need to test with the latest stable version of EG, since these e2e tests
351+
# do not depend on the EG version.
352+
EG_VERSION: v1.5.0
353+
324354
test_e2e_aigw:
325355
needs: changes
326356
name: E2E Test for aigw CLI

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ ACCESS_LOG_PATH
1313

1414
tests/e2e/logs/
1515
tests/e2e-inference-extension/logs/
16+
tests/e2e-namespaced/logs/
1617
tests/e2e-upgrade/logs/
1718

1819
# Files and directories to ignore in the site directory

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,12 @@ test-e2e-upgrade: build-e2e
187187
@echo "Run E2E upgrade tests"
188188
@go test -v ./tests/e2e-upgrade/... $(GO_TEST_ARGS) $(GO_TEST_E2E_ARGS)
189189

190+
# This runs the end-to-end namespaced tests for the controller and extproc with a local kind cluster.
191+
.PHONY: test-e2e-namespaced
192+
test-e2e-namespaced: build-e2e
193+
@echo "Run E2E namespaced tests"
194+
@go test -v ./tests/e2e-namespaced/... $(GO_TEST_ARGS) $(GO_TEST_E2E_ARGS)
195+
190196
# This runs the MCP end-to-end tests.
191197
.PHONY: test-e2e-aigw
192198
test-e2e-aigw: build.aigw ## Run MCP end-to-end tests.

cmd/controller/main.go

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414
"net"
1515
"os"
1616
"path/filepath"
17+
"strings"
18+
"time"
1719

1820
egextension "github.com/envoyproxy/gateway/proto/extension"
1921
"go.uber.org/zap/zapcore"
@@ -22,7 +24,9 @@ import (
2224
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
2325
corev1 "k8s.io/api/core/v1"
2426
ctrl "sigs.k8s.io/controller-runtime"
27+
"sigs.k8s.io/controller-runtime/pkg/cache"
2528
"sigs.k8s.io/controller-runtime/pkg/client"
29+
"sigs.k8s.io/controller-runtime/pkg/config"
2630
"sigs.k8s.io/controller-runtime/pkg/log/zap"
2731
"sigs.k8s.io/controller-runtime/pkg/webhook"
2832

@@ -51,7 +55,9 @@ type flags struct {
5155
// extProcMaxRecvMsgSize is the maximum message size in bytes that the gRPC server can receive.
5256
extProcMaxRecvMsgSize int
5357
// maxRecvMsgSize is the maximum message size in bytes that the gRPC extension server can receive.
54-
maxRecvMsgSize int
58+
maxRecvMsgSize int
59+
watchNamespaces []string
60+
cacheSyncTimeout time.Duration
5561
}
5662

5763
// parsePullPolicy parses string into a k8s PullPolicy.
@@ -64,6 +70,18 @@ func parsePullPolicy(s string) (corev1.PullPolicy, error) {
6470
}
6571
}
6672

73+
// parseWatchNamespaces parses a comma-separated list of namespaces into a slice of strings.
74+
func parseWatchNamespaces(s string) []string {
75+
var namespaces []string
76+
for _, n := range strings.Split(s, ",") {
77+
ns := strings.TrimSpace(n)
78+
if ns != "" {
79+
namespaces = append(namespaces, ns)
80+
}
81+
}
82+
return namespaces
83+
}
84+
6785
// parseAndValidateFlags parses the command-line arguments provided in args,
6886
// validates them, and returns the parsed configuration.
6987
func parseAndValidateFlags(args []string) (flags, error) {
@@ -159,6 +177,16 @@ func parseAndValidateFlags(args []string) (flags, error) {
159177
4*1024*1024,
160178
"Maximum message size in bytes that the gRPC extension server can receive. Default is 4MB.",
161179
)
180+
watchNamespaces := fs.String(
181+
"watchNamespaces",
182+
"",
183+
"Comma-separated list of namespaces to watch. If not set, the controller watches all namespaces.",
184+
)
185+
cacheSyncTimeout := fs.Duration(
186+
"cacheSyncTimeout",
187+
2*time.Minute, // This is the controller-runtime default
188+
"Maximum time to wait for k8s caches to sync",
189+
)
162190

163191
if err := fs.Parse(args); err != nil {
164192
err = fmt.Errorf("failed to parse flags: %w", err)
@@ -238,41 +266,47 @@ func parseAndValidateFlags(args []string) (flags, error) {
238266
extProcImagePullSecrets: *extProcImagePullSecrets,
239267
extProcMaxRecvMsgSize: *extProcMaxRecvMsgSize,
240268
maxRecvMsgSize: *maxRecvMsgSize,
269+
watchNamespaces: parseWatchNamespaces(*watchNamespaces),
270+
cacheSyncTimeout: *cacheSyncTimeout,
241271
}, nil
242272
}
243273

244274
func main() {
245275
setupLog := ctrl.Log.WithName("setup")
246276

247-
flags, err := parseAndValidateFlags(os.Args[1:])
277+
parsedFlags, err := parseAndValidateFlags(os.Args[1:])
248278
if err != nil {
249279
setupLog.Error(err, "failed to parse and validate flags")
250280
os.Exit(1)
251281
}
252282

253283
// Warn if deprecated flag is being used.
254-
if flags.metricsRequestHeaderLabels != "" {
284+
if parsedFlags.metricsRequestHeaderLabels != "" {
255285
setupLog.Info("The --metricsRequestHeaderLabels flag is deprecated and will be removed in a future release. Please use --metricsRequestHeaderAttributes instead.")
256286
}
257287

258-
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{Development: true, Level: flags.logLevel})))
288+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{Development: true, Level: parsedFlags.logLevel})))
259289
k8sConfig := ctrl.GetConfigOrDie()
260290

261-
lis, err := net.Listen("tcp", flags.extensionServerPort)
291+
lis, err := net.Listen("tcp", parsedFlags.extensionServerPort)
262292
if err != nil {
263-
setupLog.Error(err, "failed to listen", "port", flags.extensionServerPort)
293+
setupLog.Error(err, "failed to listen", "port", parsedFlags.extensionServerPort)
264294
os.Exit(1)
265295
}
266296

297+
setupLog.Info("configuring kubernetes cache", "watch-namespaces", parsedFlags.watchNamespaces, "sync-timeout", parsedFlags.cacheSyncTimeout)
298+
267299
ctx := ctrl.SetupSignalHandler()
268300
mgrOpts := ctrl.Options{
301+
Cache: setupCache(parsedFlags),
302+
Controller: config.Controller{CacheSyncTimeout: parsedFlags.cacheSyncTimeout},
269303
Scheme: controller.Scheme,
270-
LeaderElection: flags.enableLeaderElection,
304+
LeaderElection: parsedFlags.enableLeaderElection,
271305
LeaderElectionID: "envoy-ai-gateway-controller",
272306
WebhookServer: webhook.NewServer(webhook.Options{
273-
CertDir: flags.tlsCertDir,
274-
CertName: flags.tlsCertName,
275-
KeyName: flags.tlsKeyName,
307+
CertDir: parsedFlags.tlsCertDir,
308+
CertName: parsedFlags.tlsCertName,
309+
KeyName: parsedFlags.tlsKeyName,
276310
Port: 9443,
277311
}),
278312
}
@@ -287,14 +321,14 @@ func main() {
287321
setupLog.Error(err, "failed to create client")
288322
os.Exit(1)
289323
}
290-
if err := maybePatchAdmissionWebhook(ctx, cli, filepath.Join(flags.tlsCertDir, flags.caBundleName)); err != nil {
324+
if err := maybePatchAdmissionWebhook(ctx, cli, filepath.Join(parsedFlags.tlsCertDir, parsedFlags.caBundleName)); err != nil {
291325
setupLog.Error(err, "failed to patch admission webhook")
292326
os.Exit(1)
293327
}
294328

295329
// Start the extension server running alongside the controller.
296330
const extProcUDSPath = "/etc/ai-gateway-extproc-uds/run.sock"
297-
s := grpc.NewServer(grpc.MaxRecvMsgSize(flags.maxRecvMsgSize))
331+
s := grpc.NewServer(grpc.MaxRecvMsgSize(parsedFlags.maxRecvMsgSize))
298332
extSrv := extensionserver.New(mgr.GetClient(), ctrl.Log, extProcUDSPath, false)
299333
egextension.RegisterEnvoyGatewayExtensionServer(s, extSrv)
300334
grpc_health_v1.RegisterHealthServer(s, extSrv)
@@ -310,17 +344,17 @@ func main() {
310344

311345
// Start the controller.
312346
if err := controller.StartControllers(ctx, mgr, k8sConfig, ctrl.Log.WithName("controller"), controller.Options{
313-
ExtProcImage: flags.extProcImage,
314-
ExtProcImagePullPolicy: flags.extProcImagePullPolicy,
315-
ExtProcLogLevel: flags.extProcLogLevel,
316-
EnableLeaderElection: flags.enableLeaderElection,
347+
ExtProcImage: parsedFlags.extProcImage,
348+
ExtProcImagePullPolicy: parsedFlags.extProcImagePullPolicy,
349+
ExtProcLogLevel: parsedFlags.extProcLogLevel,
350+
EnableLeaderElection: parsedFlags.enableLeaderElection,
317351
UDSPath: extProcUDSPath,
318-
MetricsRequestHeaderAttributes: flags.metricsRequestHeaderAttributes,
319-
TracingRequestHeaderAttributes: flags.spanRequestHeaderAttributes,
320-
RootPrefix: flags.rootPrefix,
321-
ExtProcExtraEnvVars: flags.extProcExtraEnvVars,
322-
ExtProcImagePullSecrets: flags.extProcImagePullSecrets,
323-
ExtProcMaxRecvMsgSize: flags.extProcMaxRecvMsgSize,
352+
MetricsRequestHeaderAttributes: parsedFlags.metricsRequestHeaderAttributes,
353+
TracingRequestHeaderAttributes: parsedFlags.spanRequestHeaderAttributes,
354+
RootPrefix: parsedFlags.rootPrefix,
355+
ExtProcExtraEnvVars: parsedFlags.extProcExtraEnvVars,
356+
ExtProcImagePullSecrets: parsedFlags.extProcImagePullSecrets,
357+
ExtProcMaxRecvMsgSize: parsedFlags.extProcMaxRecvMsgSize,
324358
}); err != nil {
325359
setupLog.Error(err, "failed to start controller")
326360
}
@@ -356,3 +390,19 @@ func maybePatchAdmissionWebhook(ctx context.Context, cli client.Client, bundlePa
356390
}
357391
return nil
358392
}
393+
394+
// setupCache sets up the cache options based on the provided flags.
395+
func setupCache(f flags) cache.Options {
396+
var namespaceCacheConfig map[string]cache.Config
397+
if len(f.watchNamespaces) > 0 {
398+
namespaceCacheConfig = make(map[string]cache.Config, len(f.watchNamespaces))
399+
for _, ns := range f.watchNamespaces {
400+
namespaceCacheConfig[ns] = cache.Config{}
401+
}
402+
}
403+
404+
return cache.Options{
405+
DefaultNamespaces: namespaceCacheConfig,
406+
DefaultTransform: cache.TransformStripManagedFields(),
407+
}
408+
}

cmd/controller/main_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ package main
88
import (
99
"os"
1010
"testing"
11+
"time"
1112

1213
"github.com/stretchr/testify/require"
1314
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
1415
corev1 "k8s.io/api/core/v1"
1516
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17+
"sigs.k8s.io/controller-runtime/pkg/cache"
1618
"sigs.k8s.io/controller-runtime/pkg/client"
1719
"sigs.k8s.io/controller-runtime/pkg/client/fake"
1820
)
@@ -51,6 +53,8 @@ func Test_parseAndValidateFlags(t *testing.T) {
5153
tc.dash + "extProcExtraEnvVars=OTEL_SERVICE_NAME=test;OTEL_TRACES_EXPORTER=console",
5254
tc.dash + "spanRequestHeaderAttributes=x-session-id:session.id",
5355
tc.dash + "maxRecvMsgSize=33554432",
56+
tc.dash + "watchNamespaces=default,envoy-ai-gateway-system",
57+
tc.dash + "cacheSyncTimeout=5m",
5458
}
5559
f, err := parseAndValidateFlags(args)
5660
require.Equal(t, "debug", f.extProcLogLevel)
@@ -62,6 +66,8 @@ func Test_parseAndValidateFlags(t *testing.T) {
6266
require.Equal(t, "OTEL_SERVICE_NAME=test;OTEL_TRACES_EXPORTER=console", f.extProcExtraEnvVars)
6367
require.Equal(t, "x-session-id:session.id", f.spanRequestHeaderAttributes)
6468
require.Equal(t, 32*1024*1024, f.maxRecvMsgSize)
69+
require.Equal(t, []string{"default", "envoy-ai-gateway-system"}, f.watchNamespaces)
70+
require.Equal(t, 5*time.Minute, f.cacheSyncTimeout)
6571
require.NoError(t, err)
6672
})
6773
}
@@ -243,3 +249,53 @@ func Test_parseAndValidateFlags_extProcImagePullSecrets(t *testing.T) {
243249
})
244250
}
245251
}
252+
253+
func Test_parseAndValidateFlags_watchNamespaces(t *testing.T) {
254+
tests := []struct {
255+
name string
256+
flags []string
257+
expected []string
258+
}{
259+
{"no watch namespaces", []string{}, nil},
260+
{"single watch namespace", []string{"--watchNamespaces=default"}, []string{"default"}},
261+
{"multiple watch namespaces", []string{"--watchNamespaces=default,envoy-ai-gateway-system"}, []string{"default", "envoy-ai-gateway-system"}},
262+
{"watch namespaces with spaces", []string{"--watchNamespaces= default , envoy-ai-gateway-system "}, []string{"default", "envoy-ai-gateway-system"}},
263+
{"empty string", []string{"--watchNamespaces="}, nil},
264+
{"empty namespace names", []string{"--watchNamespaces=default,,envoy-ai-gateway-system"}, []string{"default", "envoy-ai-gateway-system"}},
265+
{"only commas", []string{"--watchNamespaces=,,,"}, nil},
266+
}
267+
268+
for _, tt := range tests {
269+
t.Run(tt.name, func(t *testing.T) {
270+
f, err := parseAndValidateFlags(tt.flags)
271+
require.NoError(t, err)
272+
require.Equal(t, tt.expected, f.watchNamespaces)
273+
})
274+
}
275+
}
276+
277+
func TestSetupCache(t *testing.T) {
278+
t.Run("default", func(t *testing.T) {
279+
c := setupCache(flags{})
280+
281+
require.NotNil(t, c.DefaultTransform)
282+
require.Nil(t, c.DefaultNamespaces)
283+
})
284+
285+
t.Run("empty watch namespaces", func(t *testing.T) {
286+
c := setupCache(flags{watchNamespaces: []string{}})
287+
288+
require.NotNil(t, c.DefaultTransform)
289+
require.Nil(t, c.DefaultNamespaces)
290+
})
291+
292+
t.Run("watch namespaces", func(t *testing.T) {
293+
c := setupCache(flags{watchNamespaces: []string{"default", "envoy-ai-gateway-system"}})
294+
295+
require.NotNil(t, c.DefaultTransform)
296+
require.Equal(t, map[string]cache.Config{
297+
"default": {},
298+
"envoy-ai-gateway-system": {},
299+
}, c.DefaultNamespaces)
300+
})
301+
}

manifests/charts/ai-gateway-helm/templates/deployment.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ spec:
6868
{{- if ne .Values.controller.maxRecvMsgSize nil }}
6969
- --maxRecvMsgSize={{ .Values.controller.maxRecvMsgSize }}
7070
{{- end }}
71+
- --cacheSyncTimeout={{ .Values.controller.watch.cacheSyncTimeout }}
72+
- --watchNamespaces={{ join "," .Values.controller.watch.namespaces }}
7173
livenessProbe:
7274
grpc:
7375
port: 1063

manifests/charts/ai-gateway-helm/values.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ controller:
6767
leaderElection:
6868
enabled: true
6969

70+
# Configuration for how the Kubernetes controllers watch the different resources.
71+
watch:
72+
# Namespaces to watch. An empty list means to watch all namespaces.
73+
# Default is an empty list, to watch all namespaces.
74+
namespaces: []
75+
# Sync timeout for the Kubernetes cache. If the cache is not synced within this time, the controller will exit.
76+
# Default is 2 minutes.
77+
cacheSyncTimeout: 2m
78+
7079
# -- Deployment configs --
7180
image:
7281
repository: docker.io/envoyproxy/ai-gateway-controller

0 commit comments

Comments
 (0)