kubernetes-sigs · k8s-ci-robot · Jul 31, 2025 · Jul 29, 2025 · Jul 29, 2025 · nirrozenbaum
diff --git a/config/charts/inferencepool/templates/rbac.yaml b/config/charts/inferencepool/templates/rbac.yaml
@@ -5,15 +5,6 @@ metadata:
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
 rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels", "inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 - apiGroups:
   - authentication.k8s.io
   resources:
@@ -40,6 +31,35 @@ roleRef:
   kind: ClusterRole
   name: {{ include "gateway-api-inference-extension.name" . }}
 ---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels", "inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+subjects:
+- kind: ServiceAccount
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ include "gateway-api-inference-extension.name" . }}
+---
 apiVersion: v1
 kind: ServiceAccount
 metadata:

diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
@@ -29,6 +29,12 @@ spec:
       appProtocol: http2
   type: ClusterIP
 ---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllm-llama3-8b-instruct-epp
+  namespace: default
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -46,6 +52,7 @@ spec:
       labels:
         app: vllm-llama3-8b-instruct-epp
     spec:
+      serviceAccountName: vllm-llama3-8b-instruct-epp
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
       terminationGracePeriodSeconds: 130
       containers:
@@ -174,23 +181,41 @@ data:
         weight: 1
       - pluginRef: max-score-picker
 ---
-kind: ClusterRole
+kind: Role
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
   name: pod-read
+  namespace: default
+rules:
+- apiGroups: [ "inference.networking.x-k8s.io" ]
+  resources: [ "inferencemodels", "inferencepools" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "inference.networking.k8s.io" ]
+  resources: [ "inferencepools" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "" ]
+  resources: [ "pods" ]
+  verbs: [ "get", "watch", "list" ]
+---
+kind: RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+  namespace: default
+subjects:
+- kind: ServiceAccount
+  name: vllm-llama3-8b-instruct-epp
+  namespace: default
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: pod-read
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: auth-reviewer
 rules:
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 - apiGroups:
   - authentication.k8s.io
   resources:
@@ -207,12 +232,12 @@ rules:
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
-  name: pod-read-binding
+  name: auth-reviewer-binding
 subjects:
 - kind: ServiceAccount
-  name: default
+  name: vllm-llama3-8b-instruct-epp
   namespace: default
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
-  name: pod-read
+  name: auth-reviewer
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
@@ -392,14 +392,29 @@ func createInferExt(k8sClient client.Client, filePath string) {
 	ginkgo.By("Creating inference extension resources from manifest: " + filePath)
 	createObjsFromYaml(k8sClient, outManifests)
 
+	// Wait for the serviceaccount to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.ServiceAccount{})
+	}, existsTimeout, interval)
+
+	// Wait for the role to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read"}, &rbacv1.Role{})
+	}, existsTimeout, interval)
+
+	// Wait for the rolebinding to exist.
+	testutils.EventuallyExists(ctx, func() error {
+		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read-binding"}, &rbacv1.RoleBinding{})
+	}, existsTimeout, interval)
+
 	// Wait for the clusterrole to exist.
 	testutils.EventuallyExists(ctx, func() error {
-		return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read"}, &rbacv1.ClusterRole{})
+		return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer"}, &rbacv1.ClusterRole{})
 	}, existsTimeout, interval)
 
 	// Wait for the clusterrolebinding to exist.
 	testutils.EventuallyExists(ctx, func() error {
-		return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read-binding"}, &rbacv1.ClusterRoleBinding{})
+		return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer-binding"}, &rbacv1.ClusterRoleBinding{})
 	}, existsTimeout, interval)
 
 	// Wait for the deployment to exist.

diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
@@ -26,6 +26,12 @@ spec:
       appProtocol: http2
   type: ClusterIP
 ---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: vllm-llama3-8b-instruct-epp
+  namespace: $E2E_NS
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -43,6 +49,7 @@ spec:
       labels:
         app: vllm-llama3-8b-instruct-epp
     spec:
+      serviceAccountName: vllm-llama3-8b-instruct-epp
       # Conservatively, this timeout should mirror the longest grace period of the pods within the pool
       terminationGracePeriodSeconds: 130
       containers:
@@ -171,23 +178,41 @@ data:
         weight: 1
       - pluginRef: max-score-picker
 ---
-kind: ClusterRole
+kind: Role
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
   name: pod-read
+  namespace: $E2E_NS
+rules:
+- apiGroups: [ "inference.networking.x-k8s.io" ]
+  resources: [ "inferencemodels", "inferencepools" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "inference.networking.k8s.io" ]
+  resources: [ "inferencepools" ]
+  verbs: [ "get", "watch", "list" ]
+- apiGroups: [ "" ]
+  resources: [ "pods" ]
+  verbs: [ "get", "watch", "list" ]
+---
+kind: RoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+  namespace: $E2E_NS
+subjects:
+- kind: ServiceAccount
+  name: vllm-llama3-8b-instruct-epp
+  namespace: $E2E_NS
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: pod-read
+---
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: auth-reviewer
 rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
 - apiGroups:
   - authentication.k8s.io
   resources:
@@ -204,12 +229,12 @@ rules:
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
 metadata:
-  name: pod-read-binding
+  name: auth-reviewer-binding
 subjects:
 - kind: ServiceAccount
-  name: default
+  name: vllm-llama3-8b-instruct-epp
   namespace: $E2E_NS
 roleRef:
   apiGroup: rbac.authorization.k8s.io
   kind: ClusterRole
-  name: pod-read
+  name: auth-reviewer
diff --git a/test/utils/utils.go b/test/utils/utils.go
@@ -45,7 +45,7 @@ import (
 func DeleteClusterResources(ctx context.Context, cli client.Client) error {
 	binding := &rbacv1.ClusterRoleBinding{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod-read-binding",
+			Name: "auth-reviewer-binding",
 		},
 	}
 	err := cli.Delete(ctx, binding, client.PropagationPolicy(metav1.DeletePropagationForeground))
@@ -54,7 +54,7 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error {
 	}
 	role := &rbacv1.ClusterRole{
 		ObjectMeta: metav1.ObjectMeta{
-			Name: "pod-read",
+			Name: "auth-reviewer",
 		},
 	}
 	err = cli.Delete(ctx, role, client.PropagationPolicy(metav1.DeletePropagationForeground))