Skip to content

Commit ccaa5e5

Browse files
committed
fix: split EPP RBAC into cluster and namespaced scoped permission
Signed-off-by: Ernest Wong <[email protected]>
1 parent f33ff43 commit ccaa5e5

File tree

5 files changed

+124
-45
lines changed

5 files changed

+124
-45
lines changed

config/charts/inferencepool/templates/rbac.yaml

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,6 @@ metadata:
55
labels:
66
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
77
rules:
8-
- apiGroups: ["inference.networking.x-k8s.io"]
9-
resources: ["inferencemodels", "inferencepools"]
10-
verbs: ["get", "watch", "list"]
11-
- apiGroups: ["inference.networking.k8s.io"]
12-
resources: ["inferencepools"]
13-
verbs: ["get", "watch", "list"]
14-
- apiGroups: [""]
15-
resources: ["pods"]
16-
verbs: ["get", "watch", "list"]
178
- apiGroups:
189
- authentication.k8s.io
1910
resources:
@@ -40,6 +31,35 @@ roleRef:
4031
kind: ClusterRole
4132
name: {{ include "gateway-api-inference-extension.name" . }}
4233
---
34+
apiVersion: rbac.authorization.k8s.io/v1
35+
kind: Role
36+
metadata:
37+
name: {{ include "gateway-api-inference-extension.name" . }}
38+
namespace: {{ .Release.Namespace }}
39+
labels:
40+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
41+
rules:
42+
- apiGroups: ["inference.networking.x-k8s.io"]
43+
resources: ["inferencemodels", "inferencepools"]
44+
verbs: ["get", "watch", "list"]
45+
- apiGroups: [""]
46+
resources: ["pods"]
47+
verbs: ["get", "watch", "list"]
48+
---
49+
apiVersion: rbac.authorization.k8s.io/v1
50+
kind: RoleBinding
51+
metadata:
52+
name: {{ include "gateway-api-inference-extension.name" . }}
53+
namespace: {{ .Release.Namespace }}
54+
subjects:
55+
- kind: ServiceAccount
56+
name: {{ include "gateway-api-inference-extension.name" . }}
57+
namespace: {{ .Release.Namespace }}
58+
roleRef:
59+
apiGroup: rbac.authorization.k8s.io
60+
kind: Role
61+
name: {{ include "gateway-api-inference-extension.name" . }}
62+
---
4363
apiVersion: v1
4464
kind: ServiceAccount
4565
metadata:

config/manifests/inferencepool-resources.yaml

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ spec:
2929
appProtocol: http2
3030
type: ClusterIP
3131
---
32+
apiVersion: v1
33+
kind: ServiceAccount
34+
metadata:
35+
name: vllm-llama3-8b-instruct-epp
36+
namespace: default
37+
---
3238
apiVersion: apps/v1
3339
kind: Deployment
3440
metadata:
@@ -46,6 +52,7 @@ spec:
4652
labels:
4753
app: vllm-llama3-8b-instruct-epp
4854
spec:
55+
serviceAccountName: vllm-llama3-8b-instruct-epp
4956
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
5057
terminationGracePeriodSeconds: 130
5158
containers:
@@ -174,23 +181,38 @@ data:
174181
weight: 1
175182
- pluginRef: max-score-picker
176183
---
177-
kind: ClusterRole
184+
kind: Role
178185
apiVersion: rbac.authorization.k8s.io/v1
179186
metadata:
180187
name: pod-read
188+
namespace: default
189+
rules:
190+
- apiGroups: [ "inference.networking.x-k8s.io" ]
191+
resources: [ "inferencepools", "inferencemodels" ]
192+
verbs: [ "get", "watch", "list" ]
193+
- apiGroups: [ "" ]
194+
resources: [ "pods" ]
195+
verbs: [ "get", "watch", "list" ]
196+
---
197+
kind: RoleBinding
198+
apiVersion: rbac.authorization.k8s.io/v1
199+
metadata:
200+
name: pod-read-binding
201+
namespace: default
202+
subjects:
203+
- kind: ServiceAccount
204+
name: vllm-llama3-8b-instruct-epp
205+
namespace: default
206+
roleRef:
207+
apiGroup: rbac.authorization.k8s.io
208+
kind: Role
209+
name: pod-read
210+
---
211+
kind: ClusterRole
212+
apiVersion: rbac.authorization.k8s.io/v1
213+
metadata:
214+
name: auth-reviewer
181215
rules:
182-
- apiGroups: ["inference.networking.k8s.io"]
183-
resources: ["inferencepools"]
184-
verbs: ["get", "watch", "list"]
185-
- apiGroups: ["inference.networking.x-k8s.io"]
186-
resources: ["inferencepools"]
187-
verbs: ["get", "watch", "list"]
188-
- apiGroups: ["inference.networking.x-k8s.io"]
189-
resources: ["inferencemodels"]
190-
verbs: ["get", "watch", "list"]
191-
- apiGroups: [""]
192-
resources: ["pods"]
193-
verbs: ["get", "watch", "list"]
194216
- apiGroups:
195217
- authentication.k8s.io
196218
resources:
@@ -207,12 +229,12 @@ rules:
207229
kind: ClusterRoleBinding
208230
apiVersion: rbac.authorization.k8s.io/v1
209231
metadata:
210-
name: pod-read-binding
232+
name: auth-reviewer-binding
211233
subjects:
212234
- kind: ServiceAccount
213-
name: default
235+
name: vllm-llama3-8b-instruct-epp
214236
namespace: default
215237
roleRef:
216238
apiGroup: rbac.authorization.k8s.io
217239
kind: ClusterRole
218-
name: pod-read
240+
name: auth-reviewer

test/e2e/epp/e2e_suite_test.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -392,14 +392,29 @@ func createInferExt(k8sClient client.Client, filePath string) {
392392
ginkgo.By("Creating inference extension resources from manifest: " + filePath)
393393
createObjsFromYaml(k8sClient, outManifests)
394394

395+
// Wait for the serviceaccount to exist.
396+
testutils.EventuallyExists(ctx, func() error {
397+
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: inferExtName}, &corev1.ServiceAccount{})
398+
}, existsTimeout, interval)
399+
400+
// Wait for the role to exist.
401+
testutils.EventuallyExists(ctx, func() error {
402+
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read"}, &rbacv1.Role{})
403+
}, existsTimeout, interval)
404+
405+
// Wait for the rolebinding to exist.
406+
testutils.EventuallyExists(ctx, func() error {
407+
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "pod-read-binding"}, &rbacv1.RoleBinding{})
408+
}, existsTimeout, interval)
409+
395410
// Wait for the clusterrole to exist.
396411
testutils.EventuallyExists(ctx, func() error {
397-
return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read"}, &rbacv1.ClusterRole{})
412+
return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer"}, &rbacv1.ClusterRole{})
398413
}, existsTimeout, interval)
399414

400415
// Wait for the clusterrolebinding to exist.
401416
testutils.EventuallyExists(ctx, func() error {
402-
return k8sClient.Get(ctx, types.NamespacedName{Name: "pod-read-binding"}, &rbacv1.ClusterRoleBinding{})
417+
return k8sClient.Get(ctx, types.NamespacedName{Name: "auth-reviewer-binding"}, &rbacv1.ClusterRoleBinding{})
403418
}, existsTimeout, interval)
404419

405420
// Wait for the deployment to exist.

test/testdata/inferencepool-e2e.yaml

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ spec:
2626
appProtocol: http2
2727
type: ClusterIP
2828
---
29+
apiVersion: v1
30+
kind: ServiceAccount
31+
metadata:
32+
name: vllm-llama3-8b-instruct-epp
33+
namespace: $E2E_NS
34+
---
2935
apiVersion: apps/v1
3036
kind: Deployment
3137
metadata:
@@ -43,6 +49,7 @@ spec:
4349
labels:
4450
app: vllm-llama3-8b-instruct-epp
4551
spec:
52+
serviceAccountName: vllm-llama3-8b-instruct-epp
4653
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
4754
terminationGracePeriodSeconds: 130
4855
containers:
@@ -171,23 +178,38 @@ data:
171178
weight: 1
172179
- pluginRef: max-score-picker
173180
---
174-
kind: ClusterRole
181+
kind: Role
175182
apiVersion: rbac.authorization.k8s.io/v1
176183
metadata:
177184
name: pod-read
185+
namespace: $E2E_NS
186+
rules:
187+
- apiGroups: [ "inference.networking.x-k8s.io" ]
188+
resources: [ "inferencepools", "inferencemodels" ]
189+
verbs: [ "get", "watch", "list" ]
190+
- apiGroups: [ "" ]
191+
resources: [ "pods" ]
192+
verbs: [ "get", "watch", "list" ]
193+
---
194+
kind: RoleBinding
195+
apiVersion: rbac.authorization.k8s.io/v1
196+
metadata:
197+
name: pod-read-binding
198+
namespace: $E2E_NS
199+
subjects:
200+
- kind: ServiceAccount
201+
name: vllm-llama3-8b-instruct-epp
202+
namespace: $E2E_NS
203+
roleRef:
204+
apiGroup: rbac.authorization.k8s.io
205+
kind: Role
206+
name: pod-read
207+
---
208+
kind: ClusterRole
209+
apiVersion: rbac.authorization.k8s.io/v1
210+
metadata:
211+
name: auth-reviewer
178212
rules:
179-
- apiGroups: ["inference.networking.x-k8s.io"]
180-
resources: ["inferencepools"]
181-
verbs: ["get", "watch", "list"]
182-
- apiGroups: ["inference.networking.x-k8s.io"]
183-
resources: ["inferencemodels"]
184-
verbs: ["get", "watch", "list"]
185-
- apiGroups: ["inference.networking.k8s.io"]
186-
resources: ["inferencepools"]
187-
verbs: ["get", "watch", "list"]
188-
- apiGroups: [""]
189-
resources: ["pods"]
190-
verbs: ["get", "watch", "list"]
191213
- apiGroups:
192214
- authentication.k8s.io
193215
resources:
@@ -204,12 +226,12 @@ rules:
204226
kind: ClusterRoleBinding
205227
apiVersion: rbac.authorization.k8s.io/v1
206228
metadata:
207-
name: pod-read-binding
229+
name: auth-reviewer-binding
208230
subjects:
209231
- kind: ServiceAccount
210-
name: default
232+
name: vllm-llama3-8b-instruct-epp
211233
namespace: $E2E_NS
212234
roleRef:
213235
apiGroup: rbac.authorization.k8s.io
214236
kind: ClusterRole
215-
name: pod-read
237+
name: auth-reviewer

test/utils/utils.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ import (
4545
func DeleteClusterResources(ctx context.Context, cli client.Client) error {
4646
binding := &rbacv1.ClusterRoleBinding{
4747
ObjectMeta: metav1.ObjectMeta{
48-
Name: "pod-read-binding",
48+
Name: "auth-reviewer-binding",
4949
},
5050
}
5151
err := cli.Delete(ctx, binding, client.PropagationPolicy(metav1.DeletePropagationForeground))
@@ -54,7 +54,7 @@ func DeleteClusterResources(ctx context.Context, cli client.Client) error {
5454
}
5555
role := &rbacv1.ClusterRole{
5656
ObjectMeta: metav1.ObjectMeta{
57-
Name: "pod-read",
57+
Name: "auth-reviewer",
5858
},
5959
}
6060
err = cli.Delete(ctx, role, client.PropagationPolicy(metav1.DeletePropagationForeground))

0 commit comments

Comments
 (0)