Skip to content

Commit b3a76c2

Browse files
committed
Address PR comments
Signed-off-by: Ernest Wong <[email protected]>
1 parent 5957b15 commit b3a76c2

File tree

2 files changed

+127
-77
lines changed

2 files changed

+127
-77
lines changed

config/manifests/inferencepool-resources.yaml

Lines changed: 94 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@ spec:
2929
appProtocol: http2
3030
type: ClusterIP
3131
---
32+
apiVersion: v1
33+
kind: ServiceAccount
34+
metadata:
35+
name: vllm-llama3-8b-instruct-epp
36+
namespace: default
37+
---
3238
apiVersion: apps/v1
3339
kind: Deployment
3440
metadata:
@@ -46,51 +52,52 @@ spec:
4652
labels:
4753
app: vllm-llama3-8b-instruct-epp
4854
spec:
55+
serviceAccountName: vllm-llama3-8b-instruct-epp
4956
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
5057
terminationGracePeriodSeconds: 130
5158
containers:
52-
- name: epp
53-
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
54-
imagePullPolicy: Always
55-
args:
56-
- -poolName
57-
- "vllm-llama3-8b-instruct"
58-
- "-poolNamespace"
59-
- "default"
60-
- -v
61-
- "4"
62-
- --zap-encoder
63-
- "json"
64-
- -grpcPort
65-
- "9002"
66-
- -grpcHealthPort
67-
- "9003"
68-
- "-configFile"
69-
- "/config/default-plugins.yaml"
70-
ports:
71-
- containerPort: 9002
72-
- containerPort: 9003
73-
- name: metrics
74-
containerPort: 9090
75-
livenessProbe:
76-
grpc:
77-
port: 9003
78-
service: inference-extension
79-
initialDelaySeconds: 5
80-
periodSeconds: 10
81-
readinessProbe:
82-
grpc:
83-
port: 9003
84-
service: inference-extension
85-
initialDelaySeconds: 5
86-
periodSeconds: 10
87-
volumeMounts:
88-
- name: plugins-config-volume
89-
mountPath: "/config"
59+
- name: epp
60+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
61+
imagePullPolicy: Always
62+
args:
63+
- -poolName
64+
- "vllm-llama3-8b-instruct"
65+
- "-poolNamespace"
66+
- "default"
67+
- -v
68+
- "4"
69+
- --zap-encoder
70+
- "json"
71+
- -grpcPort
72+
- "9002"
73+
- -grpcHealthPort
74+
- "9003"
75+
- "-configFile"
76+
- "/config/default-plugins.yaml"
77+
ports:
78+
- containerPort: 9002
79+
- containerPort: 9003
80+
- name: metrics
81+
containerPort: 9090
82+
livenessProbe:
83+
grpc:
84+
port: 9003
85+
service: inference-extension
86+
initialDelaySeconds: 5
87+
periodSeconds: 10
88+
readinessProbe:
89+
grpc:
90+
port: 9003
91+
service: inference-extension
92+
initialDelaySeconds: 5
93+
periodSeconds: 10
94+
volumeMounts:
95+
- name: plugins-config-volume
96+
mountPath: "/config"
9097
volumes:
91-
- name: plugins-config-volume
92-
configMap:
93-
name: plugins-config
98+
- name: plugins-config-volume
99+
configMap:
100+
name: plugins-config
94101
---
95102
apiVersion: v1
96103
kind: ConfigMap
@@ -174,42 +181,60 @@ data:
174181
weight: 1
175182
- pluginRef: max-score-picker
176183
---
177-
kind: ClusterRole
184+
kind: Role
178185
apiVersion: rbac.authorization.k8s.io/v1
179186
metadata:
180187
name: pod-read
188+
namespace: default
181189
rules:
182-
- apiGroups: ["inference.networking.x-k8s.io"]
183-
resources: ["inferencepools"]
184-
verbs: ["get", "watch", "list"]
185-
- apiGroups: ["inference.networking.x-k8s.io"]
186-
resources: ["inferencemodels"]
187-
verbs: ["get", "watch", "list"]
188-
- apiGroups: [""]
189-
resources: ["pods"]
190-
verbs: ["get", "watch", "list"]
191-
- apiGroups:
192-
- authentication.k8s.io
193-
resources:
194-
- tokenreviews
195-
verbs:
196-
- create
197-
- apiGroups:
198-
- authorization.k8s.io
199-
resources:
200-
- subjectaccessreviews
201-
verbs:
202-
- create
203-
---
204-
kind: ClusterRoleBinding
190+
- apiGroups: [ "inference.networking.x-k8s.io" ]
191+
resources: [ "inferencepools", "inferencemodels" ]
192+
verbs: [ "get", "watch", "list" ]
193+
- apiGroups: [ "" ]
194+
resources: [ "pods" ]
195+
verbs: [ "get", "watch", "list" ]
196+
---
197+
kind: RoleBinding
205198
apiVersion: rbac.authorization.k8s.io/v1
206199
metadata:
207200
name: pod-read-binding
208-
subjects:
209-
- kind: ServiceAccount
210-
name: default
211201
namespace: default
202+
subjects:
203+
- kind: ServiceAccount
204+
name: vllm-llama3-8b-instruct-epp
205+
namespace: default
212206
roleRef:
213207
apiGroup: rbac.authorization.k8s.io
214-
kind: ClusterRole
208+
kind: Role
215209
name: pod-read
210+
---
211+
kind: ClusterRole
212+
apiVersion: rbac.authorization.k8s.io/v1
213+
metadata:
214+
name: auth-reviewer
215+
rules:
216+
- apiGroups:
217+
- authentication.k8s.io
218+
resources:
219+
- tokenreviews
220+
verbs:
221+
- create
222+
- apiGroups:
223+
- authorization.k8s.io
224+
resources:
225+
- subjectaccessreviews
226+
verbs:
227+
- create
228+
---
229+
kind: ClusterRoleBinding
230+
apiVersion: rbac.authorization.k8s.io/v1
231+
metadata:
232+
name: auth-reviewer-binding
233+
subjects:
234+
- kind: ServiceAccount
235+
name: vllm-llama3-8b-instruct-epp
236+
namespace: default
237+
roleRef:
238+
apiGroup: rbac.authorization.k8s.io
239+
kind: ClusterRole
240+
name: auth-reviewer

test/testdata/inferencepool-e2e.yaml

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ spec:
2626
appProtocol: http2
2727
type: ClusterIP
2828
---
29+
apiVersion: v1
30+
kind: ServiceAccount
31+
metadata:
32+
name: vllm-llama3-8b-instruct-epp
33+
namespace: $E2E_NS
34+
---
2935
apiVersion: apps/v1
3036
kind: Deployment
3137
metadata:
@@ -43,6 +49,7 @@ spec:
4349
labels:
4450
app: vllm-llama3-8b-instruct-epp
4551
spec:
52+
serviceAccountName: vllm-llama3-8b-instruct-epp
4653
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
4754
terminationGracePeriodSeconds: 130
4855
containers:
@@ -171,20 +178,38 @@ data:
171178
weight: 1
172179
- pluginRef: max-score-picker
173180
---
174-
kind: ClusterRole
181+
kind: Role
175182
apiVersion: rbac.authorization.k8s.io/v1
176183
metadata:
177184
name: pod-read
185+
namespace: $E2E_NS
178186
rules:
179187
- apiGroups: ["inference.networking.x-k8s.io"]
180-
resources: ["inferencepools"]
181-
verbs: ["get", "watch", "list"]
182-
- apiGroups: ["inference.networking.x-k8s.io"]
183-
resources: ["inferencemodels"]
188+
resources: ["inferencepools", "inferencemodels"]
184189
verbs: ["get", "watch", "list"]
185190
- apiGroups: [""]
186191
resources: ["pods"]
187192
verbs: ["get", "watch", "list"]
193+
---
194+
kind: RoleBinding
195+
apiVersion: rbac.authorization.k8s.io/v1
196+
metadata:
197+
name: pod-read-binding
198+
namespace: default
199+
subjects:
200+
- kind: ServiceAccount
201+
name: vllm-llama3-8b-instruct-epp
202+
namespace: $E2E_NS
203+
roleRef:
204+
apiGroup: rbac.authorization.k8s.io
205+
kind: Role
206+
name: pod-read
207+
---
208+
kind: ClusterRole
209+
apiVersion: rbac.authorization.k8s.io/v1
210+
metadata:
211+
name: auth-reviewer
212+
rules:
188213
- apiGroups:
189214
- authentication.k8s.io
190215
resources:
@@ -201,12 +226,12 @@ rules:
201226
kind: ClusterRoleBinding
202227
apiVersion: rbac.authorization.k8s.io/v1
203228
metadata:
204-
name: pod-read-binding
229+
name: auth-reviewer-binding
205230
subjects:
206231
- kind: ServiceAccount
207-
name: default
232+
name: vllm-llama3-8b-instruct-epp
208233
namespace: $E2E_NS
209234
roleRef:
210235
apiGroup: rbac.authorization.k8s.io
211236
kind: ClusterRole
212-
name: pod-read
237+
name: auth-reviewer

0 commit comments

Comments
 (0)