Skip to content

Commit fe5e457

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 9b71c4a + ae001d6 commit fe5e457

File tree

2 files changed

+122
-0
lines changed

2 files changed

+122
-0
lines changed

tests/ilab/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
* Installed Go 1.21
1414

15+
### Sample Judge Model Deployment
16+
17+
* The sample manifest for deploying judge-model can be found here - `tests/ilab/resources/judge_model_deployment.yaml`
18+
1519
## Required environment variables
1620

1721
### Environment variables to download SDG and upload trained model
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
apiVersion: serving.kserve.io/v1alpha1
2+
kind: ServingRuntime
3+
metadata:
4+
name: custom-vllm-runtime
5+
namespace: test-namespace # replace this with your namespace name
6+
spec:
7+
annotations:
8+
sidecar.istio.io/inject: "true"
9+
sidecar.istio.io/rewriteAppHTTPProbers: "true"
10+
serving.knative.openshift.io/enablePassthrough: "true"
11+
opendatahub.io/dashboard: "true"
12+
openshift.io/display-name: "vLLLM Openai entry point"
13+
prometheus.io/port: '8080'
14+
prometheus.io/path: "/metrics/"
15+
builtInAdapter:
16+
modelLoadingTimeoutMillis: 90000
17+
containers:
18+
- args:
19+
- '--port=8080'
20+
- '--distributed-executor-backend=mp'
21+
- '--model=/mnt/models'
22+
image: quay.io/opendatahub/vllm:stable
23+
name: kserve-container
24+
command:
25+
- python3
26+
- '-m'
27+
- "vllm.entrypoints.openai.api_server"
28+
ports:
29+
- containerPort: 8080
30+
name: http1
31+
protocol: TCP
32+
multiModel: false
33+
supportedModelFormats:
34+
- autoSelect: true
35+
name: vLLM
36+
37+
---
38+
apiVersion: v1
39+
kind: Secret
40+
metadata:
41+
name: test-secret
42+
namespace: test-namespace # replace this with your namespace name
43+
type: Opaque
44+
stringData:
45+
AWS_S3_ENDPOINT: https://s3.us-east-1.amazonaws.com # replace this with your Storage Bucket endpoint URL
46+
AWS_S3_BUCKET: <storage-bucket-name> # add your storage bucket name
47+
AWS_ACCESS_KEY_ID: <access-key-id> # add your storage bucket Access-Key ID
48+
AWS_SECRET_ACCESS_KEY: <secret-access-key> # add your storage bucket Secret-Access-Key
49+
AWS_DEFAULT_REGION: us-east-1 # replace this with your Storage Bucket region
50+
51+
---
52+
apiVersion: v1
53+
kind: Secret
54+
metadata:
55+
name: storage-config
56+
namespace: test-namespace # replace this with your namespace name
57+
type: Opaque
58+
stringData:
59+
rhelai-s3-data: '{"access_key_id":"<access-key-id>","bucket":"<storage-bucket-name>","default_bucket":"<storage-bucket-name>","endpoint_url":"https://s3.us-east-1.amazonaws.com","region":"us-east-1","secret_access_key":"<secret-access-key>","type":"s3"}'
60+
# replace above Storage-Config Secret data with the actual S3 bucket credentials
61+
62+
---
63+
apiVersion: v1
64+
kind: ServiceAccount
65+
metadata:
66+
name: test-service-account
67+
namespace: test-namespace # replace this with your namespace name
68+
69+
---
70+
apiVersion: rbac.authorization.k8s.io/v1
71+
kind: RoleBinding
72+
metadata:
73+
name: test-cluster-role-binding
74+
namespace: test-namespace # replace this with your namespace nametest-namespace
75+
subjects:
76+
- kind: ServiceAccount
77+
name: test-service-account
78+
roleRef:
79+
apiGroup: rbac.authorization.k8s.io
80+
kind: ClusterRole
81+
name: view
82+
83+
---
84+
apiVersion: serving.kserve.io/v1beta1
85+
kind: InferenceService
86+
metadata:
87+
name: mistral
88+
namespace: test-namespace # replace this with your namespace nametest-namespace
89+
annotations:
90+
serving.kserve.io/deploymentMode: "Serverless"
91+
security.opendatahub.io/enable-auth: "true"
92+
opendatahub.io/dashboard: 'true'
93+
spec:
94+
predictor:
95+
maxReplicas: 1
96+
minReplicas: 1
97+
serviceAccountName: test-service-account # replace this with Service account created above
98+
model:
99+
modelFormat:
100+
name: vLLM
101+
name: ''
102+
runtime: custom-vllm-runtime # replace this with custom serving runtime created above
103+
resources:
104+
limits:
105+
cpu: '8'
106+
memory: 10Gi
107+
nvidia.com/gpu: '1'
108+
requests:
109+
cpu: '4'
110+
memory: 8Gi
111+
nvidia.com/gpu: '1'
112+
storage:
113+
key: test-secret
114+
path: mistral/model/
115+
tolerations:
116+
- effect: NoSchedule
117+
key: nvidia.com/gpu
118+
operator: Exists

0 commit comments

Comments
 (0)