Skip to content

Commit f2b3e26

Browse files
authored
Fix race issue (#46)
* Reverting back to using jobs for fixing race condition. The issue has been fixed in rhods 2.16 * Remove minio dependency
1 parent 98f0663 commit f2b3e26

File tree

8 files changed

+442
-263
lines changed

8 files changed

+442
-263
lines changed

charts/all/llm-serving-service/templates/accelerator-profile.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
---
21
apiVersion: dashboard.opendatahub.io/v1
32
kind: AcceleratorProfile
43
metadata:
Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,36 @@
1-
apiVersion: serving.kserve.io/v1beta1
2-
kind: InferenceService
3-
metadata:
4-
annotations:
5-
openshift.io/display-name: mistral-7b-instruct
6-
serving.knative.openshift.io/enablePassthrough: 'true'
7-
sidecar.istio.io/inject: 'true'
8-
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
9-
argocd.argoproj.io/sync-wave: "60"
10-
name: mistral-7b-instruct
11-
namespace: rag-llm
12-
labels:
13-
opendatahub.io/dashboard: 'true'
14-
spec:
15-
predictor:
16-
restartPolicy: OnFailure
17-
maxReplicas: 1
18-
minReplicas: 1
19-
model:
20-
modelFormat:
21-
name: vLLM
22-
name: ''
23-
resources:
24-
limits:
25-
cpu: '8'
26-
memory: 10Gi
27-
nvidia.com/gpu: '1'
28-
requests:
29-
cpu: '2'
30-
memory: 8Gi
31-
nvidia.com/gpu: '1'
32-
runtime: mistral-7b-instruct
33-
tolerations:
34-
- effect: NoSchedule
35-
key: odh-notebook
36-
operator: Exists
1+
# apiVersion: serving.kserve.io/v1beta1
2+
# kind: InferenceService
3+
# metadata:
4+
# annotations:
5+
# openshift.io/display-name: mistral-7b-instruct
6+
# serving.knative.openshift.io/enablePassthrough: 'true'
7+
# sidecar.istio.io/inject: 'true'
8+
# sidecar.istio.io/rewriteAppHTTPProbers: 'true'
9+
# argocd.argoproj.io/sync-wave: "60"
10+
# name: mistral-7b-instruct
11+
# namespace: rag-llm
12+
# labels:
13+
# opendatahub.io/dashboard: 'true'
14+
# spec:
15+
# predictor:
16+
# restartPolicy: OnFailure
17+
# maxReplicas: 1
18+
# minReplicas: 1
19+
# model:
20+
# modelFormat:
21+
# name: vLLM
22+
# name: ''
23+
# resources:
24+
# limits:
25+
# cpu: '8'
26+
# memory: 10Gi
27+
# nvidia.com/gpu: '1'
28+
# requests:
29+
# cpu: '2'
30+
# memory: 8Gi
31+
# nvidia.com/gpu: '1'
32+
# runtime: mistral-7b-instruct
33+
# tolerations:
34+
# - effect: NoSchedule
35+
# key: odh-notebook
36+
# operator: Exists
Lines changed: 69 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,69 @@
1-
apiVersion: serving.kserve.io/v1alpha1
2-
kind: ServingRuntime
3-
metadata:
4-
annotations:
5-
opendatahub.io/accelerator-name: nvidia-gpu
6-
opendatahub.io/apiProtocol: REST
7-
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
8-
openshift.io/display-name: mistral-7b-instruct
9-
argocd.argoproj.io/sync-wave: "50"
10-
name: mistral-7b-instruct
11-
namespace: rag-llm
12-
labels:
13-
opendatahub.io/dashboard: 'true'
14-
spec:
15-
annotations:
16-
prometheus.io/path: /metrics
17-
prometheus.io/port: '8080'
18-
containers:
19-
- args:
20-
- '--port=8080'
21-
- '--model=$(MODEL_ID)'
22-
- '--download-dir=/cache'
23-
- '--distributed-executor-backend=mp'
24-
- '--served-model-name=mistral-7b-instruct'
25-
- '--max-model-len=4096'
26-
- '--dtype=half'
27-
- '--gpu-memory-utilization'
28-
- '0.98'
29-
- '--enforce-eager'
30-
command:
31-
- python
32-
- '-m'
33-
- vllm.entrypoints.openai.api_server
34-
env:
35-
- name: HF_HOME
36-
value: /tmp/hf_home
37-
- name: HF_TOKEN
38-
valueFrom:
39-
secretKeyRef:
40-
name: huggingface-secret
41-
key: hftoken
42-
- name: MODEL_ID
43-
valueFrom:
44-
secretKeyRef:
45-
name: huggingface-secret
46-
key: modelId
47-
- name: HF_HUB_OFFLINE
48-
value: "0"
49-
image: 'quay.io/modh/vllm@sha256:b51fde66f162f1a78e8c027320dddf214732d5345953b1599a84fe0f0168c619'
50-
name: kserve-container
51-
ports:
52-
- containerPort: 8080
53-
protocol: TCP
54-
volumeMounts:
55-
- mountPath: /dev/shm
56-
name: shm
57-
- mountPath: /cache
58-
name: cache
59-
multiModel: false
60-
supportedModelFormats:
61-
- autoSelect: true
62-
name: vLLM
63-
volumes:
64-
- emptyDir:
65-
medium: Memory
66-
sizeLimit: 2Gi
67-
name: shm
68-
- emptyDir: {}
69-
name: cache
1+
# apiVersion: serving.kserve.io/v1alpha1
2+
# kind: ServingRuntime
3+
# metadata:
4+
# annotations:
5+
# opendatahub.io/accelerator-name: nvidia-gpu
6+
# opendatahub.io/apiProtocol: REST
7+
# opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
8+
# openshift.io/display-name: mistral-7b-instruct
9+
# argocd.argoproj.io/sync-wave: "50"
10+
# name: mistral-7b-instruct
11+
# namespace: rag-llm
12+
# labels:
13+
# opendatahub.io/dashboard: 'true'
14+
# spec:
15+
# annotations:
16+
# prometheus.io/path: /metrics
17+
# prometheus.io/port: '8080'
18+
# containers:
19+
# - args:
20+
# - '--port=8080'
21+
# - '--model=$(MODEL_ID)'
22+
# - '--download-dir=/cache'
23+
# - '--distributed-executor-backend=mp'
24+
# - '--served-model-name=mistral-7b-instruct'
25+
# - '--max-model-len=4096'
26+
# - '--dtype=half'
27+
# - '--gpu-memory-utilization'
28+
# - '0.98'
29+
# - '--enforce-eager'
30+
# command:
31+
# - python
32+
# - '-m'
33+
# - vllm.entrypoints.openai.api_server
34+
# env:
35+
# - name: HF_HOME
36+
# value: /tmp/hf_home
37+
# - name: HF_TOKEN
38+
# valueFrom:
39+
# secretKeyRef:
40+
# name: huggingface-secret
41+
# key: hftoken
42+
# - name: MODEL_ID
43+
# valueFrom:
44+
# secretKeyRef:
45+
# name: huggingface-secret
46+
# key: modelId
47+
# - name: HF_HUB_OFFLINE
48+
# value: "0"
49+
# image: 'quay.io/modh/vllm@sha256:b51fde66f162f1a78e8c027320dddf214732d5345953b1599a84fe0f0168c619'
50+
# name: kserve-container
51+
# ports:
52+
# - containerPort: 8080
53+
# protocol: TCP
54+
# volumeMounts:
55+
# - mountPath: /dev/shm
56+
# name: shm
57+
# - mountPath: /cache
58+
# name: cache
59+
# multiModel: false
60+
# supportedModelFormats:
61+
# - autoSelect: true
62+
# name: vLLM
63+
# volumes:
64+
# - emptyDir:
65+
# medium: Memory
66+
# sizeLimit: 2Gi
67+
# name: shm
68+
# - emptyDir: {}
69+
# name: cache
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
---
2+
apiVersion: batch/v1
3+
kind: Job
4+
metadata:
5+
name: create-vllm
6+
spec:
7+
selector: {}
8+
template:
9+
spec:
10+
containers:
11+
- args:
12+
- -ec
13+
- |-
14+
cat << EOF | oc apply -f-
15+
apiVersion: serving.kserve.io/v1alpha1
16+
kind: ServingRuntime
17+
metadata:
18+
annotations:
19+
opendatahub.io/accelerator-name: nvidia-gpu
20+
opendatahub.io/apiProtocol: REST
21+
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
22+
openshift.io/display-name: mistral-7b-instruct
23+
name: mistral-7b-instruct
24+
namespace: rag-llm
25+
labels:
26+
opendatahub.io/dashboard: 'true'
27+
spec:
28+
annotations:
29+
prometheus.io/path: /metrics
30+
prometheus.io/port: '8080'
31+
containers:
32+
- args:
33+
- '--port=8080'
34+
- '--model=\$(MODEL_ID)'
35+
- '--download-dir=/cache'
36+
- '--distributed-executor-backend=mp'
37+
- '--served-model-name=mistral-7b-instruct'
38+
- '--max-model-len=4096'
39+
- '--dtype=half'
40+
- '--gpu-memory-utilization'
41+
- '0.98'
42+
- '--enforce-eager'
43+
command:
44+
- python
45+
- '-m'
46+
- vllm.entrypoints.openai.api_server
47+
env:
48+
- name: HF_HOME
49+
value: /tmp/hf_home
50+
- name: HF_TOKEN
51+
valueFrom:
52+
secretKeyRef:
53+
name: huggingface-secret
54+
key: hftoken
55+
- name: MODEL_ID
56+
valueFrom:
57+
secretKeyRef:
58+
name: huggingface-secret
59+
key: modelId
60+
- name: HF_HUB_OFFLINE
61+
value: "0"
62+
image: 'quay.io/modh/vllm@sha256:b51fde66f162f1a78e8c027320dddf214732d5345953b1599a84fe0f0168c619'
63+
name: kserve-container
64+
ports:
65+
- containerPort: 8080
66+
protocol: TCP
67+
volumeMounts:
68+
- mountPath: /dev/shm
69+
name: shm
70+
- mountPath: /cache
71+
name: cache
72+
multiModel: false
73+
supportedModelFormats:
74+
- autoSelect: true
75+
name: vLLM
76+
volumes:
77+
- emptyDir:
78+
medium: Memory
79+
sizeLimit: 2Gi
80+
name: shm
81+
- emptyDir: {}
82+
name: cache
83+
EOF
84+
cat << EOF | oc apply -f-
85+
apiVersion: serving.kserve.io/v1beta1
86+
kind: InferenceService
87+
metadata:
88+
annotations:
89+
openshift.io/display-name: mistral-7b-instruct
90+
serving.knative.openshift.io/enablePassthrough: 'true'
91+
sidecar.istio.io/inject: 'true'
92+
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
93+
name: mistral-7b-instruct
94+
namespace: rag-llm
95+
labels:
96+
opendatahub.io/dashboard: 'true'
97+
spec:
98+
predictor:
99+
restartPolicy: OnFailure
100+
maxReplicas: 1
101+
minReplicas: 1
102+
model:
103+
modelFormat:
104+
name: vLLM
105+
name: ''
106+
resources:
107+
limits:
108+
cpu: '8'
109+
memory: 10Gi
110+
nvidia.com/gpu: '1'
111+
requests:
112+
cpu: '2'
113+
memory: 8Gi
114+
nvidia.com/gpu: '1'
115+
runtime: mistral-7b-instruct
116+
tolerations:
117+
- effect: NoSchedule
118+
key: odh-notebook
119+
operator: Exists
120+
EOF
121+
command:
122+
- /bin/bash
123+
image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest
124+
imagePullPolicy: IfNotPresent
125+
name: create-vllm
126+
envFrom:
127+
- secretRef:
128+
name: minio-secret
129+
- secretRef:
130+
name: huggingface-secret
131+
initContainers:
132+
- args:
133+
- -ec
134+
- |-
135+
echo -n 'Waiting for openshift-ai initialize'
136+
while ! oc describe sub rhods-operator -n redhat-ods-operator 2>/dev/null | grep -qF rhods-operator; do
137+
echo -n .
138+
sleep 15
139+
done; echo
140+
echo -n 'openshift-ai initialized';echo
141+
echo -n 'Waiting for dscinitialization/default-dsci to initialize'
142+
echo
143+
oc wait --for=jsonpath='{.status.phase}'=Ready --timeout=600s -n redhat-ods-operator dscinitialization/default-dsci
144+
sleep 10
145+
echo -n 'dscinitialization/default-dsci initialized';echo
146+
command:
147+
- /bin/bash
148+
image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest
149+
imagePullPolicy: IfNotPresent
150+
name: wait-for-openshift
151+
restartPolicy: Never
152+
serviceAccount: demo-setup
153+
serviceAccountName: demo-setup

0 commit comments

Comments
 (0)