File tree Expand file tree Collapse file tree 1 file changed +31
-33
lines changed Expand file tree Collapse file tree 1 file changed +31
-33
lines changed Original file line number Diff line number Diff line change @@ -16,41 +16,40 @@ spec:
16
16
app : llama-3-8b
17
17
spec :
18
18
volumes :
19
- - name : cache-volume
20
- persistentVolumeClaim :
21
- claimName : llama-3-8b
22
- # vLLM needs to access the host's shared memory for tensor parallel inference.
23
- - name : shm
24
- emptyDir :
25
- medium : Memory
26
- sizeLimit : " 2Gi"
27
- containers :
28
- - name : llama-3-8b
29
- image : vllm/vllm-openai:latest
30
- command : ["/bin/sh", "-c", "while true; do sleep 10000; done"]
31
- env :
32
- - name : HUGGING_FACE_HUB_TOKEN
33
- valueFrom :
34
- secretKeyRef :
35
- name : hf-token-secret-llama3
36
- key : token
37
- resources :
38
- limits :
39
- cpu : " 10"
40
- memory : 20G
41
- nvidia.com/gpu : " 1"
42
- requests :
43
- cpu : " 2"
44
- memory : 6G
45
- nvidia.com/gpu : " 1"
46
- volumeMounts :
47
- - mountPath : /root/.cache/huggingface
48
- name : cache-volume
19
+ - name : cache-volume
20
+ persistentVolumeClaim :
21
+ claimName : llama-3-8b
22
+ # vLLM needs to access the host's shared memory for tensor parallel inference.
49
23
- name : shm
50
- mountPath : /dev/shm
24
+ emptyDir :
25
+ medium : Memory
26
+ sizeLimit : " 2Gi"
27
+ containers :
28
+ - name : llama-3-8b
29
+ image : vllm/vllm-openai:latest
30
+ command : ["/bin/sh", "-c", "while true; do sleep 10000; done"]
31
+ env :
32
+ - name : HUGGING_FACE_HUB_TOKEN
33
+ valueFrom :
34
+ secretKeyRef :
35
+ name : hf-token-secret-llama3
36
+ key : token
37
+ resources :
38
+ limits :
39
+ cpu : " 10"
40
+ memory : 200G
41
+ nvidia.com/gpu : " 1"
42
+ requests :
43
+ cpu : " 2"
44
+ memory : 60G
45
+ nvidia.com/gpu : " 1"
46
+ volumeMounts :
47
+ - mountPath : /root/.cache/huggingface
48
+ name : cache-volume
49
+ - name : shm
50
+ mountPath : /dev/shm
51
51
52
52
---
53
-
54
53
apiVersion : v1
55
54
kind : PersistentVolumeClaim
56
55
metadata :
63
62
requests :
64
63
storage : 300Gi
65
64
storageClassName : ocs-storagecluster-cephfs
66
-
You can’t perform that action at this time.
0 commit comments