Skip to content

Commit 012d956

Browse files
Increase memory of pod
Signed-off-by: Diego-Castan <[email protected]>
1 parent b400366 commit 012d956

File tree

1 file changed

+31
-33
lines changed

1 file changed

+31
-33
lines changed

install_pod_files/v3/vm-fm-vllm.yaml

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,41 +16,40 @@ spec:
1616
app: llama-3-8b
1717
spec:
1818
volumes:
19-
- name: cache-volume
20-
persistentVolumeClaim:
21-
claimName: llama-3-8b
22-
# vLLM needs to access the host's shared memory for tensor parallel inference.
23-
- name: shm
24-
emptyDir:
25-
medium: Memory
26-
sizeLimit: "2Gi"
27-
containers:
28-
- name: llama-3-8b
29-
image: vllm/vllm-openai:latest
30-
command: ["/bin/sh", "-c", "while true; do sleep 10000; done"]
31-
env:
32-
- name: HUGGING_FACE_HUB_TOKEN
33-
valueFrom:
34-
secretKeyRef:
35-
name: hf-token-secret-llama3
36-
key: token
37-
resources:
38-
limits:
39-
cpu: "10"
40-
memory: 20G
41-
nvidia.com/gpu: "1"
42-
requests:
43-
cpu: "2"
44-
memory: 6G
45-
nvidia.com/gpu: "1"
46-
volumeMounts:
47-
- mountPath: /root/.cache/huggingface
48-
name: cache-volume
19+
- name: cache-volume
20+
persistentVolumeClaim:
21+
claimName: llama-3-8b
22+
# vLLM needs to access the host's shared memory for tensor parallel inference.
4923
- name: shm
50-
mountPath: /dev/shm
24+
emptyDir:
25+
medium: Memory
26+
sizeLimit: "2Gi"
27+
containers:
28+
- name: llama-3-8b
29+
image: vllm/vllm-openai:latest
30+
command: ["/bin/sh", "-c", "while true; do sleep 10000; done"]
31+
env:
32+
- name: HUGGING_FACE_HUB_TOKEN
33+
valueFrom:
34+
secretKeyRef:
35+
name: hf-token-secret-llama3
36+
key: token
37+
resources:
38+
limits:
39+
cpu: "10"
40+
memory: 200G
41+
nvidia.com/gpu: "1"
42+
requests:
43+
cpu: "2"
44+
memory: 60G
45+
nvidia.com/gpu: "1"
46+
volumeMounts:
47+
- mountPath: /root/.cache/huggingface
48+
name: cache-volume
49+
- name: shm
50+
mountPath: /dev/shm
5151

5252
---
53-
5453
apiVersion: v1
5554
kind: PersistentVolumeClaim
5655
metadata:
@@ -63,4 +62,3 @@ spec:
6362
requests:
6463
storage: 300Gi
6564
storageClassName: ocs-storagecluster-cephfs
66-

0 commit comments

Comments
 (0)