-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmistral-vllm.yml
More file actions
30 lines (30 loc) · 774 Bytes
/
mistral-vllm.yml
File metadata and controls
30 lines (30 loc) · 774 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
services:
vllm:
image: vllm/vllm-openai:latest
runtime: nvidia
shm_size: 1g
ports:
- "8000:8000"
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
- HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
command:
- --host
- 0.0.0.0
- --model
- mistralai/Mistral-7B-v0.3 # fulll precision
# - neuralmagic/Mistral-7B-Instruct-v0.3-quantized.w8a16 # quantized 1
#- neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit
- --gpu-memory-utilization
- "0.95"
- --max-model-len
- "8192"
deploy:
resources:
reservations:
devices:
- driver: nvidia
# count: all
device_ids: ["0"]
capabilities: [gpu]