We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ec910bd commit 5fcfb48Copy full SHA for 5fcfb48
.runpod/hub.json
@@ -12,7 +12,7 @@
12
"allowedCudaVersions": ["12.7", "12.6", "12.5", "12.4"],
13
"env": [
14
{
15
- "key": "MODEL_PATH",
+ "key": "MODEL_NAME",
16
"input": {
17
"name": "Model",
18
"type": "huggingface",
Dockerfile
@@ -18,14 +18,14 @@ COPY handler.py engine.py utils.py download_model.py test_input.json ./
COPY public/ ./public/
19
20
# Setup for Option 2: Building the Image with the Model included
21
-ARG MODEL_PATH=""
+ARG MODEL_NAME=""
22
ARG TOKENIZER_NAME=""
23
ARG BASE_PATH="/runpod-volume"
24
ARG QUANTIZATION=""
25
ARG MODEL_REVISION=""
26
ARG TOKENIZER_REVISION=""
27
28
-ENV MODEL_PATH=$MODEL_PATH \
+ENV MODEL_NAME=$MODEL_NAME \
29
MODEL_REVISION=$MODEL_REVISION \
30
TOKENIZER_NAME=$TOKENIZER_NAME \
31
TOKENIZER_REVISION=$TOKENIZER_REVISION \
@@ -42,7 +42,7 @@ RUN --mount=type=secret,id=HF_TOKEN,required=false \
42
if [ -f /run/secrets/HF_TOKEN ]; then \
43
export HF_TOKEN=$(cat /run/secrets/HF_TOKEN); \
44
fi && \
45
- if [ -n "$MODEL_PATH" ]; then \
+ if [ -n "$MODEL_NAME" ]; then \
46
python3 download_model.py; \
47
fi
48
README.md
@@ -14,7 +14,7 @@ All behaviour is controlled through environment variables:
| Environment Variable | Description | Default | Options |
| --------------------------------- | ------------------------------------------------- | ------------------------------------- | ----------------------------------------------------------------------------------------- |
-| `MODEL_PATH` | Path of the model weights | "meta-llama/Meta-Llama-3-8B-Instruct" | Local folder or Hugging Face repo ID |
+| `MODEL_NAME` | Hugging Face model name or local path | "meta-llama/Meta-Llama-3-8B-Instruct" | Hugging Face repo ID or local folder path |
| `HF_TOKEN` | HuggingFace access token for gated/private models | | Your HuggingFace access token |
| `TOKENIZER_PATH` | Path of the tokenizer | | |
| `TOKENIZER_MODE` | Tokenizer mode | "auto" | "auto", "slow" |
docker-compose.yml
@@ -14,7 +14,7 @@ services:
environment:
- HOST=0.0.0.0
- PORT=30000
- - MODEL_PATH=meta-llama/Llama-3.2-1B-Instruct
+ - MODEL_NAME=meta-llama/Llama-3.2-1B-Instruct
- TRUST_REMOTE_CODE=true
- ATTENTION_BACKEND=flashinfer
- SAMPLING_BACKEND=flashinfer
docs/conventions.md
@@ -45,7 +45,7 @@
feat(docker): add github workflow for automated builds
fix(handler): resolve openai compatibility issue
docs(readme): update installation instructions
-refactor(engine): migrate from MODEL_NAME to MODEL_PATH
+refactor(engine): migrate from MODEL_PATH to MODEL_NAME
49
chore(deps): update requirements.txt
50
```
51
download_model.py
@@ -17,7 +17,7 @@ def setup_env():
os.environ.update(
"HF_HOME": f"{BASE_DIR}/hf_cache",
- "MODEL_PATH": "openchat/openchat-3.5-0106",
+ "MODEL_NAME": "openchat/openchat-3.5-0106",
"HF_HUB_ENABLE_HF_TRANSFER": "1",
"TENSORIZE": "1",
"TENSORIZER_NUM_GPUS": "1",
@@ -52,7 +52,7 @@ def download(name, revision, type, cache_dir):
52
setup_env()
53
cache_dir = os.getenv("HF_HOME")
54
model_name, model_revision = (
55
- os.getenv("MODEL_PATH"),
+ os.getenv("MODEL_NAME"),
56
os.getenv("MODEL_REVISION") or None,
57
)
58
tokenizer_name, tokenizer_revision = (
@@ -63,7 +63,7 @@ def download(name, revision, type, cache_dir):
63
model_path_downloaded = download(model_name, model_revision, "model", cache_dir)
64
65
metadata = {
66
- "MODEL_PATH": model_path_downloaded,
+ "MODEL_NAME": model_path_downloaded,
67
"MODEL_REVISION": os.getenv("MODEL_REVISION"),
68
"QUANTIZATION": os.getenv("QUANTIZATION"),
69
}
engine.py
@@ -10,7 +10,7 @@
10
class SGlangEngine:
11
def __init__(
self,
- model=os.getenv("MODEL_PATH"),
+ model=os.getenv("MODEL_NAME"),
host=os.getenv("HOST", "0.0.0.0"),
port=int(os.getenv("PORT", 30000)),
):
@@ -33,7 +33,7 @@ def start_server(self):
33
34
# Dictionary of all possible options and their corresponding env var names
35
options = {
36
- "MODEL_PATH": "--model-path",
+ "MODEL_NAME": "--model-path",
37
"TOKENIZER_PATH": "--tokenizer-path",
38
"TOKENIZER_MODE": "--tokenizer-mode",
39
"LOAD_FORMAT": "--load-format",
0 commit comments