Skip to content

Commit 9635daf

Browse files
committed
update worker config
Signed-off-by: pandyamarut <pandyamarut@gmail.com>
1 parent fb4e700 commit 9635daf

File tree

1 file changed

+63
-2
lines changed

1 file changed

+63
-2
lines changed

worker-config.json

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"versions": {
3-
"0.7.0": {
4-
"imageName": "runpod/worker-v1-vllm:v1.9.0stable-cuda12.1.0",
3+
"0.7.3": {
4+
"imageName": "runpod/worker-v1-vllm:v2.1.0stable-cuda12.1.0",
55
"minimumCudaVersion": "12.1",
66
"categories": [
77
{
@@ -122,6 +122,67 @@
122122
}
123123
]
124124
},
125+
"0.7.0": {
126+
"imageName": "runpod/worker-v1-vllm:v1.9.0stable-cuda12.1.0",
127+
"minimumCudaVersion": "12.1",
128+
"categories": [
129+
{
130+
"title": "LLM Settings",
131+
"settings": [
132+
"TOKENIZER", "TOKENIZER_MODE", "SKIP_TOKENIZER_INIT", "TRUST_REMOTE_CODE",
133+
"DOWNLOAD_DIR", "LOAD_FORMAT", "DTYPE", "KV_CACHE_DTYPE", "QUANTIZATION_PARAM_PATH",
134+
"MAX_MODEL_LEN", "GUIDED_DECODING_BACKEND", "DISTRIBUTED_EXECUTOR_BACKEND",
135+
"WORKER_USE_RAY", "RAY_WORKERS_USE_NSIGHT", "PIPELINE_PARALLEL_SIZE",
136+
"TENSOR_PARALLEL_SIZE", "MAX_PARALLEL_LOADING_WORKERS", "ENABLE_PREFIX_CACHING",
137+
"DISABLE_SLIDING_WINDOW", "NUM_LOOKAHEAD_SLOTS",
138+
"SEED", "NUM_GPU_BLOCKS_OVERRIDE", "MAX_NUM_BATCHED_TOKENS", "MAX_NUM_SEQS",
139+
"MAX_LOGPROBS", "DISABLE_LOG_STATS", "QUANTIZATION", "ROPE_SCALING", "ROPE_THETA",
140+
"TOKENIZER_POOL_SIZE", "TOKENIZER_POOL_TYPE", "TOKENIZER_POOL_EXTRA_CONFIG",
141+
"ENABLE_LORA", "MAX_LORAS", "MAX_LORA_RANK", "LORA_EXTRA_VOCAB_SIZE",
142+
"LORA_DTYPE", "LONG_LORA_SCALING_FACTORS", "MAX_CPU_LORAS", "FULLY_SHARDED_LORAS",
143+
"DEVICE", "SCHEDULER_DELAY_FACTOR", "ENABLE_CHUNKED_PREFILL", "SPECULATIVE_MODEL",
144+
"NUM_SPECULATIVE_TOKENS", "SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE",
145+
"SPECULATIVE_MAX_MODEL_LEN", "SPECULATIVE_DISABLE_BY_BATCH_SIZE",
146+
"NGRAM_PROMPT_LOOKUP_MAX", "NGRAM_PROMPT_LOOKUP_MIN", "SPEC_DECODING_ACCEPTANCE_METHOD",
147+
"TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD", "TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA",
148+
"MODEL_LOADER_EXTRA_CONFIG", "PREEMPTION_MODE", "PREEMPTION_CHECK_PERIOD",
149+
"PREEMPTION_CPU_CAPACITY", "MAX_LOG_LEN", "DISABLE_LOGGING_REQUEST",
150+
"ENABLE_AUTO_TOOL_CHOICE", "TOOL_CALL_PARSER"
151+
]
152+
},
153+
{
154+
"title": "Tokenizer Settings",
155+
"settings": [
156+
"TOKENIZER_NAME", "TOKENIZER_REVISION", "CUSTOM_CHAT_TEMPLATE"
157+
]
158+
},
159+
{
160+
"title": "System Settings",
161+
"settings": [
162+
"GPU_MEMORY_UTILIZATION", "MAX_PARALLEL_LOADING_WORKERS", "BLOCK_SIZE",
163+
"SWAP_SPACE", "ENFORCE_EAGER", "MAX_SEQ_LEN_TO_CAPTURE", "DISABLE_CUSTOM_ALL_REDUCE"
164+
]
165+
},
166+
{
167+
"title": "Streaming Settings",
168+
"settings": [
169+
"DEFAULT_BATCH_SIZE", "DEFAULT_MIN_BATCH_SIZE", "DEFAULT_BATCH_SIZE_GROWTH_FACTOR"
170+
]
171+
},
172+
{
173+
"title": "OpenAI Settings",
174+
"settings": [
175+
"RAW_OPENAI_OUTPUT", "OPENAI_RESPONSE_ROLE", "OPENAI_SERVED_MODEL_NAME_OVERRIDE"
176+
]
177+
},
178+
{
179+
"title": "Serverless Settings",
180+
"settings": [
181+
"MAX_CONCURRENCY", "DISABLE_LOG_STATS", "DISABLE_LOG_REQUESTS"
182+
]
183+
}
184+
]
185+
},
125186
"0.6.4": {
126187
"imageName": "runpod/worker-v1-vllm:v1.7.0stable-cuda12.1.0",
127188
"minimumCudaVersion": "12.1",

0 commit comments

Comments
 (0)