|
1 | 1 | { |
2 | 2 | "versions": { |
3 | | - "0.7.0": { |
4 | | - "imageName": "runpod/worker-v1-vllm:v1.9.0stable-cuda12.1.0", |
| 3 | + "0.7.3": { |
| 4 | + "imageName": "runpod/worker-v1-vllm:v2.1.0stable-cuda12.1.0", |
5 | 5 | "minimumCudaVersion": "12.1", |
6 | 6 | "categories": [ |
7 | 7 | { |
|
122 | 122 | } |
123 | 123 | ] |
124 | 124 | }, |
| 125 | + "0.7.0": { |
| 126 | + "imageName": "runpod/worker-v1-vllm:v1.9.0stable-cuda12.1.0", |
| 127 | + "minimumCudaVersion": "12.1", |
| 128 | + "categories": [ |
| 129 | + { |
| 130 | + "title": "LLM Settings", |
| 131 | + "settings": [ |
| 132 | + "TOKENIZER", "TOKENIZER_MODE", "SKIP_TOKENIZER_INIT", "TRUST_REMOTE_CODE", |
| 133 | + "DOWNLOAD_DIR", "LOAD_FORMAT", "DTYPE", "KV_CACHE_DTYPE", "QUANTIZATION_PARAM_PATH", |
| 134 | + "MAX_MODEL_LEN", "GUIDED_DECODING_BACKEND", "DISTRIBUTED_EXECUTOR_BACKEND", |
| 135 | + "WORKER_USE_RAY", "RAY_WORKERS_USE_NSIGHT", "PIPELINE_PARALLEL_SIZE", |
| 136 | + "TENSOR_PARALLEL_SIZE", "MAX_PARALLEL_LOADING_WORKERS", "ENABLE_PREFIX_CACHING", |
| 137 | + "DISABLE_SLIDING_WINDOW", "NUM_LOOKAHEAD_SLOTS", |
| 138 | + "SEED", "NUM_GPU_BLOCKS_OVERRIDE", "MAX_NUM_BATCHED_TOKENS", "MAX_NUM_SEQS", |
| 139 | + "MAX_LOGPROBS", "DISABLE_LOG_STATS", "QUANTIZATION", "ROPE_SCALING", "ROPE_THETA", |
| 140 | + "TOKENIZER_POOL_SIZE", "TOKENIZER_POOL_TYPE", "TOKENIZER_POOL_EXTRA_CONFIG", |
| 141 | + "ENABLE_LORA", "MAX_LORAS", "MAX_LORA_RANK", "LORA_EXTRA_VOCAB_SIZE", |
| 142 | + "LORA_DTYPE", "LONG_LORA_SCALING_FACTORS", "MAX_CPU_LORAS", "FULLY_SHARDED_LORAS", |
| 143 | + "DEVICE", "SCHEDULER_DELAY_FACTOR", "ENABLE_CHUNKED_PREFILL", "SPECULATIVE_MODEL", |
| 144 | + "NUM_SPECULATIVE_TOKENS", "SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE", |
| 145 | + "SPECULATIVE_MAX_MODEL_LEN", "SPECULATIVE_DISABLE_BY_BATCH_SIZE", |
| 146 | + "NGRAM_PROMPT_LOOKUP_MAX", "NGRAM_PROMPT_LOOKUP_MIN", "SPEC_DECODING_ACCEPTANCE_METHOD", |
| 147 | + "TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD", "TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA", |
| 148 | + "MODEL_LOADER_EXTRA_CONFIG", "PREEMPTION_MODE", "PREEMPTION_CHECK_PERIOD", |
| 149 | + "PREEMPTION_CPU_CAPACITY", "MAX_LOG_LEN", "DISABLE_LOGGING_REQUEST", |
| 150 | + "ENABLE_AUTO_TOOL_CHOICE", "TOOL_CALL_PARSER" |
| 151 | + ] |
| 152 | + }, |
| 153 | + { |
| 154 | + "title": "Tokenizer Settings", |
| 155 | + "settings": [ |
| 156 | + "TOKENIZER_NAME", "TOKENIZER_REVISION", "CUSTOM_CHAT_TEMPLATE" |
| 157 | + ] |
| 158 | + }, |
| 159 | + { |
| 160 | + "title": "System Settings", |
| 161 | + "settings": [ |
| 162 | + "GPU_MEMORY_UTILIZATION", "MAX_PARALLEL_LOADING_WORKERS", "BLOCK_SIZE", |
| 163 | + "SWAP_SPACE", "ENFORCE_EAGER", "MAX_SEQ_LEN_TO_CAPTURE", "DISABLE_CUSTOM_ALL_REDUCE" |
| 164 | + ] |
| 165 | + }, |
| 166 | + { |
| 167 | + "title": "Streaming Settings", |
| 168 | + "settings": [ |
| 169 | + "DEFAULT_BATCH_SIZE", "DEFAULT_MIN_BATCH_SIZE", "DEFAULT_BATCH_SIZE_GROWTH_FACTOR" |
| 170 | + ] |
| 171 | + }, |
| 172 | + { |
| 173 | + "title": "OpenAI Settings", |
| 174 | + "settings": [ |
| 175 | + "RAW_OPENAI_OUTPUT", "OPENAI_RESPONSE_ROLE", "OPENAI_SERVED_MODEL_NAME_OVERRIDE" |
| 176 | + ] |
| 177 | + }, |
| 178 | + { |
| 179 | + "title": "Serverless Settings", |
| 180 | + "settings": [ |
| 181 | + "MAX_CONCURRENCY", "DISABLE_LOG_STATS", "DISABLE_LOG_REQUESTS" |
| 182 | + ] |
| 183 | + } |
| 184 | + ] |
| 185 | + }, |
125 | 186 | "0.6.4": { |
126 | 187 | "imageName": "runpod/worker-v1-vllm:v1.7.0stable-cuda12.1.0", |
127 | 188 | "minimumCudaVersion": "12.1", |
|
0 commit comments