File tree Expand file tree Collapse file tree 2 files changed +1
-10
lines changed
Expand file tree Collapse file tree 2 files changed +1
-10
lines changed Original file line number Diff line number Diff line change 280280 "advanced" : true
281281 }
282282 },
283- {
284- "key" : " NUM_GPU_BLOCKS_OVERRIDE" ,
285- "input" : {
286- "name" : " Num GPU Blocks Override" ,
287- "type" : " number" ,
288- "description" : " If specified, ignore GPU profiling result and use this number of GPU blocks." ,
289- "advanced" : true
290- }
291- },
292283 {
293284 "key" : " MAX_NUM_BATCHED_TOKENS" ,
294285 "input" : {
Original file line number Diff line number Diff line change @@ -7,7 +7,7 @@ RUN ldconfig /usr/local/cuda-12.9/compat/
77
88# Install vLLM with FlashInfer - use CUDA 12.8 PyTorch wheels (compatible with vLLM 0.15.1)
99RUN python3 -m pip install --upgrade pip && \
10- python3 -m pip install "vllm[flashinfer]==0.15.1 " --extra-index-url https://download.pytorch.org/whl/cu129
10+ python3 -m pip install "vllm[flashinfer]==0.16.0 " --extra-index-url https://download.pytorch.org/whl/cu129
1111
1212
1313
You can’t perform that action at this time.
0 commit comments