Skip to content

Commit 17efb0e

Browse files
Merge pull request #272 from runpod-workers/feat/vllm-0.16.0
feat: Update to 0.16.0
2 parents 13fa718 + 2b5f07d commit 17efb0e

File tree

2 files changed

+1
-10
lines changed

2 files changed

+1
-10
lines changed

.runpod/hub.json

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -280,15 +280,6 @@
280280
"advanced": true
281281
}
282282
},
283-
{
284-
"key": "NUM_GPU_BLOCKS_OVERRIDE",
285-
"input": {
286-
"name": "Num GPU Blocks Override",
287-
"type": "number",
288-
"description": "If specified, ignore GPU profiling result and use this number of GPU blocks.",
289-
"advanced": true
290-
}
291-
},
292283
{
293284
"key": "MAX_NUM_BATCHED_TOKENS",
294285
"input": {

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ RUN ldconfig /usr/local/cuda-12.9/compat/
77

88
# Install vLLM with FlashInfer - use CUDA 12.8 PyTorch wheels (compatible with vLLM 0.15.1)
99
RUN python3 -m pip install --upgrade pip && \
10-
python3 -m pip install "vllm[flashinfer]==0.15.1" --extra-index-url https://download.pytorch.org/whl/cu129
10+
python3 -m pip install "vllm[flashinfer]==0.16.0" --extra-index-url https://download.pytorch.org/whl/cu129
1111

1212

1313

0 commit comments

Comments
 (0)