Skip to content

Commit cd3bb41

Browse files
committed
patch vLLM
1 parent 94896be commit cd3bb41

File tree

9 files changed

+4458
-2
lines changed

9 files changed

+4458
-2
lines changed

server/Dockerfile.cuda

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ RUN pip install -r requirements.txt
1818
# RUN pip install .
1919
ENV ACCELERATOR=CUDA
2020
# Copy our application source code
21-
COPY vllm_patched /usr/local/lib/python3.12/dist-packages/vllm
2221
COPY ./homl_server ./homl_server
22+
COPY ./vllm_patches ./patches
23+
24+
RUN cd /usr/local/lib/python3.12/dist-packages/vllm && patch -p1 < /app/patches/api_server.patch
25+
RUN cd /usr/local/lib/python3.12/dist-packages/vllm && patch -p1 < /app/patches/registry.patch
26+
2327
WORKDIR /app/homl_server
2428
# The base image exposes port 8000, so we don't need to do it again.
2529
# EXPOSE 8000

server/Dockerfile.cuda-gptoss

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ RUN pip install -r requirements.txt
1818
ENV ACCELERATOR=CUDA
1919
# Copy our application source code
2020
COPY ./homl_server ./homl_server
21+
COPY ./vllm_patches ./patches
22+
23+
RUN cd /usr/local/lib/python3.12/dist-packages/vllm && patch -p1 < /app/patches/api_server.patch
24+
RUN cd /usr/local/lib/python3.12/dist-packages/vllm && patch -p1 < /app/patches/registry.patch
25+
2126
WORKDIR /app/homl_server
2227
# The base image exposes port 8000, so we don't need to do it again.
2328
# EXPOSE 8000

server/create_patch.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
diff -u vllm_origin/entrypoints/openai/api_server.py vllm_patched/entrypoints/openai/api_server.py > vllm_patches/api_server.patch
2+
diff -u vllm_origin/model_executor/models/registry.py vllm_patched/model_executor/models/registry.py > vllm_patches/registry.patch

0 commit comments

Comments
 (0)