@@ -41,6 +41,14 @@ ENV NVCC_THREADS=$nvcc_threads
41
41
42
42
RUN python3 setup.py build_ext --inplace
43
43
44
+ # Build the megablocks library as wheel because it doesn't publish pre-built wheels.
45
+ # https://github.com/stanford-futuredata/megablocks/commit/5897cd6f254b7b3edf7a708a3a3314ecb54b6f78
46
+ RUN apt-get install -y git && \
47
+ git clone https://github.com/stanford-futuredata/megablocks.git && \
48
+ cd megablocks && \
49
+ git checkout 5897cd6f254b7b3edf7a708a3a3314ecb54b6f78 && \
50
+ MAX_JOBS=8 NVCC_THREADS=8 python3 setup.py bdist_wheel
51
+
44
52
# image to run unit testing suite
45
53
FROM dev AS test
46
54
@@ -73,12 +81,16 @@ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
73
81
74
82
# openai api server alternative
75
83
FROM vllm-base AS vllm-openai
76
- # install additional dependencies for openai api server, and mixtral
84
+ # install additional dependencies for openai api server
77
85
RUN --mount=type=cache,target=/root/.cache/pip \
78
- pip install accelerate megablocks
86
+ pip install accelerate
79
87
80
- COPY --from=build /workspace/vllm/*.so /workspace/vllm/
81
88
COPY vllm vllm
89
+ COPY --from=build /workspace/vllm/*.so /workspace/vllm/
90
+ COPY --from=build /workspace/megablocks/dist/*.whl /tmp/
91
+ RUN --mount=type=cache,target=/root/.cache/pip \
92
+ pip install /tmp/megablocks-0.5.0-cp310-cp310-linux_x86_64.whl && \
93
+ rm /tmp/megablocks-0.5.0-cp310-cp310-linux_x86_64.whl
82
94
83
95
ENTRYPOINT ["python3" , "-m" , "vllm.entrypoints.openai.api_server" ]
84
96
0 commit comments