try it again

ef0xa · ef0xa · commit 0bccda3500e2 · 2025-02-09T18:42:25.000-08:00
diff --git a/Dockerfile b/Dockerfile
@@ -7,20 +7,22 @@ RUN apt-get update -y \
 
 RUN ldconfig /usr/local/cuda-12.1/compat/
 
-# install sglang's dependencies
+# update pip
+RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade pip
 
+# install sglang's dependencies
 # EFRON:
 # these guys are unbelivably huge - >80GiB. Took well over ten minutes to install on my machine and used 28GiB(!) of RAM.
 # we should consider having a base image with them pre-installed or seeing if we can knock it down a little bit.
 RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install "sglang[all]" 
 RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
+
+# install our own python dependencies
 COPY requirements.txt ./requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip \
-     python3 -m pip install --upgrade pip && \
-     python3 -m pip install --upgrade -r app/requirements.txt
+RUN --mount-type=cache,target=/root/.cache/pip python3 -m pip install --upgrade -r ./requirements.txt
 
 # not sure why this is here: is a vllm-workspace even in our image?
- ENV PYTHONPATH="/:/vllm-workspace" 
+# ENV PYTHONPATH="/:/vllm-workspace" 
 COPY ./src/handler.py ./handler.py
 
 # run the serverless worker