1- FROM nvidia/cuda:12.1.0-base-ubuntu22.04
1+ FROM nvidia/cuda:12.1.0-base-ubuntu22.04 as deps
22
33ENV DEBIAN_FRONTEND=noninteractive
44RUN apt-get update -y \
@@ -12,31 +12,16 @@ RUN ldconfig /usr/local/cuda-12.1/compat/
1212# EFRON:
1313# these guys are unbelivably huge - >80GiB. Took well over ten minutes to install on my machine and used 28GiB(!) of RAM.
1414# we should consider having a base image with them pre-installed or seeing if we can knock it down a little bit.
15- RUN python3 -m pip install "sglang[all]"
16- RUN python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
17-
18-
19- # install _our_ dependencies
15+ RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install "sglang[all]"
16+ RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
17+ COPY requirements.txt ./requirements.txt
2018RUN --mount=type=cache,target=/root/.cache/pip \
21- python3 -m pip install --upgrade pip && \
22- python3 -m pip install --upgrade -r /requirements.txt
23-
24- RUN mkdir app
25- COPY requirements.txt ./app/requirements.txt
26-
27- # EFRON: no idea what this is doing: leaving it in in case it's important
28- ENV BASE_PATH=$BASE_PATH
29- ENV HF_DATASETS_CACHE="${BASE_PATH}/huggingface-cache/datasets"
30- ENV HF_HOME="${BASE_PATH}/huggingface-cache/hub"
31- ENV HF_HUB_ENABLE_HF_TRANSFER=1
32- ENV HUGGINGFACE_HUB_CACHE="${BASE_PATH}/huggingface-cache/hub"
33- ENV MODEL_NAME=$MODEL_NAME
34- ENV MODEL_REVISION=$MODEL_REVISION
35- ENV QUANTIZATION=$QUANTIZATION
36- ENV TOKENIZER_NAME=$TOKENIZER_NAME
37- ENV TOKENIZER_REVISION=$TOKENIZER_REVISION
19+ python3 -m pip install --upgrade pip && \
20+ python3 -m pip install --upgrade -r app/requirements.txt
3821
3922# not sure why this is here: is a vllm-workspace even in our image?
40- ENV PYTHONPATH="/:/vllm-workspace"
41- COPY ./src/handler.py ./app/handler.py
42- CMD ["python3" , "./app/handler.py" ] # actually run the handler
23+ ENV PYTHONPATH="/:/vllm-workspace"
24+ COPY ./src/handler.py ./handler.py
25+
26+ # run the serverless worker
27+ CMD ["python3" , "./handler.py" ]
0 commit comments