Skip to content

Commit 530370f

Browse files
committed
2 parents d369492 + 3b4614d commit 530370f

File tree

4 files changed

+65
-27
lines changed

4 files changed

+65
-27
lines changed

model-deployment/containers/llama2/Dockerfile.vllm

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
1-
FROM nvidia/cuda:11.8.0-base-ubuntu20.04 as base
1+
FROM nvcr.io/nvidia/cuda@sha256:3bbed06f530534a5f797a2a09df9b609783796d323663c94bc7ebe082c64a81f as base
22
ARG DEBIAN_FRONTEND=noninteractive
33

4-
RUN apt-get update && apt-get -y install tzdata && apt-get install -y curl && apt-get install -y git
4+
# nvidia-container-runtime
5+
ENV NVIDIA_VISIBLE_DEVICES all
6+
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
7+
ENV NVIDIA_REQUIRE_CUDA "cuda>=11.6"
8+
9+
RUN apt-get update && apt-get -y install tzdata && apt-get install -y curl && apt-get install -y git && apt-get clean && apt-get autoremove
510
RUN curl -L https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh
611
RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh;
712
ENV PATH="/miniconda/bin:$PATH"
8-
RUN mkdir -p /opt/vllm
913

14+
# install oci-cli
15+
RUN bash -c "$(curl -L https://raw.githubusercontent.com/oracle/oci-cli/master/scripts/install/install.sh)" -- --accept-all-defaults
16+
17+
RUN mkdir -p /opt/vllm
1018
ARG INSTALL_DIR=/opt/vllm
1119
COPY vllm-env.yaml /opt/vllm/environment.yaml
1220
RUN conda env create --name vllm -f ${INSTALL_DIR}/environment.yaml
21+
RUN conda clean -a
1322

1423
ENV TMPDIR=/home/datascience
1524
WORKDIR /home/datascience
@@ -20,6 +29,11 @@ COPY vllm-log-config.yaml ${INSTALL_DIR}/vllm-log-config.yaml
2029
ENV UVICORN_LOG-CONFIG=${INSTALL_DIR}/vllm-log-config.yaml
2130
ENV UVICORN_LOG_CONFIG=${INSTALL_DIR}/vllm-log-config.yaml
2231

32+
# for debugging
33+
RUN mkdir -p /aiapps
34+
COPY runner.sh /aiapps/
35+
RUN chmod +x /aiapps/runner.sh
36+
2337
# Default location where downloaded models are mapped on model container. No need to override, if using model catalog.
2438
ENV MODEL /opt/ds/model/deployed_model
2539

model-deployment/containers/llama2/Makefile

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,24 @@
1+
# Initial setup to create the version file if it doesn't exist
2+
init:
3+
@if [ ! -f version.txt ]; then \
4+
echo 0 > version.txt; \
5+
fi
6+
7+
increment_version:
8+
@echo "Reading current version..."
9+
$(eval VERSION=$(shell cat version.txt || echo "0"))
10+
@echo "Current version is $(VERSION)"
11+
$(eval NEW_VERSION=$(shell echo $$(($(VERSION) + 1))))
12+
@echo $(NEW_VERSION) > version.txt
13+
@echo "Version incremented to $(NEW_VERSION)"
14+
115
TENANCY:=${TENANCY_NAME}
216
CONTAINER_REGISTRY:=${REGION_KEY}.ocir.io
317

418
TGI_INFERENCE_IMAGE:=${CONTAINER_REGISTRY}/${TENANCY}/text-generation-interface-odsc:0.9.3
519
TGI_CONTAINER_NAME:=tgi-odsc
620

7-
VLLM_INFERENCE_IMAGE:=${CONTAINER_REGISTRY}/${TENANCY}/vllm-odsc:0.1.4
21+
VLLM_INFERENCE_IMAGE:=${CONTAINER_REGISTRY}/${TENANCY}/vllm-odsc:0.2.2-v
822
VLLM_CONTAINER_NAME:=vllm-odsc
923

1024
MODEL_DIR:=${PWD}/hfdata
@@ -31,8 +45,8 @@ check-env:
3145
@echo "Both TENANCY_NAME and REGION_KEY are set and have values."
3246
build.tgi:
3347
docker build --network host -t ${TGI_INFERENCE_IMAGE} -f Dockerfile.tgi .
34-
build.vllm: check-env
35-
docker build --network host -t ${VLLM_INFERENCE_IMAGE} -f Dockerfile.vllm .
48+
build.vllm: check-env init increment_version
49+
docker build --network host -t ${VLLM_INFERENCE_IMAGE}$(shell cat version.txt) -f Dockerfile.vllm .
3650
run.tgi.hf:
3751
docker run --rm -it --gpus all --shm-size 1g -p ${port}:${port} -e PORT=${port} -e TOKEN_FILE=${target_token} -e PARAMS=${params} -e MODEL=${model} -v ${MODEL_DIR}:${TARGET_DIR} -v ${token}:${target_token} --name ${TGI_CONTAINER_NAME} ${TGI_INFERENCE_IMAGE}
3852
run.tgi.oci:
@@ -48,6 +62,6 @@ stop.vllm:
4862
push.tgi:
4963
docker push ${TGI_INFERENCE_IMAGE}
5064
push.vllm:
51-
docker push ${VLLM_INFERENCE_IMAGE}
65+
docker push ${VLLM_INFERENCE_IMAGE}$(shell cat version.txt)
5266
app:
53-
MODEL=${model} gradio app.py
67+
MODEL=${model} gradio app.py
Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,17 @@
11
#!/bin/bash
22

3-
if [ -z "$TOKEN_FILE" ] ; then
4-
echo "No authentication token is provided. Weights are assumed to be downloaded from OCI Model Catalog."
3+
if [[ -z "${MODEL_DEPLOYMENT_OCID}" ]]; then
4+
auth_method=instance_principal
55
else
6+
auth_method=resource_principal
7+
fi
8+
9+
if [ -n "\$BUCKET" ]; then
10+
echo "BUCKET variable are set."
11+
#oci os object sync --auth resource_principal --bucket-name genai --dest-dir /home/datascience/llma2/
12+
/root/bin/oci os object sync --auth $auth_method --bucket-name $BUCKET --dest-dir /home/datascience/model/
13+
MODEL="/home/datascience/model/$MODEL"
14+
elif [ -n "\$TOKEN_FILE" ]; then
615
export HUGGING_FACE_HUB_TOKEN=$(cat $TOKEN_FILE)
716
echo "The md5 of token is $(md5sum $TOKEN_FILE)"
817
mkdir -p /home/datascience/.cache/huggingface
@@ -14,17 +23,19 @@ else
1423
echo $(df -h /home/datascience)
1524
df -h
1625
echo "Checking internet connection: "
17-
curl -sI -v https://www.wikipedia.org
26+
curl -s --connect-timeout 15 http://example.com > /dev/null && echo "Connected" || echo "Not connected"
1827
echo $(du -sh /home/datascience/*)
28+
else
29+
echo "No bucket or authentication token is provided. Weights are assumed to be downloaded from OCI Model Catalog."
1930
fi
2031

2132
echo "Starting vllm engine..."
2233
source activate vllm
23-
WEB_CONCURRENCY=1 python $VLLM_DIR/vllm-api-server.py --port ${PORT} --host 0.0.0.0 --log-config $VLLM_DIR/vllm-log-config.yaml --model ${MODEL} --tensor-parallel-size ${TENSOR_PARALLELISM} ${PARAMS}
34+
WEB_CONCURRENCY=1 python $VLLM_DIR/vllm-api-server.py --port ${PORT} --host 0.0.0.0 --log-config $VLLM_DIR/vllm-log-config.yaml --model ${MODEL} --tensor-parallel-size ${TENSOR_PARALLELISM}
2435

2536

2637
echo "Exiting vLLM. Here is the disk utilization of /home/datascience - "
2738
echo $(du -sh /home/datascience)
2839
echo "server logs: "
2940
ls -lah /home/datascience
30-
cat /home/datascience/server.log
41+
cat /home/datascience/server.log
Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
1+
channels:
2+
- pytorch
3+
- nvidia
4+
- main
5+
- conda-forge
16
dependencies:
2-
- main::python=3.8
3-
- main::pip
4-
- nvidia::cuda-cccl=11.8.89
5-
- nvidia::cuda-compiler=11.8.0
6-
- nvidia::cuda-cuobjdump=11.8.86
7-
- nvidia::cuda-cuxxfilt=11.8.86
8-
- nvidia::cuda-nvcc=11.8.89
9-
- nvidia::cuda-nvdisasm=11.8.86
10-
- nvidia::cuda-nvml-dev=11.8.86
7+
- python=3.9
8+
- pip
119
- pip:
12-
- vllm
13-
- transformers
14-
- pandas
15-
- flask
16-
- GPUtil
10+
- https://github.com/vllm-project/vllm/releases/download/v0.2.1.post1/vllm-0.2.1.post1-cp39-cp39-manylinux1_x86_64.whl
11+
- transformers >= 4.34.0
12+
- pandas==2.1.3
13+
- fastapi==0.104.1
14+
- GPUtil==1.4.0
15+
- oci-cli

0 commit comments

Comments
 (0)