|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# This script build the Ascend NPU docker image and run the offline inference inside the container. |
| 4 | +# It serves a sanity check for compilation and basic model usage. |
| 5 | +set -ex |
| 6 | + |
| 7 | +# Base ubuntu image with basic ascend development libraries and python installed |
| 8 | +VLLM_ASCEND_REPO="https://github.com/vllm-project/vllm-ascend.git" |
| 9 | +CONFIG_FILE_REMOTE_PATH="tests/e2e/vllm_interface/vllm_test.cfg" |
| 10 | +TEST_RUN_CONFIG_FILE="vllm_test.cfg" |
| 11 | +VLLM_ASCEND_TMP_DIR= |
| 12 | +# Get the test run configuration file from the vllm-ascend repository |
| 13 | +fetch_vllm_test_cfg() { |
| 14 | + VLLM_ASCEND_TMP_DIR=$(mktemp -d) |
| 15 | + # Ensure that the temporary directory is cleaned up when an exception occurs during configuration file retrieval |
| 16 | + cleanup() { |
| 17 | + rm -rf "${VLLM_ASCEND_TMP_DIR}" |
| 18 | + } |
| 19 | + trap cleanup EXIT |
| 20 | + |
| 21 | + GIT_TRACE=1 git clone -v --depth 1 "${VLLM_ASCEND_REPO}" "${VLLM_ASCEND_TMP_DIR}" |
| 22 | + if [ ! -f "${VLLM_ASCEND_TMP_DIR}/${CONFIG_FILE_REMOTE_PATH}" ]; then |
| 23 | + echo "Error: file '${CONFIG_FILE_REMOTE_PATH}' does not exist in the warehouse" >&2 |
| 24 | + exit 1 |
| 25 | + fi |
| 26 | + |
| 27 | + # If the file already exists locally, just overwrite it |
| 28 | + cp "${VLLM_ASCEND_TMP_DIR}/${CONFIG_FILE_REMOTE_PATH}" "${TEST_RUN_CONFIG_FILE}" |
| 29 | + echo "Copied ${CONFIG_FILE_REMOTE_PATH} to ${TEST_RUN_CONFIG_FILE}" |
| 30 | + |
| 31 | + # Since the trap will be overwritten later, and when it is executed here, the task of cleaning up resources |
| 32 | + # when the trap is abnormal has been completed, so the temporary resources are manually deleted here. |
| 33 | + rm -rf "${VLLM_ASCEND_TMP_DIR}" |
| 34 | + trap - EXIT |
| 35 | +} |
| 36 | + |
| 37 | +# Downloads test run configuration file from a remote URL. |
| 38 | +# Loads the configuration into the current script environment. |
| 39 | +get_config() { |
| 40 | + if [ ! -f "${TEST_RUN_CONFIG_FILE}" ]; then |
| 41 | + echo "Error: file '${TEST_RUN_CONFIG_FILE}' does not exist in the warehouse" >&2 |
| 42 | + exit 1 |
| 43 | + fi |
| 44 | + source "${TEST_RUN_CONFIG_FILE}" |
| 45 | + echo "Base docker image name that get from configuration: ${BASE_IMAGE_NAME}" |
| 46 | + return 0 |
| 47 | +} |
| 48 | + |
| 49 | +# get test running configuration. |
| 50 | +fetch_vllm_test_cfg |
| 51 | +get_config |
| 52 | +# Check if the function call was successful. If not, exit the script. |
| 53 | +if [ $? -ne 0 ]; then |
| 54 | + exit 1 |
| 55 | +fi |
| 56 | + |
| 57 | +image_name="npu/vllm-ci:${BUILDKITE_COMMIT}_${EPOCHSECONDS}" |
| 58 | +container_name="npu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)" |
| 59 | + |
| 60 | +# BUILDKITE_AGENT_NAME format is {hostname}-{agent_idx}-{npu_card_num}cards |
| 61 | +agent_idx=$(echo "${BUILDKITE_AGENT_NAME}" | awk -F'-' '{print $(NF-1)}') |
| 62 | +echo "agent_idx: ${agent_idx}" |
| 63 | +builder_name="cachebuilder${agent_idx}" |
| 64 | +builder_cache_dir="/mnt/docker-cache${agent_idx}" |
| 65 | +mkdir -p ${builder_cache_dir} |
| 66 | + |
| 67 | +# Try building the docker image |
| 68 | +cat <<EOF | DOCKER_BUILDKIT=1 docker build \ |
| 69 | + --add-host cache-service-vllm.nginx-pypi-cache.svc.cluster.local:${PYPI_CACHE_HOST} \ |
| 70 | + --builder ${builder_name} --cache-from type=local,src=${builder_cache_dir} \ |
| 71 | + --cache-to type=local,dest=${builder_cache_dir},mode=max \ |
| 72 | + --progress=plain --load -t ${image_name} -f - . |
| 73 | +FROM ${BASE_IMAGE_NAME} |
| 74 | +
|
| 75 | +# Define environments |
| 76 | +ENV DEBIAN_FRONTEND=noninteractive |
| 77 | +
|
| 78 | +RUN pip config set global.index-url http://cache-service-vllm.nginx-pypi-cache.svc.cluster.local:${PYPI_CACHE_PORT}/pypi/simple && \ |
| 79 | + pip config set global.trusted-host cache-service-vllm.nginx-pypi-cache.svc.cluster.local && \ |
| 80 | + apt-get update -y && \ |
| 81 | + apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \ |
| 82 | + rm -rf /var/cache/apt/* && \ |
| 83 | + rm -rf /var/lib/apt/lists/* |
| 84 | +
|
| 85 | +# Install for pytest to make the docker build cache layer always valid |
| 86 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 87 | + pip install pytest>=6.0 modelscope |
| 88 | +
|
| 89 | +WORKDIR /workspace/vllm |
| 90 | +
|
| 91 | +# Install vLLM dependencies in advance. Effect: As long as common.txt remains unchanged, the docker cache layer will be valid. |
| 92 | +COPY requirements/common.txt /workspace/vllm/requirements/common.txt |
| 93 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 94 | + pip install -r requirements/common.txt |
| 95 | +
|
| 96 | +COPY . . |
| 97 | +
|
| 98 | +# Install vLLM |
| 99 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 100 | + VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ |
| 101 | + python3 -m pip uninstall -y triton |
| 102 | +
|
| 103 | +# Install vllm-ascend |
| 104 | +WORKDIR /workspace |
| 105 | +ARG VLLM_ASCEND_REPO=https://github.com/vllm-project/vllm-ascend.git |
| 106 | +ARG VLLM_ASCEND_TAG=main |
| 107 | +RUN git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf "https://github.com/" && \ |
| 108 | + git clone --depth 1 \$VLLM_ASCEND_REPO --branch \$VLLM_ASCEND_TAG /workspace/vllm-ascend |
| 109 | +
|
| 110 | +# Install vllm dependencies in advance. Effect: As long as common.txt remains unchanged, the docker cache layer will be valid. |
| 111 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 112 | + pip install -r /workspace/vllm-ascend/requirements.txt |
| 113 | +
|
| 114 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 115 | + export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \ |
| 116 | + source /usr/local/Ascend/ascend-toolkit/set_env.sh && \ |
| 117 | + source /usr/local/Ascend/nnal/atb/set_env.sh && \ |
| 118 | + export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \ |
| 119 | + python3 -m pip install -v -e /workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ |
| 120 | +
|
| 121 | +ENV VLLM_WORKER_MULTIPROC_METHOD=spawn |
| 122 | +ENV VLLM_USE_MODELSCOPE=True |
| 123 | +
|
| 124 | +WORKDIR /workspace/vllm-ascend |
| 125 | +
|
| 126 | +CMD ["/bin/bash"] |
| 127 | +
|
| 128 | +EOF |
| 129 | + |
| 130 | +# Setup cleanup |
| 131 | +remove_docker_container() { |
| 132 | + docker rm -f "${container_name}" || true; |
| 133 | + docker image rm -f "${image_name}" || true; |
| 134 | + docker system prune -f || true; |
| 135 | +} |
| 136 | +trap remove_docker_container EXIT |
| 137 | + |
| 138 | +# Generate corresponding --device args based on BUILDKITE_AGENT_NAME |
| 139 | +# Ascend NPU BUILDKITE_AGENT_NAME format is {hostname}-{agent_idx}-{npu_card_num}cards, and agent_idx starts from 1. |
| 140 | +# e.g. atlas-a2-001-1-2cards means this is the 1-th agent on atlas-a2-001 host, and it has 2 NPU cards. |
| 141 | +# returns --device /dev/davinci0 --device /dev/davinci1 |
| 142 | +parse_and_gen_devices() { |
| 143 | + local input="$1" |
| 144 | + local index cards_num |
| 145 | + if [[ "$input" =~ ([0-9]+)-([0-9]+)cards$ ]]; then |
| 146 | + index="${BASH_REMATCH[1]}" |
| 147 | + cards_num="${BASH_REMATCH[2]}" |
| 148 | + else |
| 149 | + echo "parse error" >&2 |
| 150 | + return 1 |
| 151 | + fi |
| 152 | + |
| 153 | + local devices="" |
| 154 | + local i=0 |
| 155 | + while (( i < cards_num )); do |
| 156 | + local dev_idx=$(((index - 1)*cards_num + i )) |
| 157 | + devices="$devices --device /dev/davinci${dev_idx}" |
| 158 | + ((i++)) |
| 159 | + done |
| 160 | + |
| 161 | + # trim leading space |
| 162 | + devices="${devices#"${devices%%[![:space:]]*}"}" |
| 163 | + # Output devices: assigned to the caller variable |
| 164 | + printf '%s' "$devices" |
| 165 | +} |
| 166 | + |
| 167 | +devices=$(parse_and_gen_devices "${BUILDKITE_AGENT_NAME}") || exit 1 |
| 168 | + |
| 169 | +# Run the image and execute the Out-Of-Tree (OOT) platform interface test case on Ascend NPU hardware. |
| 170 | +# This test checks whether the OOT platform interface is functioning properly in conjunction with |
| 171 | +# the hardware plugin vllm-ascend. |
| 172 | +model_cache_dir=/mnt/modelscope${agent_idx} |
| 173 | +mkdir -p ${model_cache_dir} |
| 174 | +docker run \ |
| 175 | + ${devices} \ |
| 176 | + --device /dev/davinci_manager \ |
| 177 | + --device /dev/devmm_svm \ |
| 178 | + --device /dev/hisi_hdc \ |
| 179 | + -v /usr/local/dcmi:/usr/local/dcmi \ |
| 180 | + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ |
| 181 | + -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \ |
| 182 | + -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \ |
| 183 | + -v /etc/ascend_install.info:/etc/ascend_install.info \ |
| 184 | + -v ${model_cache_dir}:/root/.cache/modelscope \ |
| 185 | + --entrypoint="" \ |
| 186 | + --name "${container_name}" \ |
| 187 | + "${image_name}" \ |
| 188 | + bash -c ' |
| 189 | + set -e |
| 190 | + pytest -v -s tests/e2e/vllm_interface/ |
| 191 | +' |
0 commit comments