Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/_schedule_image_build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ jobs:
driver: docker-container
use: true

- name: Configure sccache
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_RESULTS_URL', process.env.ACTIONS_RESULTS_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');

- name: Build and push
uses: docker/build-push-action@v7
id: build
Expand All @@ -89,6 +96,11 @@ jobs:
outputs: type=image,name=quay.io/ascend/vllm-ascend,push-by-digest=true,name-canonical=true,push=${{ inputs.should_push }}
build-args: |
PIP_INDEX_URL=https://pypi.org/simple
SCCACHE_GHA_ENABLED=true
MATRIX_ARCH=${{ matrix.arch }}
secrets: |
ACTIONS_RESULTS_URL=${{ env.ACTIONS_RESULTS_URL }}
ACTIONS_RUNTIME_TOKEN=${{ env.ACTIONS_RUNTIME_TOKEN }}
provenance: false

- name: Export digest
Expand Down
27 changes: 24 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ FROM quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG MOONCAKE_TAG="v0.3.8.post1"
ARG SOC_VERSION="ascend910b1"
ARG SCCACHE_GHA_ENABLED
ARG MATRIX_ARCH

# Define environments
ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -31,17 +33,28 @@ WORKDIR /workspace

COPY . /vllm-workspace/vllm-ascend/

RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
bash /vllm-workspace/vllm-ascend/tools/sccache_installer.sh

# Install Mooncake dependencies
RUN apt-get update -y && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
apt-get update -y && \
apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \
git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
ARCH=$(uname -m) && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON -DENABLE_SCCACHE=ON && \
make -j$(nproc) && make install && \
sccache --show-stats && \
rm -fr /vllm-workspace/Mooncake/build && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*
Expand All @@ -60,11 +73,19 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm
# Install vllm-ascend
# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH
# Installing vllm-ascend on x86 can pull upstream triton back in alongside triton-ascend. Remove it immediately after this step.
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
sccache --show-stats && \
rm -f /usr/bin/sccache && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
python3 -m pip cache purge

Expand Down
16 changes: 15 additions & 1 deletion Dockerfile.310p
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ FROM quay.io/ascend/cann:8.5.1-310p-ubuntu22.04-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG SOC_VERSION="ascend310p1"
ARG SCCACHE_GHA_ENABLED
ARG MATRIX_ARCH

# Define environments
ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -36,6 +38,10 @@ WORKDIR /workspace

COPY . /vllm-workspace/vllm-ascend/

RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
bash /vllm-workspace/vllm-ascend/tools/sccache_installer.sh

RUN pip config set global.index-url ${PIP_INDEX_URL}

# Install vLLM
Expand All @@ -50,11 +56,19 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm
# Install vllm-ascend
# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH
# Installing vllm-ascend on x86 can pull upstream triton back in alongside triton-ascend. Remove it immediately after this step.
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
sccache --show-stats && \
rm -f /usr/bin/sccache && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
python3 -m pip cache purge

Expand Down
16 changes: 15 additions & 1 deletion Dockerfile.310p.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ FROM quay.io/ascend/cann:8.5.1-310p-openeuler24.03-py3.11

ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG SOC_VERSION="ascend310p1"
ARG SCCACHE_GHA_ENABLED
ARG MATRIX_ARCH

ENV SOC_VERSION=$SOC_VERSION \
TASK_QUEUE_ENABLE=1 \
Expand All @@ -34,6 +36,10 @@ WORKDIR /workspace

COPY . /vllm-workspace/vllm-ascend/

RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
bash /vllm-workspace/vllm-ascend/tools/sccache_installer.sh

# Install vLLM
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
ARG VLLM_TAG=v0.18.0
Expand All @@ -45,12 +51,20 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[a

# Install vllm-ascend
# Installing vllm-ascend on x86 can pull upstream triton back in alongside triton-ascend. Remove it immediately after this step.
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
sccache --show-stats && \
rm -f /usr/bin/sccache && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
python3 -m pip cache purge

Expand Down
26 changes: 23 additions & 3 deletions Dockerfile.a3
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ FROM quay.io/ascend/cann:8.5.1-a3-ubuntu22.04-py3.11
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG MOONCAKE_TAG=v0.3.8.post1
ARG SOC_VERSION="ascend910_9391"
ARG SCCACHE_GHA_ENABLED
ARG MATRIX_ARCH

COPY . /vllm-workspace/vllm-ascend/
# Define environments
Expand All @@ -32,17 +34,28 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}

WORKDIR /workspace

RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
bash /vllm-workspace/vllm-ascend/tools/sccache_installer.sh

# Install Mooncake dependencies
RUN apt-get update -y && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
apt-get update -y && \
apt-get install -y git vim wget net-tools gcc g++ cmake libnuma-dev libjemalloc2 && \
git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
cd /vllm-workspace/Mooncake && bash mooncake_installer.sh -y && \
ARCH=$(uname -m) && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON -DENABLE_SCCACHE=ON && \
make -j$(nproc) && make install && \
sccache --show-stats && \
rm -fr /vllm-workspace/Mooncake/build && \
rm -rf /var/cache/apt/* && \
rm -rf /var/lib/apt/lists/*
Expand All @@ -59,11 +72,18 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm
# Install vllm-ascend
# Append `libascend_hal.so` path (devlib) to LD_LIBRARY_PATH
# Installing vllm-ascend on x86 can pull upstream triton back in alongside triton-ascend. Remove it immediately after this step.
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
rm -f /usr/bin/sccache && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
python3 -m pip cache purge

Expand Down
27 changes: 24 additions & 3 deletions Dockerfile.a3.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ FROM quay.io/ascend/cann:8.5.1-a3-openeuler24.03-py3.11
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG MOONCAKE_TAG="v0.3.8.post1"
ARG SOC_VERSION="ascend910_9391"
ARG SCCACHE_GHA_ENABLED
ARG MATRIX_ARCH

ENV SOC_VERSION=$SOC_VERSION \
TASK_QUEUE_ENABLE=1 \
Expand All @@ -31,20 +33,31 @@ WORKDIR /workspace

COPY . /vllm-workspace/vllm-ascend/

RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
bash /vllm-workspace/vllm-ascend/tools/sccache_installer.sh

SHELL ["/bin/bash", "-c"]

RUN yum update -y && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
yum update -y && \
yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
ARCH=$(uname -m) && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
cd /vllm-workspace/Mooncake && \
bash mooncake_installer.sh -y && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON -DENABLE_SCCACHE=ON && \
make -j$(nproc) && make install && \
sccache --show-stats && \
rm -fr /vllm-workspace/Mooncake/build && \
rm -rf /var/cache/yum/*

Expand All @@ -59,12 +72,20 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[a

# Install vllm-ascend
# Installing vllm-ascend on x86 can pull upstream triton back in alongside triton-ascend. Remove it immediately after this step.
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
sccache --show-stats && \
rm -f /usr/bin/sccache && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
python3 -m pip cache purge

Expand Down
27 changes: 24 additions & 3 deletions Dockerfile.openEuler
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ FROM quay.io/ascend/cann:8.5.1-910b-openeuler24.03-py3.11
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
ARG MOONCAKE_TAG="v0.3.8.post1"
ARG SOC_VERSION="ascend910b1"
ARG SCCACHE_GHA_ENABLED
ARG MATRIX_ARCH

ENV SOC_VERSION=$SOC_VERSION \
TASK_QUEUE_ENABLE=1 \
Expand All @@ -31,20 +33,31 @@ WORKDIR /workspace

COPY . /vllm-workspace/vllm-ascend/

RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
bash /vllm-workspace/vllm-ascend/tools/sccache_installer.sh

SHELL ["/bin/bash", "-c"]

RUN yum update -y && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
yum update -y && \
yum install -y git vim wget net-tools gcc gcc-c++ make cmake numactl-devel jemalloc && \
git clone --depth 1 --branch ${MOONCAKE_TAG} https://github.com/kvcache-ai/Mooncake /vllm-workspace/Mooncake && \
cp /vllm-workspace/vllm-ascend/tools/mooncake_installer.sh /vllm-workspace/Mooncake/ && \
ARCH=$(uname -m) && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/devlib:/usr/local/Ascend/ascend-toolkit/latest/${ARCH}-linux/lib64:$LD_LIBRARY_PATH && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/${ARCH}-openEuler-linux && \
cd /vllm-workspace/Mooncake && \
bash mooncake_installer.sh -y && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON && \
mkdir -p build && cd build && cmake .. -DUSE_ASCEND_DIRECT=ON -DENABLE_SCCACHE=ON && \
make -j$(nproc) && make install && \
sccache --show-stats && \
rm -fr /vllm-workspace/Mooncake/build && \
rm -rf /var/cache/yum/*

Expand All @@ -59,12 +72,20 @@ RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -e /vllm-workspace/vllm/[a

# Install vllm-ascend
# Installing vllm-ascend on x86 can pull upstream triton back in alongside triton-ascend. Remove it immediately after this step.
RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
RUN --mount=type=secret,id=ACTIONS_RESULTS_URL,required=false \
--mount=type=secret,id=ACTIONS_RUNTIME_TOKEN,required=false \
export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh && \
source /usr/local/Ascend/nnal/atb/set_env.sh && \
export ACTIONS_RESULTS_URL=$(cat /run/secrets/ACTIONS_RESULTS_URL 2>/dev/null || echo "") && \
export ACTIONS_RUNTIME_TOKEN=$(cat /run/secrets/ACTIONS_RUNTIME_TOKEN 2>/dev/null || echo "") && \
export ACTIONS_CACHE_SERVICE_V2=on && \
export SCCACHE_GHA_ENABLED=${SCCACHE_GHA_ENABLED} && \
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/`uname -i`-linux/devlib && \
export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/c++/12:/usr/include/c++/12/`uname -i`-openEuler-linux && \
python3 -m pip install -v -e /vllm-workspace/vllm-ascend/ --extra-index https://download.pytorch.org/whl/cpu/ && \
sccache --show-stats && \
rm -f /usr/bin/sccache && \
if [ "$(uname -i)" = "x86_64" ]; then python3 -m pip uninstall -y triton; fi && \
python3 -m pip cache purge

Expand Down
2 changes: 1 addition & 1 deletion csrc/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ CUSTOM_OPTION="${CUSTOM_OPTION} -DCUSTOM_ASCEND_CANN_PACKAGE_PATH=${ASCEND_CANN_
set_env
clean

ccache_system=$(which ccache || true)
ccache_system=$(command -v sccache 2>/dev/null || command -v ccache 2>/dev/null || true)
if [ -n "${ccache_system}" ];then
CUSTOM_OPTION="${CUSTOM_OPTION} -DENABLE_CCACHE=ON -DCUSTOM_CCACHE=${ccache_system}"
gen_bisheng ${ccache_system}
Expand Down
Loading
Loading