Skip to content

Commit c428b05

Browse files
authored
test: Separate test and build workflows (#23)
- Add GitHub action to free up disk space - Split build and test into separate workflows - Run Python tests and integration tests in parallel - Build test image once, reuse it for Python tests and integration tests (`check-test-image` vs `build-test-image`) - Build all stages of Dockerfile sequentially to capture build logs - Limit number of parallel jobs for compilation of `flash-attention` builds to avoid OOM error - Use same PyTorch build as used for flash-attn v2 wheels - Build on push to main - Add build status badge to README.md --------- Signed-off-by: Christian Kadner <[email protected]>
1 parent 6fbf742 commit c428b05

File tree

7 files changed

+273
-27
lines changed

7 files changed

+273
-27
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: "Free up disk space"
2+
description: "Removes non-essential tools, libraries and cached files from GitHub action runner node"
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: "Remove non-essential tools and libraries"
8+
shell: bash
9+
run: |
10+
# https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
11+
echo "Disk usage before cleanup:"
12+
df -h
13+
echo "Removing non-essential tools and libraries ..."
14+
sudo rm -rf /opt/ghc
15+
sudo rm -rf /usr/share/dotnet
16+
# sudo rm -rf /usr/local/share/boost
17+
echo "Deleting libraries for Android (12G), CodeQL (5.3G), PowerShell (1.3G), Swift (1.7G) ..."
18+
sudo rm -rf /usr/local/lib/android
19+
sudo rm -rf "${AGENT_TOOLSDIRECTORY}/CodeQL"
20+
sudo rm -rf /usr/local/share/powershell
21+
sudo rm -rf /usr/share/swift
22+
echo "Disk usage after cleanup:"
23+
df -h
24+
25+
- name: "Prune docker images"
26+
shell: bash
27+
run: |
28+
echo "Pruning docker images ..."
29+
docker image prune -a -f
30+
docker system df
31+
echo "Disk usage after pruning docker images:"
32+
df -h

.github/workflows/build.yml

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
name: "Build"
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
branches:
7+
- "main"
8+
paths-ignore:
9+
- "**.md"
10+
- "proto/**"
11+
pull_request:
12+
branches:
13+
- "main"
14+
paths-ignore:
15+
- "**.md"
16+
- "proto/**"
17+
18+
defaults:
19+
run:
20+
shell: bash
21+
22+
env:
23+
CI: true
24+
DOCKER_BUILDKIT: 1
25+
SERVER_IMAGE_NAME: "text-gen-server:0"
26+
27+
jobs:
28+
build:
29+
runs-on: ubuntu-latest
30+
env:
31+
BUILDKIT_INLINE_CACHE: 1
32+
33+
steps:
34+
- name: "Checkout"
35+
uses: actions/checkout@v4
36+
37+
- name: "Free up disk space"
38+
uses: ./.github/actions/free-up-disk-space
39+
40+
- name: "Set up QEMU"
41+
uses: docker/setup-qemu-action@v3
42+
43+
- name: "Set up Docker Buildx"
44+
uses: docker/setup-buildx-action@v3
45+
46+
- name: "Generate job steps to build stages sequentially"
47+
run: |
48+
build_targets=$(grep -iE "^FROM .+ as .*$" Dockerfile | grep -E -o "[^ ]+$")
49+
for t in $build_targets; do
50+
echo
51+
echo " - name: \"Docker build ${t}\""
52+
echo " run: docker build --target=$t -t $t ."
53+
done
54+
55+
- name: "Docker build base"
56+
run: docker build --target=base -t base .
57+
58+
- name: "Docker build cuda-base"
59+
run: docker build --target=cuda-base -t cuda-base .
60+
61+
- name: "Docker build cuda-devel"
62+
run: docker build --target=cuda-devel -t cuda-devel .
63+
64+
- name: "Docker build python-builder"
65+
run: docker build --target=python-builder -t python-builder .
66+
67+
- name: "Docker build flash-att-v2-builder"
68+
run: docker build --target=flash-att-v2-builder -t flash-att-v2-builder .
69+
70+
- name: "Docker build flash-att-builder"
71+
run: docker build --target=flash-att-builder -t flash-att-builder .
72+
73+
- name: "Docker build flash-att-cache"
74+
run: docker build --target=flash-att-cache -t flash-att-cache .
75+
76+
- name: "Docker build flash-att-v2-cache"
77+
run: docker build --target=flash-att-v2-cache -t flash-att-v2-cache .
78+
79+
- name: "Docker build auto-gptq-installer"
80+
run: docker build --target=auto-gptq-installer -t auto-gptq-installer .
81+
82+
- name: "Docker build auto-gptq-cache"
83+
run: docker build --target=auto-gptq-cache -t auto-gptq-cache .
84+
85+
- name: "Docker build cuda-runtime"
86+
run: docker build --target=cuda-runtime -t cuda-runtime .
87+
88+
- name: "Docker build rust-builder"
89+
run: docker build --target=rust-builder -t rust-builder .
90+
91+
- name: "Docker build router-builder"
92+
run: docker build --target=router-builder -t router-builder .
93+
94+
- name: "Docker build launcher-builder"
95+
run: docker build --target=launcher-builder -t launcher-builder .
96+
97+
- name: "Docker build test-base"
98+
run: docker build --target=test-base -t test-base .
99+
100+
- name: "Docker build cpu-tests"
101+
run: docker build --target=cpu-tests -t cpu-tests .
102+
103+
- name: "Docker build build"
104+
run: docker build --target=build -t build .
105+
106+
- name: "Docker build exllama-kernels-builder"
107+
run: docker build --target=exllama-kernels-builder -t exllama-kernels-builder .
108+
109+
- name: "Docker build exllamav2-kernels-builder"
110+
run: docker build --target=exllamav2-kernels-builder -t exllamav2-kernels-builder .
111+
112+
- name: "Docker build server-release"
113+
run: docker build --target=server-release -t server-release .
114+
115+
- name: "List docker images"
116+
run: docker images
117+
118+
- name: "Check disk usage"
119+
shell: bash
120+
run: |
121+
docker system df
122+
df -h

.github/workflows/test.yml

Lines changed: 88 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,106 @@
1-
name: Test
1+
name: "Test"
22

33
on:
44
workflow_dispatch:
55
pull_request:
66
branches:
77
- main
88
paths-ignore:
9-
# - '.github/**'
10-
- '**.md'
11-
- 'proto/**'
9+
- "**.md"
10+
- "proto/**"
11+
12+
defaults:
13+
run:
14+
shell: bash
15+
16+
env:
17+
CI: true
18+
DOCKER_BUILDKIT: 1
19+
TEST_IMAGE_NAME: "cpu-tests:0"
1220

1321
jobs:
14-
test:
22+
build:
1523
runs-on: ubuntu-latest
16-
env:
17-
CI: true
18-
DOCKER_BUILDKIT: 1
19-
2024
steps:
2125
- name: "Checkout"
22-
uses: actions/checkout@v3
26+
uses: actions/checkout@v4
27+
28+
- name: "Free up disk space"
29+
uses: ./.github/actions/free-up-disk-space
30+
31+
- name: "Set up QEMU"
32+
uses: docker/setup-qemu-action@v3
2333

2434
- name: "Setup Docker Buildx"
25-
uses: docker/setup-buildx-action@v2
35+
uses: docker/setup-buildx-action@v3
36+
37+
- name: "Build test image"
38+
uses: docker/build-push-action@v5
39+
with:
40+
context: .
41+
file: ./Dockerfile
42+
target: "cpu-tests"
43+
tags: ${{ env.TEST_IMAGE_NAME }}
44+
outputs: type=docker,dest=/tmp/test_image.tar
45+
46+
- name: "Upload test image"
47+
uses: actions/upload-artifact@v4
48+
with:
49+
name: "test-image"
50+
path: /tmp/test_image.tar
51+
retention-days: 1
52+
53+
test-python:
54+
runs-on: ubuntu-latest
55+
needs: build
56+
steps:
57+
- name: "Checkout"
58+
uses: actions/checkout@v3
59+
60+
- name: "Free up disk space"
61+
uses: ./.github/actions/free-up-disk-space
62+
63+
- name: "Setup Docker Buildx"
64+
uses: docker/setup-buildx-action@v3
65+
66+
- name: "Download test image"
67+
uses: actions/download-artifact@v4
68+
with:
69+
name: "test-image"
70+
path: /tmp
71+
72+
- name: "Load Docker image"
73+
run: |
74+
docker load --input /tmp/test_image.tar
75+
docker image ls -a
76+
77+
- name: "Run Python tests"
78+
run: |
79+
make python-tests
80+
81+
integration-tests:
82+
runs-on: ubuntu-latest
83+
needs: build
84+
steps:
85+
- name: "Checkout"
86+
uses: actions/checkout@v3
87+
88+
- name: "Free up disk space"
89+
uses: ./.github/actions/free-up-disk-space
2690

27-
- name: "Build"
28-
run: make build
91+
- name: "Setup Docker Buildx"
92+
uses: docker/setup-buildx-action@v3
2993

30-
- name: "Build test-image"
31-
run: make build-test-image
94+
- name: "Download test image"
95+
uses: actions/download-artifact@v4
96+
with:
97+
name: "test-image"
98+
path: /tmp
3299

33-
- name: "Python tests"
34-
run: make python-tests
100+
- name: "Load Docker image"
101+
run: |
102+
docker load --input /tmp/test_image.tar
103+
docker image ls -a
35104
36-
- name: "Integration tests"
37-
run: make integration-tests
105+
- name: "Run integration tests"
106+
run: make integration-tests

Dockerfile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ ARG BASE_UBI_IMAGE_TAG=9.3-1552
33
ARG PROTOC_VERSION=25.2
44
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
55
# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
6+
7+
# match PyTorch version that was used to compile flash-attention v2 pre-built wheels
8+
# e.g. flash-attn v2.5.2 => torch ['1.12.1', '1.13.1', '2.0.1', '2.1.2', '2.2.0', '2.3.0.dev20240126']
9+
# https://github.com/Dao-AILab/flash-attention/blob/v2.5.2/.github/workflows/publish.yml#L47
10+
# use nightly build index for torch .dev pre-release versions
611
ARG PYTORCH_VERSION=2.2.0
12+
713
ARG PYTHON_VERSION=3.11
814

915
## Base Layer ##################################################################
@@ -201,6 +207,7 @@ ENV PATH=/opt/tgis/bin/:$PATH
201207

202208
# Install specific version of torch
203209
RUN pip install ninja==1.11.1.1 --no-cache-dir
210+
RUN pip install packaging --no-cache-dir
204211
RUN pip install torch==$PYTORCH_VERSION+cu118 --index-url "${PYTORCH_INDEX}/cu118" --no-cache-dir
205212

206213

@@ -211,7 +218,8 @@ ARG FLASH_ATT_VERSION=v2.5.2
211218
WORKDIR /usr/src/flash-attention-v2
212219

213220
# Download the wheel or build it if a pre-compiled release doesn't exist
214-
RUN MAX_JOBS=4 pip --verbose wheel flash-attn==${FLASH_ATT_VERSION} \
221+
# MAX_JOBS: For CI, limit number of parallel compilation threads otherwise the github runner goes OOM
222+
RUN MAX_JOBS=2 pip --verbose wheel flash-attn==${FLASH_ATT_VERSION} \
215223
--no-build-isolation --no-deps --no-cache-dir
216224

217225
## Build flash attention ######################################################
@@ -220,6 +228,10 @@ FROM python-builder as flash-att-builder
220228
WORKDIR /usr/src
221229

222230
COPY server/Makefile-flash-att Makefile
231+
232+
# For CI, limit number of parallel compilation threads otherwise the github runner goes OOM
233+
ENV MAX_JOBS=2
234+
223235
RUN make build-flash-attention
224236

225237
## Install auto-gptq ###########################################################

Makefile

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
SHELL := /bin/bash
22

3+
DOCKER_BUILDKIT := 1
4+
TEST_IMAGE_NAME ?= 'cpu-tests:0'
5+
SERVER_IMAGE_NAME ?= 'text-gen-server:0'
36
GIT_COMMIT_HASH := $(shell git rev-parse --short HEAD)
47

58
build:
6-
DOCKER_BUILDKIT=1 docker build --progress=plain --target=server-release --build-arg GIT_COMMIT_HASH=$(GIT_COMMIT_HASH) -t text-gen-server:0 .
9+
docker build --progress=plain --target=server-release --build-arg GIT_COMMIT_HASH=$(GIT_COMMIT_HASH) -t $(SERVER_IMAGE_NAME) .
710
docker images
811

912
all: help
@@ -44,19 +47,25 @@ run-bloom-quantize:
4447
text-generation-launcher --model-name bigscience/bloom --num-shard 8 --dtype-str int8
4548

4649
build-test-image:
47-
DOCKER_BUILDKIT=1 docker build --progress=plain --target=cpu-tests -t cpu-tests:0 .
50+
docker build --progress=plain --target=cpu-tests -t $(TEST_IMAGE_NAME) .
4851

49-
integration-tests: build-test-image
52+
check-test-image:
53+
@docker image inspect $(TEST_IMAGE_NAME) >/dev/null 2>&1 || $(MAKE) build-test-image
54+
55+
integration-tests: check-test-image
5056
mkdir -p /tmp/transformers_cache
5157
docker run --rm -v /tmp/transformers_cache:/transformers_cache \
5258
-e HUGGINGFACE_HUB_CACHE=/transformers_cache \
53-
-e TRANSFORMERS_CACHE=/transformers_cache -w /usr/src/integration_tests cpu-tests:0 make test
59+
-e TRANSFORMERS_CACHE=/transformers_cache \
60+
-w /usr/src/integration_tests \
61+
$(TEST_IMAGE_NAME) make test
5462

55-
python-tests: build-test-image
63+
python-tests: check-test-image
5664
mkdir -p /tmp/transformers_cache
5765
docker run --rm -v /tmp/transformers_cache:/transformers_cache \
5866
-e HUGGINGFACE_HUB_CACHE=/transformers_cache \
59-
-e TRANSFORMERS_CACHE=/transformers_cache cpu-tests:0 pytest -sv --ignore=server/tests/test_utils.py server/tests
67+
-e TRANSFORMERS_CACHE=/transformers_cache \
68+
$(TEST_IMAGE_NAME) pytest -sv --ignore=server/tests/test_utils.py server/tests
6069

6170
clean:
6271
rm -rf target

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
[![Build](https://github.com/IBM/text-generation-inference/actions/workflows/build.yml/badge.svg)](https://github.com/IBM/text-generation-inference/actions/workflows/build.yml)
2+
13
## Text Generation Inference Server
24

35
This repo is an early fork of https://github.com/huggingface/text-generation-inference.

integration_tests/text_generation_tests/test_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def start_server(
2727
num_shard: int,
2828
port: int,
2929
master_port: int,
30-
timeout=20,
30+
timeout=30,
3131
model_path=None,
3232
include_cache_env_vars=True,
3333
output_special_tokens=False,

0 commit comments

Comments
 (0)