test: Separate test and build workflows (#23)

ckadner · web-flow · commit c428b0518d24 · 2024-02-09T14:09:25.000-08:00
- Add GitHub action to free up disk space
- Split build and test into separate workflows
- Run Python tests and integration tests in parallel
- Build test image once, reuse it for Python tests and
  integration tests (`check-test-image` vs `build-test-image`)
- Build all stages of Dockerfile sequentially to capture build logs
- Limit number of parallel jobs for compilation of `flash-attention` builds
  to avoid OOM error
- Use same PyTorch build as used for flash-attn v2 wheels
- Build on push to main
- Add build status badge to README.md

---------

Signed-off-by: Christian Kadner &lt;ckadner@us.ibm.com&gt;
diff --git a/.github/actions/free-up-disk-space/action.yml b/.github/actions/free-up-disk-space/action.yml
@@ -0,0 +1,32 @@
+name: "Free up disk space"
+description: "Removes non-essential tools, libraries and cached files from GitHub action runner node"
+
+runs:
+  using: "composite"
+  steps:
+    - name: "Remove non-essential tools and libraries"
+      shell: bash
+      run: |
+        # https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+        echo "Disk usage before cleanup:"
+        df -h
+        echo "Removing non-essential tools and libraries ..."
+        sudo rm -rf /opt/ghc
+        sudo rm -rf /usr/share/dotnet
+        # sudo rm -rf /usr/local/share/boost
+        echo "Deleting libraries for Android (12G), CodeQL (5.3G), PowerShell (1.3G), Swift (1.7G) ..."
+        sudo rm -rf /usr/local/lib/android
+        sudo rm -rf "${AGENT_TOOLSDIRECTORY}/CodeQL"
+        sudo rm -rf /usr/local/share/powershell
+        sudo rm -rf /usr/share/swift
+        echo "Disk usage after cleanup:"
+        df -h
+
+    - name: "Prune docker images"
+      shell: bash
+      run: |
+        echo "Pruning docker images ..."
+        docker image prune -a -f
+        docker system df
+        echo "Disk usage after pruning docker images:"
+        df -h
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,122 @@
+name: "Build"
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - "main"
+    paths-ignore:
+      - "**.md"
+      - "proto/**"
+  pull_request:
+    branches:
+      - "main"
+    paths-ignore:
+      - "**.md"
+      - "proto/**"
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  CI: true
+  DOCKER_BUILDKIT: 1
+  SERVER_IMAGE_NAME: "text-gen-server:0"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    env:
+      BUILDKIT_INLINE_CACHE: 1
+
+    steps:
+    - name: "Checkout"
+      uses: actions/checkout@v4
+
+    - name: "Free up disk space"
+      uses: ./.github/actions/free-up-disk-space
+
+    - name: "Set up QEMU"
+      uses: docker/setup-qemu-action@v3
+
+    - name: "Set up Docker Buildx"
+      uses: docker/setup-buildx-action@v3
+
+    - name: "Generate job steps to build stages sequentially"
+      run: |
+        build_targets=$(grep -iE "^FROM .+ as .*$" Dockerfile | grep -E -o "[^ ]+$")
+        for t in $build_targets; do
+          echo
+          echo "    - name: \"Docker build ${t}\""
+          echo "      run: docker build --target=$t -t $t ."
+        done
+
+    - name: "Docker build base"
+      run: docker build --target=base -t base .
+
+    - name: "Docker build cuda-base"
+      run: docker build --target=cuda-base -t cuda-base .
+
+    - name: "Docker build cuda-devel"
+      run: docker build --target=cuda-devel -t cuda-devel .
+
+    - name: "Docker build python-builder"
+      run: docker build --target=python-builder -t python-builder .
+
+    - name: "Docker build flash-att-v2-builder"
+      run: docker build --target=flash-att-v2-builder -t flash-att-v2-builder .
+
+    - name: "Docker build flash-att-builder"
+      run: docker build --target=flash-att-builder -t flash-att-builder .
+
+    - name: "Docker build flash-att-cache"
+      run: docker build --target=flash-att-cache -t flash-att-cache .
+
+    - name: "Docker build flash-att-v2-cache"
+      run: docker build --target=flash-att-v2-cache -t flash-att-v2-cache .
+
+    - name: "Docker build auto-gptq-installer"
+      run: docker build --target=auto-gptq-installer -t auto-gptq-installer .
+
+    - name: "Docker build auto-gptq-cache"
+      run: docker build --target=auto-gptq-cache -t auto-gptq-cache .
+
+    - name: "Docker build cuda-runtime"
+      run: docker build --target=cuda-runtime -t cuda-runtime .
+
+    - name: "Docker build rust-builder"
+      run: docker build --target=rust-builder -t rust-builder .
+
+    - name: "Docker build router-builder"
+      run: docker build --target=router-builder -t router-builder .
+
+    - name: "Docker build launcher-builder"
+      run: docker build --target=launcher-builder -t launcher-builder .
+
+    - name: "Docker build test-base"
+      run: docker build --target=test-base -t test-base .
+
+    - name: "Docker build cpu-tests"
+      run: docker build --target=cpu-tests -t cpu-tests .
+
+    - name: "Docker build build"
+      run: docker build --target=build -t build .
+
+    - name: "Docker build exllama-kernels-builder"
+      run: docker build --target=exllama-kernels-builder -t exllama-kernels-builder .
+
+    - name: "Docker build exllamav2-kernels-builder"
+      run: docker build --target=exllamav2-kernels-builder -t exllamav2-kernels-builder .
+
+    - name: "Docker build server-release"
+      run: docker build --target=server-release -t server-release .
+
+    - name: "List docker images"
+      run: docker images
+
+    - name: "Check disk usage"
+      shell: bash
+      run: |
+        docker system df
+        df -h
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,37 +1,106 @@
-name: Test
+name: "Test"
 
 on:
   workflow_dispatch:
   pull_request:
     branches:
       - main
     paths-ignore:
-#      - '.github/**'
-      - '**.md'
-      - 'proto/**'
+      - "**.md"
+      - "proto/**"
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  CI: true
+  DOCKER_BUILDKIT: 1
+  TEST_IMAGE_NAME: "cpu-tests:0"
 
 jobs:
-  test:
+  build:
     runs-on: ubuntu-latest
-    env:
-      CI: true
-      DOCKER_BUILDKIT: 1
-
     steps:
     - name: "Checkout"
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
+
+    - name: "Free up disk space"
+      uses: ./.github/actions/free-up-disk-space
+
+    - name: "Set up QEMU"
+      uses: docker/setup-qemu-action@v3
 
     - name: "Setup Docker Buildx"
-      uses: docker/setup-buildx-action@v2
+      uses: docker/setup-buildx-action@v3
+
+    - name: "Build test image"
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        file: ./Dockerfile
+        target: "cpu-tests"
+        tags: ${{ env.TEST_IMAGE_NAME }}
+        outputs: type=docker,dest=/tmp/test_image.tar
+
+    - name: "Upload test image"
+      uses: actions/upload-artifact@v4
+      with:
+        name: "test-image"
+        path: /tmp/test_image.tar
+        retention-days: 1
+
+  test-python:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@v3
+
+      - name: "Free up disk space"
+        uses: ./.github/actions/free-up-disk-space
+
+      - name: "Setup Docker Buildx"
+        uses: docker/setup-buildx-action@v3
+
+      - name: "Download test image"
+        uses: actions/download-artifact@v4
+        with:
+          name: "test-image"
+          path: /tmp
+
+      - name: "Load Docker image"
+        run: |
+          docker load --input /tmp/test_image.tar
+          docker image ls -a
+
+      - name: "Run Python tests"
+        run: |
+          make python-tests
+
+  integration-tests:
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: "Checkout"
+        uses: actions/checkout@v3
+
+      - name: "Free up disk space"
+        uses: ./.github/actions/free-up-disk-space
 
-    - name: "Build"
-      run: make build
+      - name: "Setup Docker Buildx"
+        uses: docker/setup-buildx-action@v3
 
-    - name: "Build test-image"
-      run: make build-test-image
+      - name: "Download test image"
+        uses: actions/download-artifact@v4
+        with:
+          name: "test-image"
+          path: /tmp
 
-    - name: "Python tests"
-      run: make python-tests
+      - name: "Load Docker image"
+        run: |
+          docker load --input /tmp/test_image.tar
+          docker image ls -a
 
-    - name: "Integration tests"
-      run: make integration-tests
+      - name: "Run integration tests"
+        run: make integration-tests
diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,13 @@ ARG BASE_UBI_IMAGE_TAG=9.3-1552
 ARG PROTOC_VERSION=25.2
 ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
 # ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
+
+# match PyTorch version that was used to compile flash-attention v2 pre-built wheels
+# e.g. flash-attn v2.5.2 => torch ['1.12.1', '1.13.1', '2.0.1', '2.1.2', '2.2.0', '2.3.0.dev20240126']
+# https://github.com/Dao-AILab/flash-attention/blob/v2.5.2/.github/workflows/publish.yml#L47
+# use nightly build index for torch .dev pre-release versions
 ARG PYTORCH_VERSION=2.2.0
+
 ARG PYTHON_VERSION=3.11
 
 ## Base Layer ##################################################################
@@ -201,6 +207,7 @@ ENV PATH=/opt/tgis/bin/:$PATH
 
 # Install specific version of torch
 RUN pip install ninja==1.11.1.1 --no-cache-dir
+RUN pip install packaging --no-cache-dir
 RUN pip install torch==$PYTORCH_VERSION+cu118 --index-url "${PYTORCH_INDEX}/cu118" --no-cache-dir
 
 
@@ -211,7 +218,8 @@ ARG FLASH_ATT_VERSION=v2.5.2
 WORKDIR /usr/src/flash-attention-v2
 
 # Download the wheel or build it if a pre-compiled release doesn't exist
-RUN MAX_JOBS=4 pip --verbose wheel flash-attn==${FLASH_ATT_VERSION} \
+# MAX_JOBS: For CI, limit number of parallel compilation threads otherwise the github runner goes OOM
+RUN MAX_JOBS=2 pip --verbose wheel flash-attn==${FLASH_ATT_VERSION} \
     --no-build-isolation --no-deps --no-cache-dir
 
 ## Build flash attention  ######################################################
@@ -220,6 +228,10 @@ FROM python-builder as flash-att-builder
 WORKDIR /usr/src
 
 COPY server/Makefile-flash-att Makefile
+
+# For CI, limit number of parallel compilation threads otherwise the github runner goes OOM
+ENV MAX_JOBS=2
+
 RUN make build-flash-attention
 
 ## Install auto-gptq ###########################################################
diff --git a/Makefile b/Makefile
@@ -1,9 +1,12 @@
 SHELL := /bin/bash
 
+DOCKER_BUILDKIT := 1
+TEST_IMAGE_NAME ?= 'cpu-tests:0'
+SERVER_IMAGE_NAME ?= 'text-gen-server:0'
 GIT_COMMIT_HASH := $(shell git rev-parse --short HEAD)
 
 build:
-	DOCKER_BUILDKIT=1 docker build --progress=plain --target=server-release --build-arg GIT_COMMIT_HASH=$(GIT_COMMIT_HASH) -t text-gen-server:0 .
+	docker build --progress=plain --target=server-release --build-arg GIT_COMMIT_HASH=$(GIT_COMMIT_HASH) -t $(SERVER_IMAGE_NAME) .
 	docker images
 
 all: help
@@ -44,19 +47,25 @@ run-bloom-quantize:
 	text-generation-launcher --model-name bigscience/bloom --num-shard 8 --dtype-str int8
 
 build-test-image:
-	DOCKER_BUILDKIT=1 docker build --progress=plain --target=cpu-tests -t cpu-tests:0 .
+	docker build --progress=plain --target=cpu-tests -t $(TEST_IMAGE_NAME) .
 
-integration-tests: build-test-image
+check-test-image:
+	@docker image inspect $(TEST_IMAGE_NAME) >/dev/null 2>&1 || $(MAKE) build-test-image
+
+integration-tests: check-test-image
 	mkdir -p /tmp/transformers_cache
 	docker run --rm -v /tmp/transformers_cache:/transformers_cache \
 		-e HUGGINGFACE_HUB_CACHE=/transformers_cache \
-		-e TRANSFORMERS_CACHE=/transformers_cache -w /usr/src/integration_tests cpu-tests:0 make test
+		-e TRANSFORMERS_CACHE=/transformers_cache \
+		-w /usr/src/integration_tests \
+		$(TEST_IMAGE_NAME) make test
 
-python-tests: build-test-image
+python-tests: check-test-image
 	mkdir -p /tmp/transformers_cache
 	docker run --rm -v /tmp/transformers_cache:/transformers_cache \
 		-e HUGGINGFACE_HUB_CACHE=/transformers_cache \
-		-e TRANSFORMERS_CACHE=/transformers_cache cpu-tests:0 pytest -sv --ignore=server/tests/test_utils.py server/tests
+		-e TRANSFORMERS_CACHE=/transformers_cache \
+		$(TEST_IMAGE_NAME) pytest -sv --ignore=server/tests/test_utils.py server/tests
 
 clean:
 	rm -rf target
diff --git a/README.md b/README.md
@@ -1,3 +1,5 @@
+[![Build](https://github.com/IBM/text-generation-inference/actions/workflows/build.yml/badge.svg)](https://github.com/IBM/text-generation-inference/actions/workflows/build.yml)
+
 ## Text Generation Inference Server
 
 This repo is an early fork of https://github.com/huggingface/text-generation-inference.
diff --git a/integration_tests/text_generation_tests/test_server.py b/integration_tests/text_generation_tests/test_server.py
@@ -27,7 +27,7 @@ def start_server(
     num_shard: int,
     port: int,
     master_port: int,
-    timeout=20,
+    timeout=30,
     model_path=None,
     include_cache_env_vars=True,
     output_special_tokens=False,

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+[![Build](https://github.com/IBM/text-generation-inference/actions/workflows/build.yml/badge.svg)](https://github.com/IBM/text-generation-inference/actions/workflows/build.yml)`
	`2`	`+`
`1`	`3`	`## Text Generation Inference Server`
`2`	`4`
`3`	`5`	`This repo is an early fork of https://github.com/huggingface/text-generation-inference.`