From 77db26723842adc657c17906e89540ebeea46220 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 17:33:07 -0400 Subject: [PATCH 01/10] Run unit tests on GH Actions --- .github/scripts/build-cuda.sh | 13 ++- .github/workflows/tests.yml | 172 ++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/tests.yml diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 9c92e0d01..315bcf60d 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -2,14 +2,19 @@ declare build_arch declare build_os declare cuda_version +declare cuda_targets set -xeuo pipefail -# By default, target Maxwell through Hopper. -build_capability="50;52;60;61;70;75;80;86;89;90" +if [ -n "${cuda_targets}" ]; then + # By default, target Maxwell through Hopper. + build_capability="50;52;60;61;70;75;80;86;89;90" -# CUDA 12.8: Add sm100 and sm120; remove < sm75 to align with PyTorch 2.7+cu128 minimum -[[ "${cuda_version}" == 12.8.* ]] && build_capability="75;80;86;89;90;100;120" + # CUDA 12.8: Add sm100 and sm120; remove < sm75 to align with PyTorch 2.7+cu128 minimum + [[ "${cuda_version}" == 12.8.* ]] && build_capability="75;80;86;89;90;100;120" +else + build_capability="${cuda_targets}" +fi [[ "${build_os}" = windows-* ]] && python3 -m pip install ninja diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..d322d0006 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,172 @@ +name: Unit tests + +on: + workflow_dispatch: + + +jobs: + + build-cpu: + strategy: + matrix: + os: [ubuntu-22.04, windows-2025] + arch: [x86_64] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - name: Setup MSVC + if: startsWith(matrix.os, 'windows') + uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl + + - name: Build C++ + run: bash .github/scripts/build-cpu.sh + env: + build_os: ${{ matrix.os }} + build_arch: ${{ matrix.arch }} + + - name: Upload build artifact + uses: actions/upload-artifact@v4 + with: + name: lib_cpu_${{ matrix.os }}_${{ matrix.arch }} + path: output/* + retention-days: 7 + + build-cuda: + strategy: + matrix: + cuda_version: ["11.8.0", "12.8.1"] + include: + - os: ubuntu-22.04 + arch: x86_64 + - os: windows-2025 + arch: x86_64 + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + + - name: Install CUDA Toolkit + uses: Jimver/cuda-toolkit@v0.2.23 + if: startsWith(matrix.os, 'windows') + id: cuda-toolkit + with: + cuda: ${{ matrix.cuda_version }} + method: "network" + sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]' + + - name: Setup MSVC + if: startsWith(matrix.os, 'windows') + uses: ilammy/msvc-dev-cmd@v1.13.0 # to use cl + + # We're running on T4 only for now, so we only target sm75. + - name: Build C++ / CUDA + run: bash .github/scripts/build-cuda.sh + env: + build_os: ${{ matrix.os }} + build_arch: x86_64 + cuda_version: ${{ matrix.cuda_version }} + cuda_targets: "75" + + - name: Upload build artifact + uses: actions/upload-artifact@v4 + with: + name: lib_cuda_${{matrix.cuda_version}}_${{ matrix.os }}_${{ matrix.arch }} + path: output/* + retention-days: 7 + + cpu-tests: + needs: build-cpu + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, windows-2025] + arch: [x86_64] + torch_version: ["2.4.1", "2.7.0"] + runs-on: ${{ matrix.os }} + env: + BNB_TEST_DEVICE: cpu + steps: + - name: Show CPU Information + run: | + if [[ $RUNNER_OS == 'Linux' ]]; then + lscpu + else + systeminfo | findstr /C:"Processor" + fi + + - uses: actions/checkout@v4 + + - name: Download build artifact + uses: actions/download-artifact@v4 + with: + name: lib_cpu_${{ matrix.os }}_${{ matrix.arch }} + path: bitsandbytes/ + merge-multiple: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/cpu + pip install -e ".[test]" + pip install pytest-cov + + - name: Show installed packages + run: pip list + + - name: Run tests + run: pytest + + cuda-tests: + needs: build-cuda + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04, windows-2025] + arch: [x86_64] + torch_version: ["2.4.1", "2.7.0"] + include: + - torch_version: "2.4.1" + cuda_version: "11.8.0" + pypi_index: "https://download.pytorch.org/whl/cu118" + - torch_version: "2.7.0" + cuda_version: "12.8.1" + pypi_index: "https://download.pytorch.org/whl/cu128" + runs-on: + labels: ${{ contains(matrix.os, 'windows') && 'CUDA-Windows-x64' || 'CUDA-Linux-x64' }} + env: + BNB_TEST_DEVICE: cuda + steps: + - name: Show GPU Information + run: nvidia-smi + + - uses: actions/checkout@v4 + + - name: Download build artifact + uses: actions/download-artifact@v4 + with: + name: lib_cuda_${{ matrix.cuda_version }}_${{ matrix.os }}_${{ matrix.arch }} + path: bitsandbytes/ + merge-multiple: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + pip install torch==${{ matrix.torch_version }} --index-url ${{ matrix.pypi_index }} + pip install -e ".[test]" + pip install pytest-cov + + - name: Show installed packages + run: pip list + + - name: Run tests + run: pytest From 18c07bdff7138bad4a4c5f143f80bd363db9d59c Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 17:36:28 -0400 Subject: [PATCH 02/10] fix --- .github/scripts/build-cuda.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 315bcf60d..430058285 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -6,14 +6,14 @@ declare cuda_targets set -xeuo pipefail -if [ -n "${cuda_targets}" ]; then +if [[ -v cuda_targets ]; then + build_capability="${cuda_targets}" +else # By default, target Maxwell through Hopper. build_capability="50;52;60;61;70;75;80;86;89;90" # CUDA 12.8: Add sm100 and sm120; remove < sm75 to align with PyTorch 2.7+cu128 minimum [[ "${cuda_version}" == 12.8.* ]] && build_capability="75;80;86;89;90;100;120" -else - build_capability="${cuda_targets}" fi [[ "${build_os}" = windows-* ]] && python3 -m pip install ninja From ad90a8429c0aca535af92b682644c1964fc5f1e0 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 17:37:24 -0400 Subject: [PATCH 03/10] fix --- .github/scripts/build-cuda.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/build-cuda.sh b/.github/scripts/build-cuda.sh index 430058285..be8e98704 100644 --- a/.github/scripts/build-cuda.sh +++ b/.github/scripts/build-cuda.sh @@ -6,7 +6,7 @@ declare cuda_targets set -xeuo pipefail -if [[ -v cuda_targets ]; then +if [[ -v cuda_targets ]]; then build_capability="${cuda_targets}" else # By default, target Maxwell through Hopper. From f1bd41bc03f31662b400ad542482f978e703b733 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 17:53:32 -0400 Subject: [PATCH 04/10] trigger workflow --- .github/workflows/tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d322d0006..4219d75ba 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,6 +2,8 @@ name: Unit tests on: workflow_dispatch: + push: + branches: [testing-ci] jobs: From 03a27b0d7356fdc4e2e70cce36466037d1bcef71 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 18:00:00 -0400 Subject: [PATCH 05/10] Update --- .github/workflows/tests.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4219d75ba..85ac1faad 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,12 +38,8 @@ jobs: strategy: matrix: cuda_version: ["11.8.0", "12.8.1"] - include: - - os: ubuntu-22.04 - arch: x86_64 - - os: windows-2025 - arch: x86_64 - + os: [ubuntu-22.04, windows-2025] + arch: [x86_64] runs-on: ${{ matrix.os }} steps: @@ -57,6 +53,7 @@ jobs: cuda: ${{ matrix.cuda_version }} method: "network" sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]' + use-github-cache: false - name: Setup MSVC if: startsWith(matrix.os, 'windows') @@ -91,6 +88,7 @@ jobs: BNB_TEST_DEVICE: cpu steps: - name: Show CPU Information + shell: bash run: | if [[ $RUNNER_OS == 'Linux' ]]; then lscpu From 2df730c623c1d4080f891cfcb9f6d4018cb0f69c Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 18:38:48 -0400 Subject: [PATCH 06/10] Update --- .github/workflows/tests.yml | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 85ac1faad..373895327 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -5,6 +5,9 @@ on: push: branches: [testing-ci] +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true jobs: @@ -31,7 +34,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: lib_cpu_${{ matrix.os }}_${{ matrix.arch }} - path: output/* + path: output/${{ matrix.os }}/${{ matrix.arch }}/* retention-days: 7 build-cuda: @@ -72,7 +75,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: lib_cuda_${{matrix.cuda_version}}_${{ matrix.os }}_${{ matrix.arch }} - path: output/* + path: output/${{ matrix.os }}/${{ matrix.arch }}/* retention-days: 7 cpu-tests: @@ -87,15 +90,6 @@ jobs: env: BNB_TEST_DEVICE: cpu steps: - - name: Show CPU Information - shell: bash - run: | - if [[ $RUNNER_OS == 'Linux' ]]; then - lscpu - else - systeminfo | findstr /C:"Processor" - fi - - uses: actions/checkout@v4 - name: Download build artifact @@ -154,6 +148,10 @@ jobs: path: bitsandbytes/ merge-multiple: true + - name: Show files + run: ls -lR . + shell: bash + - name: Setup Python uses: actions/setup-python@v5 with: From 08871a42f1857e78f2d16c3137c35526359fab6c Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 28 Apr 2025 20:22:11 -0400 Subject: [PATCH 07/10] Update --- .github/workflows/tests.yml | 6 +----- tests/test_functional.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 373895327..de3c7b2af 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -85,7 +85,7 @@ jobs: matrix: os: [ubuntu-22.04, windows-2025] arch: [x86_64] - torch_version: ["2.4.1", "2.7.0"] + torch_version: ["2.7.0"] runs-on: ${{ matrix.os }} env: BNB_TEST_DEVICE: cpu @@ -148,10 +148,6 @@ jobs: path: bitsandbytes/ merge-multiple: true - - name: Show files - run: ls -lR . - shell: bash - - name: Setup Python uses: actions/setup-python@v5 with: diff --git a/tests/test_functional.py b/tests/test_functional.py index ee2b52429..c8a390733 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -728,6 +728,9 @@ def test_int8_double_quant(self, dim1, dim2): ), ) def test_integrated_int8_linear_matmul(self, device, dim1, dim4, inner): + if device == "cpu" and inner > 2048: + pytest.skip("Slow on CPU") + for i in range(k): A = torch.randn(dim1, inner, device=device).half() B = torch.randn(dim4, inner, device=device).half() @@ -1316,7 +1319,18 @@ def test_gemv_4bit(self, device, dim, dtype, storage_type, quant_storage, double if dtype == torch.float16: if dim <= 512: assert err1 < 7e-5 - assert relerr1 < 0.0008 + + # TODO(matthewdouglas): On T4, dim=128-fp16-fc2-fp4-DQ will have relerror ~ 0.00092727 + if ( + device == "cuda" + and double_quant + and storage_type == "fp4" + and kind == "fc2" + and torch.cuda.get_device_capability() == (7, 5) + ): + assert relerr1 < 0.00093 + else: + assert relerr1 < 0.0008 else: assert err1 < 6e-5 assert relerr1 < 2e-4 From 5b6a9b98cc82feb7628bde5452873de44ed19624 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 29 Apr 2025 12:34:04 -0400 Subject: [PATCH 08/10] Run tests nightly --- .github/workflows/tests.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index de3c7b2af..9aa2acaf0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,11 +2,14 @@ name: Unit tests on: workflow_dispatch: + schedule: + # Every day at 02:15 AM UTC + - cron: "15 2 * * *" push: branches: [testing-ci] concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: @@ -123,13 +126,13 @@ jobs: matrix: os: [ubuntu-22.04, windows-2025] arch: [x86_64] - torch_version: ["2.4.1", "2.7.0"] + cuda_version: ["11.8.0", "12.8.1"] include: - - torch_version: "2.4.1" - cuda_version: "11.8.0" + - cuda_version: "11.8.0" + torch_version: "2.4.1" pypi_index: "https://download.pytorch.org/whl/cu118" - - torch_version: "2.7.0" - cuda_version: "12.8.1" + - cuda_version: "12.8.1" + torch_version: "2.7.0" pypi_index: "https://download.pytorch.org/whl/cu128" runs-on: labels: ${{ contains(matrix.os, 'windows') && 'CUDA-Windows-x64' || 'CUDA-Linux-x64' }} From fb85e9ce5592c055c622fad91f51b9d04fe9d2e3 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 29 Apr 2025 12:42:43 -0400 Subject: [PATCH 09/10] Disable paged optimizer test on Windows --- tests/test_optim.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_optim.py b/tests/test_optim.py index 9358a2e9b..0d86da7d8 100644 --- a/tests/test_optim.py +++ b/tests/test_optim.py @@ -1,6 +1,7 @@ import os from os.path import join import shutil +import sys import time import uuid @@ -168,6 +169,9 @@ def rm_path(path): @pytest.mark.parametrize("dim1", [1024], ids=id_formatter("dim1")) @pytest.mark.parametrize("dim2", [32, 1024, 4097, 1], ids=id_formatter("dim2")) def test_optimizer32bit(requires_cuda, dim1, dim2, gtype, optim_name): + if optim_name.startswith("paged_") and sys.platform == "win32": + pytest.skip("Paged optimizers can have issues on Windows.") + if gtype == torch.bfloat16 and optim_name in ["momentum", "rmsprop"]: pytest.skip() if dim1 == 1 and dim2 == 1: From 818144b85284e2f76f37b7f4eb2e47a988fa6fc4 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 29 Apr 2025 13:17:54 -0400 Subject: [PATCH 10/10] Skip unit tests on Windows for CUDA 12.x (driver on runner is too old) --- .github/workflows/tests.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9aa2acaf0..a2917b9bb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -134,6 +134,11 @@ jobs: - cuda_version: "12.8.1" torch_version: "2.7.0" pypi_index: "https://download.pytorch.org/whl/cu128" + exclude: + # Our current T4 Windows runner has a driver too old (471.11) + # and cannot support CUDA 12+. Skip for now. + - os: windows-2025 + cuda_version: "12.8.1" runs-on: labels: ${{ contains(matrix.os, 'windows') && 'CUDA-Windows-x64' || 'CUDA-Linux-x64' }} env: