diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index b8e9860aec63..e2228fdacf30 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -347,6 +347,64 @@ jobs: pip install slack_sdk tabulate python utils/log_reports.py >> $GITHUB_STEP_SUMMARY + run_nightly_quantization_tests: + name: Torch quantization nightly tests + strategy: + fail-fast: false + max-parallel: 2 + matrix: + config: + - backend: "bitsandbytes" + test_location: "bnb" + runs-on: + group: aws-g6e-xlarge-plus + container: + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "20gb" --ipc host --gpus 0 + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + - name: NVIDIA-SMI + run: nvidia-smi + - name: Install dependencies + run: | + python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" + python -m uv pip install -e [quality,test] + python -m uv pip install -U ${{ matrix.config.backend }} + python -m uv pip install pytest-reportlog + - name: Environment + run: | + python utils/print_env.py + - name: ${{ matrix.config.backend }} quantization tests on GPU + env: + HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} + # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms + CUBLAS_WORKSPACE_CONFIG: :16:8 + BIG_GPU_MEMORY: 40 + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + --make-reports=tests_${{ matrix.config.backend }}_torch_cuda \ + --report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \ + tests/quantization/${{ matrix.config.test_location }} + - name: Failure short reports + if: ${{ failure() }} + run: | + cat reports/tests_${{ matrix.config.backend }}_torch_cuda_stats.txt + cat reports/tests_${{ matrix.config.backend }}_torch_cuda_failures_short.txt + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: torch_cuda_${{ matrix.config.backend }}_reports + path: reports + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python utils/log_reports.py >> $GITHUB_STEP_SUMMARY + # M1 runner currently not well supported # TODO: (Dhruv) add these back when we setup better testing for Apple Silicon # run_nightly_tests_apple_m1: diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py index 7b553434fbe9..b548b03be31d 100644 --- a/tests/quantization/bnb/test_4bit.py +++ b/tests/quantization/bnb/test_4bit.py @@ -432,7 +432,6 @@ def test_quality(self): expected_slice = np.array([0.1123, 0.1296, 0.1609, 0.1042, 0.1230, 0.1274, 0.0928, 0.1165, 0.1216]) max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) - print(f"{max_diff=}") self.assertTrue(max_diff < 1e-2) def test_generate_quality_dequantize(self): diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py index ba2402461c87..a67e8d38e961 100644 --- a/tests/quantization/bnb/test_mixed_int8.py +++ b/tests/quantization/bnb/test_mixed_int8.py @@ -369,7 +369,7 @@ def test_quality(self): output_type="np", ).images out_slice = output[0, -3:, -3:, -1].flatten() - expected_slice = np.array([0.0149, 0.0322, 0.0073, 0.0134, 0.0332, 0.011, 0.002, 0.0232, 0.0193]) + expected_slice = np.array([0.0376, 0.0359, 0.0015, 0.0449, 0.0479, 0.0098, 0.0083, 0.0295, 0.0295]) max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice) self.assertTrue(max_diff < 1e-2)