@@ -180,6 +180,62 @@ jobs:
180180 pip install slack_sdk tabulate
181181 python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
182182
183+ run_big_gpu_torch_tests :
184+ name : Torch tests on big GPU
185+ strategy :
186+ fail-fast : false
187+ max-parallel : 2
188+ runs-on :
189+ group : aws-g6e-xlarge-plus
190+ container :
191+ image : diffusers/diffusers-pytorch-cuda
192+ options : --shm-size "16gb" --ipc host --gpus 0
193+ steps :
194+ - name : Checkout diffusers
195+ uses : actions/checkout@v3
196+ with :
197+ fetch-depth : 2
198+ - name : NVIDIA-SMI
199+ run : nvidia-smi
200+ - name : Install dependencies
201+ run : |
202+ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
203+ python -m uv pip install -e [quality,test]
204+ python -m uv pip install peft@git+https://github.com/huggingface/peft.git
205+ pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
206+ python -m uv pip install pytest-reportlog
207+ - name : Environment
208+ run : |
209+ python utils/print_env.py
210+ - name : Selected Torch CUDA Test on big GPU
211+ env :
212+ HF_TOKEN : ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
213+ # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
214+ CUBLAS_WORKSPACE_CONFIG : :16:8
215+ BIG_GPU_MEMORY : 40
216+ run : |
217+ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
218+ -m "big_gpu_with_torch_cuda" \
219+ --make-reports=tests_big_gpu_torch_cuda \
220+ --report-log=tests_big_gpu_torch_cuda.log \
221+ tests/
222+ - name : Failure short reports
223+ if : ${{ failure() }}
224+ run : |
225+ cat reports/tests_big_gpu_torch_cuda_stats.txt
226+ cat reports/tests_big_gpu_torch_cuda_failures_short.txt
227+ - name : Test suite reports artifacts
228+ if : ${{ always() }}
229+ uses : actions/upload-artifact@v4
230+ with :
231+ name : torch_cuda_big_gpu_test_reports
232+ path : reports
233+ - name : Generate Report and Notify Channel
234+ if : always()
235+ run : |
236+ pip install slack_sdk tabulate
237+ python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
238+
183239 run_flax_tpu_tests :
184240 name : Nightly Flax TPU Tests
185241 runs-on : docker-tpu
@@ -291,6 +347,64 @@ jobs:
291347 pip install slack_sdk tabulate
292348 python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
293349
350+ run_nightly_quantization_tests :
351+ name : Torch quantization nightly tests
352+ strategy :
353+ fail-fast : false
354+ max-parallel : 2
355+ matrix :
356+ config :
357+ - backend : " bitsandbytes"
358+ test_location : " bnb"
359+ runs-on :
360+ group : aws-g6e-xlarge-plus
361+ container :
362+ image : diffusers/diffusers-pytorch-cuda
363+ options : --shm-size "20gb" --ipc host --gpus 0
364+ steps :
365+ - name : Checkout diffusers
366+ uses : actions/checkout@v3
367+ with :
368+ fetch-depth : 2
369+ - name : NVIDIA-SMI
370+ run : nvidia-smi
371+ - name : Install dependencies
372+ run : |
373+ python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
374+ python -m uv pip install -e [quality,test]
375+ python -m uv pip install -U ${{ matrix.config.backend }}
376+ python -m uv pip install pytest-reportlog
377+ - name : Environment
378+ run : |
379+ python utils/print_env.py
380+ - name : ${{ matrix.config.backend }} quantization tests on GPU
381+ env :
382+ HF_TOKEN : ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
383+ # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
384+ CUBLAS_WORKSPACE_CONFIG : :16:8
385+ BIG_GPU_MEMORY : 40
386+ run : |
387+ python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
388+ --make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
389+ --report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
390+ tests/quantization/${{ matrix.config.test_location }}
391+ - name : Failure short reports
392+ if : ${{ failure() }}
393+ run : |
394+ cat reports/tests_${{ matrix.config.backend }}_torch_cuda_stats.txt
395+ cat reports/tests_${{ matrix.config.backend }}_torch_cuda_failures_short.txt
396+ - name : Test suite reports artifacts
397+ if : ${{ always() }}
398+ uses : actions/upload-artifact@v4
399+ with :
400+ name : torch_cuda_${{ matrix.config.backend }}_reports
401+ path : reports
402+ - name : Generate Report and Notify Channel
403+ if : always()
404+ run : |
405+ pip install slack_sdk tabulate
406+ python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
407+
294408# M1 runner currently not well supported
295409# TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
296410# run_nightly_tests_apple_m1:
0 commit comments