File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -58,7 +58,7 @@ run_test_config(){
5858 run_default_fa 1 test_deferred_init.py
5959 run_default_fa 1 test_float8tensor.py
6060 run_default_fa 1 test_float8_current_scaling_exact.py
61- run_default_fa 1 test_cpu_offloading.py
61+ test $_fus_attn = auto -o $_fus_attn = ck -o $_fus_attn = aotriton && NVTE_FLASH_ATTN=0 run 1 test_cpu_offloading.py
6262 run_default_fa 1 test_fused_rope.py
6363 run_default_fa 1 test_fusible_ops.py
6464 run_default_fa 3 test_gemm_autotune.py
Original file line number Diff line number Diff line change 2929
3030# Flash attention saves some internal tensor for the backward pass
3131# that cannot be offloaded to CPU.
32- assert os .getenv ("NVTE_FLASH_ATTN" ) == "0"
32+ assert os .getenv ("NVTE_FLASH_ATTN" , "1" ) == "0"
3333
3434# Offloading is supported for attention only for fused and flash attention backends,
3535# so the use of bfloat16 is required.
You can’t perform that action at this time.
0 commit comments