@@ -639,7 +639,7 @@ def kernel(input_desc, FAILURE: ttgl.constexpr):
639639 kernel [(1 , )](input_desc , FAILURE = FAILURE , num_warps = 4 )
640640
641641
642- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
642+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
643643@pytest .mark .parametrize ("FAILURE" , [True , False ])
644644def test_ws_store_wait_load (FAILURE , device , run_wrapper ):
645645 if run_wrapper :
@@ -692,7 +692,7 @@ def ws_kernel(output, FAILURE: ttgl.constexpr):
692692 ws_kernel [(1 , )](output , FAILURE = FAILURE , num_warps = 4 )
693693
694694
695- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
695+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
696696@pytest .mark .parametrize ("FAILURE" , [True , False ])
697697def test_ws_load_wait_store (FAILURE , device , run_wrapper ):
698698 if run_wrapper :
@@ -745,7 +745,7 @@ def ws_kernel(output, FAILURE: ttgl.constexpr):
745745 ws_kernel [(1 , )](output , FAILURE = FAILURE , num_warps = 4 )
746746
747747
748- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
748+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
749749@pytest .mark .parametrize ("MISSING_BAR" , ["none" , "1" , "2" ])
750750def test_ws_two_loads_two_bars (MISSING_BAR , device , run_wrapper ):
751751 if run_wrapper :
@@ -807,7 +807,7 @@ def kernel(output, MISSING_BAR: ttgl.constexpr):
807807 kernel [(1 , )](output , MISSING_BAR = MISSING_BAR , num_warps = 4 )
808808
809809
810- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
810+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
811811@pytest .mark .parametrize ("FAILURE" , [True , False ])
812812def test_ws_two_loads_one_bar (FAILURE , device , run_wrapper ):
813813 if run_wrapper :
@@ -866,7 +866,7 @@ def kernel(output, FAILURE: ttgl.constexpr):
866866 kernel [(1 , )](output , FAILURE = FAILURE , num_warps = 4 )
867867
868868
869- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
869+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
870870@pytest .mark .parametrize ("MISSING_BAR" , ["none" , "0" , "1" , "2" , "3" ])
871871def test_ws_two_loads_two_bars_loop (MISSING_BAR , device , run_wrapper ):
872872 if run_wrapper :
@@ -949,7 +949,7 @@ def kernel(output, MISSING_BAR: ttgl.constexpr):
949949 kernel [(1 , )](output , MISSING_BAR = MISSING_BAR , num_warps = 4 )
950950
951951
952- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
952+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
953953@pytest .mark .parametrize ("FAILURE" , [True , False ])
954954def test_ws_load_ordering (FAILURE , device , run_wrapper ):
955955 if run_wrapper :
@@ -1013,7 +1013,7 @@ def kernel(output, FAILURE: ttgl.constexpr):
10131013 kernel [(1 , )](output , FAILURE = FAILURE , num_warps = 4 )
10141014
10151015
1016- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1016+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
10171017@pytest .mark .parametrize ("MISSING_BAR" , ["none" , "T2" , "T3" ])
10181018def test_ws_two_producers_two_consumers (MISSING_BAR , device , run_wrapper ):
10191019 if run_wrapper :
@@ -1100,7 +1100,7 @@ def kernel(output, MISSING_BAR: ttgl.constexpr):
11001100 kernel [(1 , )](output , MISSING_BAR = MISSING_BAR , num_warps = 4 )
11011101
11021102
1103- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1103+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
11041104@pytest .mark .parametrize ("MISSING_BAR" , ["none" , "1" , "2" ])
11051105def test_ws_different_warp_sizes (MISSING_BAR , device , run_wrapper ):
11061106 if run_wrapper :
@@ -1168,7 +1168,8 @@ def kernel(output, MISSING_BAR: ttgl.constexpr):
11681168 kernel [(1 , )](output , MISSING_BAR = MISSING_BAR , num_warps = 4 )
11691169
11701170
1171- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper or newer" )
1171+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper or newer" ,
1172+ run = False )
11721173@pytest .mark .parametrize ("FAILURE" , [True , False ])
11731174def test_ws_async_copy_commits (FAILURE , device , run_wrapper ):
11741175 if run_wrapper :
@@ -1229,7 +1230,8 @@ def kernel(input, FAILURE: ttgl.constexpr):
12291230 kernel [(1 , )](input , FAILURE = FAILURE , num_warps = 4 )
12301231
12311232
1232- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper or newer" )
1233+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper or newer" ,
1234+ run = False )
12331235@pytest .mark .parametrize ("FAILURE" , [True , False ])
12341236def test_ws_async_copy_wait_visibility (FAILURE , device , run_wrapper ):
12351237 if run_wrapper :
@@ -1283,7 +1285,7 @@ def kernel(input, FAILURE: ttgl.constexpr):
12831285 kernel [(1 , )](input , FAILURE = FAILURE , num_warps = 4 )
12841286
12851287
1286- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] != 9 , reason = "Requires hopper" )
1288+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] != 9 , reason = "Requires hopper" , run = False )
12871289@pytest .mark .parametrize ("FAILURE" , [True , False ])
12881290def test_ws_wgmma_wait_visibility (FAILURE , device , run_wrapper ):
12891291 if run_wrapper :
@@ -1337,7 +1339,7 @@ def kernel(FAILURE: ttgl.constexpr):
13371339 kernel [(1 , )](FAILURE = FAILURE , num_warps = 4 )
13381340
13391341
1340- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1342+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
13411343def test_deadlock_two_partitions (device , run_wrapper ):
13421344 if run_wrapper :
13431345 result = run_in_process (test_deadlock_two_partitions , (device , False ))
@@ -1371,7 +1373,7 @@ def kernel():
13711373 kernel [(1 , )](num_warps = 4 )
13721374
13731375
1374- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1376+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
13751377def test_deadlock_overarrival (device , run_wrapper ):
13761378 if run_wrapper :
13771379 result = run_in_process (test_deadlock_overarrival , (device , False ))
@@ -1400,7 +1402,7 @@ def kernel():
14001402 kernel [(1 , )](num_warps = 4 )
14011403
14021404
1403- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1405+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
14041406def test_deadlock_underarrival (device , run_wrapper ):
14051407 if run_wrapper :
14061408 result = run_in_process (test_deadlock_underarrival , (device , False ))
@@ -1436,7 +1438,7 @@ def kernel():
14361438 kernel [(1 , )](num_warps = 4 )
14371439
14381440
1439- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1441+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
14401442def test_deadlock_different_phases (device , run_wrapper ):
14411443 if run_wrapper :
14421444 result = run_in_process (test_deadlock_different_phases , (device , False ))
@@ -1471,7 +1473,7 @@ def kernel():
14711473 kernel [(1 , )](num_warps = 4 )
14721474
14731475
1474- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1476+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
14751477def test_deadlock_exempt_when_tma_signals (device , run_wrapper ):
14761478 if run_wrapper :
14771479 result = run_in_process (test_deadlock_exempt_when_tma_signals , (device , False ))
@@ -1514,7 +1516,7 @@ def kernel(input_desc):
15141516 kernel [(1 , )](input_desc , num_warps = 4 )
15151517
15161518
1517- @pytest .mark .skipif (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" )
1519+ @pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
15181520def test_barrier_underflow (device , run_wrapper ):
15191521 if run_wrapper :
15201522 result = run_in_process (test_barrier_underflow , (device , False ))
0 commit comments