@@ -4461,7 +4461,7 @@ def test_dot3d(B, num_warps, M, N, K, BLOCK_M, BLOCK_N, in_dtype_str, out_dtype_
44614461 shared_mem_accum = B * (BLOCK_M * K + K * BLOCK_N ) * get_src_element_ty_size (in_dtype_str )
44624462 if not is_interpreter () and triton .runtime .driver .active .utils .get_device_properties (
44634463 triton .runtime .driver .active .get_current_device ())["max_shared_mem" ] < shared_mem_accum :
4464- pytest .skip ("Skipped due to insufficient shared memory on this GPU." )
4464+ pytest .xfail ("Skipped due to insufficient shared memory on this GPU." )
44654465
44664466 @triton .jit
44674467 def kernel (
@@ -6210,7 +6210,7 @@ def test_convert2d(M, N, src_layout, interm_layout, dst_layout, dtype, device, t
62106210 int32_size = 4
62116211 # skip even if scratch buffer equal to shared mem size, because real scratch buffer is typically larger due to padding
62126212 if scratch_shape [0 ] * scratch_shape [1 ] * int32_size >= shared_mem_size :
6213- pytest .skip ("Scratch buffer is too large" )
6213+ pytest .xfail ("Scratch buffer is too large" )
62146214
62156215 layouts = f"""
62166216 #src = { src_layout }
@@ -6397,8 +6397,8 @@ def test_local_load_store(M, N, K, dist_layout, shared_layout, device, tmp_path:
63976397
63986398@pytest .mark .parametrize ("M, N, M_tile_size, N_tile_size" ,
63996399 [[128 , 128 , 64 , 64 ], [128 , 128 , 64 , 32 ], [128 , 64 , 64 , 32 ], [256 , 128 , 64 , 64 ]])
6400- def test_split_subview (M , N , M_tile_size , N_tile_size , device = 'cuda' ):
6401- if not is_hip ():
6400+ def test_split_subview (M , N , M_tile_size , N_tile_size , device ):
6401+ if not is_hip () and not is_xpu () :
64026402 pytest .skip ("the test is temporary disabled for the Nvidia backend." )
64036403
64046404 threads_per_warp = 64 if is_hip () else 32
0 commit comments