@@ -111,20 +111,20 @@ def test_simple_matmul(dtype_src_str, dtype_dst_str, BLOCK_M, BLOCK_N, BLOCK_K,
111
111
if is_hip () and (not is_hip_cdna3 ()) and dtype_src_str == "tensorfloat32" :
112
112
pytest .skip ("tensorfloat32 is only supported on HIP CDNA3" )
113
113
if dtype_src_str == "float8e5" and BLOCK_K == 16 :
114
- pytest .skip ("Skipping cases small K for float8" )
114
+ pytest .xfail ("Skipping cases small K for float8" )
115
115
if dtype_src_str == "float8e5" and device == "cuda" and torch .cuda .get_device_capability ()[0 ] < 9 :
116
116
pytest .skip ("Float8 requires compute capability >= 9" )
117
117
if (dtype_src_str == "float64" ) != (dtype_dst_str == "float64" ):
118
- pytest .skip ("Skipping unsupported case" )
119
- if dtype_src_str == "float64" and not is_cuda ():
118
+ pytest .xfail ("Skipping unsupported case" )
119
+ if not is_xpu () and dtype_src_str == "float64" and not is_cuda ():
120
120
pytest .skip ("Float64 not supported on HIP yet" )
121
121
if "float32" in dtype_src_str and dtype_dst_str == "float16" :
122
- pytest .skip ("Skipping unsupported case" )
122
+ pytest .xfail ("Skipping unsupported case" )
123
123
if "float32" == dtype_src_str and NUM_CTAS > 1 :
124
124
pytest .skip ("FMA matmul not supported for multiple CTAs" )
125
125
if (BLOCK_M < 64 or (BLOCK_M == 64 and BLOCK_N == 16 )) and NUM_CTAS > 1 :
126
126
pytest .skip ("multi-CTAs is broken for mmav2" )
127
- if EPILOGUE_SUBTILE and not is_xpu () and (is_hip () or NUM_CTAS > 1 or BLOCK_N >= 512 ):
127
+ if not is_xpu () and EPILOGUE_SUBTILE and (is_hip () or NUM_CTAS > 1 or BLOCK_N >= 512 ):
128
128
pytest .skip ("creates convert layout too big to fit in smem" )
129
129
if LAYOUT_16x256 and (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 10 ):
130
130
pytest .xfail ("skip forcing tmem layout on non blackwell targets." )
0 commit comments