Skip to content

Commit 53bc0ef

Browse files
authored
Improve triton kernels pass rate on rolling and xe2 (#5080)
Pass rate: 89.04% -> 97.93% --------- Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 1b4b914 commit 53bc0ef

File tree

3 files changed

+56
-8
lines changed

3 files changed

+56
-8
lines changed

python/triton_kernels/tests/test_matmul.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# testing utilities
2121
from triton_kernels.testing import assert_close, compute_actual_scale
2222
# target-specific utilities
23-
from triton_kernels.target_info import is_hip, is_hip_cdna3, is_cuda, is_xpu, is_hip_cdna4
23+
from triton_kernels.target_info import is_hip, is_hip_cdna3, is_cuda, is_hip_cdna4
2424

2525
# ---------------
2626
# initialize data
@@ -294,15 +294,11 @@ def test_op(m, n, k, split_k, do_gather, do_scatter, fused_scatter, has_y_gammas
294294
if split_k > 1:
295295
pytest.skip("splitK hasn't been fully tested on AMD GPU.")
296296

297-
elif is_xpu():
298-
if split_k > 1:
299-
pytest.skip("FIXME: https://github.com/intel/intel-xpu-backend-for-triton/issues/5074")
300-
301297
if "float8_e4m3fnuz" in (weight_dtype_str, act_dtype_str) and not is_hip_cdna3():
302-
pytest.skip("float8_e4m3fnuz only tested on AMD CDNA3 Platform")
298+
pytest.xfail("float8_e4m3fnuz only tested on AMD CDNA3 Platform")
303299

304300
if fused_scatter and split_k > 1:
305-
pytest.skip("fused scatter scratchpad not supported with split_k")
301+
pytest.xfail("fused scatter scratchpad not supported with split_k")
306302

307303
if hbm_swizzling:
308304
if is_hip():
@@ -430,7 +426,7 @@ def test_op(m, n, k, split_k, do_gather, do_scatter, fused_scatter, has_y_gammas
430426
try:
431427
tri_y = matmul_ogs(x_tri, w_tri, bias_tri, rdata, gindx, sindx, precision_opt, gammas=gs1_ref, epilogue=epilogue)
432428
except (opt_flags.InapplicableConstraint, NotImplementedError):
433-
pytest.skip("inapplicable opt_flags constraint")
429+
pytest.xfail("inapplicable opt_flags constraint")
434430
# If split_k > 1, then the intermediate tensor is fp32.
435431
sep_gather = mode == "ragged" and do_gather and n_expts_act > 1 and split_k == 1
436432
sep_scatter = mode == "ragged" and do_scatter and n_expts_act > 1 and split_k == 1
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,35 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/5074
2+
tests/test_matmul.py::test_op[False-False-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
3+
tests/test_matmul.py::test_op[False-False-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
4+
tests/test_matmul.py::test_op[False-False-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
5+
tests/test_matmul.py::test_op[False-False-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
6+
tests/test_matmul.py::test_op[False-False-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
7+
tests/test_matmul.py::test_op[False-False-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
8+
tests/test_matmul.py::test_op[False-False-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
9+
tests/test_matmul.py::test_op[False-False-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
10+
tests/test_matmul.py::test_op[False-False-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
11+
tests/test_matmul.py::test_op[False-False-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
212
tests/test_matmul.py::test_op[False-False-False-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
13+
tests/test_matmul.py::test_op[False-False-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
14+
tests/test_matmul.py::test_op[False-False-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
315
tests/test_matmul.py::test_op[False-False-False-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
416
tests/test_matmul.py::test_op[False-False-True-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
517
tests/test_matmul.py::test_op[False-False-True-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
18+
tests/test_matmul.py::test_op[False-True-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
19+
tests/test_matmul.py::test_op[False-True-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
20+
tests/test_matmul.py::test_op[False-True-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
21+
tests/test_matmul.py::test_op[False-True-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
22+
tests/test_matmul.py::test_op[False-True-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
23+
tests/test_matmul.py::test_op[False-True-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
24+
tests/test_matmul.py::test_op[False-True-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
25+
tests/test_matmul.py::test_op[False-True-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
26+
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-700-700-ragged-bfloat16-mxfloat4_e2m1-8-2-1-9-False-None]
27+
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
28+
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
629
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
30+
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-700-700-ragged-bfloat16-mxfloat4_e2m1-8-2-1-9-False-None]
31+
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
32+
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
733
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
834
tests/test_matmul.py::test_op[False-True-True-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
935
tests/test_matmul.py::test_op[False-True-True-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,35 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/5074
2+
tests/test_matmul.py::test_op[False-False-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
3+
tests/test_matmul.py::test_op[False-False-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
4+
tests/test_matmul.py::test_op[False-False-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
5+
tests/test_matmul.py::test_op[False-False-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
6+
tests/test_matmul.py::test_op[False-False-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
7+
tests/test_matmul.py::test_op[False-False-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
8+
tests/test_matmul.py::test_op[False-False-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
9+
tests/test_matmul.py::test_op[False-False-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
10+
tests/test_matmul.py::test_op[False-False-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
11+
tests/test_matmul.py::test_op[False-False-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
212
tests/test_matmul.py::test_op[False-False-False-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
13+
tests/test_matmul.py::test_op[False-False-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
14+
tests/test_matmul.py::test_op[False-False-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
315
tests/test_matmul.py::test_op[False-False-False-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
416
tests/test_matmul.py::test_op[False-False-True-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
517
tests/test_matmul.py::test_op[False-False-True-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
18+
tests/test_matmul.py::test_op[False-True-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
19+
tests/test_matmul.py::test_op[False-True-False-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
20+
tests/test_matmul.py::test_op[False-True-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
21+
tests/test_matmul.py::test_op[False-True-False-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
22+
tests/test_matmul.py::test_op[False-True-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
23+
tests/test_matmul.py::test_op[False-True-True-False-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
24+
tests/test_matmul.py::test_op[False-True-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
25+
tests/test_matmul.py::test_op[False-True-True-False-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
26+
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-700-700-ragged-bfloat16-mxfloat4_e2m1-8-2-1-9-False-None]
27+
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
28+
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
629
tests/test_matmul.py::test_op[False-True-False-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
30+
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-700-700-ragged-bfloat16-mxfloat4_e2m1-8-2-1-9-False-None]
31+
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-False-None]
32+
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-704-800-ragged-mxfloat8_e4m3fn-mxfloat4_e2m1-8-2-1-9-True-None]
733
tests/test_matmul.py::test_op[False-True-False-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
834
tests/test_matmul.py::test_op[False-True-True-True-False-16-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]
935
tests/test_matmul.py::test_op[False-True-True-True-False-128-1000-400-400-ragged-float8_e4m3fn-float8_e4m3fn-3-1-1-1-False-None]

0 commit comments

Comments
 (0)