Skip to content

Commit 3de296d

Browse files
committed
[TEST_DEBUG] Enable test_device_assert
1 parent f674b76 commit 3de296d

File tree

9 files changed

+91
-29
lines changed

9 files changed

+91
-29
lines changed

python/test/unit/test_debug.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import torch
33
import triton.language as tl
44
import triton
5+
import sys
6+
import subprocess
7+
import os
58

69

710
@pytest.mark.parametrize('cond', [True, False])
@@ -10,29 +13,35 @@
1013
@pytest.mark.parametrize('env_var', [True, False])
1114
@pytest.mark.parametrize('jit_flag', [True, False])
1215
@pytest.mark.forked
13-
def test_device_assert(monkeypatch, cond, mask, opt_flag, env_var, jit_flag, device):
14-
monkeypatch.setenv("TRITON_DEBUG", str(int(env_var)))
15-
triton.knobs.refresh_knobs()
16-
torch.zeros([1], dtype=torch.int32, device=device)
17-
18-
@triton.jit(debug=jit_flag)
19-
def _kernel(COND: tl.constexpr, MASK: tl.constexpr):
20-
tl.device_assert(COND, 'test', mask=MASK)
16+
def test_device_assert(cond, mask, opt_flag, env_var, jit_flag, device):
17+
"""Temporary subprocess solution due to:
18+
https://github.com/pytorch/pytorch/issues/142135"""
2119

2220
is_debug = env_var or (opt_flag if opt_flag is not None else jit_flag)
2321

24-
kwargs = {}
25-
if opt_flag is not None:
26-
kwargs["debug"] = opt_flag
27-
28-
if not cond and is_debug and mask is not False:
29-
with pytest.raises(RuntimeError):
30-
_kernel[(1, )](cond, mask, **kwargs)
31-
getattr(torch, device).synchronize()
32-
return
33-
34-
_kernel[(1, )](cond, mask, **kwargs)
35-
getattr(torch, device).synchronize()
22+
should_fail = not cond and is_debug and mask is not False
23+
kernel_file = os.path.join(os.path.dirname(__file__), "test_debug_kernels.py")
24+
mask_str = "None" if mask is None else str(mask)
25+
opt_flag_str = "None" if opt_flag is None else str(opt_flag)
26+
27+
result = subprocess.run([
28+
sys.executable, kernel_file, "device_assert",
29+
str(cond), mask_str, opt_flag_str,
30+
str(jit_flag), device,
31+
str(env_var)
32+
], capture_output=True, text=True)
33+
34+
if should_fail:
35+
abort_or_runtime_error = (
36+
result.returncode == 1 or # RuntimeError
37+
result.returncode == -6 # SIGABRT
38+
)
39+
assert abort_or_runtime_error, (
40+
f"Expected runtime error or abort signal but got unexpected exit code {result.returncode}. "
41+
f"stdout: {result.stdout}, stderr: {result.stderr}")
42+
else:
43+
assert result.returncode == 0, (f"Expected success but got unexpected exit code {result.returncode}. "
44+
f"stdout: {result.stdout}, stderr: {result.stderr}")
3645

3746

3847
def test_device_assert_barrier(monkeypatch, device):
@@ -70,10 +79,14 @@ def _test_overflow(x, y, x_dtype, y_dtype, debug, should_overflow, tri_func, ref
7079
y = torch.tensor([y], dtype=getattr(torch, y_dtype), device=device)
7180
z = torch.empty_like(x)
7281
if should_overflow and debug:
73-
with pytest.raises(RuntimeError) as exc_info:
82+
# with pytest.raises(RuntimeError) as exc_info:
83+
try:
7484
tri_func[(1, )](x, y, z, debug=debug)
7585
getattr(torch, device).synchronize()
76-
assert "device-side assert" in str(exc_info.value)
86+
except RuntimeError as e:
87+
assert True
88+
assert "device-side assert" in str(e) #str(exc_info.value)
89+
assert False
7790
else:
7891
tri_func[(1, )](x, y, z, debug=debug)
7992
getattr(torch, device).synchronize()
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""
2+
Helper module containing Triton kernels for test_debug.py.
3+
These kernels are separated so they can be called from subprocesses.
4+
"""
5+
import torch
6+
import triton
7+
import triton.language as tl
8+
import sys
9+
import os
10+
11+
12+
def run_device_assert_kernel(cond, mask, opt_flag, jit_flag, device):
13+
14+
@triton.jit(debug=jit_flag)
15+
def _kernel(COND: tl.constexpr, MASK: tl.constexpr):
16+
tl.device_assert(COND, 'test', mask=MASK)
17+
18+
kwargs = {}
19+
if opt_flag is not None:
20+
kwargs["debug"] = opt_flag
21+
22+
try:
23+
_kernel[(1, )](cond, mask, **kwargs)
24+
getattr(torch, device).synchronize()
25+
return 0
26+
except RuntimeError:
27+
return 1
28+
except Exception as e:
29+
print(f"Unexpected error: {type(e).__name__}: {e}")
30+
return 2
31+
32+
33+
if __name__ == "__main__":
34+
35+
def parse_bool_or_none(arg_str):
36+
if arg_str == "None":
37+
return None
38+
return arg_str == "True"
39+
40+
test_type = sys.argv[1]
41+
if test_type == "device_assert":
42+
cond = sys.argv[2] == "True"
43+
mask = parse_bool_or_none(sys.argv[3])
44+
opt_flag = parse_bool_or_none(sys.argv[4])
45+
jit_flag = sys.argv[5] == "True"
46+
device = sys.argv[6]
47+
env_var = sys.argv[7] == "True"
48+
49+
os.environ["TRITON_DEBUG"] = str(int(env_var))
50+
triton.knobs.refresh_knobs()
51+
exit_code = run_device_assert_kernel(cond, mask, opt_flag, jit_flag, device)
52+
sys.exit(exit_code)
53+
54+
else:
55+
print(f"Unknown test type: {test_type}")
56+
sys.exit(3)

scripts/skiplist/a770/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

scripts/skiplist/arl-h/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

scripts/skiplist/arl-s/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

scripts/skiplist/default/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

scripts/skiplist/lts/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

scripts/skiplist/mtl/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

scripts/skiplist/xe2/debug.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# https://github.com/intel/intel-xpu-backend-for-triton/issues/2755
2-
python/test/unit/test_debug.py::test_device_assert[r"^(False|True)-(False|True)-True-(True|None)-False$|^(False|True)-True-None-(True|None)-False$|^(False|True)-True-False-(True|None)-False$|^True-False-None-(True|None)-False$"]@regexp
32
python/test/unit/test_debug.py::test_sanitize_int_add_overflow[r".*True-True$"]@regexp
43
python/test/unit/test_debug.py::test_sanitize_int_mul_overflow[r".*True-True$"]@regexp
54
python/test/unit/test_debug.py::test_sanitize_int_sub_overflow[2147483647--1-int32-int32-True-True]

0 commit comments

Comments
 (0)