@@ -86,40 +86,43 @@ def failing_kernel(input):
8686 ampere .async_copy .wait_group (0 )
8787
8888
89- def run_failing_kernel (device ):
89+ def run_failing_kernel (device , enable_consan , mode ):
9090 # ConSan requires a global memory allocation
9191 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
9292 return torch .empty (size , device = "cuda" , dtype = torch .int8 )
9393
9494 triton .set_allocator (alloc_fn )
9595
96+ if enable_consan :
97+ if mode == "env" :
98+ os .environ ["TRITON_INSTRUMENTATION_MODE" ] = "consan"
99+ knobs .refresh_knobs ()
100+ elif mode == "knob" :
101+ knobs .compilation .instrumentation_mode = "consan"
102+
96103 input = torch .randn ((XBLOCK , XBLOCK ), device = device , dtype = torch .float16 )
97104 failing_kernel [(1 , )](input )
98105
99106
100107@pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
101- def test_cache_miss_knob (device , fresh_knobs , monkeypatch ):
108+ def test_cache_miss_knob (device , monkeypatch ):
102109 # First run without consan
103- knobs .compilation .enable_experimental_consan = False
104- run_failing_kernel (device )
110+ run_in_process (run_failing_kernel , (device , False , "knob" ))
105111
106112 # Then run with consan and assert that if fails
107- knobs .compilation .enable_experimental_consan = True
108113 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
109- result = run_in_process (run_failing_kernel , (device , ))
114+ result = run_in_process (run_failing_kernel , (device , True , "knob" ))
110115 assert "device-side assert" in str (result .exc )
111116
112117
113118@pytest .mark .xfail (not is_cuda () or torch .cuda .get_device_capability ()[0 ] < 9 , reason = "Requires hopper" , run = False )
114119def test_cache_miss_env (device , monkeypatch ):
115120 # First run without consan
116- knobs .compilation .enable_experimental_consan = False
117- run_failing_kernel (device )
121+ run_in_process (run_failing_kernel , (device , False , "env" ))
118122
119123 # Then run with consan and assert that if fails
120- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN" , "1" )
121124 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
122- result = run_in_process (run_failing_kernel , (device , ))
125+ result = run_in_process (run_failing_kernel , (device , True , "env" ))
123126 assert "device-side assert" in str (result .exc )
124127
125128
@@ -134,8 +137,9 @@ def test_async_tma_kernel(FAILURE, device, run_wrapper, monkeypatch):
134137 assert "Buffer being accessed has outstanding writes" in result .driver_stderr_output
135138 return
136139
137- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
140+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
138141 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
142+ knobs .refresh_knobs ()
139143
140144 # ConSan requires a global memory allocation
141145 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -184,8 +188,9 @@ def test_tma_interleave_kernel(FAILURE, device, run_wrapper, monkeypatch):
184188 assert result .driver_stderr_output == ""
185189 return
186190
187- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
191+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
188192 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
193+ knobs .refresh_knobs ()
189194
190195 # ConSan requires a global memory allocation
191196 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -243,8 +248,9 @@ def test_async_copy(FAILURE, device, run_wrapper, monkeypatch):
243248 assert result .driver_stderr_output == ""
244249 return
245250
246- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
251+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
247252 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
253+ knobs .refresh_knobs ()
248254
249255 # ConSan requires a global memory allocation
250256 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -296,8 +302,9 @@ def test_tcgen5_mma(FAILURE, MEM_ACCESS_KIND, device, run_wrapper, monkeypatch):
296302 assert result .driver_stderr_output == ""
297303 return
298304
299- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
305+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
300306 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
307+ knobs .refresh_knobs ()
301308
302309 # ConSan requires a global memory allocation
303310 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -359,8 +366,9 @@ def test_warpgroup_mma(FAILURE, device, run_wrapper, monkeypatch):
359366 assert result .driver_stderr_output == ""
360367 return
361368
362- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
369+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
363370 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
371+ knobs .refresh_knobs ()
364372
365373 # ConSan requires a global memory allocation
366374 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -403,8 +411,9 @@ def test_warpgroup_mma2(FAILURE, device, run_wrapper, monkeypatch):
403411 assert result .driver_stderr_output == ""
404412 return
405413
406- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
414+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
407415 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
416+ knobs .refresh_knobs ()
408417
409418 # ConSan requires a global memory allocation
410419 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -452,8 +461,9 @@ def test_tcgen5_mma_multibar(BUF_IDX, BAR_IDX, device, run_wrapper, monkeypatch)
452461 assert result .exc is None
453462 assert result .driver_stderr_output == ""
454463 return
455- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
464+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
456465 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
466+ knobs .refresh_knobs ()
457467
458468 # ConSan requires a global memory allocation
459469 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -511,8 +521,9 @@ def test_multibuffered_loop(FAILURE, device, run_wrapper, monkeypatch):
511521 assert result .driver_stderr_output == ""
512522 return
513523
514- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
524+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
515525 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
526+ knobs .refresh_knobs ()
516527
517528 # ConSan requires a global memory allocation
518529 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -625,8 +636,9 @@ def test_multibuffered_wgmma_loop(FAILURE, device, run_wrapper, monkeypatch):
625636 assert result .driver_stderr_output == ""
626637 return
627638
628- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
639+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
629640 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
641+ knobs .refresh_knobs ()
630642
631643 # ConSan requires a global memory allocation
632644 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -705,8 +717,9 @@ def test_ws_store_wait_load(FAILURE, device, run_wrapper, monkeypatch):
705717 assert result .exc is None
706718 assert result .driver_stderr_output == ""
707719 return
708- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
720+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
709721 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
722+ knobs .refresh_knobs ()
710723
711724 # ConSan requires a global memory allocation
712725 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -758,8 +771,9 @@ def test_ws_load_wait_store(FAILURE, device, run_wrapper, monkeypatch):
758771 assert result .exc is None
759772 assert result .driver_stderr_output == ""
760773 return
761- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
774+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
762775 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
776+ knobs .refresh_knobs ()
763777
764778 # ConSan requires a global memory allocation
765779 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -811,8 +825,9 @@ def test_ws_two_loads_two_bars(MISSING_BAR, device, run_wrapper, monkeypatch):
811825 assert result .exc is None
812826 assert result .driver_stderr_output == ""
813827 return
814- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
828+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
815829 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
830+ knobs .refresh_knobs ()
816831
817832 # ConSan requires a global memory allocation
818833 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -873,8 +888,9 @@ def test_ws_two_loads_one_bar(FAILURE, device, run_wrapper, monkeypatch):
873888 assert result .exc is None
874889 assert result .driver_stderr_output == ""
875890 return
876- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
891+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
877892 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
893+ knobs .refresh_knobs ()
878894
879895 # ConSan requires a global memory allocation
880896 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -935,8 +951,9 @@ def test_ws_two_loads_two_bars_loop(MISSING_BAR, device, run_wrapper, monkeypatc
935951 assert result .exc is None
936952 assert result .driver_stderr_output == ""
937953 return
938- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
954+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
939955 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
956+ knobs .refresh_knobs ()
940957
941958 # ConSan requires a global memory allocation
942959 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1015,8 +1032,9 @@ def test_ws_load_ordering(FAILURE, device, run_wrapper, monkeypatch):
10151032 assert result .exc is None
10161033 assert result .driver_stderr_output == ""
10171034 return
1018- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1035+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
10191036 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1037+ knobs .refresh_knobs ()
10201038
10211039 # ConSan requires a global memory allocation
10221040 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1079,8 +1097,9 @@ def test_ws_two_producers_two_consumers(MISSING_BAR, device, run_wrapper, monkey
10791097 assert result .exc is None
10801098 assert result .driver_stderr_output == ""
10811099 return
1082- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1100+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
10831101 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1102+ knobs .refresh_knobs ()
10841103
10851104 # ConSan requires a global memory allocation
10861105 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1166,8 +1185,9 @@ def test_ws_different_warp_sizes(MISSING_BAR, device, run_wrapper, monkeypatch):
11661185 assert result .exc is None
11671186 assert result .driver_stderr_output == ""
11681187 return
1169- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1188+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
11701189 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1190+ knobs .refresh_knobs ()
11711191
11721192 # ConSan requires a global memory allocation
11731193 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1236,8 +1256,9 @@ def test_ws_async_copy_commits(FAILURE, device, run_wrapper, monkeypatch):
12361256 assert result .driver_stderr_output == ""
12371257 return
12381258
1239- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1259+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
12401260 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1261+ knobs .refresh_knobs ()
12411262
12421263 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
12431264 return torch .empty (size , device = "cuda" , dtype = torch .int8 )
@@ -1300,8 +1321,9 @@ def test_ws_async_copy_wait_visibility(FAILURE, device, run_wrapper, monkeypatch
13001321 assert result .driver_stderr_output == ""
13011322 return
13021323
1303- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1324+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
13041325 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1326+ knobs .refresh_knobs ()
13051327
13061328 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
13071329 return torch .empty (size , device = "cuda" , dtype = torch .int8 )
@@ -1352,8 +1374,9 @@ def test_ws_wgmma_wait_visibility(FAILURE, device, run_wrapper, monkeypatch):
13521374 assert result .driver_stderr_output == ""
13531375 return
13541376
1355- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1377+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
13561378 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1379+ knobs .refresh_knobs ()
13571380
13581381 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
13591382 return torch .empty (size , device = "cuda" , dtype = torch .int8 )
@@ -1400,8 +1423,9 @@ def test_deadlock_two_partitions(device, run_wrapper, monkeypatch):
14001423 assert "device-side assert" in str (result .exc )
14011424 assert "Deadlock detected" in result .driver_stderr_output
14021425 return
1403- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1426+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
14041427 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1428+ knobs .refresh_knobs ()
14051429
14061430 # ConSan requires a global memory allocation
14071431 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1434,8 +1458,9 @@ def test_deadlock_overarrival(device, run_wrapper, monkeypatch):
14341458 assert "device-side assert" in str (result .exc )
14351459 assert "Deadlock detected" in result .driver_stderr_output
14361460 return
1437- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1461+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
14381462 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1463+ knobs .refresh_knobs ()
14391464
14401465 # ConSan requires a global memory allocation
14411466 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1463,8 +1488,9 @@ def test_deadlock_underarrival(device, run_wrapper, monkeypatch):
14631488 assert "device-side assert" in str (result .exc )
14641489 assert "Deadlock detected" in result .driver_stderr_output
14651490 return
1466- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1491+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
14671492 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1493+ knobs .refresh_knobs ()
14681494
14691495 # ConSan requires a global memory allocation
14701496 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1499,8 +1525,9 @@ def test_deadlock_different_phases(device, run_wrapper, monkeypatch):
14991525 assert result .exc is None
15001526 assert result .driver_stderr_output == ""
15011527 return
1502- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1528+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
15031529 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1530+ knobs .refresh_knobs ()
15041531
15051532 # ConSan requires a global memory allocation
15061533 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1534,8 +1561,9 @@ def test_deadlock_exempt_when_tma_signals(device, run_wrapper, monkeypatch):
15341561 assert result .exc is None
15351562 assert result .driver_stderr_output == ""
15361563 return
1537- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1564+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
15381565 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1566+ knobs .refresh_knobs ()
15391567
15401568 # ConSan requires a global memory allocation
15411569 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
@@ -1577,8 +1605,9 @@ def test_barrier_underflow(device, run_wrapper, monkeypatch):
15771605 assert "device-side assert" in str (result .exc )
15781606 assert "Barrier arrive underflow: current count would become negative" in result .driver_stderr_output
15791607 return
1580- monkeypatch .setenv ("TRITON_ENABLE_EXPERIMENTAL_CONSAN " , "1 " )
1608+ monkeypatch .setenv ("TRITON_INSTRUMENTATION_MODE " , "consan " )
15811609 monkeypatch .setenv ("CUDA_LAUNCH_BLOCKING" , "1" )
1610+ knobs .refresh_knobs ()
15821611
15831612 # ConSan requires a global memory allocation
15841613 def alloc_fn (size : int , alignment : int , stream : Optional [int ]):
0 commit comments