[Tests] Don't run autotuning when running attention tutorial as a test (#6867)

peterbell10 · web-flow · commit 971a52afa488 · 2025-05-20T19:38:14.000+01:00
diff --git a/Makefile b/Makefile
@@ -37,7 +37,7 @@ test-unit: all
 	$(PYTEST) -s -n 8 python/triton_kernels/tests/
 	TRITON_DISABLE_LINE_INFO=0 $(PYTEST) -s python/test/unit/language/test_line_info.py
 	# Run attention separately to avoid out of gpu memory
-	TRITON_PRINT_AUTOTUNING=1 $(PYTEST) -vs python/tutorials/06-fused-attention.py
+	$(PYTEST) -vs python/tutorials/06-fused-attention.py
 	TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=python/triton/instrumentation/libGPUInstrumentationTestLib.so \
 		$(PYTEST) --capture=tee-sys -rfs -vvv python/test/unit/instrumentation/test_gpuhello.py
 
diff --git a/python/tutorials/06-fused-attention.py b/python/tutorials/06-fused-attention.py
@@ -15,6 +15,7 @@
 
 import pytest
 import torch
+import sys
 
 import triton
 import triton.language as tl
@@ -111,6 +112,11 @@ def _host_descriptor_pre_hook(nargs):
     for s in NUM_STAGES_OPTIONS \
     for w in [4, 8]\
 ]
+if "pytest" in sys.modules:
+    # Use a single config in testing for reproducibility
+    configs = [
+        triton.Config(dict(BLOCK_M=64, BLOCK_N=64), num_stages=4, num_warps=4, pre_hook=_host_descriptor_pre_hook),
+    ]
 
 
 def keep(conf):