[Pallas MGPU] Disable XLA:GPU autotuning in attention tests

apaszke · Google-ML-Automation · commit eda7506d6bcd · 2024-12-06T09:19:08.000-08:00
We don't care about performance of the reference impl, we only use it for
correctness testing. More importantly, it works around a deadlock at compile
time that sometimes happens when testing large batch sizes.

PiperOrigin-RevId: 703521029
diff --git a/tests/pallas/BUILD b/tests/pallas/BUILD
@@ -494,6 +494,7 @@ jax_multiplatform_test(
     srcs = ["//jax/experimental/pallas/ops/gpu:attention_mgpu.py"],
     enable_backends = [],
     enable_configs = ["gpu_h100_x32"],
+    env = {"XLA_FLAGS": "--xla_gpu_autotune_level=0"},
     tags = [
         "manual",
         "notap",
@@ -509,6 +510,7 @@ jax_multiplatform_test(
     srcs = ["mgpu_attention_test.py"],
     enable_backends = [],
     enable_configs = ["gpu_h100_x32"],
+    env = {"XLA_FLAGS": "--xla_gpu_autotune_level=0"},
     deps = [
         "//jax:pallas",
         "//jax:pallas_experimental_gpu_ops",