Fix warp num lookup segfault for empty modules (#4322)

kurapov-peter · web-flow · commit 2426ba72d224 · 2025-05-27T19:01:56.000+02:00
There's a subtle problem with the pattern application for the conversion
to llvm pass I noticed recently. The symptom is that when I have two
modules that are lowered to llvm correctly separately, and I put them
into a single test case, the matcher fails to find legalization
patterns. While trying to create a minimal reproducer, I came across
this issue that if I just add an empty module, our conversion fails with
a segfault. This small fix also helps with the initial problem I had
(although I'm not exactly sure why just yet).
diff --git a/test/Conversion/intel/tritonintelgpu_to_llvm.mlir b/test/Conversion/intel/tritonintelgpu_to_llvm.mlir
@@ -1,11 +1,11 @@
-// RUN: triton-opt %s --convert-triton-intel-gpu-to-llvm | FileCheck %s
+// RUN: triton-opt %s -split-input-file --convert-triton-intel-gpu-to-llvm | FileCheck %s
 
 #blocked = #ttg.blocked<{sizePerThread = [1], threadsPerWarp = [16], warpsPerCTA = [4], order = [0]}>
 module attributes { "ttg.threads-per-warp" = 16 : i32, "ttg.num-warps" = 4 : i32 } {
   // As the assert message is shared, a single instance is emitted.
 
   // CHECK-DAG:         llvm.mlir.global internal constant @assertFunc_("unknown\00") {addr_space = 1 : i32}
-  // CHECK-DAG:         llvm.mlir.global internal constant @assertFile_("{{.*}}tritonintelgpu_to_llvm.mlir\00") {addr_space = 1 : i32}
+  // CHECK-DAG:         llvm.mlir.global internal constant @assertFile_("{{.*}}tritonintelgpu_to_llvm.mlir{{.*}}\00") {addr_space = 1 : i32}
   // CHECK-DAG:         llvm.mlir.global internal constant @assertMessage_("assert text\00") {addr_space = 1 : i32}
   // CHECK-DAG:         llvm.mlir.global internal constant @assertMessage_3("different assert text\00") {addr_space = 1 : i32}
   // CHECK-DAG:         llvm.func spir_funccc @__assert_fail(!llvm.ptr<4>, !llvm.ptr<4>, i32, !llvm.ptr<4>)
@@ -84,3 +84,8 @@ module attributes { "ttg.threads-per-warp" = 16 : i32, "ttg.num-warps" = 4 : i32
     tt.return
   }
 }
+
+// -----
+
+// Sanity check for the conversion pass to correctly process even empty modules
+module attributes { "ttg.threads-per-warp" = 16 : i32, "ttg.num-warps" = 4 : i32 } {}
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/TritonGPUToLLVM.cpp b/third_party/intel/lib/TritonIntelGPUToLLVM/TritonGPUToLLVM.cpp
@@ -101,7 +101,7 @@ struct ConvertTritonGPUToLLVM
     TritonIntelGPUToLLVMTypeConverter typeConverter(
         context, option, *targetInfo, isAdvancedPathEnabled);
     TritonLLVMConversionTarget convTarget(*context);
-    int numWarps = triton::gpu::lookupNumWarps(&*mod.getOps().begin());
+    int numWarps = triton::gpu::lookupNumWarps(mod);
     int numCTAs = triton::gpu::TritonGPUDialect::getNumCTAs(mod);
     int threadsPerWarp = triton::gpu::TritonGPUDialect::getThreadsPerWarp(mod);