Address code review comments

etiotto · etiotto · commit b059f2c270a2 · 2025-10-22T18:12:03.000Z
Signed-off-by: Ettore Tiotto &lt;ettore.tiotto@intel.com&gt;
diff --git a/test/Triton/Intel/RemoveBoundaryChecks/remove-boundary-checks.mlir b/test/Triton/Intel/RemoveBoundaryChecks/remove-boundary-checks.mlir
@@ -0,0 +1,57 @@
+// RUN: triton-opt %s -split-input-file -triton-intel-remove-boundary-checks | FileCheck %s
+
+module {
+tt.func public @simple_load(%load_ptr: !tt.ptr<f16> {tt.divisibility = 16 : i32}, %store_ptr: !tt.ptr<f16> {tt.divisibility = 16 : i32}) {
+  %c1_i64 = arith.constant 1 : i64
+  %c64_i64 = arith.constant 64 : i64
+  %c512_i64 = arith.constant 512 : i64
+  %c1024_i64 = arith.constant 1024 : i64
+  %c0_i32 = arith.constant 0 : i32
+  %x = arith.constant 10 : i32
+  %in = tt.make_tensor_ptr %load_ptr, [%c1_i64, %c64_i64, %c1024_i64], [%c512_i64, %c64_i64, %c1_i64], [%c0_i32, %c0_i32, %x] {order = array<i32: 2, 1, 0>} : <tensor<1x64x64xf16>>
+  %load = tt.load %in {boundaryCheck = array<i32: 2>} : !tt.ptr<tensor<1x64x64xf16>>
+  tt.return
+}
+// CHECK-LABEL: simple_load
+// CHECK: [[PTR:%.*]] = tt.make_tensor_ptr
+// CHECK: tt.load [[PTR]] : !tt.ptr<tensor<1x64x64xf16>>
+}
+
+// -----
+
+module {
+tt.func public @load_in_for_loop(%load_ptr0: !tt.ptr<f16> {tt.divisibility = 16 : i32}, %load_ptr1: !tt.ptr<f16> {tt.divisibility = 16 : i32}, %store_ptr: !tt.ptr<f16> {tt.divisibility = 16 : i32}) {
+  %c0_i32 = arith.constant 0 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %c20_i32 = arith.constant 20 : i32
+  %c64_i32 = arith.constant 64 : i32
+  %c1024_i32 = arith.constant 1024 : i32
+  scf.for %x = %c0_i32 to %c20_i32 step %c1_i32 : i32 {
+    %pid = tt.get_program_id x : i32
+    %c0_i64 = arith.constant 0 : i64
+    %c1_i64 = arith.constant 1 : i64
+    %c512_i64 = arith.constant 512 : i64
+    %c1024_i64 = arith.constant 1024 : i64
+    %c64_i64 = arith.constant 64 : i64
+    %c65536_i64 = arith.constant 65536 : i64
+    %ptr0 = tt.make_tensor_ptr %load_ptr0, [%c512_i64, %c1024_i64, %c64_i64], [%c65536_i64, %c64_i64, %c1_i64], [%x, %pid, %c0_i32] {order = array<i32: 2, 1, 0>} : <tensor<1x512x64xf16>>
+    %load0 = tt.load %ptr0 {boundaryCheck = array<i32: 1, 2>, padding = 1 : i32} : !tt.ptr<tensor<1x512x64xf16>>
+    %9 = arith.bitcast %c0_i32 : i32 to i32
+    %10 = arith.bitcast %c1024_i32 : i32 to i32
+    %11 = arith.bitcast %c64_i32 : i32 to i32
+    scf.for %z = %9 to %10 step %11 iter_args() -> ()  : i32 {
+      %ptr1 = tt.make_tensor_ptr %load_ptr1, [%c512_i64, %c64_i64, %c1024_i64], [%c65536_i64, %c1_i64, %c64_i64], [%x, %c0_i32, %z] {order = array<i32: 2, 0, 1>} : <tensor<1x64x64xf16>>
+      //   a. boundaryCheck = 1 checks the block ptr offset at index 2 (%z)
+      //   b. boundaryCheck = 2 checks the block ptr offset at index 1 (%y)
+      // Check (a) is unnecessary because max(%z) = 920 which is less than %s2 (1024)
+      // Check (a) is trivially unnecessary because %y(zero) < %s1(64)
+      %load1 = tt.load %ptr1 {boundaryCheck = array<i32: 1, 2>} : !tt.ptr<tensor<1x64x64xf16>>
+    }
+  }
+  tt.return
+}
+// CHECK-LABEL: load_in_for_loop
+// CHECK-COUNT-2: scf.for
+// CHECK: [[PTR:%.*]] = tt.make_tensor_ptr
+// CHECK: tt.load [[PTR]] : !tt.ptr<tensor<1x64x64xf16>>
+}
diff --git a/third_party/intel/include/Dialect/Triton/Transforms/Passes.td b/third_party/intel/include/Dialect/Triton/Transforms/Passes.td
@@ -80,7 +80,7 @@ def TritonIntelRemoveBoundaryChecks
     %lb = arith.bitcast %c0_i32 : i32 to i32
     %ub = arith.bitcast %c1024_i32 : i32 to i32
     %st = arith.bitcast %c64_i32 : i32 to i32
-    scf.for %i = %lb to %ub step %st : i32 {
+    scf.for %iv = %lb to %ub step %st : i32 {
       %s0 = arith.constant 512 : i64
       %s1 = arith.constant 64 : i64
       %s2 = arith.constant 1024 : i64
@@ -98,7 +98,7 @@ def TritonIntelRemoveBoundaryChecks
     The transformation would drop the boundary check on the load operation because:
       - `%ptr` is never advanced in the loop
       - `%iv` has values [0, 64, 128, ..., 960], max(%iv) = 960
-      - `%s2` is qual to 1014
+      - `%s2` is equal to 1014
       - the boundary check expression `%iv` < `%s2` is always true
   }];
 
diff --git a/third_party/intel/lib/Dialect/Triton/Transforms/RemoveBoundaryChecks.cpp b/third_party/intel/lib/Dialect/Triton/Transforms/RemoveBoundaryChecks.cpp
@@ -1,4 +1,3 @@
-
 #include "intel/include/Dialect/Triton/Transforms/Passes.h"
 #include "intel/include/Utils/Utility.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`#include "intel/include/Dialect/Triton/Transforms/Passes.h"`
`3`	`2`	`#include "intel/include/Utils/Utility.h"`
`4`	`3`	`#include "mlir/Dialect/Arith/IR/Arith.h"`