[ANALYSIS] Fix the divisibility of rem op with pessimistic analysis (#7441)

Jokeren · web-flow · commit 973461ca2228 · 2025-07-09T18:37:47.000-04:00
When `rhs` has a contiguity &gt; 1, it's difficult to specialize conditions
to get optimal divisibility estimates.

For example, `[128, 128 128, ..., 128] % [0, 1, 2, 3, ...] = [0, 0, 0,
2, ...]`
diff --git a/lib/Analysis/AxisInfo.cpp b/lib/Analysis/AxisInfo.cpp
@@ -480,11 +480,19 @@ class RemOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
 
   int64_t getDivisibility(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
                           int dim) override {
-    // lhs: d_lhs * k = gcd(d_lhs, d_rhs) * k' * k = gcd(d_lhs, d_rhs) * k''
-    // rhs: d_rhs * p = gcd(d_lhs, d_rhs) * p' * p = gcd(d_lhs, d_rhs) * p''
-    // lhs = gcd(d_lhs, d_rhs) * k'' = gcd(d_lhs, d_rhs) * d + r
-    // r must be divisible by gcd(d_lhs, d_rhs)
-    return gcd(lhs.getDivisibility(dim), rhs.getDivisibility(dim));
+    auto resTy = dyn_cast<RankedTensorType>(op.getType());
+    if (rhs.getConstancy(dim) > 1) {
+      // lhs: d_lhs * k = gcd(d_lhs, d_rhs) * k' * k = gcd(d_lhs, d_rhs) * k''
+      // rhs: d_rhs * p = gcd(d_lhs, d_rhs) * p' * p = gcd(d_lhs, d_rhs) * p''
+      // lhs = gcd(d_lhs, d_rhs) * k'' = gcd(d_lhs, d_rhs) * d + r
+      // r must be divisible by gcd(d_lhs, d_rhs)
+      return gcd(lhs.getDivisibility(dim), rhs.getDivisibility(dim));
+    }
+    // Otherwise we shouldn't assume any divisibility.
+    // For example:
+    // lhs: [2, 2, 4, 4], rhs: [0, 1, 2, 3]
+    // lhs % rhs = [0, 0, 0, 1]
+    return 1;
   };
 
   int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
diff --git a/test/Analysis/test-alignment.mlir b/test/Analysis/test-alignment.mlir
@@ -190,12 +190,28 @@ tt.func @rem() {
   %4 = arith.constant dense<64> : tensor<128xi32>
   // expected-remark @below {{contiguity = [64], divisibility = [64], constancy = [1], constant_value = <none>}}
   %5 = arith.remsi %0, %4 : tensor<128xi32>
-  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [1], constant_value = <none>}}
+  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
   %6 = arith.remsi %4, %0 : tensor<128xi32>
   // expected-remark @below {{contiguity = [1], divisibility = [2], constancy = [128], constant_value = 66}}
   %7 = arith.constant dense<66> : tensor<128xi32>
   // expected-remark @below {{contiguity = [2], divisibility = [2], constancy = [1], constant_value = <none>}}
   %8 = arith.remui %0, %7 : tensor<128xi32>
+  // expected-remark @below {{contiguity = [1], divisibility = [64], constancy = [128], constant_value = 192}}
+  %9 = arith.constant dense<192> : tensor<128xi32>
+  // expected-remark @below {{contiguity = [64], divisibility = [64], constancy = [1], constant_value = <none>}}
+  %10 = arith.remsi %0, %9 : tensor<128xi32>
+  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
+  %11 = arith.remsi %9, %0 : tensor<128xi32>
+  // expected-remark @below {{contiguity = [128], divisibility = [32], constancy = [1], constant_value = <none>}}
+  %12 = tt.make_range {end = 160 : i32, start = 32 : i32} : tensor<128xi32>
+  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
+  %13 = arith.remsi %0, %12 : tensor<128xi32>
+  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
+  %14 = arith.remsi %12, %0 : tensor<128xi32>
+  // expected-remark @below {{contiguity = [32], divisibility = [32], constancy = [1], constant_value = <none>}}
+  %15 = arith.remsi %12, %4 : tensor<128xi32>
+  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = <none>}}
+  %16 = arith.remsi %4, %12 : tensor<128xi32>
   tt.return
 }