[BACKEND] Improve constant analysis in AxisInfo (#8502)

lezcano · web-flow · commit ecd33fe33e02 · 2025-10-23T08:36:13.000-07:00
We fix a number of cases where the constancy analysis could be improved.

The code is quite messy, and the whole pass could do with a full
rewrite, but we are not doing so ATM.

This PR was mostly vibecoded, with a cleaning pass afterwards from me.
diff --git a/lib/Analysis/AxisInfo.cpp b/lib/Analysis/AxisInfo.cpp
@@ -91,23 +91,26 @@ class BinaryOpVisitorImpl : public AxisInfoVisitorImpl<OpTy> {
     auto lhsInfo = operands[0]->getValue();
     auto rhsInfo = operands[1]->getValue();
     auto rank = lhsInfo.getRank();
+    assert(isa<RankedTensorType>(op.getType()) ||
+           rank == 1 && "Expected ranked tensor or scalar");
     assert(operands.size() == 2 && "Expected two operands");
+    auto constantValue = getConstantValue(op, lhsInfo, rhsInfo);
+    if (constantValue.has_value()) {
+      auto resTy = dyn_cast<RankedTensorType>(op.getType());
+      AxisInfo::DimVectorT constancy =
+          resTy ? to_vector(resTy.getShape()) : AxisInfo::DimVectorT(rank, 1);
+      AxisInfo::DimVectorT contiguity(rank, 1);
+      AxisInfo::DimVectorT divisibility(
+          rank, highestPowOf2Divisor<int64_t>(constantValue.value()));
+      return AxisInfo(contiguity, divisibility, constancy, constantValue);
+    }
     AxisInfo::DimVectorT contiguity;
     AxisInfo::DimVectorT divisibility;
     AxisInfo::DimVectorT constancy;
-    auto constantValue = getConstantValue(op, lhsInfo, rhsInfo);
     for (auto d = 0; d < rank; ++d) {
-      if (constantValue.has_value()) {
-        contiguity.push_back(1);
-        constancy.push_back(
-            std::max(lhsInfo.getConstancy(d), rhsInfo.getConstancy(d)));
-        divisibility.push_back(
-            highestPowOf2Divisor<int64_t>(constantValue.value()));
-      } else {
-        contiguity.push_back(getContiguity(op, lhsInfo, rhsInfo, d));
-        constancy.push_back(getConstancy(op, lhsInfo, rhsInfo, d));
-        divisibility.push_back(getDivisibility(op, lhsInfo, rhsInfo, d));
-      }
+      contiguity.push_back(getContiguity(op, lhsInfo, rhsInfo, d));
+      constancy.push_back(getConstancy(op, lhsInfo, rhsInfo, d));
+      divisibility.push_back(getDivisibility(op, lhsInfo, rhsInfo, d));
     }
     return AxisInfo(contiguity, divisibility, constancy, constantValue);
   }
@@ -125,9 +128,8 @@ class BinaryOpVisitorImpl : public AxisInfoVisitorImpl<OpTy> {
 
   virtual int64_t getConstancy(OpTy op, const AxisInfo &lhs,
                                const AxisInfo &rhs, int dim) {
-    return 1;
+    return gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
   }
-
   virtual std::optional<int64_t> getConstantValue(OpTy op, const AxisInfo &lhs,
                                                   const AxisInfo &rhs) {
     return {};
@@ -328,11 +330,6 @@ class AddSubOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
     return gcd(lhs.getDivisibility(dim), rhsDivisibility);
   }
 
-  int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
-                       int dim) override {
-    return gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
-  }
-
   std::optional<int64_t> getConstantValue(OpTy op, const AxisInfo &lhs,
                                           const AxisInfo &rhs) override {
     if (lhs.getConstantValue().has_value() &&
@@ -375,11 +372,6 @@ class MulIOpAxisInfoVisitor final : public BinaryOpVisitorImpl<arith::MulIOp> {
     return std::max(lhsContiguity, rhsContiguity);
   }
 
-  int64_t getConstancy(arith::MulIOp op, const AxisInfo &lhs,
-                       const AxisInfo &rhs, int dim) override {
-    return gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
-  }
-
   int64_t getDivisibility(arith::MulIOp op, const AxisInfo &lhs,
                           const AxisInfo &rhs, int dim) override {
     auto lhsDivisibility = lhs.getDivisibility(dim);
@@ -399,9 +391,13 @@ class MulIOpAxisInfoVisitor final : public BinaryOpVisitorImpl<arith::MulIOp> {
 
   std::optional<int64_t> getConstantValue(arith::MulIOp op, const AxisInfo &lhs,
                                           const AxisInfo &rhs) override {
-    if (lhs.getConstantValue().has_value() &&
-        rhs.getConstantValue().has_value())
-      return {lhs.getConstantValue().value() * rhs.getConstantValue().value()};
+    auto lhsConst = lhs.getConstantValue();
+    auto rhsConst = rhs.getConstantValue();
+    if (lhsConst.has_value() && rhsConst.has_value())
+      return {lhsConst.value() * rhsConst.value()};
+    if ((lhsConst.has_value() && lhsConst.value() == 0) ||
+        (rhsConst.has_value() && rhsConst.value() == 0))
+      return 0;
     return {};
   }
 };
@@ -424,12 +420,11 @@ class DivOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
   int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
                        int dim) override {
     auto resTy = dyn_cast<RankedTensorType>(op.getType());
+    auto constancy = BinaryOpVisitorImpl<OpTy>::getConstancy(op, lhs, rhs, dim);
     if (!resTy)
-      return BinaryOpVisitorImpl<OpTy>::getConstancy(op, lhs, rhs, dim);
+      return constancy;
     auto shape = resTy.getShape();
-    // Case 1: both lhs and rhs are constants.
-    auto constancy = gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
-    // Case 2: lhs contiguous, rhs constant.
+    // Case: lhs contiguous, rhs constant.
     // lhs: d_lhs * k, d_lhs * k + 1, ..., d_lhs * k + n
     // rhs: d_rhs * p, d_rhs * p, ..., d_rhs * p
     // lhs / rhs = d_lhs * k / (d_rhs * p), (d_lhs * k + 1) / (d_rhs * p),
@@ -526,15 +521,15 @@ class RemOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
 
   int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
                        int dim) override {
+    auto constancy = BinaryOpVisitorImpl<OpTy>::getConstancy(op, lhs, rhs, dim);
     auto resTy = dyn_cast<RankedTensorType>(op.getType());
     if (!resTy)
-      return BinaryOpVisitorImpl<OpTy>::getConstancy(op, lhs, rhs, dim);
-    auto shape = resTy.getShape();
-    // lhs % 1 = 0
-    return rhs.getConstantValue().has_value() &&
-                   rhs.getConstantValue().value() == 1
-               ? shape[dim]
-               : gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
+      return constancy;
+    // Case: lhs % 1 = 0
+    if (rhs.getConstantValue().has_value() &&
+        rhs.getConstantValue().value() == 1)
+      return resTy.getDimSize(dim);
+    return constancy;
   }
 
   std::optional<int64_t> getConstantValue(OpTy op, const AxisInfo &lhs,
@@ -689,7 +684,7 @@ class CmpOpAxisInfoVisitor final : public AxisInfoVisitorImpl<OpTy> {
       int64_t constHint = 1;
       if (lhsInfo.getConstantValue().has_value() &&
           rhsInfo.getConstantValue().has_value()) {
-        constHint = lhsInfo.getConstancy(d);
+        constHint = shape[d];
         constantValue =
             compare(getPredicate(op), lhsInfo.getConstantValue().value(),
                     rhsInfo.getConstantValue().value())
@@ -848,6 +843,13 @@ class SelectOpAxisInfoVisitor final : public AxisInfoVisitorImpl<OpTy> {
           rhsInfo.getConstantValue().has_value() &&
           lhsInfo.getConstantValue() == rhsInfo.getConstantValue())
         constantValue = lhsInfo.getConstantValue();
+
+      if (constantValue.has_value()) {
+        auto resTy = dyn_cast<RankedTensorType>(op.getType());
+        assert(resTy || rank == 1);
+        constancy =
+            resTy ? to_vector(resTy.getShape()) : AxisInfo::DimVectorT(rank, 1);
+      }
     }
 
     return AxisInfo(contiguity, divisibility, constancy, constantValue);
@@ -860,11 +862,6 @@ class LogicalOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
   using BinaryOpVisitorImpl<OpTy>::BinaryOpVisitorImpl;
 
 private:
-  int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
-                       int dim) override {
-    return gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
-  }
-
   std::optional<int64_t> getConstantValue(OpTy op, const AxisInfo &lhs,
                                           const AxisInfo &rhs) override {
     if (lhs.getConstantValue().has_value() &&
@@ -910,11 +907,6 @@ class ShLIOpAxisInfoVisitor final : public BinaryOpVisitorImpl<arith::ShLIOp> {
     return multiplyDivisor(lhsDivisibility, 1ll << shift);
   }
 
-  int64_t getConstancy(arith::ShLIOp op, const AxisInfo &lhs,
-                       const AxisInfo &rhs, int dim) override {
-    return gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
-  }
-
   std::optional<int64_t> getConstantValue(arith::ShLIOp op, const AxisInfo &lhs,
                                           const AxisInfo &rhs) override {
     if (lhs.getConstantValue().has_value() &&
@@ -952,11 +944,6 @@ class ShROpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
     return std::max<int64_t>(1, lhsDivisibility / (int64_t(1) << shift));
   }
 
-  int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
-                       int dim) override {
-    return gcd(lhs.getConstancy(dim), rhs.getConstancy(dim));
-  }
-
   std::optional<int64_t> getConstantValue(OpTy op, const AxisInfo &lhs,
                                           const AxisInfo &rhs) override {
     if (lhs.getConstantValue().has_value() &&
@@ -989,9 +976,15 @@ class MaxMinOpAxisInfoVisitor final : public AxisInfoVisitorImpl<OpTy> {
         constantValue = {std::min(lhsInfo.getConstantValue().value(),
                                   rhsInfo.getConstantValue().value())};
       }
+      auto resTy = dyn_cast<RankedTensorType>(op.getType());
+      assert(resTy || rank == 1);
+      AxisInfo::DimVectorT constancy =
+          resTy ? to_vector(resTy.getShape()) : AxisInfo::DimVectorT(rank, 1);
+      AxisInfo::DimVectorT divisibility(
+          rank, highestPowOf2Divisor<int64_t>(constantValue.value()));
       return AxisInfo(/*knownContiguity=*/AxisInfo::DimVectorT(rank, 1),
-                      /*knownDivisibility=*/AxisInfo::DimVectorT(rank, 1),
-                      /*knownConstancy=*/AxisInfo::DimVectorT(rank, 1),
+                      /*knownDivisibility=*/divisibility,
+                      /*knownConstancy=*/constancy,
                       /*constantValue=*/constantValue);
     } else {
       AxisInfo::DimVectorT contiguity, divisibility, constancy;
diff --git a/test/Analysis/test-alignment.mlir b/test/Analysis/test-alignment.mlir
@@ -458,7 +458,7 @@ tt.func @max_min() {
   %4 = arith.constant dense<8> : tensor<128xi32>
   // expected-remark @below {{contiguity = [1], divisibility = [4], constancy = [128], constant_value = 4}}
   %5 = arith.constant dense<4> : tensor<128xi32>
-  // expected-remark @below {{contiguity = [1], divisibility = [1], constancy = [1], constant_value = 8}}
+  // expected-remark @below {{contiguity = [1], divisibility = [8], constancy = [128], constant_value = 8}}
   %6 = arith.maxsi %4, %5 : tensor<128xi32>
   tt.return
 }
@@ -1011,3 +1011,49 @@ tt.func @caller() {
   tt.call @callee(%1) : (tensor<128x1xi32>) -> ()
   tt.return
 }
+
+// -----
+
+tt.func @mul_zero_constancy() {
+  %range = tt.make_range {end = 128 : i32, start = 0 : i32} : tensor<128xi32>
+  %zeros = arith.constant dense<0> : tensor<128xi32>
+  // expected-remark @below {{constancy = [128]}}
+  %product = arith.muli %zeros, %range : tensor<128xi32>
+  tt.return
+}
+
+// -----
+
+tt.func @max_constancy() {
+  %c5 = arith.constant dense<5> : tensor<4xi32>
+  %c7 = arith.constant dense<7> : tensor<4xi32>
+  // expected-remark @below {{constancy = [4], constant_value = 7}}
+  %max = arith.maxsi %c5, %c7 : tensor<4xi32>
+  tt.return
+}
+
+// -----
+
+tt.func @select_same_value_constancy() {
+  %range = tt.make_range {end = 4 : i32, start = 0 : i32} : tensor<4xi32>
+  %two = arith.constant dense<2> : tensor<4xi32>
+  %mod = arith.remsi %range, %two : tensor<4xi32>
+  %zero = arith.constant dense<0> : tensor<4xi32>
+  %cond = arith.cmpi ne, %mod, %zero : tensor<4xi32>
+  %lhs = arith.constant dense<42> : tensor<4xi32>
+  %rhs = arith.constant dense<42> : tensor<4xi32>
+  // expected-remark @below {{constancy = [4], constant_value = 42}}
+  %sel = arith.select %cond, %lhs, %rhs : tensor<4xi1>, tensor<4xi32>
+  tt.return
+}
+
+// -----
+
+tt.func @cmp_after_max_constancy() {
+  %c5 = arith.constant dense<5> : tensor<4xi32>
+  %c7 = arith.constant dense<7> : tensor<4xi32>
+  %max = arith.maxsi %c5, %c7 : tensor<4xi32>
+  // expected-remark @below {{constancy = [4], constant_value = 1}}
+  %cmp = arith.cmpi sgt, %max, %c5 : tensor<4xi32>
+  tt.return
+}