Improve axis analysis to handle tt.make_tensor_ptr (#2448)

etiotto · web-flow · commit 7fc0d2b1e32a · 2024-10-15T10:26:13.000-04:00
The upstream axis analysis doesn't handle blocked pointers. This PR
creates an intel version of the analysis and adds support for the
`tt.make_tensor_ptr` and `tt.advance` operations along with an
additional unit test.

---------

Signed-off-by: Tiotto, Ettore &lt;ettore.tiotto@intel.com&gt;
diff --git a/test/Analysis/intel/test-axis-info.mlir b/test/Analysis/intel/test-axis-info.mlir
@@ -876,3 +876,18 @@ module {
     tt.return %int_min : i64
   }
 }
+
+// -----
+
+// CHECK-LABEL: @make_tensor_ptr
+tt.func public @make_tensor_ptr(%arg0: !tt.ptr<f16>, %arg1: !tt.ptr<f8E5M2> {tt.divisibility = 32 : i32}, %arg2: i64 {tt.divisibility = 16 : i32}) {
+  %c0_i32 = arith.constant 0 : i32
+  %c1_i64 = arith.constant 1 : i64
+  %c32_i64 = arith.constant 32 : i64
+  %c128_i64 = arith.constant 128 : i64
+  // CHECK: %0 = tt.make_tensor_ptr %arg0, {{.*}} => contiguity = [128, 32], divisibility = [1, 1], constancy = [1, 1], constant_value = <none>
+  %0 = tt.make_tensor_ptr %arg0, [%c128_i64, %c32_i64], [%c1_i64, %c1_i64], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : !tt.ptr<tensor<128x32xf16>>
+  // CHECK: %1 = tt.make_tensor_ptr %arg1, {{.*}} => contiguity = [32, 1], divisibility = [16, 1], constancy = [1, 1], constant_value = <none>
+  %1 = tt.make_tensor_ptr %arg1, [%c32_i64, %c32_i64], [%c1_i64, %arg2], [%c0_i32, %c0_i32] {order = array<i32: 0, 1>} : <tensor<64x16xf8E5M2>>
+  tt.return
+}
diff --git a/test/lib/Analysis/intel/TestAxisInfo.cpp b/test/lib/Analysis/intel/TestAxisInfo.cpp
@@ -13,7 +13,7 @@ struct TestAxisInfoPass
 
   StringRef getArgument() const final { return "test-print-axis-info"; }
   StringRef getDescription() const final {
-    return "print the result of the alignment analysis pass";
+    return "print the result of the axis analysis pass";
   }
 
   void runOnOperation() override {
diff --git a/third_party/intel/lib/Analysis/AxisInfo.cpp b/third_party/intel/lib/Analysis/AxisInfo.cpp
@@ -50,6 +50,12 @@ int64_t multiplyDivisor(int64_t lhs, int64_t rhs) {
   return lhs * rhs;
 }
 
+RankedTensorType getRankedTensorType(Type ptrTy) {
+  return isTensorPointerType(ptrTy)
+             ? cast<RankedTensorType>(cast<PointerType>(ptrTy).getPointeeType())
+             : dyn_cast<RankedTensorType>(ptrTy);
+}
+
 class AxisInfoVisitor {
 public:
   AxisInfoVisitor() = default;
@@ -409,7 +415,7 @@ class DivOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
 
   int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
                        int dim) override {
-    auto resTy = dyn_cast<RankedTensorType>(op.getType());
+    auto resTy = getRankedTensorType(op.getType());
     if (!resTy)
       return BinaryOpVisitorImpl<OpTy>::getConstancy(op, lhs, rhs, dim);
     auto shape = resTy.getShape();
@@ -464,7 +470,7 @@ class RemOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
 private:
   int64_t getContiguity(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
                         int dim) override {
-    auto resTy = dyn_cast<RankedTensorType>(op.getType());
+    auto resTy = getRankedTensorType(op.getType());
     if (!resTy)
       return BinaryOpVisitorImpl<OpTy>::getContiguity(op, lhs, rhs, dim);
     auto shape = resTy.getShape();
@@ -498,7 +504,7 @@ class RemOpAxisInfoVisitor final : public BinaryOpVisitorImpl<OpTy> {
 
   int64_t getConstancy(OpTy op, const AxisInfo &lhs, const AxisInfo &rhs,
                        int dim) override {
-    auto resTy = dyn_cast<RankedTensorType>(op.getType());
+    auto resTy = getRankedTensorType(op.getType());
     if (!resTy)
       return BinaryOpVisitorImpl<OpTy>::getConstancy(op, lhs, rhs, dim);
     auto shape = resTy.getShape();
@@ -647,7 +653,7 @@ class CmpOpAxisInfoVisitor final : public AxisInfoVisitorImpl<OpTy> {
   AxisInfo
   getAxisInfo(OpTy op,
               ArrayRef<const dataflow::Lattice<AxisInfo> *> operands) override {
-    auto resTy = dyn_cast<RankedTensorType>(op.getType());
+    auto resTy = getRankedTensorType(op.getType());
     if (!resTy)
       return AxisInfo();
     auto shape = resTy.getShape();
@@ -995,6 +1001,55 @@ class MaxMinOpAxisInfoVisitor final : public AxisInfoVisitorImpl<OpTy> {
   }
 };
 
+class MakeTensorPtrOpAxisInfoVisitor final
+    : public AxisInfoVisitorImpl<triton::MakeTensorPtrOp> {
+public:
+  using AxisInfoVisitorImpl<triton::MakeTensorPtrOp>::AxisInfoVisitorImpl;
+
+  AxisInfo
+  getAxisInfo(triton::MakeTensorPtrOp op,
+              ArrayRef<const dataflow::Lattice<AxisInfo> *> operands) override {
+    LDBG("MakeTensorPtrOpAxisInfoVisitor: " << *op);
+    assert(op.getShape().size() == 2 && operands.size() == 7 &&
+           "MakeTensorPtrOp should have 2D shape");
+
+    AxisInfo ptrInfo = operands[0]->getValue();
+    AxisInfo shapeInfo0 = operands[1]->getValue();
+    AxisInfo shapeInfo1 = operands[2]->getValue();
+    AxisInfo strideInfo0 = operands[3]->getValue();
+    AxisInfo strideInfo1 = operands[4]->getValue();
+
+    std::optional<int64_t> shape0 = shapeInfo0.getConstantValue();
+    std::optional<int64_t> shape1 = shapeInfo1.getConstantValue();
+    std::optional<int64_t> stride0 = strideInfo0.getConstantValue();
+    std::optional<int64_t> stride1 = strideInfo1.getConstantValue();
+
+    AxisInfo::DimVectorT contiguity{
+        shape0.has_value() && (stride0 == 1) ? shape0.value() : 1,
+        shape1.has_value() && (stride1 == 1) ? shape1.value() : 1};
+
+    int64_t ptrDivisibility = ptrInfo.getDivisibility()[0];
+    int64_t strideDivisibility0 = strideInfo0.getDivisibility()[0];
+    int64_t strideDivisibility1 = strideInfo1.getDivisibility()[0];
+
+    LDBG("ptrDivisibility: " << ptrDivisibility);
+    LDBG("strideDivisibility0: " << strideDivisibility0);
+    LDBG("strideDivisibility1: " << strideDivisibility1);
+
+    AxisInfo::DimVectorT divisibility{1, 1};
+    if (ptrDivisibility > 1) {
+      if (contiguity[0] > 1)
+        divisibility[0] = std::min(ptrDivisibility, strideDivisibility1);
+      if (contiguity[1] > 1)
+        divisibility[1] = std::min(ptrDivisibility, strideDivisibility0);
+    }
+
+    AxisInfo::DimVectorT constancy{1, 1};
+
+    return AxisInfo(contiguity, divisibility, constancy);
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // AxisInfoAnalysis
 //===----------------------------------------------------------------------===//
@@ -1042,11 +1097,13 @@ AxisInfoAnalysis::AxisInfoAnalysis(DataFlowSolver &solver)
                   MaxMinOpAxisInfoVisitor<arith::MinSIOp>,
                   MaxMinOpAxisInfoVisitor<arith::MinUIOp>>();
   visitors.append<LoadOpAxisInfoVisitor>();
+  visitors.append<MakeTensorPtrOpAxisInfoVisitor>();
 }
 
 LogicalResult AxisInfoAnalysis::visitOperation(
     Operation *op, ArrayRef<const dataflow::Lattice<AxisInfo> *> operands,
     ArrayRef<dataflow::Lattice<AxisInfo> *> results) {
+  LDBG("visitOperation: << " << *op);
   // TODO: For sure not the right way to do this
   // but why is scf.if not initialized otherwise?
   for (auto op : operands)
@@ -1204,7 +1261,7 @@ void AxisInfo::initPessimisticStateFromFunc(int argNumber, T funcOp,
 }
 
 unsigned ModuleAxisInfoAnalysis::getPtrContiguity(Value ptr) {
-  auto tensorTy = dyn_cast<RankedTensorType>(ptr.getType());
+  auto tensorTy = getRankedTensorType(ptr.getType());
   if (!tensorTy)
     return 1;
   auto layout = tensorTy.getEncoding();
@@ -1226,7 +1283,7 @@ unsigned ModuleAxisInfoAnalysis::getPtrContiguity(Value ptr) {
 }
 
 unsigned ModuleAxisInfoAnalysis::getPtrAlignment(Value ptr) {
-  auto tensorTy = dyn_cast<RankedTensorType>(ptr.getType());
+  auto tensorTy = getRankedTensorType(ptr.getType());
   if (!tensorTy)
     return 1;
   auto *axisInfo = getAxisInfo(ptr);
@@ -1254,7 +1311,7 @@ unsigned ModuleAxisInfoAnalysis::getPtrAlignment(Value ptr) {
 }
 
 unsigned ModuleAxisInfoAnalysis::getMaskAlignment(Value mask) {
-  auto tensorTy = dyn_cast<RankedTensorType>(mask.getType());
+  auto tensorTy = getRankedTensorType(mask.getType());
   if (!tensorTy)
     return 1;
   auto *axisInfo = getAxisInfo(mask);

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ struct TestAxisInfoPass`
`13`	`13`
`14`	`14`	`StringRef getArgument() const final { return "test-print-axis-info"; }`
`15`	`15`	`StringRef getDescription() const final {`
`16`		`- return "print the result of the alignment analysis pass";`
	`16`	`+ return "print the result of the axis analysis pass";`
`17`	`17`	`}`
`18`	`18`
`19`	`19`	`void runOnOperation() override {`