microsoft
diff --git a/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionPatterns.hpp‎
Lines changed: 1 addition & 47 deletions b/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionPatterns.hpp‎
Lines changed: 1 addition & 47 deletions
diff --git a/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionTools.h‎
Lines changed: 59 additions & 0 deletions b/‎include/triton-shared/Conversion/TritonArithToLinalg/ConversionTools.h‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎include/triton-shared/Conversion/TritonToLinalgExperimental/CollapseShape.h‎
Lines changed: 22 additions & 0 deletions b/‎include/triton-shared/Conversion/TritonToLinalgExperimental/CollapseShape.h‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎include/triton-shared/Conversion/TritonToLinalgExperimental/Passes.h‎
Lines changed: 1 addition & 0 deletions b/‎include/triton-shared/Conversion/TritonToLinalgExperimental/Passes.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/triton-shared/Conversion/TritonToLinalgExperimental/Passes.td‎
Lines changed: 7 additions & 1 deletion b/‎include/triton-shared/Conversion/TritonToLinalgExperimental/Passes.td‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎lib/Conversion/TritonToLinalgExperimental/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎lib/Conversion/TritonToLinalgExperimental/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
@@ -13,6 +13,7 @@
 #include "triton-shared/Analysis/PtrAnalysis.h"
 #include "triton-shared/Dialect/TritonTilingExt/IR/TritonTilingExtDialect.h"
 #include "triton-shared/Utils/Utils.h"
+#include "triton-shared/Conversion/TritonArithToLinalg/ConversionTools.h"
 
 #include "triton/Dialect/Triton/IR/Dialect.h"
 
@@ -109,10 +110,6 @@ static Value getScalarValue(Value operand, Location loc,
   return nullptr;
 }
 
-static SmallVector<utils::IteratorType> getNParallelLoopsAttrs(unsigned n) {
-  return SmallVector<utils::IteratorType>(n, utils::IteratorType::parallel);
-}
-
 // if order is empty, transpose the last two dimensions
 // otherwise, use the provided order.
 // The order must be a permutation of the source rank.
@@ -656,49 +653,6 @@ struct BroadcastConverter : public OpConversionPattern<triton::BroadcastOp> {
 private:
   using OpConversionPattern<triton::BroadcastOp>::OpConversionPattern;
 
-  SmallVector<int64_t> getBroadcastDims(RankedTensorType src,
-                                        RankedTensorType dst) const {
-    SmallVector<int64_t> broadcastDims;
-    auto srcShape = src.getShape();
-    auto dstShape = dst.getShape();
-
-    for (size_t i = 0; i < srcShape.size(); i++) {
-      if (dstShape[i] != srcShape[i]) {
-        assert(srcShape[i] == 1);
-        broadcastDims.push_back(i);
-      }
-    }
-    assert(!broadcastDims.empty() && "cannot identify broadcast dimension");
-    return broadcastDims;
-  }
-
-  // Broadcasts input tensor based on TosaToLinalg's broadcastToShape
-  AffineMap getBroadcastAffineMap(MLIRContext *context,
-                                  ArrayRef<int64_t> inputShape,
-                                  ArrayRef<int64_t> broadcastToShape) const {
-
-    assert(broadcastToShape.size() >= inputShape.size());
-
-    // Create affine map and shapes for tensor initialization.
-    SmallVector<AffineExpr> outExpr;
-
-    size_t diff = broadcastToShape.size() - inputShape.size();
-    for (size_t i = 0; i < broadcastToShape.size(); i++) {
-      if (i < diff) {
-        continue;
-      }
-      size_t j = i - diff;
-      if (inputShape[j] == 1) {
-        // Broadcast singleton dimension
-        outExpr.push_back(mlir::getAffineConstantExpr(0, context));
-        continue;
-      }
-      // Non-broadcast case
-      outExpr.push_back(mlir::getAffineDimExpr(i, context));
-    }
-    return AffineMap::get(broadcastToShape.size(), 0, outExpr, context);
-  }
-
 public:
   LogicalResult
   matchAndRewrite(triton::BroadcastOp op, OpAdaptor adaptor,
 
@@ -0,0 +1,59 @@
+#ifndef TRITON_CONVERSION_TRITONARITHTOLINALG_CONVERSIONTOOLS_H
+#define TRITON_CONVERSION_TRITONARITHTOLINALG_CONVERSIONTOOLS_H
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+
+namespace mlir {
+namespace triton {
+
+static inline SmallVector<utils::IteratorType> getNParallelLoopsAttrs(unsigned n) {
+  return SmallVector<utils::IteratorType>(n, utils::IteratorType::parallel);
+}
+
+static inline SmallVector<int64_t> getBroadcastDims(RankedTensorType src,
+                                                    RankedTensorType dst) {
+  SmallVector<int64_t> broadcastDims;
+  auto srcShape = src.getShape();
+  auto dstShape = dst.getShape();
+
+  for (size_t i = 0; i < srcShape.size(); i++) {
+    if (dstShape[i] != srcShape[i]) {
+      assert(srcShape[i] == 1);
+      broadcastDims.push_back(i);
+    }
+  }
+  assert(!broadcastDims.empty() && "cannot identify broadcast dimension");
+  return broadcastDims;
+}
+
+// Broadcasts input tensor based on TosaToLinalg's broadcastToShape
+static inline AffineMap
+getBroadcastAffineMap(MLIRContext *context, ArrayRef<int64_t> inputShape,
+                      ArrayRef<int64_t> broadcastToShape) {
+
+  assert(broadcastToShape.size() >= inputShape.size());
+
+  // Create affine map and shapes for tensor initialization.
+  SmallVector<AffineExpr> outExpr;
+
+  size_t diff = broadcastToShape.size() - inputShape.size();
+  for (size_t i = 0; i < broadcastToShape.size(); i++) {
+    if (i < diff) {
+      continue;
+    }
+    size_t j = i - diff;
+    if (inputShape[j] == 1) {
+      // Broadcast singleton dimension
+      outExpr.push_back(mlir::getAffineConstantExpr(0, context));
+      continue;
+    }
+    // Non-broadcast case
+    outExpr.push_back(mlir::getAffineDimExpr(i, context));
+  }
+  return AffineMap::get(broadcastToShape.size(), 0, outExpr, context);
+}
+
+} // namespace triton
+} // namespace mlir
+
+#endif // TRITON_CONVERSION_TRITONARITHTOLINALG_CONVERSIONTOOLS_H
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRITON_CONVERSION_TRITONTOLINALG_CollapseShape_H
+#define TRITON_CONVERSION_TRITONTOLINALG_CollapseShape_H
+
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace triton {
+
+std::unique_ptr<OperationPass<ModuleOp>> createCollapseShapePass();
+
+} // namespace triton
+} // namespace mlir
+
+#endif // TRITON_CONVERSION_TRITONTOLINALG_CollapseShape_H
@@ -11,6 +11,7 @@
 #include "triton-shared/Conversion/TritonToLinalgExperimental/TritonToLinalgExperimental.h"
 #include "triton-shared/Conversion/TritonToLinalgExperimental/ReconcilePtrCasts.h"
 #include "triton-shared/Conversion/TritonToLinalgExperimental/TritonToPtr.h"
+#include "triton-shared/Conversion/TritonToLinalgExperimental/CollapseShape.h"
 
 namespace mlir {
 namespace triton {
 
@@ -15,7 +15,9 @@ def TritonToLinalgExperimental : Pass<"triton-to-linalg-experimental", "mlir::Mo
   let constructor = "triton::createTritonToLinalgExperimentalPass()";
   let options = [
       Option<"enableMakeGatherScatterTensorPtr", "enable-make-gather-scatter", "bool", /*default*/"true",
-             "Enable make_gather_scatter_tptr support">
+             "Enable make_gather_scatter_tptr support">,
+      Option<"enableCollapseShape", "enable-collapse-shape", "bool", /*default*/"false",
+             "Enable collapse shape pass">,
   ];
 }
 
@@ -29,4 +31,8 @@ def TritonToPtr : Pass<"triton-to-ptr", "mlir::ModuleOp"> {
   let constructor = "triton::createTritonToPtrPass()";
 }
 
+def CollapseShape : Pass</*cli-arg*/"collapse-shape", /*Op*/"mlir::ModuleOp"> {
+  let summary = "Compress tensor dimensions to improve linalg{broadcast,transpose,fill,reduce} efficiency";
+  let constructor = "triton::createCollapseShapePass()";
+}
 #endif
@@ -8,6 +8,7 @@ add_triton_library(TritonToLinalgExperimental
   TritonToLinalgExperimentalPass.cpp
   ReconcilePtrCastsPass.cpp
   TritonToPtrPass.cpp
+  CollapseShape.cpp
 
   DEPENDS
   TritonToLinalgExperimentalConversionPassIncGen