Apply rocMLIR patches for external

umangyadav · umangyadav · commit 1ae19b97bca4 · 2025-06-24T16:14:43.000Z
diff --git a/external/llvm-project/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/external/llvm-project/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
@@ -66,6 +66,11 @@ LogicalResult bufferizeBlockSignature(Block *block, RewriterBase &rewriter,
                                       const BufferizationOptions &options,
                                       BufferizationState &state);
 
+/// Return `BufferizationOptions` such that the `bufferizeOp` behaves like the
+/// old (deprecated) partial, dialect conversion-based bufferization passes. A
+/// copy will be inserted before every buffer write.
+BufferizationOptions getPartialBufferizationOptions();
+
 } // namespace bufferization
 } // namespace mlir
 
diff --git a/external/llvm-project/mlir/include/mlir/Interfaces/CallInterfaces.td b/external/llvm-project/mlir/include/mlir/Interfaces/CallInterfaces.td
@@ -107,7 +107,8 @@ def CallOpInterface : OpInterface<"CallOpInterface"> {
         reference to a symbol, via SymbolRefAttr, or a reference to a defined
         SSA value. If the reference is an SSA value, the SSA value corresponds
         to a region of a lambda-like operation.
-      }], "::mlir::CallInterfaceCallable", "getCallableForCallee"
+      }],
+      "::mlir::CallInterfaceCallable", "getCallableForCallee"
     >,
     InterfaceMethod<[{
         Sets the callee of this call-like operation. A `callee` is either a
diff --git a/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -27,6 +27,7 @@
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
 #include "mlir/Conversion/MathToROCDL/MathToROCDL.h"
+#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -37,6 +38,7 @@
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
+#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -253,7 +255,8 @@ struct LowerGpuOpsToROCDLOpsPass final
   LowerGpuOpsToROCDLOpsPass() = default;
   LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth,
                             bool useBarePtrCallConv,
-                            gpu::amd::Runtime runtime) {
+                            gpu::amd::Runtime runtime,
+                            std::optional<llvm::SmallDenseSet<StringRef>> allowedDialects) {
     if (this->chipset.getNumOccurrences() == 0)
       this->chipset = chipset;
     if (this->indexBitwidth.getNumOccurrences() == 0)
@@ -262,6 +265,11 @@ struct LowerGpuOpsToROCDLOpsPass final
       this->useBarePtrCallConv = useBarePtrCallConv;
     if (this->runtime.getNumOccurrences() == 0)
       this->runtime = runtime;
+    if(this->allowedDialects.getNumOccurrences() == 0 && allowedDialects.has_value()) {
+      for (auto &str : allowedDialects.value()) {
+        this->allowedDialects.push_back(str.str());
+      }
+    }
   }
 
   void getDependentDialects(DialectRegistry &registry) const override {
@@ -369,6 +377,11 @@ struct LowerGpuOpsToROCDLOpsPass final
 
     populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
                                             *maybeChipset);
+    // TODO (rocmlir): remove hardcoded passes
+    // related PR: https://github.com/llvm/llvm-project/pull/124439
+    mlir::vector::populateVectorInsertExtractStridedSliceTransforms(
+        llvmPatterns);
+    // TODO: ends here
     populateGpuToROCDLConversionPatterns(converter, llvmPatterns, runtime,
                                          *maybeChipset);
     configureGpuToROCDLConversionLegality(target);
@@ -405,6 +418,9 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
   target.addLegalDialect<::mlir::LLVM::LLVMDialect>();
   target.addLegalDialect<ROCDL::ROCDLDialect>();
   target.addIllegalDialect<gpu::GPUDialect>();
+  // TODO (rocmlir): remove vector::VectorDialect
+  // related PR: https://github.com/llvm/llvm-project/pull/124439
+  target.addIllegalDialect<gpu::GPUDialect, vector::VectorDialect>();
   target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
                       LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
                       LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
@@ -466,7 +482,8 @@ std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
 mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset,
                                       unsigned indexBitwidth,
                                       bool useBarePtrCallConv,
-                                      gpu::amd::Runtime runtime) {
+                                      gpu::amd::Runtime runtime,
+                                      const std::optional<llvm::SmallDenseSet<StringRef>>& allowedDialects) {
   return std::make_unique<LowerGpuOpsToROCDLOpsPass>(
-      chipset, indexBitwidth, useBarePtrCallConv, runtime);
+      chipset, indexBitwidth, useBarePtrCallConv, runtime, allowedDialects);
 }
diff --git a/external/llvm-project/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/external/llvm-project/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -482,3 +482,16 @@ bufferization::bufferizeBlockSignature(Block *block, RewriterBase &rewriter,
 
   return success();
 }
+
+BufferizationOptions bufferization::getPartialBufferizationOptions() {
+  BufferizationOptions options;
+  options.allowUnknownOps = true;
+  options.copyBeforeWrite = true;
+  options.unknownTypeConverterFn = [](Value value, Attribute memorySpace,
+                                      const BufferizationOptions &options) {
+    return getMemRefTypeWithStaticIdentityLayout(
+        cast<TensorType>(value.getType()), memorySpace);
+  };
+  options.opFilter.allowDialect<BufferizationDialect>();
+  return options;
+}
diff --git a/external/llvm-project/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/external/llvm-project/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -501,100 +501,103 @@ static LogicalResult verifyConvOpErrorIf(T op) {
     // Skip following checks if output is not ranked
     return success();
 
-  const RankedTensorType inputType =
-      llvm::dyn_cast<RankedTensorType>(op.getInput().getType());
-  const RankedTensorType weightType =
-      llvm::dyn_cast<RankedTensorType>(op.getWeight().getType());
-
-  if (inputType && weightType) {
-    const auto verifyOutputSize =
-        [&op](const int64_t inputSize, const int64_t kernelSize,
-              const int64_t outputSize, const int64_t padBefore,
-              const int64_t padAfter, const int64_t stride,
-              const int64_t dilation, const llvm::StringRef dimName,
-              const llvm::StringRef dimAxis,
-              const llvm::StringRef padBeforeName,
-              const llvm::StringRef padAfterName) -> LogicalResult {
-      if (inputSize == ShapedType::kDynamic ||
-          kernelSize == ShapedType::kDynamic)
-        return success();
-
-      // ERROR_IF: O != idiv_check(I - 1 + pa + pb - (K - 1) * d, s) + 1
-
-      const std::optional<int64_t> calculatedOutSizeMinusOne = idivCheck(
-          inputSize - 1 + padBefore + padAfter - (kernelSize - 1) * dilation,
-          stride);
-      if (!calculatedOutSizeMinusOne.has_value())
-        return op.emitOpError("expected input_")
-               << dimName << " - 1 + pad_" << padBeforeName << " + pad_"
-               << padAfterName << " - (kernel_" << dimName
-               << " - 1) * dilation_" << dimAxis
-               << " to be wholly divisible by stride_" << dimAxis << ", got ("
-               << inputSize << " - 1 + " << padBefore << " + " << padAfter
-               << " - (" << kernelSize << " - 1) * " << dilation << ") / "
-               << stride;
-
-      const int64_t calculatedOutSize = calculatedOutSizeMinusOne.value() + 1;
-      if (outputSize != ShapedType::kDynamic && calculatedOutSize != outputSize)
-        return op.emitOpError("calculated output ")
-               << dimName << " did not match expected: "
-               << "calculated=" << calculatedOutSize
-               << ", expected=" << outputSize;
-
-      return success();
-    };
-
-    // input = [_,IH,IW,_], weight = [_,KH,KW,_], output = [_,OH,OW,_]
-    if constexpr (std::is_same<T, tosa::Conv2DOp>::value) {
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(1), weightType.getDimSize(1),
-              outputType.getDimSize(1), padding[0], padding[1], strides[0],
-              dilations[0], "height", "y", "top", "bottom")))
-        return failure();
-
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(2), weightType.getDimSize(2),
-              outputType.getDimSize(2), padding[2], padding[3], strides[1],
-              dilations[1], "width", "x", "left", "right")))
-        return failure();
-    }
-
-    // input = [_,IH,IW,_], weight = [KH,KW,_,_], output = [_,OH,OW,_]
-    if constexpr (std::is_same<T, tosa::DepthwiseConv2DOp>::value) {
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(1), weightType.getDimSize(0),
-              outputType.getDimSize(1), padding[0], padding[1], strides[0],
-              dilations[0], "height", "y", "top", "bottom")))
-        return failure();
-
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(2), weightType.getDimSize(1),
-              outputType.getDimSize(2), padding[2], padding[3], strides[1],
-              dilations[1], "width", "x", "left", "right")))
-        return failure();
-    }
-
-    // input = [_,ID,IH,IW,_], weight = [_,KD,KH,KW,_], output = [_,OD,OH,OW,_]
-    if constexpr (std::is_same<T, tosa::Conv3DOp>::value) {
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(1), weightType.getDimSize(1),
-              outputType.getDimSize(1), padding[0], padding[1], strides[0],
-              dilations[0], "depth", "d", "front", "back")))
-        return failure();
-
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(2), weightType.getDimSize(2),
-              outputType.getDimSize(2), padding[2], padding[3], strides[1],
-              dilations[1], "height", "y", "top", "bottom")))
-        return failure();
-
-      if (failed(verifyOutputSize(
-              inputType.getDimSize(3), weightType.getDimSize(3),
-              outputType.getDimSize(3), padding[4], padding[5], strides[2],
-              dilations[2], "width", "x", "left", "right")))
-        return failure();
-    }
-  }
+  // TODO: fix and create upstream PR
+  // const RankedTensorType inputType =
+  //     llvm::dyn_cast<RankedTensorType>(op.getInput().getType());
+  // const RankedTensorType weightType =
+  //     llvm::dyn_cast<RankedTensorType>(op.getWeight().getType());
+
+  // if (inputType && weightType) {
+  //   const auto verifyOutputSize =
+  //       [&op](const int64_t inputSize, const int64_t kernelSize,
+  //             const int64_t outputSize, const int64_t padBefore,
+  //             const int64_t padAfter, const int64_t stride,
+  //             const int64_t dilation, const llvm::StringRef dimName,
+  //             const llvm::StringRef dimAxis,
+  //             const llvm::StringRef padBeforeName,
+  //             const llvm::StringRef padAfterName) -> LogicalResult {
+  //     if (inputSize == ShapedType::kDynamic ||
+  //         kernelSize == ShapedType::kDynamic)
+  //       return success();
+
+  //     // ERROR_IF: O != idiv_check(I - 1 + pa + pb - (K - 1) * d, s) + 1
+
+  //     const std::optional<int64_t> calculatedOutSizeMinusOne = idivCheck(
+  //         inputSize - 1 + padBefore + padAfter - (kernelSize - 1) * dilation,
+  //         stride);
+  //     if (!calculatedOutSizeMinusOne.has_value())
+  //       return op.emitOpError("expected input_")
+  //              << dimName << " - 1 + pad_" << padBeforeName << " + pad_"
+  //              << padAfterName << " - (kernel_" << dimName
+  //              << " - 1) * dilation_" << dimAxis
+  //              << " to be wholly divisible by stride_" << dimAxis << ", got
+  //              ("
+  //              << inputSize << " - 1 + " << padBefore << " + " << padAfter
+  //              << " - (" << kernelSize << " - 1) * " << dilation << ") / "
+  //              << stride;
+
+  //     const int64_t calculatedOutSize = calculatedOutSizeMinusOne.value() +
+  //     1; if (outputSize != ShapedType::kDynamic && calculatedOutSize !=
+  //     outputSize)
+  //       return op.emitOpError("calculated output ")
+  //              << dimName << " did not match expected: "
+  //              << "calculated=" << calculatedOutSize
+  //              << ", expected=" << outputSize;
+
+  //     return success();
+  //   };
+
+  //   // input = [_,IH,IW,_], weight = [_,KH,KW,_], output = [_,OH,OW,_]
+  //   if constexpr (std::is_same<T, tosa::Conv2DOp>::value) {
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(1), weightType.getDimSize(1),
+  //             outputType.getDimSize(1), padding[0], padding[1], strides[0],
+  //             dilations[0], "height", "y", "top", "bottom")))
+  //       return failure();
+
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(2), weightType.getDimSize(2),
+  //             outputType.getDimSize(2), padding[2], padding[3], strides[1],
+  //             dilations[1], "width", "x", "left", "right")))
+  //       return failure();
+  //   }
+
+  //   // input = [_,IH,IW,_], weight = [KH,KW,_,_], output = [_,OH,OW,_]
+  //   if constexpr (std::is_same<T, tosa::DepthwiseConv2DOp>::value) {
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(1), weightType.getDimSize(0),
+  //             outputType.getDimSize(1), padding[0], padding[1], strides[0],
+  //             dilations[0], "height", "y", "top", "bottom")))
+  //       return failure();
+
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(2), weightType.getDimSize(1),
+  //             outputType.getDimSize(2), padding[2], padding[3], strides[1],
+  //             dilations[1], "width", "x", "left", "right")))
+  //       return failure();
+  //   }
+
+  //   // input = [_,ID,IH,IW,_], weight = [_,KD,KH,KW,_], output =
+  //   [_,OD,OH,OW,_] if constexpr (std::is_same<T, tosa::Conv3DOp>::value) {
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(1), weightType.getDimSize(1),
+  //             outputType.getDimSize(1), padding[0], padding[1], strides[0],
+  //             dilations[0], "depth", "d", "front", "back")))
+  //       return failure();
+
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(2), weightType.getDimSize(2),
+  //             outputType.getDimSize(2), padding[2], padding[3], strides[1],
+  //             dilations[1], "height", "y", "top", "bottom")))
+  //       return failure();
+
+  //     if (failed(verifyOutputSize(
+  //             inputType.getDimSize(3), weightType.getDimSize(3),
+  //             outputType.getDimSize(3), padding[4], padding[5], strides[2],
+  //             dilations[2], "width", "x", "left", "right")))
+  //       return failure();
+  //   }
+  // }
 
   const RankedTensorType biasType =
       llvm::dyn_cast<RankedTensorType>(op.getBias().getType());
@@ -1007,6 +1010,20 @@ static void buildConvOpWithQuantInfo(OpBuilder &builder, OperationState &result,
   result.addTypes(finalOutputType);
 }
 
+// Handles grouped convolution
+static void buildConvOpWithQuantInfo(OpBuilder &builder, OperationState &result,
+                                     Type outputType, Value input, Value weight,
+                                     Value bias, DenseI64ArrayAttr pad,
+                                     DenseI64ArrayAttr stride,
+                                     DenseI64ArrayAttr dilation,
+                                     mlir::IntegerAttr group) {
+  TypeAttr accType;
+  buildConvOpWithQuantInfo(builder, result, outputType, input, weight, bias,
+                           pad, stride, dilation, accType);
+  if (group)
+    result.addAttribute("group", group);
+}
+
 /// Handles tosa.transpose_conv2d which has outpad and output shape
 /// attributes.
 static void