-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[mlir]introduce UnrollScopeInterface and apply it to funcOp and gpu.launch Op. #123904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir]introduce UnrollScopeInterface and apply it to funcOp and gpu.launch Op. #123904
Conversation
|
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir-affine Author: lonely eagle (linuxlonelyeagle) ChangesWhen using This PR fixes this issue.Feel free to comment below, thank you. Full diff: https://github.com/llvm/llvm-project/pull/123904.diff 12 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
index 5e10a9f50b774e..3f5566a28546d1 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
@@ -20,6 +20,7 @@
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
namespace mlir {
class PatternRewriter;
diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
index 4da0efcb13ddf5..5c9f8c6a59f8f6 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
@@ -17,6 +17,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/UnrollScopeInterface.td"
def Func_Dialect : Dialect {
let name = "func";
@@ -225,8 +226,8 @@ def ConstantOp : Func_Op<"constant",
//===----------------------------------------------------------------------===//
def FuncOp : Func_Op<"func", [
- AffineScope, AutomaticAllocationScope,
- FunctionOpInterface, IsolatedFromAbove, OpAsmOpInterface
+ AffineScope, AutomaticAllocationScope, FunctionOpInterface,
+ IsolatedFromAbove, OpAsmOpInterface, UnrollScopeInterface
]> {
let summary = "An operation with a name containing a single `SSACFG` region";
let description = [{
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 7b53594a1c8e28..0cf2d0c77383f1 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -29,6 +29,7 @@
#include "mlir/Interfaces/InferIntRangeInterface.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
#include "llvm/ADT/STLExtras.h"
namespace mlir {
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 3adfd5f4f2c436..8279bb9985ea3e 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -30,6 +30,7 @@ include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferIntRangeInterface.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/UnrollScopeInterface.td"
//===----------------------------------------------------------------------===//
// GPU Dialect operations.
@@ -796,7 +797,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
def GPU_LaunchOp : GPU_Op<"launch", [
AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,
DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
- RecursiveMemoryEffects]>,
+ RecursiveMemoryEffects, UnrollScopeInterface]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index d81298bb4daf01..cd6cc084dd2801 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_interface(TilingInterface)
add_mlir_interface(ValueBoundsOpInterface)
add_mlir_interface(VectorInterfaces)
add_mlir_interface(ViewLikeInterface)
+add_mlir_interface(UnrollScopeInterface)
set(LLVM_TARGET_DEFINITIONS MemorySlotInterfaces.td)
mlir_tablegen(MemorySlotOpInterfaces.h.inc -gen-op-interface-decls)
diff --git a/mlir/include/mlir/Interfaces/UnrollScopeInterface.h b/mlir/include/mlir/Interfaces/UnrollScopeInterface.h
new file mode 100644
index 00000000000000..f7d71b6f9be654
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/UnrollScopeInterface.h
@@ -0,0 +1,21 @@
+//===- UnrollScopeInterface.h - unroll region interface -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operation interface for unroll region
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
+#define MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
+
+#include "mlir/IR/OpDefinition.h"
+
+/// Include the generated interface declarations.
+#include "mlir/Interfaces/UnrollScopeInterface.h.inc"
+
+#endif // MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/UnrollScopeInterface.td b/mlir/include/mlir/Interfaces/UnrollScopeInterface.td
new file mode 100644
index 00000000000000..5ad5e5b44cfe14
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/UnrollScopeInterface.td
@@ -0,0 +1,36 @@
+//===- UnrollScopeInterface.td - unroll scope interface ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the interface for unroll region.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_UNROLLSCOPEINTERFACE
+#define MLIR_INTERFACES_UNROLLSCOPEINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def UnrollScopeInterface : OpInterface<"UnrollScopeInterface"> {
+ let description = [{
+ This interface controls the scope of the loop unroll.It ensures
+ that SSA values generated outside the loop when unrolling are
+ in the nearest `UnrollScopeInterface` region.
+ }];
+ let cppNamespace = "::mlir";
+ let methods = [
+ InterfaceMethod<[{
+ return the `UnrollScopeInterface` region.
+ }],
+ "::mlir::Region&", "getUnrollBody", (ins),
+ /*methodBody=*/[{}], /*defaultImplementation=*/[{
+ return $_op->getRegion(0);
+ }]>,
+ ];
+}
+
+#endif // MLIR_INTERFACES_UNROLLSCOPEINTERFACE
diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index ef6e0dbf45d3a9..d2993a424060f8 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -16,4 +16,5 @@ add_mlir_dialect_library(MLIRAffineUtils
MLIRMemRefDialect
MLIRTransformUtils
MLIRViewLikeInterface
+ MLIRUnrollScopeInterface
)
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 4e02559a089493..c697d3b0127a83 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -21,6 +21,7 @@
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/IntegerSet.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -129,10 +130,10 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
auto *parentBlock = forOp->getBlock();
if (!iv.use_empty()) {
if (forOp.hasConstantLowerBound()) {
- auto func = forOp->getParentOfType<FunctionOpInterface>();
+ auto unrollScope = forOp->getParentOfType<UnrollScopeInterface>();
OpBuilder builder(forOp->getContext());
- if (func)
- builder.setInsertionPointToStart(&func.getFunctionBody().front());
+ if (unrollScope)
+ builder.setInsertionPointToStart(&unrollScope.getUnrollBody().front());
else
builder.setInsertionPoint(forOp);
auto constOp = builder.create<arith::ConstantIndexOp>(
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index d3b7bf65ad3e73..bd0b79aaecab0c 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -21,6 +21,7 @@ set(LLVM_OPTIONAL_SOURCES
ValueBoundsOpInterface.cpp
VectorInterfaces.cpp
ViewLikeInterface.cpp
+ UnrollScopeInterface.cpp
)
function(add_mlir_interface_library name)
@@ -46,6 +47,7 @@ add_mlir_interface_library(CopyOpInterface)
add_mlir_interface_library(DataLayoutInterfaces)
add_mlir_interface_library(DerivedAttributeOpInterface)
add_mlir_interface_library(DestinationStyleOpInterface)
+add_mlir_interface_library(UnrollScopeInterface)
add_mlir_library(MLIRFunctionInterfaces
FunctionInterfaces.cpp
diff --git a/mlir/lib/Interfaces/UnrollScopeInterface.cpp b/mlir/lib/Interfaces/UnrollScopeInterface.cpp
new file mode 100644
index 00000000000000..b500f5ad01f2ad
--- /dev/null
+++ b/mlir/lib/Interfaces/UnrollScopeInterface.cpp
@@ -0,0 +1,18 @@
+//===- UnrollScopeInterface.cpp - unroll scope interface in MLIR ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/UnrollScopeInterface.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// UnrollScopeInterface Interface
+//===----------------------------------------------------------------------===//
+
+/// Include the definitions of the unroll scope interface.
+#include "mlir/Interfaces/UnrollScopeInterface.cpp.inc"
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index e398c3fe2011dd..4fdaa1a7405d1a 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -240,6 +240,66 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }
+
+// UNROLL-FULL-LABEL: func @gpu_launch_unroll() {
+
+func.func @gpu_launch_unroll() {
+ %c1 = arith.constant 1 : index
+ gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
+ %cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
+ %cst_0 = arith.constant dense<0.000000e+00> : vector<2x4x2xf16>
+ %cst_1 = arith.constant dense<0.000000e+00> : vector<4x2x2xf16>
+ %0 = affine.for %arg12 = 0 to 2 iter_args(%arg13 = %cst) -> (vector<2x4x2x2xf16>) {
+ %1 = affine.for %arg14 = 0 to 4 iter_args(%arg15 = %arg13) -> (vector<2x4x2x2xf16>) {
+ %2 = vector.extract %cst_0[%arg12] : vector<4x2xf16> from vector<2x4x2xf16>
+ %3 = vector.extract %cst_1[%arg14] : vector<2x2xf16> from vector<4x2x2xf16>
+ %4 = vector.extract %arg15[%arg12, %arg14] : vector<2x2xf16> from vector<2x4x2x2xf16>
+ %cst_2 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+ %5 = vector.insert %cst_2, %arg13 [%arg12, %arg14] : vector<2x2xf16> into vector<2x4x2x2xf16>
+ affine.yield %5 : vector<2x4x2x2xf16>
+ }
+ affine.yield %1 : vector<2x4x2x2xf16>
+ }
+ gpu.terminator
+ }
+ return
+}
+
+// UNROLL-FULL: %[[VAL_0:.*]] = arith.constant 1 : index
+// UNROLL-FULL: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) {
+// UNROLL-FULL: %[[VAL_13:.*]] = arith.constant 0 : index
+// UNROLL-FULL: %[[VAL_14:.*]] = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_15:.*]] = arith.constant dense<0.000000e+00> : vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_16:.*]] = arith.constant dense<0.000000e+00> : vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_17:.*]] = affine.for %[[VAL_18:.*]] = 0 to 2 iter_args(%[[VAL_19:.*]] = %[[VAL_14]]) -> (vector<2x4x2x2xf16>) {
+// UNROLL-FULL: %[[VAL_20:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_21:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_13]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_22:.*]] = vector.extract %[[VAL_19]]{{\[}}%[[VAL_18]], %[[VAL_13]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_23:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_24:.*]] = vector.insert %[[VAL_23]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_13]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_25:.*]] = affine.apply [[$MAP0]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_26:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_27:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_25]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_28:.*]] = vector.extract %[[VAL_24]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_29:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_30:.*]] = vector.insert %[[VAL_29]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_25]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_31:.*]] = affine.apply [[$MAP1]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_32:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_33:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_31]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_34:.*]] = vector.extract %[[VAL_30]]{{\[}}%[[VAL_18]], %[[VAL_31]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_35:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_36:.*]] = vector.insert %[[VAL_35]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_31]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_37:.*]] = affine.apply [[$MAP2]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_38:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_39:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_37]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_40:.*]] = vector.extract %[[VAL_36]]{{\[}}%[[VAL_18]], %[[VAL_37]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_41:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_42:.*]] = vector.insert %[[VAL_41]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_37]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: affine.yield %[[VAL_42]] : vector<2x4x2x2xf16>
+// UNROLL-FULL: }
+// UNROLL-FULL: gpu.terminator
+
+
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
|
|
@llvm/pr-subscribers-mlir-func Author: lonely eagle (linuxlonelyeagle) ChangesWhen using This PR fixes this issue.Feel free to comment below, thank you. Full diff: https://github.com/llvm/llvm-project/pull/123904.diff 12 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
index 5e10a9f50b774e..3f5566a28546d1 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
@@ -20,6 +20,7 @@
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
namespace mlir {
class PatternRewriter;
diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
index 4da0efcb13ddf5..5c9f8c6a59f8f6 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
@@ -17,6 +17,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/UnrollScopeInterface.td"
def Func_Dialect : Dialect {
let name = "func";
@@ -225,8 +226,8 @@ def ConstantOp : Func_Op<"constant",
//===----------------------------------------------------------------------===//
def FuncOp : Func_Op<"func", [
- AffineScope, AutomaticAllocationScope,
- FunctionOpInterface, IsolatedFromAbove, OpAsmOpInterface
+ AffineScope, AutomaticAllocationScope, FunctionOpInterface,
+ IsolatedFromAbove, OpAsmOpInterface, UnrollScopeInterface
]> {
let summary = "An operation with a name containing a single `SSACFG` region";
let description = [{
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 7b53594a1c8e28..0cf2d0c77383f1 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -29,6 +29,7 @@
#include "mlir/Interfaces/InferIntRangeInterface.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
#include "llvm/ADT/STLExtras.h"
namespace mlir {
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 3adfd5f4f2c436..8279bb9985ea3e 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -30,6 +30,7 @@ include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferIntRangeInterface.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/UnrollScopeInterface.td"
//===----------------------------------------------------------------------===//
// GPU Dialect operations.
@@ -796,7 +797,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
def GPU_LaunchOp : GPU_Op<"launch", [
AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,
DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
- RecursiveMemoryEffects]>,
+ RecursiveMemoryEffects, UnrollScopeInterface]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index d81298bb4daf01..cd6cc084dd2801 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_interface(TilingInterface)
add_mlir_interface(ValueBoundsOpInterface)
add_mlir_interface(VectorInterfaces)
add_mlir_interface(ViewLikeInterface)
+add_mlir_interface(UnrollScopeInterface)
set(LLVM_TARGET_DEFINITIONS MemorySlotInterfaces.td)
mlir_tablegen(MemorySlotOpInterfaces.h.inc -gen-op-interface-decls)
diff --git a/mlir/include/mlir/Interfaces/UnrollScopeInterface.h b/mlir/include/mlir/Interfaces/UnrollScopeInterface.h
new file mode 100644
index 00000000000000..f7d71b6f9be654
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/UnrollScopeInterface.h
@@ -0,0 +1,21 @@
+//===- UnrollScopeInterface.h - unroll region interface -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operation interface for unroll region
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
+#define MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
+
+#include "mlir/IR/OpDefinition.h"
+
+/// Include the generated interface declarations.
+#include "mlir/Interfaces/UnrollScopeInterface.h.inc"
+
+#endif // MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/UnrollScopeInterface.td b/mlir/include/mlir/Interfaces/UnrollScopeInterface.td
new file mode 100644
index 00000000000000..5ad5e5b44cfe14
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/UnrollScopeInterface.td
@@ -0,0 +1,36 @@
+//===- UnrollScopeInterface.td - unroll scope interface ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the interface for unroll region.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_UNROLLSCOPEINTERFACE
+#define MLIR_INTERFACES_UNROLLSCOPEINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def UnrollScopeInterface : OpInterface<"UnrollScopeInterface"> {
+ let description = [{
+ This interface controls the scope of the loop unroll.It ensures
+ that SSA values generated outside the loop when unrolling are
+ in the nearest `UnrollScopeInterface` region.
+ }];
+ let cppNamespace = "::mlir";
+ let methods = [
+ InterfaceMethod<[{
+ return the `UnrollScopeInterface` region.
+ }],
+ "::mlir::Region&", "getUnrollBody", (ins),
+ /*methodBody=*/[{}], /*defaultImplementation=*/[{
+ return $_op->getRegion(0);
+ }]>,
+ ];
+}
+
+#endif // MLIR_INTERFACES_UNROLLSCOPEINTERFACE
diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index ef6e0dbf45d3a9..d2993a424060f8 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -16,4 +16,5 @@ add_mlir_dialect_library(MLIRAffineUtils
MLIRMemRefDialect
MLIRTransformUtils
MLIRViewLikeInterface
+ MLIRUnrollScopeInterface
)
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 4e02559a089493..c697d3b0127a83 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -21,6 +21,7 @@
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/IntegerSet.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -129,10 +130,10 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
auto *parentBlock = forOp->getBlock();
if (!iv.use_empty()) {
if (forOp.hasConstantLowerBound()) {
- auto func = forOp->getParentOfType<FunctionOpInterface>();
+ auto unrollScope = forOp->getParentOfType<UnrollScopeInterface>();
OpBuilder builder(forOp->getContext());
- if (func)
- builder.setInsertionPointToStart(&func.getFunctionBody().front());
+ if (unrollScope)
+ builder.setInsertionPointToStart(&unrollScope.getUnrollBody().front());
else
builder.setInsertionPoint(forOp);
auto constOp = builder.create<arith::ConstantIndexOp>(
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index d3b7bf65ad3e73..bd0b79aaecab0c 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -21,6 +21,7 @@ set(LLVM_OPTIONAL_SOURCES
ValueBoundsOpInterface.cpp
VectorInterfaces.cpp
ViewLikeInterface.cpp
+ UnrollScopeInterface.cpp
)
function(add_mlir_interface_library name)
@@ -46,6 +47,7 @@ add_mlir_interface_library(CopyOpInterface)
add_mlir_interface_library(DataLayoutInterfaces)
add_mlir_interface_library(DerivedAttributeOpInterface)
add_mlir_interface_library(DestinationStyleOpInterface)
+add_mlir_interface_library(UnrollScopeInterface)
add_mlir_library(MLIRFunctionInterfaces
FunctionInterfaces.cpp
diff --git a/mlir/lib/Interfaces/UnrollScopeInterface.cpp b/mlir/lib/Interfaces/UnrollScopeInterface.cpp
new file mode 100644
index 00000000000000..b500f5ad01f2ad
--- /dev/null
+++ b/mlir/lib/Interfaces/UnrollScopeInterface.cpp
@@ -0,0 +1,18 @@
+//===- UnrollScopeInterface.cpp - unroll scope interface in MLIR ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/UnrollScopeInterface.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// UnrollScopeInterface Interface
+//===----------------------------------------------------------------------===//
+
+/// Include the definitions of the unroll scope interface.
+#include "mlir/Interfaces/UnrollScopeInterface.cpp.inc"
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index e398c3fe2011dd..4fdaa1a7405d1a 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -240,6 +240,66 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }
+
+// UNROLL-FULL-LABEL: func @gpu_launch_unroll() {
+
+func.func @gpu_launch_unroll() {
+ %c1 = arith.constant 1 : index
+ gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
+ %cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
+ %cst_0 = arith.constant dense<0.000000e+00> : vector<2x4x2xf16>
+ %cst_1 = arith.constant dense<0.000000e+00> : vector<4x2x2xf16>
+ %0 = affine.for %arg12 = 0 to 2 iter_args(%arg13 = %cst) -> (vector<2x4x2x2xf16>) {
+ %1 = affine.for %arg14 = 0 to 4 iter_args(%arg15 = %arg13) -> (vector<2x4x2x2xf16>) {
+ %2 = vector.extract %cst_0[%arg12] : vector<4x2xf16> from vector<2x4x2xf16>
+ %3 = vector.extract %cst_1[%arg14] : vector<2x2xf16> from vector<4x2x2xf16>
+ %4 = vector.extract %arg15[%arg12, %arg14] : vector<2x2xf16> from vector<2x4x2x2xf16>
+ %cst_2 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+ %5 = vector.insert %cst_2, %arg13 [%arg12, %arg14] : vector<2x2xf16> into vector<2x4x2x2xf16>
+ affine.yield %5 : vector<2x4x2x2xf16>
+ }
+ affine.yield %1 : vector<2x4x2x2xf16>
+ }
+ gpu.terminator
+ }
+ return
+}
+
+// UNROLL-FULL: %[[VAL_0:.*]] = arith.constant 1 : index
+// UNROLL-FULL: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) {
+// UNROLL-FULL: %[[VAL_13:.*]] = arith.constant 0 : index
+// UNROLL-FULL: %[[VAL_14:.*]] = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_15:.*]] = arith.constant dense<0.000000e+00> : vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_16:.*]] = arith.constant dense<0.000000e+00> : vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_17:.*]] = affine.for %[[VAL_18:.*]] = 0 to 2 iter_args(%[[VAL_19:.*]] = %[[VAL_14]]) -> (vector<2x4x2x2xf16>) {
+// UNROLL-FULL: %[[VAL_20:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_21:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_13]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_22:.*]] = vector.extract %[[VAL_19]]{{\[}}%[[VAL_18]], %[[VAL_13]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_23:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_24:.*]] = vector.insert %[[VAL_23]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_13]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_25:.*]] = affine.apply [[$MAP0]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_26:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_27:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_25]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_28:.*]] = vector.extract %[[VAL_24]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_29:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_30:.*]] = vector.insert %[[VAL_29]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_25]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_31:.*]] = affine.apply [[$MAP1]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_32:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_33:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_31]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_34:.*]] = vector.extract %[[VAL_30]]{{\[}}%[[VAL_18]], %[[VAL_31]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_35:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_36:.*]] = vector.insert %[[VAL_35]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_31]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_37:.*]] = affine.apply [[$MAP2]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_38:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_39:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_37]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_40:.*]] = vector.extract %[[VAL_36]]{{\[}}%[[VAL_18]], %[[VAL_37]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_41:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_42:.*]] = vector.insert %[[VAL_41]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_37]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: affine.yield %[[VAL_42]] : vector<2x4x2x2xf16>
+// UNROLL-FULL: }
+// UNROLL-FULL: gpu.terminator
+
+
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
|
|
@llvm/pr-subscribers-mlir Author: lonely eagle (linuxlonelyeagle) ChangesWhen using This PR fixes this issue.Feel free to comment below, thank you. Full diff: https://github.com/llvm/llvm-project/pull/123904.diff 12 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
index 5e10a9f50b774e..3f5566a28546d1 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.h
@@ -20,6 +20,7 @@
#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
namespace mlir {
class PatternRewriter;
diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
index 4da0efcb13ddf5..5c9f8c6a59f8f6 100644
--- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
+++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td
@@ -17,6 +17,7 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/UnrollScopeInterface.td"
def Func_Dialect : Dialect {
let name = "func";
@@ -225,8 +226,8 @@ def ConstantOp : Func_Op<"constant",
//===----------------------------------------------------------------------===//
def FuncOp : Func_Op<"func", [
- AffineScope, AutomaticAllocationScope,
- FunctionOpInterface, IsolatedFromAbove, OpAsmOpInterface
+ AffineScope, AutomaticAllocationScope, FunctionOpInterface,
+ IsolatedFromAbove, OpAsmOpInterface, UnrollScopeInterface
]> {
let summary = "An operation with a name containing a single `SSACFG` region";
let description = [{
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
index 7b53594a1c8e28..0cf2d0c77383f1 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDialect.h
@@ -29,6 +29,7 @@
#include "mlir/Interfaces/InferIntRangeInterface.h"
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
#include "llvm/ADT/STLExtras.h"
namespace mlir {
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
index 3adfd5f4f2c436..8279bb9985ea3e 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -30,6 +30,7 @@ include "mlir/Interfaces/FunctionInterfaces.td"
include "mlir/Interfaces/InferIntRangeInterface.td"
include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/UnrollScopeInterface.td"
//===----------------------------------------------------------------------===//
// GPU Dialect operations.
@@ -796,7 +797,7 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
def GPU_LaunchOp : GPU_Op<"launch", [
AutomaticAllocationScope, AttrSizedOperandSegments, GPU_AsyncOpInterface,
DeclareOpInterfaceMethods<InferIntRangeInterface, ["inferResultRanges"]>,
- RecursiveMemoryEffects]>,
+ RecursiveMemoryEffects, UnrollScopeInterface]>,
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
diff --git a/mlir/include/mlir/Interfaces/CMakeLists.txt b/mlir/include/mlir/Interfaces/CMakeLists.txt
index d81298bb4daf01..cd6cc084dd2801 100644
--- a/mlir/include/mlir/Interfaces/CMakeLists.txt
+++ b/mlir/include/mlir/Interfaces/CMakeLists.txt
@@ -17,6 +17,7 @@ add_mlir_interface(TilingInterface)
add_mlir_interface(ValueBoundsOpInterface)
add_mlir_interface(VectorInterfaces)
add_mlir_interface(ViewLikeInterface)
+add_mlir_interface(UnrollScopeInterface)
set(LLVM_TARGET_DEFINITIONS MemorySlotInterfaces.td)
mlir_tablegen(MemorySlotOpInterfaces.h.inc -gen-op-interface-decls)
diff --git a/mlir/include/mlir/Interfaces/UnrollScopeInterface.h b/mlir/include/mlir/Interfaces/UnrollScopeInterface.h
new file mode 100644
index 00000000000000..f7d71b6f9be654
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/UnrollScopeInterface.h
@@ -0,0 +1,21 @@
+//===- UnrollScopeInterface.h - unroll region interface -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the operation interface for unroll region
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
+#define MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
+
+#include "mlir/IR/OpDefinition.h"
+
+/// Include the generated interface declarations.
+#include "mlir/Interfaces/UnrollScopeInterface.h.inc"
+
+#endif // MLIR_INTERFACES_UNROLLSCOPEINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/UnrollScopeInterface.td b/mlir/include/mlir/Interfaces/UnrollScopeInterface.td
new file mode 100644
index 00000000000000..5ad5e5b44cfe14
--- /dev/null
+++ b/mlir/include/mlir/Interfaces/UnrollScopeInterface.td
@@ -0,0 +1,36 @@
+//===- UnrollScopeInterface.td - unroll scope interface ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the interface for unroll region.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_INTERFACES_UNROLLSCOPEINTERFACE
+#define MLIR_INTERFACES_UNROLLSCOPEINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+def UnrollScopeInterface : OpInterface<"UnrollScopeInterface"> {
+ let description = [{
+ This interface controls the scope of the loop unroll.It ensures
+ that SSA values generated outside the loop when unrolling are
+ in the nearest `UnrollScopeInterface` region.
+ }];
+ let cppNamespace = "::mlir";
+ let methods = [
+ InterfaceMethod<[{
+ return the `UnrollScopeInterface` region.
+ }],
+ "::mlir::Region&", "getUnrollBody", (ins),
+ /*methodBody=*/[{}], /*defaultImplementation=*/[{
+ return $_op->getRegion(0);
+ }]>,
+ ];
+}
+
+#endif // MLIR_INTERFACES_UNROLLSCOPEINTERFACE
diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index ef6e0dbf45d3a9..d2993a424060f8 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -16,4 +16,5 @@ add_mlir_dialect_library(MLIRAffineUtils
MLIRMemRefDialect
MLIRTransformUtils
MLIRViewLikeInterface
+ MLIRUnrollScopeInterface
)
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 4e02559a089493..c697d3b0127a83 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -21,6 +21,7 @@
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/IntegerSet.h"
+#include "mlir/Interfaces/UnrollScopeInterface.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -129,10 +130,10 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
auto *parentBlock = forOp->getBlock();
if (!iv.use_empty()) {
if (forOp.hasConstantLowerBound()) {
- auto func = forOp->getParentOfType<FunctionOpInterface>();
+ auto unrollScope = forOp->getParentOfType<UnrollScopeInterface>();
OpBuilder builder(forOp->getContext());
- if (func)
- builder.setInsertionPointToStart(&func.getFunctionBody().front());
+ if (unrollScope)
+ builder.setInsertionPointToStart(&unrollScope.getUnrollBody().front());
else
builder.setInsertionPoint(forOp);
auto constOp = builder.create<arith::ConstantIndexOp>(
diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index d3b7bf65ad3e73..bd0b79aaecab0c 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -21,6 +21,7 @@ set(LLVM_OPTIONAL_SOURCES
ValueBoundsOpInterface.cpp
VectorInterfaces.cpp
ViewLikeInterface.cpp
+ UnrollScopeInterface.cpp
)
function(add_mlir_interface_library name)
@@ -46,6 +47,7 @@ add_mlir_interface_library(CopyOpInterface)
add_mlir_interface_library(DataLayoutInterfaces)
add_mlir_interface_library(DerivedAttributeOpInterface)
add_mlir_interface_library(DestinationStyleOpInterface)
+add_mlir_interface_library(UnrollScopeInterface)
add_mlir_library(MLIRFunctionInterfaces
FunctionInterfaces.cpp
diff --git a/mlir/lib/Interfaces/UnrollScopeInterface.cpp b/mlir/lib/Interfaces/UnrollScopeInterface.cpp
new file mode 100644
index 00000000000000..b500f5ad01f2ad
--- /dev/null
+++ b/mlir/lib/Interfaces/UnrollScopeInterface.cpp
@@ -0,0 +1,18 @@
+//===- UnrollScopeInterface.cpp - unroll scope interface in MLIR ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Interfaces/UnrollScopeInterface.h"
+
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// UnrollScopeInterface Interface
+//===----------------------------------------------------------------------===//
+
+/// Include the definitions of the unroll scope interface.
+#include "mlir/Interfaces/UnrollScopeInterface.cpp.inc"
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index e398c3fe2011dd..4fdaa1a7405d1a 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -240,6 +240,66 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }
+
+// UNROLL-FULL-LABEL: func @gpu_launch_unroll() {
+
+func.func @gpu_launch_unroll() {
+ %c1 = arith.constant 1 : index
+ gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
+ %cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
+ %cst_0 = arith.constant dense<0.000000e+00> : vector<2x4x2xf16>
+ %cst_1 = arith.constant dense<0.000000e+00> : vector<4x2x2xf16>
+ %0 = affine.for %arg12 = 0 to 2 iter_args(%arg13 = %cst) -> (vector<2x4x2x2xf16>) {
+ %1 = affine.for %arg14 = 0 to 4 iter_args(%arg15 = %arg13) -> (vector<2x4x2x2xf16>) {
+ %2 = vector.extract %cst_0[%arg12] : vector<4x2xf16> from vector<2x4x2xf16>
+ %3 = vector.extract %cst_1[%arg14] : vector<2x2xf16> from vector<4x2x2xf16>
+ %4 = vector.extract %arg15[%arg12, %arg14] : vector<2x2xf16> from vector<2x4x2x2xf16>
+ %cst_2 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+ %5 = vector.insert %cst_2, %arg13 [%arg12, %arg14] : vector<2x2xf16> into vector<2x4x2x2xf16>
+ affine.yield %5 : vector<2x4x2x2xf16>
+ }
+ affine.yield %1 : vector<2x4x2x2xf16>
+ }
+ gpu.terminator
+ }
+ return
+}
+
+// UNROLL-FULL: %[[VAL_0:.*]] = arith.constant 1 : index
+// UNROLL-FULL: gpu.launch blocks(%[[VAL_1:.*]], %[[VAL_2:.*]], %[[VAL_3:.*]]) in (%[[VAL_4:.*]] = %[[VAL_0]], %[[VAL_5:.*]] = %[[VAL_0]], %[[VAL_6:.*]] = %[[VAL_0]]) threads(%[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]]) in (%[[VAL_10:.*]] = %[[VAL_0]], %[[VAL_11:.*]] = %[[VAL_0]], %[[VAL_12:.*]] = %[[VAL_0]]) {
+// UNROLL-FULL: %[[VAL_13:.*]] = arith.constant 0 : index
+// UNROLL-FULL: %[[VAL_14:.*]] = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_15:.*]] = arith.constant dense<0.000000e+00> : vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_16:.*]] = arith.constant dense<0.000000e+00> : vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_17:.*]] = affine.for %[[VAL_18:.*]] = 0 to 2 iter_args(%[[VAL_19:.*]] = %[[VAL_14]]) -> (vector<2x4x2x2xf16>) {
+// UNROLL-FULL: %[[VAL_20:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_21:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_13]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_22:.*]] = vector.extract %[[VAL_19]]{{\[}}%[[VAL_18]], %[[VAL_13]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_23:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_24:.*]] = vector.insert %[[VAL_23]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_13]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_25:.*]] = affine.apply [[$MAP0]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_26:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_27:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_25]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_28:.*]] = vector.extract %[[VAL_24]]{{\[}}%[[VAL_18]], %[[VAL_25]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_29:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_30:.*]] = vector.insert %[[VAL_29]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_25]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_31:.*]] = affine.apply [[$MAP1]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_32:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_33:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_31]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_34:.*]] = vector.extract %[[VAL_30]]{{\[}}%[[VAL_18]], %[[VAL_31]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_35:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_36:.*]] = vector.insert %[[VAL_35]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_31]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_37:.*]] = affine.apply [[$MAP2]](%[[VAL_13]])
+// UNROLL-FULL: %[[VAL_38:.*]] = vector.extract %[[VAL_15]]{{\[}}%[[VAL_18]]] : vector<4x2xf16> from vector<2x4x2xf16>
+// UNROLL-FULL: %[[VAL_39:.*]] = vector.extract %[[VAL_16]]{{\[}}%[[VAL_37]]] : vector<2x2xf16> from vector<4x2x2xf16>
+// UNROLL-FULL: %[[VAL_40:.*]] = vector.extract %[[VAL_36]]{{\[}}%[[VAL_18]], %[[VAL_37]]] : vector<2x2xf16> from vector<2x4x2x2xf16>
+// UNROLL-FULL: %[[VAL_41:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf16>
+// UNROLL-FULL: %[[VAL_42:.*]] = vector.insert %[[VAL_41]], %[[VAL_19]] {{\[}}%[[VAL_18]], %[[VAL_37]]] : vector<2x2xf16> into vector<2x4x2x2xf16>
+// UNROLL-FULL: affine.yield %[[VAL_42]] : vector<2x4x2x2xf16>
+// UNROLL-FULL: }
+// UNROLL-FULL: gpu.terminator
+
+
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
|
0432d91 to
9233d63
Compare
|
Have you tried using Try writing a complete IR with OP that has side effect, can be store op, and use |
|
|
Ping @bondhugula I think the PR also need you, thank you. |
|
Do you, perhaps, want to schedule your affine loop unrolling pass onto |
Yes, Because affine-loop-unroll runs on funcOp. Not on the whole MLIR moduleOp. |
|
I claim that the PR you actually want is to change I can't think why the affine loop unroller needs to run on functions specifically |
https://arxiv.org/pdf/[2108.13191](https://arxiv.org/pdf/2108.13191) I borrowed my ideas from this paper, but with a difference. |
|
I think this is something that needs to be looked at in depth, making it possible to unroll on the whole moduleOp is something I've thought about, but I think it would lead to bad things. |
But one thing is that it doesn't matter if unroll is done in a funcOp or on the whole moduleOp. The IR of gpu.launch is indeed broken by the unroll pass. This PR is needed.An unroll follow-up on the whole moduleOp I'd be happy to implement it. If you think it makes sense. |
|
... this isn't implementinng unroll on the module op It's specifically telling the loop unroller not to move things out of the root of the pass - whatever you schedule it on - which, in your case, would be the |
|
It might also be the case that you want to scope the canonicalize |
My English is a bit poor, can you say it more carefully, I can only get your point roughly. |
|
Ok, no, I worked out part of the issue. The reason |
I know what you're saying again, I've written this example before and still have the same problems that appear in this PR. |
|
Ok, so, I cleaned up your example a bit to make it work - you had some typos and a GPU kernel with no side effects. // example
func.func @gpu_launch_unroll() {
%buf = gpu.alloc() : memref<2x4x2x2xf16, #gpu.address_space<global>>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %[0/1881]
ads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
%cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16> %0 = affine.for %arg12 = 0 to 2 iter_args(%arg13 = %cst) -> (vector<2x4x2x2xf16>) { %1 = affine.for %arg14 = 0 to 4 iter_args(%arg15 = %arg13) -> (vector<2x4x2x2xf16>) { %cst_0 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
%2 = vector.insert %cst_0, %arg15 [%arg12, %arg14] : vector<2x2xf16> into vector<2x4x2x2xf16> affine.yield %2 : vector<2x4x2x2xf16>
} affine.yield %1 : vector<2x4x2x2xf16> } vector.transfer_write %0, %buf[%c0, %c0, %c0, %c0] {inbounds = [true, true, true, true]} : vector<2x4x2x2xf16>, memref<2x4x2x2xf16, #gpu.address_space<global>>
gpu.terminator
}
gpu.dealloc %buf : memref<2x4x2x2xf16, #gpu.address_space<global>>
return
}which, when run through (If you only want to unroll the inner loop, just get rid of the second |
There's something wrong with the IR you pasted, |
|
.. Yeah, I pasted the example wrong, give me a moment |
// example
func.func @gpu_launch_unroll() {
%buf = gpu.alloc() : memref<2x4x2x2xf16, #gpu.address_space<global>>
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) {
%cst = arith.constant dense<0.000000e+00> : vector<2x4x2x2xf16>
%0 = affine.for %arg12 = 0 to 2 iter_args(%arg13 = %cst) -> (vector<2x4x2x2xf16>) {
%1 = affine.for %arg14 = 0 to 4 iter_args(%arg15 = %arg13) -> (vector<2x4x2x2xf16>) {
%cst_0 = arith.constant dense<0.000000e+00> : vector<2x2xf16>
%2 = vector.insert %cst_0, %arg15 [%arg12, %arg14] : vector<2x2xf16> into vector<2x4x2x2xf16>
affine.yield %2 : vector<2x4x2x2xf16>
}
affine.yield %1 : vector<2x4x2x2xf16>
}
vector.transfer_write %0, %buf[%c0, %c0, %c0, %c0] {inbounds = [true, true, true, true]} : vector<2x4x2x2xf16>, memref<2x4x2x2xf16, #gpu.address_space<global>>
gpu.terminator
}
gpu.dealloc %buf : memref<2x4x2x2xf16, #gpu.address_space<global>>
return
}If I run this through |
|
The |
Such a change would still be welcome. The loop unroll pass was written before we turned functions into operations, that's why it is historically a function pass, but there is no reason for that apart from history. Then it can run either on |
Give me more time, I still have some work to do, I will implement it. |
|
The problem with But I think at the very least moving to |
|
Can you add a commit summary for the interface being introduced (with a couple of lines on the rationale)? You have it in the comment at #123904 (comment), but the commit summary is empty. I ran the example in the first comment - the output: Which redundant values are you referring to? loop-unroll/full was actually meant to be a test pass - it doesn't have a concrete heuristic. (In fact, it was the first pass of MLIR!) One would expect the utilities it exposes, On a minor note, separately, it makes sense to make it a |
In your example. |
When using
affine-loop-unrollwhen usinggpu.launch,redundant SSA valueswill be introduced ingpu.launch.This PR fixes this issue.Feel free to comment below, thank you.