Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mlir/include/mlir/Dialect/Affine/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

namespace mlir {

class ModuleOp;
namespace func {
class FuncOp;
} // namespace func
Expand Down Expand Up @@ -93,7 +94,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
/// factors supplied through other means. If -1 is passed as the unrollFactor
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
std::unique_ptr<OperationPass<func::FuncOp>> createLoopUnrollPass(
std::unique_ptr<OperationPass<mlir::ModuleOp>> createLoopUnrollPass(
int unrollFactor = -1, bool unrollUpToFactor = false,
bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
Expand Down
2 changes: 1 addition & 1 deletion mlir/include/mlir/Dialect/Affine/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def AffineLoopTiling : Pass<"affine-loop-tile", "func::FuncOp"> {
];
}

def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
def AffineLoopUnroll : Pass<"affine-loop-unroll", "ModuleOp"> {
let summary = "Unroll affine loops";
let constructor = "mlir::affine::createLoopUnrollPass()";
let options = [
Expand Down
75 changes: 40 additions & 35 deletions mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
Expand Down Expand Up @@ -82,7 +83,7 @@ static bool isInnermostAffineForOp(AffineForOp op) {
}

/// Gathers loops that have no affine.for's nested within.
static void gatherInnermostLoops(func::FuncOp f,
static void gatherInnermostLoops(FunctionOpInterface f,
SmallVectorImpl<AffineForOp> &loops) {
f.walk([&](AffineForOp forOp) {
if (isInnermostAffineForOp(forOp))
Expand All @@ -91,40 +92,44 @@ static void gatherInnermostLoops(func::FuncOp f,
}

void LoopUnroll::runOnOperation() {
func::FuncOp func = getOperation();
if (func.isExternal())
return;

if (unrollFull && unrollFullThreshold.hasValue()) {
// Store short loops as we walk.
mlir::ModuleOp module = getOperation();
SmallVector<FunctionOpInterface> funcOps;
module.walk([&](FunctionOpInterface func) { funcOps.push_back(func); });
for (auto func : funcOps) {
if (func.isExternal())
return;

if (unrollFull && unrollFullThreshold.hasValue()) {
// Store short loops as we walk.
SmallVector<AffineForOp, 4> loops;

// Gathers all loops with trip count <= minTripCount. Do a post order walk
// so that loops are gathered from innermost to outermost (or else
// unrolling an outer one may delete gathered inner ones).
getOperation().walk([&](AffineForOp forOp) {
std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
if (tripCount && *tripCount <= unrollFullThreshold)
loops.push_back(forOp);
});
for (auto forOp : loops)
(void)loopUnrollFull(forOp);
return;
}

// If the call back is provided, we will recurse until no loops are found.
SmallVector<AffineForOp, 4> loops;

// Gathers all loops with trip count <= minTripCount. Do a post order walk
// so that loops are gathered from innermost to outermost (or else unrolling
// an outer one may delete gathered inner ones).
getOperation().walk([&](AffineForOp forOp) {
std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
if (tripCount && *tripCount <= unrollFullThreshold)
loops.push_back(forOp);
});
for (auto forOp : loops)
(void)loopUnrollFull(forOp);
return;
}

// If the call back is provided, we will recurse until no loops are found.
SmallVector<AffineForOp, 4> loops;
for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
loops.clear();
gatherInnermostLoops(func, loops);
if (loops.empty())
break;
bool unrolled = false;
for (auto forOp : loops)
unrolled |= succeeded(runOnAffineForOp(forOp));
if (!unrolled)
// Break out if nothing was unrolled.
break;
for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
loops.clear();
gatherInnermostLoops(func, loops);
if (loops.empty())
break;
bool unrolled = false;
for (auto forOp : loops)
unrolled |= succeeded(runOnAffineForOp(forOp));
if (!unrolled)
// Break out if nothing was unrolled.
break;
}
}
}

Expand All @@ -145,7 +150,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
cleanUpUnroll);
}

std::unique_ptr<OperationPass<func::FuncOp>> mlir::affine::createLoopUnrollPass(
std::unique_ptr<OperationPass<ModuleOp>> mlir::affine::createLoopUnrollPass(
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
Expand Down
100 changes: 100 additions & 0 deletions mlir/test/Dialect/Affine/unroll.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,23 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }

gpu.module @unroll_full {
// UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
gpu.func @gpu_loop_nest_simplest() {
// UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
// UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
// UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
// UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
// UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
affine.for %j = 0 to 4 {
%x = arith.constant 1 : i32
}
} // UNROLL-FULL: }
gpu.return // UNROLL-FULL: return
}
}

// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
Expand All @@ -260,6 +277,28 @@ func.func @loop_nest_outer_unroll() {
return // SHORT: return
} // SHORT }

gpu.module @short {
// SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
gpu.func @gpu_loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
// SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// SHORT-NEXT: }
// SHORT-NEXT: affine.for %arg0 = 0 to 4 {
// SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
// SHORT-NEXT: }
affine.for %i = 0 to 2 {
affine.for %j = 0 to 4 {
%x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
(index) -> (index)
%y = "addi32"(%x, %x) : (index, index) -> index
}
}
gpu.return // SHORT: gpu.return
} // SHORT }
}

// We are doing a minimal FileCheck here. We just need this test case to
// successfully run. Both %x and %y will get unrolled here as the min trip
// count threshold set to 2.
Expand Down Expand Up @@ -345,6 +384,37 @@ func.func @unroll_unit_stride_no_cleanup() {
return
}

gpu.module @unroll_by_4{
// UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
gpu.func @gpu_unroll_unit_stride_no_cleanup() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
affine.for %i = 0 to 100 {
// UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
// UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
// UNROLL-BY-4-NEXT: }
affine.for %j = 0 to 8 {
%x = "addi32"(%j, %j) : (index, index) -> i32
%y = "addi32"(%x, %x) : (i32, i32) -> i32
}
// empty loop
// UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
affine.for %k = 0 to 8 {
}
}
gpu.return
}
}

// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func.func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
Expand Down Expand Up @@ -632,6 +702,19 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
// UNROLL-BY-1-NEXT: return
}

gpu.module @unroll_by_1 {
// UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop() {
affine.for %i = 0 to 1 {
%x = "foo"(%i) : (index) -> i32
}
gpu.return
// UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
// UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
// UNROLL-BY-1-NEXT: gpu.return
}
}

// Test unrolling with affine.for iter_args.

// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
Expand Down Expand Up @@ -706,6 +789,23 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
// UNROLL-CLEANUP-LOOP-NEXT: return
}

gpu.module @unroll_cleanup_loop {
// UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor() {
affine.for %i = 0 to 3 {
%x = "foo"(%i) : (index) -> i32
}
gpu.return
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
// UNROLL-CLEANUP-LOOP-NEXT: gpu.return
}
}

// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
affine.for %i = 0 to 7 {
Expand Down