Skip to content

Commit 4383806

Browse files
update c++ impl and update test.
1 parent 0861464 commit 4383806

File tree

3 files changed

+18
-98
lines changed

3 files changed

+18
-98
lines changed

mlir/include/mlir/Dialect/Affine/Passes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#ifndef MLIR_DIALECT_AFFINE_PASSES_H
1515
#define MLIR_DIALECT_AFFINE_PASSES_H
1616

17+
#include "mlir/Interfaces/FunctionInterfaces.h"
1718
#include "mlir/Pass/Pass.h"
1819
#include <limits>
1920

@@ -93,7 +94,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
9394
/// factors supplied through other means. If -1 is passed as the unrollFactor
9495
/// and no callback is provided, anything passed from the command-line (if at
9596
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
96-
std::unique_ptr<Pass> createLoopUnrollPass(
97+
std::unique_ptr<InterfacePass<FunctionOpInterface>> createLoopUnrollPass(
9798
int unrollFactor = -1, bool unrollUpToFactor = false,
9899
bool unrollFull = false,
99100
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);

mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
145145
cleanUpUnroll);
146146
}
147147

148-
std::unique_ptr<Pass> mlir::affine::createLoopUnrollPass(
148+
std::unique_ptr<InterfacePass<FunctionOpInterface>>
149+
mlir::affine::createLoopUnrollPass(
149150
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
150151
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
151152
return std::make_unique<LoopUnroll>(

mlir/test/Dialect/Affine/unroll.mlir

Lines changed: 14 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix UNROLL-FULL
2-
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2})))" | FileCheck %s --check-prefix SHORT
3-
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=4})))" | FileCheck %s --check-prefix UNROLL-BY-4
4-
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=1})))" | FileCheck %s --check-prefix UNROLL-BY-1
5-
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true})))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
1+
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}))" | FileCheck %s --check-prefix UNROLL-FULL
2+
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}))" | FileCheck %s --check-prefix SHORT
3+
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}))" | FileCheck %s --check-prefix UNROLL-BY-4
4+
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}))" | FileCheck %s --check-prefix UNROLL-BY-1
5+
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
6+
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix GPU-UNROLL-FULL
67

78
// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
89
// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)>
@@ -241,19 +242,19 @@ func.func @loop_nest_unroll_full() {
241242
} // UNROLL-FULL }
242243

243244
gpu.module @unroll_full {
244-
// UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
245+
// GPU-UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
245246
gpu.func @gpu_loop_nest_simplest() {
246-
// UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
247+
// GPU-UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
247248
affine.for %i = 0 to 100 step 2 {
248-
// UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
249-
// UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
250-
// UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
251-
// UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
249+
// GPU-UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
250+
// GPU-UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
251+
// GPU-UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
252+
// GPU-UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
252253
affine.for %j = 0 to 4 {
253254
%x = arith.constant 1 : i32
254255
}
255-
} // UNROLL-FULL: }
256-
gpu.return // UNROLL-FULL: return
256+
} // GPU-UNROLL-FULL: }
257+
gpu.return // GPU-UNROLL-FULL: return
257258
}
258259
}
259260

@@ -277,28 +278,6 @@ func.func @loop_nest_outer_unroll() {
277278
return // SHORT: return
278279
} // SHORT }
279280

280-
gpu.module @short {
281-
// SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
282-
gpu.func @gpu_loop_nest_outer_unroll() {
283-
// SHORT: affine.for %arg0 = 0 to 4 {
284-
// SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
285-
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
286-
// SHORT-NEXT: }
287-
// SHORT-NEXT: affine.for %arg0 = 0 to 4 {
288-
// SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
289-
// SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
290-
// SHORT-NEXT: }
291-
affine.for %i = 0 to 2 {
292-
affine.for %j = 0 to 4 {
293-
%x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
294-
(index) -> (index)
295-
%y = "addi32"(%x, %x) : (index, index) -> index
296-
}
297-
}
298-
gpu.return // SHORT: gpu.return
299-
} // SHORT }
300-
}
301-
302281
// We are doing a minimal FileCheck here. We just need this test case to
303282
// successfully run. Both %x and %y will get unrolled here as the min trip
304283
// count threshold set to 2.
@@ -384,37 +363,6 @@ func.func @unroll_unit_stride_no_cleanup() {
384363
return
385364
}
386365

387-
gpu.module @unroll_by_4{
388-
// UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
389-
gpu.func @gpu_unroll_unit_stride_no_cleanup() {
390-
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
391-
affine.for %i = 0 to 100 {
392-
// UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
393-
// UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
394-
// UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
395-
// UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
396-
// UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
397-
// UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
398-
// UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
399-
// UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
400-
// UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
401-
// UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
402-
// UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
403-
// UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
404-
// UNROLL-BY-4-NEXT: }
405-
affine.for %j = 0 to 8 {
406-
%x = "addi32"(%j, %j) : (index, index) -> i32
407-
%y = "addi32"(%x, %x) : (i32, i32) -> i32
408-
}
409-
// empty loop
410-
// UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
411-
affine.for %k = 0 to 8 {
412-
}
413-
}
414-
gpu.return
415-
}
416-
}
417-
418366
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
419367
func.func @unroll_unit_stride_cleanup() {
420368
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -702,19 +650,6 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
702650
// UNROLL-BY-1-NEXT: return
703651
}
704652

705-
gpu.module @unroll_by_1 {
706-
// UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
707-
gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop() {
708-
affine.for %i = 0 to 1 {
709-
%x = "foo"(%i) : (index) -> i32
710-
}
711-
gpu.return
712-
// UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
713-
// UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
714-
// UNROLL-BY-1-NEXT: gpu.return
715-
}
716-
}
717-
718653
// Test unrolling with affine.for iter_args.
719654

720655
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -789,23 +724,6 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
789724
// UNROLL-CLEANUP-LOOP-NEXT: return
790725
}
791726

792-
gpu.module @unroll_cleanup_loop {
793-
// UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
794-
gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor() {
795-
affine.for %i = 0 to 3 {
796-
%x = "foo"(%i) : (index) -> i32
797-
}
798-
gpu.return
799-
// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
800-
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
801-
// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
802-
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
803-
// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
804-
// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
805-
// UNROLL-CLEANUP-LOOP-NEXT: gpu.return
806-
}
807-
}
808-
809727
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
810728
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
811729
affine.for %i = 0 to 7 {

0 commit comments

Comments
 (0)