Skip to content

Commit 252efeb

Browse files
authored
[flang] Change traversal order for OptimizeArrayRepackingPass. (#153136)
A long chain of fir.pack_arrays might require multiple iterations of the greedy rewriter, if we use down-top traversal. The rewriter may not converge in 10 (default) iterations. It is not an error, but it was reported as such. This patch changes the traversal to top-down and also disabled the hard error, if the rewriter does not converge soon enough.
1 parent bd1b1a5 commit 252efeb

File tree

2 files changed

+145
-4
lines changed

2 files changed

+145
-4
lines changed

flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ namespace fir {
2626
#include "flang/Optimizer/Transforms/Passes.h.inc"
2727
} // namespace fir
2828

29+
#define DEBUG_TYPE "optimize-array-repacking"
30+
2931
namespace {
3032
class OptimizeArrayRepackingPass
3133
: public fir::impl::OptimizeArrayRepackingBase<OptimizeArrayRepackingPass> {
@@ -78,13 +80,19 @@ void OptimizeArrayRepackingPass::runOnOperation() {
7880
mlir::MLIRContext *context = &getContext();
7981
mlir::RewritePatternSet patterns(context);
8082
mlir::GreedyRewriteConfig config;
81-
config.setRegionSimplificationLevel(
82-
mlir::GreedySimplifyRegionLevel::Disabled);
83+
config
84+
.setRegionSimplificationLevel(mlir::GreedySimplifyRegionLevel::Disabled)
85+
// Traverse the operations top-down, so that fir.pack_array
86+
// operations are optimized before their using fir.pack_array
87+
// operations. This way the rewrite may converge faster.
88+
.setUseTopDownTraversal();
8389
patterns.insert<PackingOfContiguous>(context);
8490
patterns.insert<NoopUnpacking>(context);
8591
if (mlir::failed(
8692
mlir::applyPatternsGreedily(funcOp, std::move(patterns), config))) {
87-
mlir::emitError(funcOp.getLoc(), "failure in array repacking optimization");
88-
signalPassFailure();
93+
// Failure may happen if the rewriter does not converge soon enough.
94+
// That is not an error, so just report a diagnostic under debug.
95+
LLVM_DEBUG(mlir::emitError(funcOp.getLoc(),
96+
"failure in array repacking optimization"));
8997
}
9098
}

flang/test/Transforms/optimize-array-repacking.fir

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,3 +658,136 @@ func.func @_QPneg_test_pointer(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf3
658658
fir.unpack_array %9 to %7 heap : !fir.box<!fir.array<?xf32>>
659659
return
660660
}
661+
662+
// Test a long chain of fir.pack_array operations.
663+
// The rewriter used to use a down-top traversal that optimized
664+
// fir.pack_array operations starting from the innermost one.
665+
// The rewriter did not converge in 10 (default) iterations
666+
// causing the pass to report a failure.
667+
// A top-down traversal should fix this an allow optimizing
668+
// all the repackings.
669+
// CHECK-LABEL: func.func @test_long_chain(
670+
// CHECK-NOT: fir.pack_array
671+
// CHECK-NOT: fir.unpack_array
672+
func.func @test_long_chain(%pred: i1) {
673+
%c10 = arith.constant 10 : index
674+
%3 = fir.dummy_scope : !fir.dscope
675+
%4 = fir.address_of(@aaa) : !fir.ref<!fir.array<10x10xi32>>
676+
%5 = fir.shape %c10, %c10 : (index, index) -> !fir.shape<2>
677+
%6 = fir.declare %4(%5) {uniq_name = "aaa"} : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> !fir.ref<!fir.array<10x10xi32>>
678+
%9 = fir.embox %6(%5) : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10xi32>>
679+
%10 = fir.convert %9 : (!fir.box<!fir.array<10x10xi32>>) -> !fir.box<!fir.array<?x?xi32>>
680+
%11 = fir.dummy_scope : !fir.dscope
681+
%12 = fir.pack_array %10 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
682+
%13 = fir.declare %12 dummy_scope %11 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
683+
%14 = fir.rebox %13 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
684+
cf.cond_br %pred, ^bb17, ^bb1
685+
^bb1: // pred: ^bb0
686+
%20 = fir.dummy_scope : !fir.dscope
687+
%21 = fir.pack_array %14 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
688+
%22 = fir.declare %21 dummy_scope %20 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
689+
%23 = fir.rebox %22 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
690+
%28 = fir.dummy_scope : !fir.dscope
691+
%29 = fir.pack_array %23 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
692+
%30 = fir.declare %29 dummy_scope %28 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
693+
%31 = fir.rebox %30 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
694+
cf.cond_br %pred, ^bb16, ^bb2
695+
^bb2: // pred: ^bb1
696+
%37 = fir.dummy_scope : !fir.dscope
697+
%38 = fir.pack_array %31 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
698+
%39 = fir.declare %38 dummy_scope %37 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
699+
%40 = fir.rebox %39 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
700+
%45 = fir.dummy_scope : !fir.dscope
701+
%46 = fir.pack_array %40 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
702+
%47 = fir.declare %46 dummy_scope %45 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
703+
%48 = fir.rebox %47 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
704+
cf.cond_br %pred, ^bb15, ^bb3
705+
^bb3: // pred: ^bb2
706+
%54 = fir.dummy_scope : !fir.dscope
707+
%55 = fir.pack_array %48 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
708+
%56 = fir.declare %55 dummy_scope %54 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
709+
%57 = fir.rebox %56 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
710+
%62 = fir.dummy_scope : !fir.dscope
711+
%63 = fir.pack_array %57 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
712+
%64 = fir.declare %63 dummy_scope %62 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
713+
%65 = fir.rebox %64 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
714+
cf.cond_br %pred, ^bb14, ^bb4
715+
^bb4: // pred: ^bb3
716+
%71 = fir.dummy_scope : !fir.dscope
717+
%72 = fir.pack_array %65 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
718+
%73 = fir.declare %72 dummy_scope %71 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
719+
%74 = fir.rebox %73 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
720+
%79 = fir.dummy_scope : !fir.dscope
721+
%80 = fir.pack_array %74 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
722+
%81 = fir.declare %80 dummy_scope %79 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
723+
%82 = fir.rebox %81 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
724+
cf.cond_br %pred, ^bb13, ^bb5
725+
^bb5: // pred: ^bb4
726+
%88 = fir.dummy_scope : !fir.dscope
727+
%89 = fir.pack_array %82 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
728+
%90 = fir.declare %89 dummy_scope %88 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
729+
%91 = fir.rebox %90 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
730+
%96 = fir.dummy_scope : !fir.dscope
731+
%97 = fir.pack_array %91 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
732+
%98 = fir.declare %97 dummy_scope %96 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
733+
%99 = fir.rebox %98 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
734+
cf.cond_br %pred, ^bb12, ^bb6
735+
^bb6: // pred: ^bb5
736+
%105 = fir.dummy_scope : !fir.dscope
737+
%106 = fir.pack_array %99 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
738+
%107 = fir.declare %106 dummy_scope %105 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
739+
%108 = fir.rebox %107 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
740+
%113 = fir.dummy_scope : !fir.dscope
741+
%114 = fir.pack_array %108 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
742+
%115 = fir.declare %114 dummy_scope %113 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
743+
%116 = fir.rebox %115 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
744+
cf.cond_br %pred, ^bb11, ^bb7
745+
^bb7: // pred: ^bb6
746+
%122 = fir.dummy_scope : !fir.dscope
747+
%123 = fir.pack_array %116 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
748+
%124 = fir.declare %123 dummy_scope %122 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
749+
%125 = fir.rebox %124 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
750+
%130 = fir.dummy_scope : !fir.dscope
751+
%131 = fir.pack_array %125 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
752+
%132 = fir.declare %131 dummy_scope %130 {uniq_name = "aaa"} : (!fir.box<!fir.array<?x?xi32>>, !fir.dscope) -> !fir.box<!fir.array<?x?xi32>>
753+
%133 = fir.rebox %132 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
754+
cf.cond_br %pred, ^bb9, ^bb8
755+
^bb8: // pred: ^bb7
756+
%139 = fir.dummy_scope : !fir.dscope
757+
%140 = fir.pack_array %133 heap innermost : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<!fir.array<?x?xi32>>
758+
fir.unpack_array %140 to %133 heap : !fir.box<!fir.array<?x?xi32>>
759+
cf.br ^bb9
760+
^bb9: // 2 preds: ^bb7, ^bb8
761+
fir.unpack_array %131 to %125 heap : !fir.box<!fir.array<?x?xi32>>
762+
cf.br ^bb10
763+
^bb10: // pred: ^bb9
764+
fir.unpack_array %123 to %116 heap : !fir.box<!fir.array<?x?xi32>>
765+
cf.br ^bb11
766+
^bb11: // 2 preds: ^bb6, ^bb10
767+
fir.unpack_array %114 to %108 heap : !fir.box<!fir.array<?x?xi32>>
768+
fir.unpack_array %106 to %99 heap : !fir.box<!fir.array<?x?xi32>>
769+
cf.br ^bb12
770+
^bb12: // 2 preds: ^bb5, ^bb11
771+
fir.unpack_array %97 to %91 heap : !fir.box<!fir.array<?x?xi32>>
772+
fir.unpack_array %89 to %82 heap : !fir.box<!fir.array<?x?xi32>>
773+
cf.br ^bb13
774+
^bb13: // 2 preds: ^bb4, ^bb12
775+
fir.unpack_array %80 to %74 heap : !fir.box<!fir.array<?x?xi32>>
776+
fir.unpack_array %72 to %65 heap : !fir.box<!fir.array<?x?xi32>>
777+
cf.br ^bb14
778+
^bb14: // 2 preds: ^bb3, ^bb13
779+
fir.unpack_array %63 to %57 heap : !fir.box<!fir.array<?x?xi32>>
780+
fir.unpack_array %55 to %48 heap : !fir.box<!fir.array<?x?xi32>>
781+
cf.br ^bb15
782+
^bb15: // 2 preds: ^bb2, ^bb14
783+
fir.unpack_array %46 to %40 heap : !fir.box<!fir.array<?x?xi32>>
784+
fir.unpack_array %38 to %31 heap : !fir.box<!fir.array<?x?xi32>>
785+
cf.br ^bb16
786+
^bb16: // 2 preds: ^bb1, ^bb15
787+
fir.unpack_array %29 to %23 heap : !fir.box<!fir.array<?x?xi32>>
788+
fir.unpack_array %21 to %14 heap : !fir.box<!fir.array<?x?xi32>>
789+
cf.br ^bb17
790+
^bb17: // 2 preds: ^bb0, ^bb16
791+
fir.unpack_array %12 to %10 heap : !fir.box<!fir.array<?x?xi32>>
792+
return
793+
}

0 commit comments

Comments
 (0)