Skip to content

Commit e5aabbd

Browse files
author
Marek Sedlacek
committed
This patch adds loop rotation to runtime loop unrolling if this makes
the loop countable, which then might enable additional unrolling of the loop.
1 parent ab6316e commit e5aabbd

File tree

5 files changed

+334
-103
lines changed

5 files changed

+334
-103
lines changed

llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
1414
#define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
1515

16+
#include "llvm/ADT/STLExtras.h"
1617
#include "llvm/Support/Compiler.h"
1718

1819
namespace llvm {
@@ -32,12 +33,13 @@ class TargetTransformInfo;
3233
/// header. If the loop header's size exceeds the threshold, the loop rotation
3334
/// will give up. The flag IsUtilMode controls the heuristic used in the
3435
/// LoopRotation. If it is true, the profitability heuristic will be ignored.
35-
LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI,
36-
const TargetTransformInfo *TTI, AssumptionCache *AC,
37-
DominatorTree *DT, ScalarEvolution *SE,
38-
MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
39-
bool RotationOnly, unsigned Threshold,
40-
bool IsUtilMode, bool PrepareForLTO = false);
36+
LLVM_ABI bool LoopRotation(
37+
Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
38+
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
39+
const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold,
40+
bool IsUtilMode, bool PrepareForLTO = false,
41+
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck =
42+
[](Loop *, ScalarEvolution *) { return false; });
4143

4244
} // namespace llvm
4345

llvm/lib/Transforms/Utils/LoopRotationUtils.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,16 +69,19 @@ class LoopRotate {
6969
bool RotationOnly;
7070
bool IsUtilMode;
7171
bool PrepareForLTO;
72+
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck;
7273

7374
public:
7475
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
7576
const TargetTransformInfo *TTI, AssumptionCache *AC,
7677
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
7778
const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
78-
bool PrepareForLTO)
79+
bool PrepareForLTO,
80+
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck)
7981
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
8082
MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
81-
IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
83+
IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO),
84+
profitabilityCheck(profitabilityCheck) {}
8285
bool processLoop(Loop *L);
8386

8487
private:
@@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
440443

441444
// Rotate if either the loop latch does *not* exit the loop, or if the loop
442445
// latch was just simplified. Or if we think it will be profitable.
443-
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
444-
!profitableToRotateLoopExitingLatch(L) &&
445-
!canRotateDeoptimizingLatchExit(L))
446+
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch &&
447+
IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) &&
448+
!canRotateDeoptimizingLatchExit(L) && !profitabilityCheck(L, SE))
446449
return Rotated;
447450

448451
// Check size of original header and reject loop if it is very big or we can't
@@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) {
10531056

10541057

10551058
/// The utility to convert a loop into a loop with bottom test.
1056-
bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
1057-
AssumptionCache *AC, DominatorTree *DT,
1058-
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
1059-
const SimplifyQuery &SQ, bool RotationOnly = true,
1060-
unsigned Threshold = unsigned(-1),
1061-
bool IsUtilMode = true, bool PrepareForLTO) {
1059+
bool llvm::LoopRotation(
1060+
Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
1061+
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
1062+
const SimplifyQuery &SQ, bool RotationOnly = true,
1063+
unsigned Threshold = unsigned(-1), bool IsUtilMode = true,
1064+
bool PrepareForLTO,
1065+
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck) {
10621066
LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
1063-
IsUtilMode, PrepareForLTO);
1067+
IsUtilMode, PrepareForLTO, profitabilityCheck);
10641068
return LR.processLoop(L);
10651069
}

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
5959
#include "llvm/Transforms/Utils/Cloning.h"
6060
#include "llvm/Transforms/Utils/Local.h"
61+
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
6162
#include "llvm/Transforms/Utils/LoopSimplify.h"
6263
#include "llvm/Transforms/Utils/LoopUtils.h"
6364
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
@@ -484,8 +485,27 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
484485

485486
assert(ULO.Count > 0);
486487

487-
// All these values should be taken only after peeling because they might have
488-
// changed.
488+
if (ULO.Runtime && SE) {
489+
BasicBlock *OrigHeader = L->getHeader();
490+
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
491+
// Rotate loop if it makes it countable (for later unrolling)
492+
if (BI && !BI->isUnconditional() &&
493+
isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
494+
!isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
495+
LLVM_DEBUG(dbgs() << " Rotating loop to make the loop countable.\n");
496+
SimplifyQuery SQ{OrigHeader->getDataLayout()};
497+
SQ.TLI = nullptr;
498+
SQ.DT = DT;
499+
SQ.AC = AC;
500+
llvm::LoopRotation(L, LI, TTI, AC, DT, SE, nullptr /*MemorySSAUpdater*/,
501+
SQ, false /*RotationOnly*/, 16 /*Threshold*/,
502+
false /*IsUtilMode*/, false /*PrepareForLTO*/,
503+
[](Loop *, ScalarEvolution *) { return true; });
504+
}
505+
}
506+
507+
// All these values should be taken only after peeling or loop rotation
508+
// because they might have changed.
489509
BasicBlock *Preheader = L->getLoopPreheader();
490510
BasicBlock *Header = L->getHeader();
491511
BasicBlock *LatchBlock = L->getLoopLatch();
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
3+
target triple = "x86_64-unknown-linux-gnu"
4+
5+
define void @test(i64 %0) #0 {
6+
; CHECK-LABEL: define void @test(
7+
; CHECK-SAME: i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
10+
; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
11+
; CHECK: [[BODY_LR_PH]]:
12+
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, [[TMP0]]
13+
; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP1]]
14+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
15+
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
16+
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
17+
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]]
18+
; CHECK: [[BODY_PROL_PREHEADER]]:
19+
; CHECK-NEXT: br label %[[BODY_PROL:.*]]
20+
; CHECK: [[BODY_PROL]]:
21+
; CHECK-NEXT: [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
22+
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
23+
; CHECK-NEXT: [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
24+
; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr addrspace(1) null, align 4
25+
; CHECK-NEXT: [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
26+
; CHECK-NEXT: br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
27+
; CHECK: [[HEADER_PROL]]:
28+
; CHECK-NEXT: [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ]
29+
; CHECK-NEXT: [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0
30+
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
31+
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
32+
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
33+
; CHECK: [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
34+
; CHECK-NEXT: [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ]
35+
; CHECK-NEXT: br label %[[BODY_PROL_LOOPEXIT]]
36+
; CHECK: [[BODY_PROL_LOOPEXIT]]:
37+
; CHECK-NEXT: [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
38+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 3
39+
; CHECK-NEXT: br i1 [[TMP4]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
40+
; CHECK: [[BODY_LR_PH_NEW]]:
41+
; CHECK-NEXT: br label %[[BODY:.*]]
42+
; CHECK: [[HEADER:.*]]:
43+
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]]
44+
; CHECK: [[HEADER_1]]:
45+
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]]
46+
; CHECK: [[HEADER_2]]:
47+
; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 4
48+
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]]
49+
; CHECK: [[HEADER_3]]:
50+
; CHECK-NEXT: [[B_7:%.*]] = icmp eq i64 [[C_7]], 0
51+
; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
52+
; CHECK: [[BODY]]:
53+
; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
54+
; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) null, align 4
55+
; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0
56+
; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
57+
; CHECK: [[END_LOOPEXIT]]:
58+
; CHECK-NEXT: br label %[[END:.*]]
59+
; CHECK: [[END_LOOPEXIT3]]:
60+
; CHECK-NEXT: br label %[[END]]
61+
; CHECK: [[END]]:
62+
; CHECK-NEXT: ret void
63+
; CHECK: [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]:
64+
; CHECK-NEXT: br label %[[HEADER_AFTER_CRIT_EDGE]]
65+
; CHECK: [[HEADER_AFTER_CRIT_EDGE]]:
66+
; CHECK-NEXT: br label %[[AFTER]]
67+
; CHECK: [[AFTER]]:
68+
; CHECK-NEXT: call void @foo(i32 0)
69+
; CHECK-NEXT: ret void
70+
;
71+
entry:
72+
br label %header
73+
74+
header:
75+
%a = phi i64 [ %0, %entry ], [ %c, %body ]
76+
%b = icmp eq i64 %a, 0
77+
br i1 %b, label %after, label %body
78+
79+
body:
80+
%c = add i64 %a, 1
81+
%d = load i32, ptr addrspace(1) null, align 4
82+
%e = icmp eq i32 %d, 0
83+
br i1 %e, label %end, label %header
84+
85+
end:
86+
ret void
87+
88+
after:
89+
call void @foo(i32 0)
90+
ret void
91+
}
92+
93+
declare void @foo(i32)
94+
95+
attributes #0 = { "tune-cpu"="generic" }
96+
;.
97+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
98+
; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
99+
;.

0 commit comments

Comments
 (0)