Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H
#define LLVM_TRANSFORMS_UTILS_LOOPROTATIONUTILS_H

#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Compiler.h"

namespace llvm {
Expand All @@ -32,12 +33,13 @@ class TargetTransformInfo;
/// header. If the loop header's size exceeds the threshold, the loop rotation
/// will give up. The flag IsUtilMode controls the heuristic used in the
/// LoopRotation. If it is true, the profitability heuristic will be ignored.
LLVM_ABI bool LoopRotation(Loop *L, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE,
MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ,
bool RotationOnly, unsigned Threshold,
bool IsUtilMode, bool PrepareForLTO = false);
LLVM_ABI bool LoopRotation(
Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly, unsigned Threshold,
bool IsUtilMode, bool PrepareForLTO = false,
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck =
[](Loop *, ScalarEvolution *) { return false; });

} // namespace llvm

Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Transforms/Utils/UnrollLoop.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ enum class LoopUnrollResult {
/// The loop was not modified.
Unmodified,

/// The loop was modified, but not unrolled.
Modified,

/// The loop was partially unrolled -- we still have a loop, but with a
/// smaller trip count. We may also have emitted epilogue loop if the loop
/// had a non-constant trip count.
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1357,8 +1357,9 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
ULO.RuntimeUnrollMultiExit = UP.RuntimeUnrollMultiExit;
LoopUnrollResult UnrollResult = UnrollLoop(
L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA);
if (UnrollResult == LoopUnrollResult::Unmodified)
return LoopUnrollResult::Unmodified;
if (UnrollResult == LoopUnrollResult::Unmodified ||
UnrollResult == LoopUnrollResult::Modified)
return UnrollResult;

if (RemainderLoop) {
std::optional<MDNode *> RemainderLoopID =
Expand Down
28 changes: 16 additions & 12 deletions llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,19 @@ class LoopRotate {
bool RotationOnly;
bool IsUtilMode;
bool PrepareForLTO;
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck;

public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode,
bool PrepareForLTO)
bool PrepareForLTO,
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck)
: MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {}
IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO),
profitabilityCheck(profitabilityCheck) {}
bool processLoop(Loop *L);

private:
Expand Down Expand Up @@ -440,9 +443,9 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {

// Rotate if either the loop latch does *not* exit the loop, or if the loop
// latch was just simplified. Or if we think it will be profitable.
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
!profitableToRotateLoopExitingLatch(L) &&
!canRotateDeoptimizingLatchExit(L))
if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch &&
IsUtilMode == false && !profitableToRotateLoopExitingLatch(L) &&
!canRotateDeoptimizingLatchExit(L) && !profitabilityCheck(L, SE))
return Rotated;

// Check size of original header and reject loop if it is very big or we can't
Expand Down Expand Up @@ -1053,13 +1056,14 @@ bool LoopRotate::processLoop(Loop *L) {


/// The utility to convert a loop into a loop with bottom test.
bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
AssumptionCache *AC, DominatorTree *DT,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly = true,
unsigned Threshold = unsigned(-1),
bool IsUtilMode = true, bool PrepareForLTO) {
bool llvm::LoopRotation(
Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC,
DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
const SimplifyQuery &SQ, bool RotationOnly = true,
unsigned Threshold = unsigned(-1), bool IsUtilMode = true,
bool PrepareForLTO,
function_ref<bool(Loop *, ScalarEvolution *)> profitabilityCheck) {
LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
IsUtilMode, PrepareForLTO);
IsUtilMode, PrepareForLTO, profitabilityCheck);
return LR.processLoop(L);
}
34 changes: 30 additions & 4 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopRotationUtils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
Expand Down Expand Up @@ -484,8 +485,33 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,

assert(ULO.Count > 0);

// All these values should be taken only after peeling because they might have
// changed.
LoopUnrollResult Result = LoopUnrollResult::Unmodified;

if (ULO.Runtime && SE) {
BasicBlock *OrigHeader = L->getHeader();
Copy link

Copilot AI Jul 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Before attempting rotation, check that the loop has a preheader and a latch block to avoid potential null-pointer assertions inside LoopRotation.

Suggested change
BasicBlock *OrigHeader = L->getHeader();
BasicBlock *OrigHeader = L->getHeader();
if (!L->getLoopPreheader()) {
LLVM_DEBUG(dbgs() << " Can't rotate loop; missing preheader.\n");
return LoopUnrollResult::Unmodified;
}

Copilot uses AI. Check for mistakes.

BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
// Rotate loop if it makes the exit count from the latch computable (for
// later unrolling).
if (BI && !BI->isUnconditional() &&
isa<SCEVCouldNotCompute>(SE->getExitCount(L, L->getLoopLatch())) &&
!isa<SCEVCouldNotCompute>(SE->getExitCount(L, OrigHeader))) {
LLVM_DEBUG(
dbgs() << " Rotating loop to make the exit count computable.\n");
SimplifyQuery SQ{OrigHeader->getDataLayout()};
SQ.TLI = nullptr;
SQ.DT = DT;
SQ.AC = AC;
if (llvm::LoopRotation(L, LI, TTI, AC, DT, SE,
nullptr /*MemorySSAUpdater*/, SQ,
false /*RotationOnly*/, 16 /*Threshold*/,
false /*IsUtilMode*/, false /*PrepareForLTO*/,
[](Loop *, ScalarEvolution *) { return true; }))
Result = LoopUnrollResult::Modified;
}
}

// All these values should be taken only after peeling or loop rotation
// because they might have changed.
BasicBlock *Preheader = L->getLoopPreheader();
BasicBlock *Header = L->getHeader();
BasicBlock *LatchBlock = L->getLoopLatch();
Expand Down Expand Up @@ -577,7 +603,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) {
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
return Result;
}

assert((!ULO.Runtime || canHaveUnrollRemainder(L)) &&
Expand All @@ -598,7 +624,7 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
else {
LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be "
"generated when assuming runtime trip count\n");
return LoopUnrollResult::Unmodified;
return Result;
}
}

Expand Down
99 changes: 99 additions & 0 deletions llvm/test/Transforms/LoopUnroll/X86/runtime-unroll-after-rotate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt --passes=loop-unroll -unroll-runtime-other-exit-predictable=1 -S %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"

define void @test(i64 %0, ptr %1) #0 {
; CHECK-LABEL: define void @test(
; CHECK-SAME: i64 [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[B1:%.*]] = icmp eq i64 [[TMP0]], 0
; CHECK-NEXT: br i1 [[B1]], label %[[AFTER:.*]], label %[[BODY_LR_PH:.*]]
; CHECK: [[BODY_LR_PH]]:
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 0, [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = freeze i64 [[TMP5]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 3
; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[BODY_PROL_PREHEADER:.*]], label %[[BODY_PROL_LOOPEXIT:.*]]
; CHECK: [[BODY_PROL_PREHEADER]]:
; CHECK-NEXT: br label %[[BODY_PROL:.*]]
; CHECK: [[BODY_PROL]]:
; CHECK-NEXT: [[A2_PROL:%.*]] = phi i64 [ [[TMP0]], %[[BODY_PROL_PREHEADER]] ], [ [[A_PROL:%.*]], %[[HEADER_PROL:.*]] ]
; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[HEADER_PROL]] ]
; CHECK-NEXT: [[C_PROL:%.*]] = add i64 [[A2_PROL]], 1
; CHECK-NEXT: [[D_PROL:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[E_PROL:%.*]] = icmp eq i32 [[D_PROL]], 0
; CHECK-NEXT: br i1 [[E_PROL]], label %[[END_LOOPEXIT3:.*]], label %[[HEADER_PROL]]
; CHECK: [[HEADER_PROL]]:
; CHECK-NEXT: [[A_PROL]] = phi i64 [ [[C_PROL]], %[[BODY_PROL]] ]
; CHECK-NEXT: [[B_PROL:%.*]] = icmp eq i64 [[A_PROL]], 0
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]]
; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[BODY_PROL]], label %[[BODY_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[BODY_PROL_LOOPEXIT_UNR_LCSSA]]:
; CHECK-NEXT: [[A2_UNR_PH:%.*]] = phi i64 [ [[A_PROL]], %[[HEADER_PROL]] ]
; CHECK-NEXT: br label %[[BODY_PROL_LOOPEXIT]]
; CHECK: [[BODY_PROL_LOOPEXIT]]:
; CHECK-NEXT: [[A2_UNR:%.*]] = phi i64 [ [[TMP0]], %[[BODY_LR_PH]] ], [ [[A2_UNR_PH]], %[[BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 3
; CHECK-NEXT: br i1 [[TMP4]], label %[[HEADER_AFTER_CRIT_EDGE:.*]], label %[[BODY_LR_PH_NEW:.*]]
; CHECK: [[BODY_LR_PH_NEW]]:
; CHECK-NEXT: br label %[[BODY:.*]]
; CHECK: [[HEADER:.*]]:
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT:.*]], label %[[HEADER_1:.*]]
; CHECK: [[HEADER_1]]:
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_2:.*]]
; CHECK: [[HEADER_2]]:
; CHECK-NEXT: [[C_7:%.*]] = add i64 [[A2:%.*]], 4
; CHECK-NEXT: br i1 false, label %[[END_LOOPEXIT]], label %[[HEADER_3:.*]]
; CHECK: [[HEADER_3]]:
; CHECK-NEXT: [[B_7:%.*]] = icmp eq i64 [[C_7]], 0
; CHECK-NEXT: br i1 [[B_7]], label %[[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA:.*]], label %[[BODY]]
; CHECK: [[BODY]]:
; CHECK-NEXT: [[A2]] = phi i64 [ [[A2_UNR]], %[[BODY_LR_PH_NEW]] ], [ [[C_7]], %[[HEADER_3]] ]
; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[E:%.*]] = icmp eq i32 [[D]], 0
; CHECK-NEXT: br i1 [[E]], label %[[END_LOOPEXIT]], label %[[HEADER]]
; CHECK: [[END_LOOPEXIT]]:
; CHECK-NEXT: br label %[[END:.*]]
; CHECK: [[END_LOOPEXIT3]]:
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
; CHECK: [[HEADER_AFTER_CRIT_EDGE_UNR_LCSSA]]:
; CHECK-NEXT: br label %[[HEADER_AFTER_CRIT_EDGE]]
; CHECK: [[HEADER_AFTER_CRIT_EDGE]]:
; CHECK-NEXT: br label %[[AFTER]]
; CHECK: [[AFTER]]:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: ret void
;
entry:
br label %header

header:
%a = phi i64 [ %0, %entry ], [ %c, %body ]
%b = icmp eq i64 %a, 0
br i1 %b, label %after, label %body

body:
%c = add i64 %a, 1
%d = load i32, ptr %1, align 4
%e = icmp eq i32 %d, 0
br i1 %e, label %end, label %header

end:
ret void

after:
call void @foo()
ret void
}

declare void @foo()

attributes #0 = { "tune-cpu"="generic" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should not be needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When not set, on my machine, the cpu chosen is "i586" and from what I can see in UnrollingPreferences it has different values from the "generic" one and so the runtime unrolling code is not even run. I suppose it is because of the disabled Runtime flag.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try adding -unroll-runtime=true -unroll-runtime-multi-exit=true.

Also, it would be nice to precommit this change in a separate MR with all the options (including the other-exit-predictable), so that we can see the patch with rotation helps to runtime unroll the loop.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@annamthomas Thank you adding the -unroll-runtime flag helped.

;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
;.
Loading