Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 36 additions & 4 deletions llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
Expand Down Expand Up @@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero' idiom");

namespace llvm {
bool DisableLIRP::All;
static cl::opt<bool, true>
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
Expand Down Expand Up @@ -163,6 +165,10 @@ static cl::opt<bool> ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
cl::Hidden);

extern cl::opt<bool> ProfcheckDisableMetadataFixes;

} // namespace llvm

namespace {

class LoopIdiomRecognize {
Expand Down Expand Up @@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
SmallVector<uint32_t> BranchWeights;
const bool HasBranchWeights =
!ProfcheckDisableMetadataFixes &&
extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);

auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
if (HasBranchWeights) {
if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
std::swap(BranchWeights[0], BranchWeights[1]);
// We're not changing the loop profile, so we can reuse the original loop's
// profile.
setBranchWeights(*BI, BranchWeights,
/*IsExpected=*/false);
}

LoopHeaderBB->getTerminator()->eraseFromParent();

// Populate the IV PHI.
Expand Down Expand Up @@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
/// br label %for.cond
/// br label %loop
///
/// loop:
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
Expand Down Expand Up @@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {

// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
SmallVector<uint32_t> BranchWeights;
const bool HasBranchWeights =
!ProfcheckDisableMetadataFixes &&
extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);

auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
if (HasBranchWeights) {
if (InvertedCond)
std::swap(BranchWeights[0], BranchWeights[1]);
// We're not changing the loop profile, so we can reuse the original loop's
// profile.
setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
}
LoopHeaderBB->getTerminator()->eraseFromParent();

// Populate the IV PHI.
Expand Down
70 changes: 70 additions & 0 deletions llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; RUN: opt -passes="module(print<block-freq>),function(loop(loop-idiom)),module(print<block-freq>)" -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck --check-prefix=PROFILE %s

declare void @escape_inner(i8, i8, i8, i1, i8)
declare void @escape_outer(i8, i8, i8, i1, i8)

declare i8 @gen.i8()

; Most basic pattern; Note that iff the shift amount is offset, said offsetting
; must not cause an overflow, but `add nsw` is fine.
define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
entry:
br label %loop

loop:
%iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
%nbits = add nsw i8 %iv, %extraoffset
%val.shifted = ashr i8 %val, %nbits
%val.shifted.iszero = icmp eq i8 %val.shifted, 0
%iv.next = add i8 %iv, 1

call void @escape_inner(i8 %iv, i8 %nbits, i8 %val.shifted, i1 %val.shifted.iszero, i8 %iv.next)

br i1 %val.shifted.iszero, label %end, label %loop, !prof !{!"branch_weights", i32 1, i32 1000 }

end:
%iv.res = phi i8 [ %iv, %loop ]
%nbits.res = phi i8 [ %nbits, %loop ]
%val.shifted.res = phi i8 [ %val.shifted, %loop ]
%val.shifted.iszero.res = phi i1 [ %val.shifted.iszero, %loop ]
%iv.next.res = phi i8 [ %iv.next, %loop ]

call void @escape_outer(i8 %iv.res, i8 %nbits.res, i8 %val.shifted.res, i1 %val.shifted.iszero.res, i8 %iv.next.res)

ret i8 %iv.res
}

define i32 @p1(i32 %x, i32 %bit) {
entry:
%bitmask = shl i32 1, %bit
br label %loop

loop:
%x.curr = phi i32 [ %x, %entry ], [ %x.next, %loop ]
%x.curr.bitmasked = and i32 %x.curr, %bitmask
%x.curr.isbitunset = icmp eq i32 %x.curr.bitmasked, 0
%x.next = shl i32 %x.curr, 1
br i1 %x.curr.isbitunset, label %loop, label %end, !prof !{!"branch_weights", i32 500, i32 1 }

end:
ret i32 %x.curr
}

;
; PROFILE: Printing analysis results of BFI for function 'p0':
; PROFILE: block-frequency-info: p0
; PROFILE: - entry: float = 1.0,
; PROFILE: - loop: float = 1001.0,
; PROFILE: - end: float = 1.0,
; PROFILE: block-frequency-info: p1
; PROFILE: - entry: float = 1.0,
; PROFILE: - loop: float = 501.0,
; PROFILE: - end: float = 1.0,
; PROFILE: block-frequency-info: p0
; PROFILE: - entry: float = 1.0,
; PROFILE: - loop: float = 1001.0,
; PROFILE: - end: float = 1.0,
; PROFILE: block-frequency-info: p1
; PROFILE: - entry: float = 1.0,
; PROFILE: - loop: float = 501.0,
; PROFILE: - end: float = 1.0,