Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 80 additions & 53 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ static bool isSafeToExecuteUnconditionally(
const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
AssumptionCache *AC, bool AllowSpeculation);
static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
AAResults *AA, Loop *CurLoop,
SinkAndHoistLICMFlags &Flags);
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags,
Expand Down Expand Up @@ -1234,8 +1237,11 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
/*InvariantGroup=*/false);
}

// FIXME: This should use mod/ref information to see if we can hoist or
// sink the call.
if (Behavior.onlyWritesMemory()) {
// can hoist or sink if there are no conflicting read/writes to the
// memory location written to by the call.
return noConflictingReadWrites(CI, MSSA, AA, CurLoop, Flags);
}

return false;
} else if (auto *FI = dyn_cast<FenceInst>(&I)) {
Expand All @@ -1253,57 +1259,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// arbitrary number of reads in the loop.
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
// If there are more accesses than the Promotion cap, then give up as we're
// not walking a list that long.
if (Flags.tooManyMemoryAccesses())
return false;

auto *SIMD = MSSA->getMemoryAccess(SI);
BatchAAResults BAA(*AA);
auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
// Make sure there are no clobbers inside the loop.
if (!MSSA->isLiveOnEntryDef(Source) &&
CurLoop->contains(Source->getBlock()))
return false;

// If there are interfering Uses (i.e. their defining access is in the
// loop), or ordered loads (stored as Defs!), don't move this store.
// Could do better here, but this is conservatively correct.
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
// moving accesses. Can also extend to dominating uses.
for (auto *BB : CurLoop->getBlocks())
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
for (const auto &MA : *Accesses)
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
const_cast<MemoryUse *>(MU));
if (!MSSA->isLiveOnEntryDef(MD) &&
CurLoop->contains(MD->getBlock()))
return false;
// Disable hoisting past potentially interfering loads. Optimized
// Uses may point to an access outside the loop, as getClobbering
// checks the previous iteration when walking the backedge.
// FIXME: More precise: no Uses that alias SI.
if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
(void)LI; // Silence warning.
assert(!LI->isUnordered() && "Expected unordered load");
return false;
}
// Any call, while it may not be clobbering SI, it may be a use.
if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
// Check if the call may read from the memory location written
// to by SI. Check CI's attributes and arguments; the number of
// such checks performed is limited above by NoOfMemAccTooLarge.
ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
if (isModOrRefSet(MRI))
return false;
}
}
}
return true;
return noConflictingReadWrites(SI, MSSA, AA, CurLoop, Flags);
}

assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
Expand Down Expand Up @@ -2330,6 +2286,77 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}

// For a given store instruction or writeonly call instruction, this function
// checks that there are no read or writes that conflict with the memory
// access in the instruction
static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
AAResults *AA, Loop *CurLoop,
SinkAndHoistLICMFlags &Flags) {
assert(isa<CallInst>(*I) || isa<StoreInst>(*I));
// If there are more accesses than the Promotion cap, then give up as we're
// not walking a list that long.
if (Flags.tooManyMemoryAccesses())
return false;

auto *IMD = MSSA->getMemoryAccess(I);
BatchAAResults BAA(*AA);
auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
// Make sure there are no clobbers inside the loop.
if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()))
return false;

// If there are interfering Uses (i.e. their defining access is in the
// loop), or ordered loads (stored as Defs!), don't move this store.
// Could do better here, but this is conservatively correct.
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
// moving accesses. Can also extend to dominating uses.
for (auto *BB : CurLoop->getBlocks()) {
auto *Accesses = MSSA->getBlockAccesses(BB);
if (!Accesses)
continue;
for (const auto &MA : *Accesses)
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
const_cast<MemoryUse *>(MU));
if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()))
return false;
// Disable hoisting past potentially interfering loads. Optimized
// Uses may point to an access outside the loop, as getClobbering
// checks the previous iteration when walking the backedge.
// FIXME: More precise: no Uses that alias I.
if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU))
return false;
} else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
(void)LI; // Silence warning.
assert(!LI->isUnordered() && "Expected unordered load");
return false;
}
// Any call, while it may not be clobbering I, it may be a use.
if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
// Check if the call may read from the memory location written
// to by I. Check CI's attributes and arguments; the number of
// such checks performed is limited above by NoOfMemAccTooLarge.
if (auto *SI = dyn_cast<StoreInst>(I)) {
ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
if (isModOrRefSet(MRI))
return false;
} else {
auto *SCI = cast<CallInst>(I);
// If the instruction we are wanting to hoist is also a call
// instruction then we need not check mod/ref info with itself
if (SCI == CI)
continue;
ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
if (isModOrRefSet(MRI))
return false;
}
}
}
}
return true;
}

static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
SinkAndHoistLICMFlags &Flags,
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ define void @doesnt_need_and(i32 %arg) {
; GCN-LABEL: doesnt_need_and:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: .LBB1_1: ; %loop
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_add_i32 s6, s6, 1
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GCN-NEXT: s_cbranch_execnz .LBB1_1
; GCN-NEXT: ; %bb.2: ; %loopexit
Expand Down
134 changes: 130 additions & 4 deletions llvm/test/Transforms/LICM/call-hoisting.ll
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,16 @@ exit:

declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind

; loop invariant calls to writeonly functions such as the above
; should be hoisted
define void @test(ptr %loc) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[LOC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
Expand All @@ -150,10 +152,10 @@ define void @test_multiexit(ptr %loc, i1 %earlycnd) {
; CHECK-LABEL: define void @test_multiexit(
; CHECK-SAME: ptr [[LOC:%.*]], i1 [[EARLYCND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ]
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[EARLYCND]], label %[[EXIT1:.*]], label %[[BACKEDGE]]
; CHECK: [[BACKEDGE]]:
Expand Down Expand Up @@ -183,6 +185,97 @@ exit2:
ret void
}

; cannot be hoisted because the two pointers can alias one another
define void @neg_two_pointer(ptr %loc, ptr %otherloc) {
; CHECK-LABEL: define void @neg_two_pointer(
; CHECK-SAME: ptr [[LOC:%.*]], ptr [[OTHERLOC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i32 [0, %entry], [%iv.next, %loop]
call void @store(i32 0, ptr %loc)
call void @store(i32 1, ptr %otherloc)
%iv.next = add i32 %iv, 1
%cmp = icmp slt i32 %iv, 200
br i1 %cmp, label %loop, label %exit
exit:
ret void
}

; hoisted due to pointers not aliasing
define void @two_pointer_noalias(ptr noalias %loc, ptr noalias %otherloc) {
; CHECK-LABEL: define void @two_pointer_noalias(
; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i32 [0, %entry], [%iv.next, %loop]
call void @store(i32 0, ptr %loc)
call void @store(i32 1, ptr %otherloc)
%iv.next = add i32 %iv, 1
%cmp = icmp slt i32 %iv, 200
br i1 %cmp, label %loop, label %exit
exit:
ret void
}

; when there's a conflicting read, store call should not be hoisted
define void @neg_conflicting_read(ptr noalias %loc, ptr noalias %otherloc) {
; CHECK-LABEL: define void @neg_conflicting_read(
; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: call void @load(i32 0, ptr [[LOC]])
; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
call void @store(i32 0, ptr %loc)
br label %loop
loop:
%iv = phi i32 [0, %entry], [%iv.next, %loop]
call void @load(i32 0, ptr %loc)
call void @store(i32 0, ptr %loc)
%iv.next = add i32 %iv, 1
%cmp = icmp slt i32 %iv, 200
br i1 %cmp, label %loop, label %exit
exit:
ret void
}

define void @neg_lv_value(ptr %loc) {
; CHECK-LABEL: define void @neg_lv_value(
; CHECK-SAME: ptr [[LOC:%.*]]) {
Expand Down Expand Up @@ -406,14 +499,47 @@ exit:
ret void
}

define void @neg_not_argmemonly(ptr %loc) {
; when the call is not argmemonly and is not the only memory access we
; do not hoist
define void @neg_not_argmemonly(ptr %loc, ptr %loc2) {
; CHECK-LABEL: define void @neg_not_argmemonly(
; CHECK-SAME: ptr [[LOC:%.*]]) {
; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]])
; CHECK-NEXT: call void @load(i32 0, ptr [[LOC2]])
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop

loop:
%iv = phi i32 [0, %entry], [%iv.next, %loop]
call void @not_argmemonly(i32 0, ptr %loc)
call void @load(i32 0, ptr %loc2)
%iv.next = add i32 %iv, 1
%cmp = icmp slt i32 %iv, 200
br i1 %cmp, label %loop, label %exit

exit:
ret void
}

; when the call is not argmemonly and is only memory access we hoist it
define void @not_argmemonly_hoisted(ptr %loc, ptr %loc2) {
; CHECK-LABEL: define void @not_argmemonly_hoisted(
; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]])
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
Expand Down
Loading