diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index a6bb8b8a21b04..cf84366c4200b 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -186,6 +186,9 @@ static bool isSafeToExecuteUnconditionally( const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, const Instruction *CtxI, AssumptionCache *AC, bool AllowSpeculation); +static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA, + AAResults *AA, Loop *CurLoop, + SinkAndHoistLICMFlags &Flags); static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags, @@ -1234,8 +1237,11 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, /*InvariantGroup=*/false); } - // FIXME: This should use mod/ref information to see if we can hoist or - // sink the call. + if (Behavior.onlyWritesMemory()) { + // can hoist or sink if there are no conflicting read/writes to the + // memory location written to by the call. + return noConflictingReadWrites(CI, MSSA, AA, CurLoop, Flags); + } return false; } else if (auto *FI = dyn_cast(&I)) { @@ -1253,57 +1259,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // arbitrary number of reads in the loop. if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) return true; - // If there are more accesses than the Promotion cap, then give up as we're - // not walking a list that long. - if (Flags.tooManyMemoryAccesses()) - return false; - - auto *SIMD = MSSA->getMemoryAccess(SI); - BatchAAResults BAA(*AA); - auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD); - // Make sure there are no clobbers inside the loop. - if (!MSSA->isLiveOnEntryDef(Source) && - CurLoop->contains(Source->getBlock())) - return false; - - // If there are interfering Uses (i.e. their defining access is in the - // loop), or ordered loads (stored as Defs!), don't move this store. - // Could do better here, but this is conservatively correct. - // TODO: Cache set of Uses on the first walk in runOnLoop, update when - // moving accesses. Can also extend to dominating uses. - for (auto *BB : CurLoop->getBlocks()) - if (auto *Accesses = MSSA->getBlockAccesses(BB)) { - for (const auto &MA : *Accesses) - if (const auto *MU = dyn_cast(&MA)) { - auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags, - const_cast(MU)); - if (!MSSA->isLiveOnEntryDef(MD) && - CurLoop->contains(MD->getBlock())) - return false; - // Disable hoisting past potentially interfering loads. Optimized - // Uses may point to an access outside the loop, as getClobbering - // checks the previous iteration when walking the backedge. - // FIXME: More precise: no Uses that alias SI. - if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU)) - return false; - } else if (const auto *MD = dyn_cast(&MA)) { - if (auto *LI = dyn_cast(MD->getMemoryInst())) { - (void)LI; // Silence warning. - assert(!LI->isUnordered() && "Expected unordered load"); - return false; - } - // Any call, while it may not be clobbering SI, it may be a use. - if (auto *CI = dyn_cast(MD->getMemoryInst())) { - // Check if the call may read from the memory location written - // to by SI. Check CI's attributes and arguments; the number of - // such checks performed is limited above by NoOfMemAccTooLarge. - ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI)); - if (isModOrRefSet(MRI)) - return false; - } - } - } - return true; + return noConflictingReadWrites(SI, MSSA, AA, CurLoop, Flags); } assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); @@ -2330,6 +2286,77 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { return Result; } +// For a given store instruction or writeonly call instruction, this function +// checks that there are no read or writes that conflict with the memory +// access in the instruction +static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA, + AAResults *AA, Loop *CurLoop, + SinkAndHoistLICMFlags &Flags) { + assert(isa(*I) || isa(*I)); + // If there are more accesses than the Promotion cap, then give up as we're + // not walking a list that long. + if (Flags.tooManyMemoryAccesses()) + return false; + + auto *IMD = MSSA->getMemoryAccess(I); + BatchAAResults BAA(*AA); + auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD); + // Make sure there are no clobbers inside the loop. + if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock())) + return false; + + // If there are interfering Uses (i.e. their defining access is in the + // loop), or ordered loads (stored as Defs!), don't move this store. + // Could do better here, but this is conservatively correct. + // TODO: Cache set of Uses on the first walk in runOnLoop, update when + // moving accesses. Can also extend to dominating uses. + for (auto *BB : CurLoop->getBlocks()) { + auto *Accesses = MSSA->getBlockAccesses(BB); + if (!Accesses) + continue; + for (const auto &MA : *Accesses) + if (const auto *MU = dyn_cast(&MA)) { + auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags, + const_cast(MU)); + if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock())) + return false; + // Disable hoisting past potentially interfering loads. Optimized + // Uses may point to an access outside the loop, as getClobbering + // checks the previous iteration when walking the backedge. + // FIXME: More precise: no Uses that alias I. + if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU)) + return false; + } else if (const auto *MD = dyn_cast(&MA)) { + if (auto *LI = dyn_cast(MD->getMemoryInst())) { + (void)LI; // Silence warning. + assert(!LI->isUnordered() && "Expected unordered load"); + return false; + } + // Any call, while it may not be clobbering I, it may be a use. + if (auto *CI = dyn_cast(MD->getMemoryInst())) { + // Check if the call may read from the memory location written + // to by I. Check CI's attributes and arguments; the number of + // such checks performed is limited above by NoOfMemAccTooLarge. + if (auto *SI = dyn_cast(I)) { + ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI)); + if (isModOrRefSet(MRI)) + return false; + } else { + auto *SCI = cast(I); + // If the instruction we are wanting to hoist is also a call + // instruction then we need not check mod/ref info with itself + if (SCI == CI) + continue; + ModRefInfo MRI = BAA.getModRefInfo(CI, SCI); + if (isModOrRefSet(MRI)) + return false; + } + } + } + } + return true; +} + static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags, diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll index e37dcf60506be..2864e0554a27e 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll @@ -64,6 +64,7 @@ define void @doesnt_need_and(i32 %arg) { ; GCN-LABEL: doesnt_need_and: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 ; GCN-NEXT: s_mov_b32 s6, 0 ; GCN-NEXT: s_mov_b64 s[4:5], 0 ; GCN-NEXT: .LBB1_1: ; %loop @@ -71,7 +72,6 @@ define void @doesnt_need_and(i32 %arg) { ; GCN-NEXT: s_add_i32 s6, s6, 1 ; GCN-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 ; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5] -; GCN-NEXT: buffer_store_dword v0, off, s[4:7], s4 ; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-NEXT: s_cbranch_execnz .LBB1_1 ; GCN-NEXT: ; %bb.2: ; %loopexit diff --git a/llvm/test/Transforms/LICM/call-hoisting.ll b/llvm/test/Transforms/LICM/call-hoisting.ll index 7124b4e445eb9..aa8c8bbed550e 100644 --- a/llvm/test/Transforms/LICM/call-hoisting.ll +++ b/llvm/test/Transforms/LICM/call-hoisting.ll @@ -118,14 +118,16 @@ exit: declare void @store(i32 %val, ptr %p) argmemonly writeonly nounwind +; loop invariant calls to writeonly functions such as the above +; should be hoisted define void @test(ptr %loc) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ptr [[LOC:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] @@ -150,10 +152,10 @@ define void @test_multiexit(ptr %loc, i1 %earlycnd) { ; CHECK-LABEL: define void @test_multiexit( ; CHECK-SAME: ptr [[LOC:%.*]], i1 [[EARLYCND:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ] -; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br i1 [[EARLYCND]], label %[[EXIT1:.*]], label %[[BACKEDGE]] ; CHECK: [[BACKEDGE]]: @@ -183,6 +185,97 @@ exit2: ret void } +; cannot be hoisted because the two pointers can alias one another +define void @neg_two_pointer(ptr %loc, ptr %otherloc) { +; CHECK-LABEL: define void @neg_two_pointer( +; CHECK-SAME: ptr [[LOC:%.*]], ptr [[OTHERLOC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) +; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + call void @store(i32 0, ptr %loc) + call void @store(i32 1, ptr %otherloc) + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; hoisted due to pointers not aliasing +define void @two_pointer_noalias(ptr noalias %loc, ptr noalias %otherloc) { +; CHECK-LABEL: define void @two_pointer_noalias( +; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) +; CHECK-NEXT: call void @store(i32 1, ptr [[OTHERLOC]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + call void @store(i32 0, ptr %loc) + call void @store(i32 1, ptr %otherloc) + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + +; when there's a conflicting read, store call should not be hoisted +define void @neg_conflicting_read(ptr noalias %loc, ptr noalias %otherloc) { +; CHECK-LABEL: define void @neg_conflicting_read( +; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[OTHERLOC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: call void @load(i32 0, ptr [[LOC]]) +; CHECK-NEXT: call void @store(i32 0, ptr [[LOC]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + call void @store(i32 0, ptr %loc) + br label %loop +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + call void @load(i32 0, ptr %loc) + call void @store(i32 0, ptr %loc) + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit +exit: + ret void +} + define void @neg_lv_value(ptr %loc) { ; CHECK-LABEL: define void @neg_lv_value( ; CHECK-SAME: ptr [[LOC:%.*]]) { @@ -406,14 +499,47 @@ exit: ret void } -define void @neg_not_argmemonly(ptr %loc) { +; when the call is not argmemonly and is not the only memory access we +; do not hoist +define void @neg_not_argmemonly(ptr %loc, ptr %loc2) { ; CHECK-LABEL: define void @neg_not_argmemonly( -; CHECK-SAME: ptr [[LOC:%.*]]) { +; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]]) +; CHECK-NEXT: call void @load(i32 0, ptr [[LOC2]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + call void @not_argmemonly(i32 0, ptr %loc) + call void @load(i32 0, ptr %loc2) + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv, 200 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; when the call is not argmemonly and is only memory access we hoist it +define void @not_argmemonly_hoisted(ptr %loc, ptr %loc2) { +; CHECK-LABEL: define void @not_argmemonly_hoisted( +; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: call void @not_argmemonly(i32 0, ptr [[LOC]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 200 ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]