diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 6e91c4fa6e230..6b4fc88cd3a72 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -164,7 +164,6 @@ struct LoopVersioningLICM { bool legalLoopInstructions(); bool legalLoopMemoryAccesses(); bool isLoopAlreadyVisited(); - void setNoAliasToLoop(Loop *VerLoop); bool instructionSafeForVersioning(Instruction *I); }; @@ -344,6 +343,13 @@ bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { } LoadAndStoreCounter++; Value *Ptr = St->getPointerOperand(); + // Don't allow stores that we don't have runtime checks for, as we won't be + // able to mark them noalias meaning they would prevent any code motion. + auto &Pointers = LAI->getRuntimePointerChecking()->Pointers; + if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) { + LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n"); + return false; + } // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; @@ -361,6 +367,13 @@ bool LoopVersioningLICM::legalLoopInstructions() { InvariantCounter = 0; IsReadOnlyLoop = true; using namespace ore; + // Get LoopAccessInfo from current loop via the proxy. + LAI = &LAIs.getInfo(*CurLoop); + // Check LoopAccessInfo for need of runtime check. + if (LAI->getRuntimePointerChecking()->getChecks().empty()) { + LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); + return false; + } // Iterate over loop blocks and instructions of each block and check // instruction safety. for (auto *Block : CurLoop->getBlocks()) @@ -374,13 +387,6 @@ bool LoopVersioningLICM::legalLoopInstructions() { return false; } } - // Get LoopAccessInfo from current loop via the proxy. - LAI = &LAIs.getInfo(*CurLoop); - // Check LoopAccessInfo for need of runtime check. - if (LAI->getRuntimePointerChecking()->getChecks().empty()) { - LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); - return false; - } // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold if (LAI->getNumRuntimePointerChecks() > VectorizerParams::RuntimeMemoryCheckThreshold) { @@ -501,41 +507,6 @@ bool LoopVersioningLICM::isLegalForVersioning() { return true; } -/// Update loop with aggressive aliasing assumptions. -/// It marks no-alias to any pairs of memory operations by assuming -/// loop should not have any must-alias memory accesses pairs. -/// During LoopVersioningLICM legality we ignore loops having must -/// aliasing memory accesses. -void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { - // Get latch terminator instruction. - Instruction *I = VerLoop->getLoopLatch()->getTerminator(); - // Create alias scope domain. - MDBuilder MDB(I->getContext()); - MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); - StringRef Name = "LVAliasScope"; - MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); - SmallVector Scopes{NewScope}, NoAliases{NewScope}; - // Iterate over each instruction of loop. - // set no-alias for all load & store instructions. - for (auto *Block : CurLoop->getBlocks()) { - for (auto &Inst : *Block) { - // Only interested in instruction that may modify or read memory. - if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) - continue; - // Set no-alias for current instruction. - Inst.setMetadata( - LLVMContext::MD_noalias, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), - MDNode::get(Inst.getContext(), NoAliases))); - // set alias-scope for current instruction. - Inst.setMetadata( - LLVMContext::MD_alias_scope, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(Inst.getContext(), Scopes))); - } - } -} - bool LoopVersioningLICM::run(DominatorTree *DT) { // Do not do the transformation if disabled by metadata. if (hasLICMVersioningTransformation(CurLoop) & TM_Disable) @@ -563,7 +534,7 @@ bool LoopVersioningLICM::run(DominatorTree *DT) { addStringMetadataToLoop(LVer.getVersionedLoop(), "llvm.mem.parallel_loop_access"); // Update version loop with aggressive aliasing assumption. - setNoAliasToLoop(LVer.getVersionedLoop()); + LVer.annotateLoopWithNoAlias(); Changed = true; } return Changed; diff --git a/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll new file mode 100644 index 0000000000000..e9b2954039198 --- /dev/null +++ b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt < %s -S -passes='function(loop-versioning-licm,loop-mssa(licm))' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + +; In these tests we have a loop where we can calculate the bounds of some memory +; accesses but not others. + +; Load from a gep whose bounds can't be calculated as the offset is loaded from memory +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval +define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_offset( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; As above but with a store to the loaded address. This should prevent the loop +; from being versioned, as we wouldn't be able to do any code motion. +define void @gep_loaded_offset_with_store(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_offset_with_store( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + store i32 0, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; Load from a gep whose bounds can't be calculated as the pointer is loaded from memory +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval +define void @gep_loaded_base(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_base( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load ptr, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[RVAL]], i64 0 +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load ptr, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %rval, i64 0 + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; Load from a gep with an offset that scalar evolution can't describe +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of qval +define void @gep_strange_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_strange_offset( +; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], %[[WHILE_BODY]] ], [ [[P]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[Q]], align 4 +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[DEC]], 2 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[REM]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[IDXPROM]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VAL]], [[QVAL]] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds nuw i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %qval = load i32, ptr %q, align 4 + %rem = srem i32 %dec, 2 + %idxprom = sext i32 %rem to i64 + %arrayidx = getelementptr inbounds i32, ptr %r, i64 %idxprom + %val = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %val, %qval + %incdec.ptr = getelementptr inbounds nuw i8, ptr %p.addr, i64 4 + store i32 %add, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where the source address is loaded from a pointer +; FIXME: We should be able to hoist the load of the source address pointer +define void @memcpy_load_src(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_src( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[DST_VAL:%.*]] = phi ptr [ [[DST_VAL_NEXT:%.*]], %[[WHILE_BODY]] ], [ [[DST]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dst_val = phi ptr [ %dst_val.next, %while.body ], [ %dst, %entry ] + %dec = add nsw i32 %n_val, -1 + %src_val = load ptr, ptr %src, align 8 + %src_val.next = getelementptr inbounds nuw i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds nuw i8, ptr %dst_val, i64 1 + store ptr %src_val.next, ptr %src, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where the destination address is loaded from a pointer +; FIXME: We could hoist the load of the destination address, but doing the +; bounds check of the store through that pointer itself requires using the +; hoisted load. +define void @memcpy_load_dst(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_dst( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[SRC_VAL:%.*]] = phi ptr [ [[SRC_VAL_NEXT:%.*]], %[[WHILE_BODY]] ], [ [[SRC]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %src_val = phi ptr [ %src_val.next, %while.body ], [ %src, %entry ] + %dec = add nsw i32 %n_val, -1 + %dst_val = load ptr, ptr %dst, align 8 + %src_val.next = getelementptr inbounds nuw i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds nuw i8, ptr %dst_val, i64 1 + store ptr %dst_val.next, ptr %dst, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where both the source and destination pointers are loaded from pointers +; FIXME: We could hoist the loads of both addresses, but doing the bounds check +; of the store through the destination address itself requires using the hoisted +; load. +define void @memcpy_load_src_dst(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_src_dst( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[WHILE_BODY:.*]] +; CHECK: [[WHILE_BODY]]: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], %[[WHILE_BODY]] ], [ [[N]], %[[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 +; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds nuw i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8 +; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[WHILE_END:.*]], label %[[WHILE_BODY]] +; CHECK: [[WHILE_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dec = add nsw i32 %n_val, -1 + %src_val = load ptr, ptr %src, align 8 + %dst_val = load ptr, ptr %dst, align 8 + %src_val.next = getelementptr inbounds nuw i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds nuw i8, ptr %dst_val, i64 1 + store ptr %src_val.next, ptr %src, align 8 + store ptr %dst_val.next, ptr %dst, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll index 3f9bd9333805f..b4f8abbf83a53 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -57,23 +57,23 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture ; CHECK-NEXT: [[CMP2_LVER_ORIG:%.*]] = icmp ult i32 [[INC_LVER_ORIG]], [[ITR]] ; CHECK-NEXT: br i1 [[CMP2_LVER_ORIG]], label [[FOR_BODY3_LVER_ORIG]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.body3.ph: -; CHECK-NEXT: [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2:![0-9]+]], !noalias [[META2]] +; CHECK-NEXT: [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2:![0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[ADD86:%.*]] = phi i32 [ [[ARRAYIDX7_PROMOTED]], [[FOR_BODY3_PH]] ], [ [[ADD8:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[J_113:%.*]] = phi i32 [ [[J_016]], [[FOR_BODY3_PH]] ], [ [[INC:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[J_113]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1]], i64 [[IDXPROM]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope [[META2]], !noalias [[META2]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META2]] ; CHECK-NEXT: [[ADD8]] = add nsw i32 [[ADD86]], [[ADD]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J_113]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[INC]], [[ITR]] -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT5:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT5:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.inc11.loopexit.loopexit: ; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT:%.*]] ; CHECK: for.inc11.loopexit.loopexit5: ; CHECK-NEXT: [[ADD8_LCSSA:%.*]] = phi i32 [ [[ADD8]], [[FOR_BODY3]] ] -; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2]], !noalias [[META2]] +; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope [[META2]] ; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT]] ; CHECK: for.inc11.loopexit: ; CHECK-NEXT: br label [[FOR_INC11]] diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll index 22ca534be7ae0..a31da2a212ea5 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -9,7 +9,7 @@ ; ; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us ; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ] -; CHECK-NEXT: store float %add14.us.lcssa, ptr %arrayidx.us, align 4, !alias.scope !0, !noalias !0 +; CHECK-NEXT: store float %add14.us.lcssa, ptr %arrayidx.us, align 4, !alias.scope !3 ; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us ; define i32 @foo(ptr nocapture %var2, ptr nocapture readonly %var3, i32 %itr) #0 {