diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index bbe2741f44fc3..c9d3874e7dd96 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -64,6 +64,9 @@ class PtrUseVisitorBase { /// Is the pointer escaped at some point? bool isEscaped() const { return EscapedInfo != nullptr; } + /// Is the pointer escaped into a read-only nocapture call at some point? + bool isEscapedReadOnly() const { return EscapedReadOnly != nullptr; } + /// Get the instruction causing the visit to abort. /// \returns a pointer to the instruction causing the abort if one is /// available; otherwise returns null. @@ -74,6 +77,10 @@ class PtrUseVisitorBase { /// is available; otherwise returns null. Instruction *getEscapingInst() const { return EscapedInfo; } + /// Get the instruction causing the pointer to escape which is a read-only + /// nocapture call. + Instruction *getEscapedReadOnlyInst() const { return EscapedReadOnly; } + /// Mark the visit as aborted. Intended for use in a void return. /// \param I The instruction which caused the visit to abort, if available. void setAborted(Instruction *I) { @@ -88,6 +95,12 @@ class PtrUseVisitorBase { EscapedInfo = I; } + /// Mark the pointer as escaped into a readonly-nocapture call. + void setEscapedReadOnly(Instruction *I) { + assert(I && "Expected a valid pointer in setEscapedReadOnly"); + EscapedReadOnly = I; + } + /// Mark the pointer as escaped, and the visit as aborted. Intended /// for use in a void return. /// \param I The instruction which both escapes the pointer and aborts the @@ -100,6 +113,7 @@ class PtrUseVisitorBase { private: Instruction *AbortedInfo = nullptr; Instruction *EscapedInfo = nullptr; + Instruction *EscapedReadOnly = nullptr; }; protected: diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 73649766a9538..989cf0b2d0e7b 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -188,6 +188,13 @@ class LoadAndStorePromoter { /// Return false if a sub-class wants to keep one of the loads/stores /// after the SSA construction. virtual bool shouldDelete(Instruction *I) const { return true; } + + /// Return the value to use for the point in the code that the alloca is + /// positioned. This will only be used if an Alloca is included in Insts, + /// otherwise the value of a uninitialized load will be assumed to be poison. + virtual Value *getValueToUseForAlloca(Instruction *AI) const { + return nullptr; + } }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index d80af26451ac7..5b78b9c80a92a 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -43,6 +43,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -83,6 +84,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include #include #include @@ -246,6 +248,7 @@ class SROA { bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P); bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); + bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS); std::pair runOnAlloca(AllocaInst &AI); void clobberUse(Use &U); bool deleteDeadInstructions(SmallPtrSetImpl &DeletedAllocas); @@ -598,6 +601,7 @@ class AllocaSlices { /// If this is true, the slices are never fully built and should be /// ignored. bool isEscaped() const { return PointerEscapingInstr; } + bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; } /// Support for iterating over the slices. /// @{ @@ -680,6 +684,7 @@ class AllocaSlices { /// store a pointer to that here and abort trying to form slices of the /// alloca. This will be null if the alloca slices are analyzed successfully. Instruction *PointerEscapingInstr; + Instruction *PointerEscapingInstrReadOnly; /// The slices of the alloca. /// @@ -1390,6 +1395,18 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor { /// Disable SROA entirely if there are unhandled users of the alloca. void visitInstruction(Instruction &I) { PI.setAborted(&I); } + + void visitCallBase(CallBase &CB) { + // If the call operand is NoCapture ReadOnly, then we mark it as + // EscapedReadOnly. + if (CB.doesNotCapture(U->getOperandNo()) && + CB.onlyReadsMemory(U->getOperandNo())) { + PI.setEscapedReadOnly(&CB); + return; + } + + Base::visitCallBase(CB); + } }; AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) @@ -1397,7 +1414,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif - PointerEscapingInstr(nullptr) { + PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -1408,6 +1425,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) assert(PointerEscapingInstr && "Did not track a bad instruction"); return; } + PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst(); llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); @@ -1445,6 +1463,9 @@ void AllocaSlices::print(raw_ostream &OS) const { return; } + if (PointerEscapingInstrReadOnly) + OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n"; + OS << "Slices of alloca: " << AI << "\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) print(OS, I); @@ -5454,6 +5475,86 @@ void SROA::clobberUse(Use &U) { } } +/// A basic LoadAndStorePromoter that does not remove store nodes. +class BasicLoadAndStorePromoter : public LoadAndStorePromoter { +public: + BasicLoadAndStorePromoter(ArrayRef Insts, SSAUpdater &S, + Type *ZeroType) + : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {} + bool shouldDelete(Instruction *I) const override { + return !isa(I) && !isa(I); + } + + Value *getValueToUseForAlloca(Instruction *I) const override { + return UndefValue::get(ZeroType); + } + +private: + Type *ZeroType; +}; + +bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) { + // Look through each "partition", looking for slices with the same start/end + // that do not overlap with any before them. The slices are sorted by + // increasing beginOffset. We don't use AS.partitions(), as it will use a more + // sophisticated algorithm that takes splittable slices into account. + auto PartitionBegin = AS.begin(); + auto PartitionEnd = PartitionBegin; + uint64_t BeginOffset = PartitionBegin->beginOffset(); + uint64_t EndOffset = PartitionBegin->endOffset(); + while (PartitionBegin != AS.end()) { + bool AllSameAndValid = true; + SmallVector Insts; + Type *PartitionType = nullptr; + while (PartitionEnd != AS.end() && + (PartitionEnd->beginOffset() < EndOffset || + PartitionEnd->endOffset() <= EndOffset)) { + if (AllSameAndValid) { + AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset && + PartitionEnd->endOffset() == EndOffset; + Instruction *User = + cast(PartitionEnd->getUse()->getUser()); + if (auto *LI = dyn_cast(User)) { + Type *UserTy = LI->getType(); + // LoadAndStorePromoter requires all the types to be the same. + if (!LI->isSimple() || (PartitionType && UserTy != PartitionType)) + AllSameAndValid = false; + PartitionType = UserTy; + Insts.push_back(User); + } else if (auto *SI = dyn_cast(User)) { + Type *UserTy = SI->getValueOperand()->getType(); + if (!SI->isSimple() || PartitionType && UserTy != PartitionType) + AllSameAndValid = false; + PartitionType = UserTy; + Insts.push_back(User); + } else if (!isAssumeLikeIntrinsic(User)) { + AllSameAndValid = false; + } + } + EndOffset = std::max(EndOffset, PartitionEnd->endOffset()); + ++PartitionEnd; + } + + // So long as all the slices start and end offsets matched, update loads to + // the values stored in the partition. + if (AllSameAndValid && !Insts.empty()) { + LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", " + << EndOffset << ")\n"); + SmallVector NewPHIs; + SSAUpdater SSA(&NewPHIs); + Insts.push_back(&AI); + BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType); + Promoter.run(Insts); + } + + // Step on to the next partition. + PartitionBegin = PartitionEnd; + BeginOffset = PartitionBegin->beginOffset(); + EndOffset = PartitionBegin->endOffset(); + } + return true; +} + /// Analyze an alloca for SROA. /// /// This analyzes the alloca to ensure we can reason about it, builds @@ -5494,6 +5595,11 @@ SROA::runOnAlloca(AllocaInst &AI) { if (AS.isEscaped()) return {Changed, CFGChanged}; + if (AS.isEscapedReadOnly()) { + Changed |= propagateStoredValuesToLoads(AI, AS); + return {Changed, CFGChanged}; + } + // Delete all the dead users of this alloca before splitting and rewriting it. for (Instruction *DeadUser : AS.getDeadUsers()) { // Free up everything used by this instruction. diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp index 597d470f18ff3..4bf4acd6330f5 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -412,9 +412,13 @@ void LoadAndStorePromoter::run(const SmallVectorImpl &Insts) { if (StoreInst *SI = dyn_cast(User)) { updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); - } else + } else if (auto *AI = dyn_cast(User)) { + // We treat AllocaInst as a store of an getValueToUseForAlloca value. + SSA.AddAvailableValue(BB, getValueToUseForAlloca(AI)); + } else { // Otherwise it is a load, queue it to rewrite as a live-in load. LiveInLoads.push_back(cast(User)); + } BlockUses.clear(); continue; } @@ -422,7 +426,7 @@ void LoadAndStorePromoter::run(const SmallVectorImpl &Insts) { // Otherwise, check to see if this block is all loads. bool HasStore = false; for (Instruction *I : BlockUses) { - if (isa(I)) { + if (isa(I) || isa(I)) { HasStore = true; break; } @@ -468,6 +472,12 @@ void LoadAndStorePromoter::run(const SmallVectorImpl &Insts) { // Remember that this is the active value in the block. StoredValue = SI->getOperand(0); + } else if (auto *AI = dyn_cast(&I)) { + // Check if this an alloca, in which case we treat it as a store of + // getValueToUseForAlloca. + if (!isInstInList(AI, Insts)) + continue; + StoredValue = getValueToUseForAlloca(AI); } } diff --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll index 87862b929a751..cc57abe391aa8 100644 --- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll +++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll @@ -9,19 +9,18 @@ define i32 @alloca_used_in_call(ptr %data, i64 %n) { ; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]]) -; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I1]] +; CHECK-NEXT: ret i32 [[RDX_INC]] ; entry: %retval = alloca i32, align 4 @@ -138,19 +137,18 @@ define i32 @alloca_not_captured_and_readonly_as_per_operand_attr(ptr %data, i64 ; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I0:%.*]] = call i32 @capture_of_alloca(ptr nocapture readonly [[RETVAL]]) -; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I1]] +; CHECK-NEXT: ret i32 [[RDX_INC]] ; entry: %retval = alloca i32, align 4 @@ -267,19 +265,18 @@ define i32 @alloca_with_gep_used_in_call(ptr %data, i64 %n) { ; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]]) -; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I1]] +; CHECK-NEXT: ret i32 [[RDX_INC]] ; entry: %retval = alloca i32, align 4 @@ -353,11 +350,11 @@ define i32 @alloca_used_in_maybe_throwing_call(ptr %data, i64 %n) personality pt ; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] @@ -372,8 +369,7 @@ define i32 @alloca_used_in_maybe_throwing_call(ptr %data, i64 %n) personality pt ; CHECK-NEXT: catch ptr null ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I2]] +; CHECK-NEXT: ret i32 [[RDX_INC]] ; entry: %retval = alloca i32, align 4 @@ -413,11 +409,11 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(ptr %data, i64 %n ; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] @@ -430,8 +426,7 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(ptr %data, i64 %n ; CHECK-NEXT: catch ptr null ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I2]] +; CHECK-NEXT: ret i32 [[RDX_INC]] ; entry: %retval = alloca i32, align 4 @@ -472,11 +467,11 @@ define [2 x i32] @part_of_alloca_used_in_call(ptr %data, i64 %n) { ; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] @@ -484,11 +479,9 @@ define [2 x i32] @part_of_alloca_used_in_call(ptr %data, i64 %n) { ; CHECK: exit: ; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr [[RETVAL]]) ; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0 ; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1 ; CHECK-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; entry: @@ -525,11 +518,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args(ptr %data, ; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] @@ -537,11 +530,9 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args(ptr %data, ; CHECK: exit: ; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL_FULL]]) ; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0 ; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1 ; CHECK-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; entry: @@ -688,11 +679,11 @@ define [2 x i32] @part_of_alloca_used_in_call_with_multiple_args(ptr %data, i64 ; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] @@ -700,11 +691,9 @@ define [2 x i32] @part_of_alloca_used_in_call_with_multiple_args(ptr %data, i64 ; CHECK: exit: ; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL]], ptr [[RETVAL]]) ; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I1_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0 ; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RDX_INC]], 1 ; CHECK-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; entry: @@ -742,11 +731,11 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(ptr %data ; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RDX]], [[LD]] ; CHECK-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] @@ -756,11 +745,9 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(ptr %data ; CHECK-NEXT: [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr [[RETVAL_FULL]], ptr [[RETVAL]]) ; CHECK-NEXT: [[I2:%.*]] = call i32 @capture_of_alloca(ptr [[SOME_ANOTHER_ALLOCA_FULL]]) ; CHECK-NEXT: [[I3_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I3_FCA_0_LOAD:%.*]] = load i32, ptr [[I3_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 [[I3_FCA_0_LOAD]], 0 +; CHECK-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] poison, i32 0, 0 ; CHECK-NEXT: [[I3_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I3_FCA_1_LOAD:%.*]] = load i32, ptr [[I3_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[I3_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[RDX_INC]], 1 ; CHECK-NEXT: ret [2 x i32] [[I3_FCA_1_INSERT]] ; entry: @@ -851,8 +838,7 @@ define i8 @dont_transform_load_only() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 ; CHECK-NEXT: call void @byte_user_of_alloca(ptr [[A]]) -; CHECK-NEXT: [[R:%.*]] = load i8, ptr [[A]], align 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 undef ; entry: %a = alloca i8 @@ -866,8 +852,7 @@ define i8 @transform_load_and_store() { ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 ; CHECK-NEXT: store i8 0, ptr [[A]], align 1 ; CHECK-NEXT: call void @byte_user_of_alloca(ptr [[A]]) -; CHECK-NEXT: [[R:%.*]] = load i8, ptr [[A]], align 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; entry: %a = alloca i8 diff --git a/llvm/test/Transforms/SROA/readonlynocapture.ll b/llvm/test/Transforms/SROA/readonlynocapture.ll index 2d02996d806ed..2284a00126678 100644 --- a/llvm/test/Transforms/SROA/readonlynocapture.ll +++ b/llvm/test/Transforms/SROA/readonlynocapture.ll @@ -8,8 +8,7 @@ define i32 @simple() { ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 0, ptr [[A]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: ret i32 [[L1]] +; CHECK-NEXT: ret i32 0 ; %a = alloca i32 store i32 0, ptr %a @@ -40,9 +39,7 @@ define i32 @twoalloc() { ; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1 ; CHECK-NEXT: store i32 1, ptr [[B]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4 -; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], [[L2]] +; CHECK-NEXT: [[R:%.*]] = add i32 0, 1 ; CHECK-NEXT: ret i32 [[R]] ; %a = alloca {i32, i32} @@ -62,8 +59,7 @@ define i32 @twostore() { ; CHECK-NEXT: store i32 1, ptr [[A]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) ; CHECK-NEXT: store i32 2, ptr [[A]], align 4 -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: ret i32 [[L]] +; CHECK-NEXT: ret i32 2 ; %a = alloca i32 store i32 1, ptr %a @@ -116,10 +112,8 @@ define i32 @twocalls() { ; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1 ; CHECK-NEXT: store i32 1, ptr [[B]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) -; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4 -; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], [[L2]] +; CHECK-NEXT: [[R:%.*]] = add i32 0, 1 ; CHECK-NEXT: ret i32 [[R]] ; %a = alloca {i32, i32} @@ -165,8 +159,7 @@ define i32 @atomic() { ; CHECK-NEXT: store i32 1, ptr [[B]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) ; CHECK-NEXT: [[L1:%.*]] = load atomic i32, ptr [[A]] seq_cst, align 4 -; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4 -; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], [[L2]] +; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], 1 ; CHECK-NEXT: ret i32 [[R]] ; %a = alloca {i32, i32} @@ -184,12 +177,10 @@ define i32 @notdominating() { ; CHECK-LABEL: @notdominating( ; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8 ; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1 -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4 ; CHECK-NEXT: store i32 0, ptr [[A]], align 4 ; CHECK-NEXT: store i32 1, ptr [[B]], align 4 ; CHECK-NEXT: call void @callee(ptr [[A]]) -; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], [[L2]] +; CHECK-NEXT: [[R:%.*]] = add i32 undef, undef ; CHECK-NEXT: ret i32 [[R]] ; %a = alloca {i32, i32} @@ -235,9 +226,7 @@ define i32 @multiuse() { ; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1 ; CHECK-NEXT: store i32 1, ptr [[B]], align 4 ; CHECK-NEXT: call void @callee_multiuse(ptr [[A]], ptr [[A]]) -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4 -; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], [[L2]] +; CHECK-NEXT: [[R:%.*]] = add i32 0, 1 ; CHECK-NEXT: ret i32 [[R]] ; %a = alloca {i32, i32} @@ -296,8 +285,7 @@ define void @incompletestruct(i1 %b, i1 %c) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LII:%.*]] = alloca [[STRUCT_LOADIMMEDIATEINFO:%.*]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[LII]]) -; CHECK-NEXT: [[BF_LOAD:%.*]] = load i32, ptr [[LII]], align 4 -; CHECK-NEXT: [[BF_CLEAR4:%.*]] = and i32 [[BF_LOAD]], -262144 +; CHECK-NEXT: [[BF_CLEAR4:%.*]] = and i32 undef, -262144 ; CHECK-NEXT: [[BF_SET5:%.*]] = select i1 [[B:%.*]], i32 196608, i32 131072 ; CHECK-NEXT: [[BF_SET12:%.*]] = or disjoint i32 [[BF_SET5]], [[BF_CLEAR4]] ; CHECK-NEXT: store i32 [[BF_SET12]], ptr [[LII]], align 4 @@ -325,8 +313,7 @@ define void @incompletestruct_bb(i1 %b, i1 %c) { ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[LII]]) -; CHECK-NEXT: [[BF_LOAD:%.*]] = load i32, ptr [[LII]], align 4 -; CHECK-NEXT: [[BF_CLEAR4:%.*]] = and i32 [[BF_LOAD]], -262144 +; CHECK-NEXT: [[BF_CLEAR4:%.*]] = and i32 undef, -262144 ; CHECK-NEXT: [[BF_SET5:%.*]] = select i1 [[B:%.*]], i32 196608, i32 131072 ; CHECK-NEXT: [[BF_SET12:%.*]] = or disjoint i32 [[BF_SET5]], [[BF_CLEAR4]] ; CHECK-NEXT: store i32 [[BF_SET12]], ptr [[LII]], align 4