diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 7a4abe9ee5082..599ad3afd008c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1977,6 +1977,11 @@ class TargetTransformInfo { /// target. LLVM_ABI bool allowVectorElementIndexingUsingGEP() const; + /// \returns True if the target does not support struct allocas and therefore + /// requires struct alloca instructions to be scalarized / decomposed into + /// its components. + LLVM_ABI bool shouldDecomposeStructAllocas() const; + private: std::unique_ptr TTIImpl; }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 566e1cf51631a..6b8fc753580ac 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1163,6 +1163,8 @@ class TargetTransformInfoImplBase { virtual bool allowVectorElementIndexingUsingGEP() const { return true; } + virtual bool shouldDecomposeStructAllocas() const { return false; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 8e68b6a57e51f..20c05687e1b4c 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -44,7 +44,8 @@ LLVM_ABI FunctionPass *createDeadStoreEliminationPass(); // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // -LLVM_ABI FunctionPass *createSROAPass(bool PreserveCFG = true); +LLVM_ABI FunctionPass *createSROAPass(bool PreserveCFG = true, + bool DecomposeStructs = false); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h index 8bb65bf7225e0..1de37b749f847 100644 --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -21,15 +21,31 @@ namespace llvm { class Function; -enum class SROAOptions : bool { ModifyCFG, PreserveCFG }; +struct SROAOptions { + enum PreserveCFGOption : bool { ModifyCFG, PreserveCFG }; + enum DecomposeStructsOption : bool { NoDecomposeStructs, DecomposeStructs }; + PreserveCFGOption PCFGOption; + DecomposeStructsOption DSOption; + SROAOptions(PreserveCFGOption PCFGOption) + : PCFGOption(PCFGOption), DSOption(NoDecomposeStructs) {} + SROAOptions(PreserveCFGOption PCFGOption, DecomposeStructsOption DSOption) + : PCFGOption(PCFGOption), DSOption(DSOption) {} +}; class SROAPass : public PassInfoMixin { - const SROAOptions PreserveCFG; + const SROAOptions Options; public: /// If \p PreserveCFG is set, then the pass is not allowed to modify CFG /// in any way, even if it would update CFG analyses. - SROAPass(SROAOptions PreserveCFG); + SROAPass(SROAOptions::PreserveCFGOption PreserveCFG); + + /// If \p Options.PreserveCFG is set, then the pass is not allowed to modify + /// CFG in any way, even if it would update CFG analyses. + /// If \p Options.DecomposeStructs is set, then the pass will decompose + /// structs allocas into its constituent components regardless of whether or + /// not pointer offsets into them are known at compile time. + SROAPass(const SROAOptions &Options); /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index bf62623099a97..dee1dd7b2a710 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1506,6 +1506,10 @@ bool TargetTransformInfo::allowVectorElementIndexingUsingGEP() const { return TTIImpl->allowVectorElementIndexingUsingGEP(); } +bool TargetTransformInfo::shouldDecomposeStructAllocas() const { + return TTIImpl->shouldDecomposeStructAllocas(); +} + TargetTransformInfoImplBase::~TargetTransformInfoImplBase() = default; TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index c234623caecf9..4f918a33f4dc3 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1353,16 +1353,29 @@ Expected parseScalarizerOptions(StringRef Params) { } Expected parseSROAOptions(StringRef Params) { - if (Params.empty() || Params == "modify-cfg") - return SROAOptions::ModifyCFG; - if (Params == "preserve-cfg") - return SROAOptions::PreserveCFG; - return make_error( - formatv("invalid SROA pass parameter '{}' (either preserve-cfg or " - "modify-cfg can be specified)", - Params) - .str(), - inconvertibleErrorCode()); + SROAOptions::PreserveCFGOption PreserveCFG = SROAOptions::ModifyCFG; + SROAOptions::DecomposeStructsOption DecomposeStructs = + SROAOptions::NoDecomposeStructs; + + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + if (ParamName.consume_front("preserve-cfg")) + PreserveCFG = SROAOptions::PreserveCFG; + else if (ParamName.consume_front("modify-cfg")) + PreserveCFG = SROAOptions::ModifyCFG; + else if (ParamName.consume_front("no-decompose-structs")) + DecomposeStructs = SROAOptions::NoDecomposeStructs; + else if (ParamName.consume_front("decompose-structs")) + DecomposeStructs = SROAOptions::DecomposeStructs; + else + return make_error( + formatv("invalid SROA pass option '{}'", ParamName).str(), + inconvertibleErrorCode()); + } + + return SROAOptions(PreserveCFG, DecomposeStructs); } Expected diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index bcf84403b2c0d..29df12b24850e 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -48,6 +48,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include @@ -107,6 +108,10 @@ class DirectXPassConfig : public TargetPassConfig { FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { + // Clang does not apply SROA with -O0, but it is required for DXIL. So we + // add SROA here when -O0 is given. + if (getOptLevel() == CodeGenOptLevel::None) + addPass(createSROAPass(/*PreserveCFG=*/true, /*DecomposeStructs=*/true)); addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createGlobalDCEPass()); addPass(createDXILResourceAccessLegacyPass()); diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 68fd3e0bc74c7..8193b5c40acc4 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -65,3 +65,5 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( return false; } } + +bool DirectXTTIImpl::shouldDecomposeStructAllocas() const { return true; } diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h index e2dd4354a8167..5a15d0a4f8510 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h @@ -39,6 +39,7 @@ class DirectXTTIImpl final : public BasicTTIImplBase { unsigned ScalarOpdIdx) const override; bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const override; + bool shouldDecomposeStructAllocas() const override; }; } // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 45d3d493a9e68..1e21f90367e8c 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -43,6 +43,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" @@ -56,6 +57,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GEPNoWrapFlags.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -172,8 +174,10 @@ using RewriteableMemOps = SmallVector; class SROA { LLVMContext *const C; DomTreeUpdater *const DTU; + TargetTransformInfo *const TTI; AssumptionCache *const AC; const bool PreserveCFG; + const bool DecomposeStructs; /// Worklist of alloca instructions to simplify. /// @@ -235,10 +239,11 @@ class SROA { isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG); public: - SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC, - SROAOptions PreserveCFG_) - : C(C), DTU(DTU), AC(AC), - PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {} + SROA(LLVMContext *C, DomTreeUpdater *DTU, TargetTransformInfo *TTI, + AssumptionCache *AC, const SROAOptions &Options) + : C(C), DTU(DTU), TTI(TTI), AC(AC), + PreserveCFG(Options.PCFGOption == SROAOptions::PreserveCFG), + DecomposeStructs(Options.DSOption == SROAOptions::DecomposeStructs) {} /// Main run method used by both the SROAPass and by the legacy pass. std::pair runSROA(Function &F); @@ -246,6 +251,7 @@ class SROA { private: friend class AllocaSliceRewriter; + bool decomposeStructAlloca(AllocaInst &AI); bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P); bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); @@ -4511,6 +4517,299 @@ class AggLoadStoreRewriter : public InstVisitor { } // end anonymous namespace +/// Returns the pointee type of a given pointer value. +/// +/// This function inspects the provided `Value *Ptr`, which must be a pointer +/// type, and attempts to determine the type of the object it points to. It +/// handles several common LLVM IR constructs: +/// +/// - `AllocaInst`: Returns the allocated type. +/// - `GlobalValue`: Returns the value type of the global. +/// - `GetElementPtrInst`: Returns the result element type. +/// - `Argument`: If marked with `byval` or `byref`, returns the corresponding +/// parameter type. +/// +/// \param Ptr a pointer-typed Value. +/// \returns the pointee `Type *` if it can be determined, or `nullptr` +/// otherwise. +static Type *getPointeeType(Value *Ptr) { + assert(Ptr->getType()->isPointerTy()); + Type *Ty = nullptr; + if (AllocaInst *Alloca = dyn_cast(Ptr)) + Ty = Alloca->getAllocatedType(); + else if (GlobalValue *GV = dyn_cast(Ptr)) + Ty = GV->getValueType(); + else if (GetElementPtrInst *GEP = dyn_cast(Ptr)) + Ty = GEP->getResultElementType(); + else if (Argument *Arg = dyn_cast(Ptr)) { + if (Arg->hasByValAttr()) + Ty = Arg->getParamByValType(); + else if (Arg->hasByRefAttr()) + Ty = Arg->getParamByRefType(); + } + return Ty; +} + +namespace { + +/// A visitor that determines whether or not a struct-based alloca can be +/// decomposed into separate allocas for each of its individual members. +/// +/// The analysis walks through the uses of the alloca, validating each +/// instruction to ensure it conforms to expected patterns (e.g., constant GEP +/// indices, correct struct types). If any unsupported or ambiguous access is +/// encountered, the visitor is aborted. +/// +/// This visitor provides iteration support over valid accesses to be replaced, +/// tracks dead users after struct alloca decomposition, and exposes the +/// first instruction that caused an abort if the visit determines that the +/// struct alloca can not be decomposed. +class StructDecompositionAnalysis + : public InstVisitor { +public: + StructDecompositionAnalysis(AllocaInst &AI) { + this->AI = &AI; + + // Ensure the allocated type is a struct or (multi-dimensional) array of + // structs. + Type *Ty = AI.getAllocatedType(); + while (isa(Ty)) + Ty = Ty->getArrayElementType(); + StructTy = dyn_cast(Ty); + if (!StructTy) { + AbortedInfo = &AI; + return; + } + const DataLayout &DL = AI.getDataLayout(); + assert(DL.getTypeAllocSize(StructTy).isFixed() && + "The struct must have a fixed size!"); + StructSizeInBytes = DL.getTypeAllocSize(StructTy).getFixedValue(); + + enqueueUses(AI); + + // Visit all the uses off the worklist until it is empty or we abort. + while (!Worklist.empty() && !isAborted()) { + Use *U = Worklist.pop_back_val(); + Instruction *User = cast(U->getUser()); + visit(User); + } + } + + /// Support for iterating over the accesses to the struct alloca. + /// @{ + using iterator = SmallVector::iterator; + using range = iterator_range; + + iterator begin() { return Accesses.begin(); } + iterator end() { return Accesses.end(); } + + using const_iterator = SmallVector::const_iterator; + using const_range = iterator_range; + + const_iterator begin() const { return Accesses.begin(); } + const_iterator end() const { return Accesses.end(); } + /// @} + + /// If there are instructions that are not handled by the struct decomposer, + /// then abort decomposing the struct. + bool isAborted() { return AbortedInfo != nullptr; } + + /// Get the instruction causing the visit to abort. + /// \returns a pointer to the instruction causing the abort if one is + /// available; otherwise returns null. + Instruction *getAbortingInst() const { return AbortedInfo; } + + /// Access the dead users for this alloca after struct decomposition. + ArrayRef getDeadUsers() const { return DeadUsers; } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; + void print(raw_ostream &OS) const; + void dump(const_iterator I) const; + void dump() const; +#endif + +private: + friend InstVisitor; + + SmallVector Accesses; + + /// The AllocaInst being visited, its size, and its corresponding StructType. + AllocaInst *AI; + uint64_t StructSizeInBytes; + StructType *StructTy; + + /// The worklist of to-visit uses. + SmallVector Worklist; + + /// If the struct is invalid to be decomposed, this analysis will be aborted. + Instruction *AbortedInfo = nullptr; + + /// Users of the Alloca which will be considered dead if the Alloca is + /// decomposed + SmallVector DeadUsers; + + /// A set of visited uses to break cycles in unreachable code. + SmallPtrSet VisitedUses; + + /// Set to de-duplicate dead instructions found in the use walk. + SmallPtrSet VisitedDeadInsts; + + void enqueueUses(Value &I) { + for (Use &U : I.uses()) + if (VisitedUses.insert(&U).second) + Worklist.push_back(&U); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The GEPs visited must have a source element type of the struct or a + // (multi-dimensional) array of structs. Otherwise the intended access chain + // for the struct can be ambiguous. + unsigned StructMemberOperandIdx = 2; + Type *Ty = GEPI.getSourceElementType(); + while (Ty->isArrayTy()) { + Ty = Ty->getArrayElementType(); + StructMemberOperandIdx++; + } + if (Ty != StructTy) { + AbortedInfo = &GEPI; + return; + } + + // If this GEP does not have the struct member index, then visit its uses. + if (GEPI.getNumOperands() < StructMemberOperandIdx + 1) { + markAsDead(GEPI); + enqueueUses(GEPI); + return; + } + + // Ensure the struct member index is constant. + Value *StructMemberIdx = GEPI.getOperand(StructMemberOperandIdx); + if (!isa(StructMemberIdx)) { + AbortedInfo = &GEPI; + return; + } + + Accesses.push_back(&GEPI); + } + + void visitMemSetInst(MemSetInst &MSI) { + // Ensure the number of bytes set is a multiple of the struct size in + // bytes. + if (!MSI.getLengthInBytes()) { + AbortedInfo = &MSI; + return; + } + APInt Length = *MSI.getLengthInBytes(); + if (Length.getZExtValue() % StructSizeInBytes != 0) { + AbortedInfo = &MSI; + return; + } + + // Ensure we are setting the bytes of the correct type of struct. + Value *Dest = MSI.getDest(); + if (AllocaInst *Alloca = dyn_cast(Dest)) + assert(Alloca == AI && + "It should be impossible to visit the allocas of other structs!"); + else if (GetElementPtrInst *GEP = dyn_cast(Dest)) { + [[maybe_unused]] Type *Ty = GEP->getResultElementType(); + while (Ty->isArrayTy()) + Ty = Ty->getArrayElementType(); + assert(Ty == StructTy && + "GEP must have a result element type of the expected struct or a " + "(multi-dimensional) array of it!"); + } else { + AbortedInfo = &MSI; + return; + } + Accesses.push_back(&MSI); + } + + void visitMemTransferInst(MemTransferInst &MTI) { + // Ensure the number of bytes transferred is a multiple of the struct size + // in bytes. + if (!MTI.getLengthInBytes()) { + AbortedInfo = &MTI; + return; + } + APInt Length = *MTI.getLengthInBytes(); + if (Length.getZExtValue() % StructSizeInBytes != 0) { + AbortedInfo = &MTI; + return; + } + + // Ensure we are transferring the bytes of the correct type of struct. + auto IsStructTy = [&](Type *Ty) -> bool { + while (Ty->isArrayTy()) + Ty = Ty->getArrayElementType(); + return Ty == StructTy; + }; + + Value *Dest = MTI.getRawDest(); + Type *DestPtrTy = getPointeeType(Dest); + Value *Src = MTI.getRawSource(); + Type *SrcPtrTy = getPointeeType(Src); + if (!DestPtrTy || !SrcPtrTy || DestPtrTy != SrcPtrTy || + !IsStructTy(DestPtrTy)) { + AbortedInfo = &MTI; + return; + } + + Accesses.push_back(&MTI); + } + + void visitInstruction(Instruction &I) { AbortedInfo = &I; } + + void visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + Accesses.push_back(&II); + break; + default: + AbortedInfo = &II; + } + } + + void markAsDead(Instruction &I) { + if (VisitedDeadInsts.insert(&I).second) + DeadUsers.push_back(&I); + } +}; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +void StructDecompositionAnalysis::print(raw_ostream &OS, const_iterator I, + StringRef Indent) const { + OS << Indent << **I << "\n"; +} + +void StructDecompositionAnalysis::print(raw_ostream &OS) const { + if (AbortedInfo) { + OS << "Can't decompose struct alloca: " << *AI << "\n" + << " An access to this alloca is not supported:\n" + << " " << *AbortedInfo << "\n"; + return; + } + + OS << "Instructions to rewrite for this alloca: " << *AI << "\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) + print(OS, I); +} + +LLVM_DUMP_METHOD void +StructDecompositionAnalysis::dump(const_iterator I) const { + print(dbgs(), I); +} + +LLVM_DUMP_METHOD void StructDecompositionAnalysis::dump() const { + print(dbgs()); +} + +#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +} // end anonymous namespace + /// Strip aggregate type wrapping. /// /// This removes no-op aggregate types wrapping an underlying type. It will @@ -4662,6 +4961,330 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, return SubTy; } +/// Recursively rebuild a multi-dimensional array type, swapping the leaf +/// element type from `OldLeaf` to `NewLeaf`. If `ArrTy` is not an +/// `ArrayType` the function simply returns `ArrTy` unchanged. +/// +/// - `ArrTy`: The original (possibly multi‑dimensional) array type +/// - `NewLeaf`: The desired new leaf element type (must be a non‑array type) +/// +/// /returns the new (multi-dimensional) array type using the new leaf element +/// type. +static Type *replaceArrayLeafType(Type *ArrTy, Type *NewLeaf) { + if (!ArrTy->isArrayTy()) + return NewLeaf; + + // Peel off the outermost ArrayType, recurse on its element type, then re‑wrap + // the same number of elements. + auto *OuterArr = cast(ArrTy); + uint64_t NumElems = OuterArr->getNumElements(); + + // Recurse to get the transformed inner type. + Type *InnerTransformed = + replaceArrayLeafType(OuterArr->getElementType(), NewLeaf); + + return ArrayType::get(InnerTransformed, NumElems); +} + +/// Retrieves or constructs a pointer for a specific member of a decomposed +/// struct. +/// +/// This helper recursively constructs a `GetElementPtrInst` (GEP) used to +/// access a given member (`MemberId`) of a struct pointed to by `Ptr`. It uses +/// a cache (`PtrMap`) to avoid redundant GEP creation for the same +/// pointer-member pair and to resolve the base case where `Ptr` is the +/// original struct alloca instruction. +/// +/// If `Ptr` is not an alloca instruction, the function assumes that `Ptr` is a +/// GEP instruction and walks back through the pointer operand chain to build a +/// new GEP with the updated source element type (`MemberTy`). The resulting GEP +/// is inserted at the same location as the original `Ptr` and cached in +/// `PtrMap`. +/// +/// If `Ptr` is an alloca instruction, the function assumes `PtrMap` contains +/// the corresponding alloca instruction for the specific member of the +/// decomposed struct alloca. +/// +/// \param Builder the IRBuilder used to insert new instructions. +/// \param Ptr a pointer value expected to be a `GetElementPtrInst` or +/// `AllocaInst` +/// \param PtrMap a mapping from (Ptr, MemberId) pairs to previously computed +/// member pointers. +/// \param MemberTy the type of the struct member being accessed. +/// \param MemberId the index of the member within the struct. +/// \returns a `Value *` pointing to memory for the specified struct member. +static Value *getPtrForStructMemberAccess( + IRBuilder<> &Builder, Value *Ptr, + SmallMapVector, Value *, 8> PtrMap, + Type *MemberTy, uint64_t MemberId) { + + if (PtrMap.contains({Ptr, MemberId})) + return PtrMap[{Ptr, MemberId}]; + + BasicBlock::iterator InsertPoint = Builder.GetInsertPoint(); + + assert(isa(Ptr) && + "Expected pointer operand to be a GEP!"); + + GetElementPtrInst *PtrGEP = cast(Ptr); + Type *NewSrcElemTy = + replaceArrayLeafType(PtrGEP->getSourceElementType(), MemberTy); + Value *NewPtr = getPtrForStructMemberAccess( + Builder, PtrGEP->getPointerOperand(), PtrMap, MemberTy, MemberId); + SmallVector Indices(PtrGEP->idx_begin(), PtrGEP->idx_end()); + std::string Name = PtrGEP->getName().str() + "." + std::to_string(MemberId); + + Builder.SetInsertPoint(PtrGEP->getIterator()); + Value *NewPtrGEP = Builder.CreateGEP(NewSrcElemTy, NewPtr, Indices, Name, + PtrGEP->getNoWrapFlags()); + + PtrMap.insert({{Ptr, MemberId}, NewPtrGEP}); + + Builder.SetInsertPoint(InsertPoint); + + return NewPtrGEP; +} + +/// Attempt to decompose a struct-based alloca into separate member allocas. +/// +/// If applicable, the function replaces the original struct alloca with +/// individual allocas for each member of the struct. It then rewrites all +/// relevant instructions (e.g., `memset`, `memcpy`, `GEP`, lifetime intrinsics) +/// to operate on the member allocas instead. Unsupported or ambiguous accesses +/// will cause the decomposition to abort, and no changes to the module will be +/// made. +/// +/// \warning This transformation is unsafe in languages that allow dynamic +/// indexing across struct members (e.g., treating a pointer to one member as a +/// base for accessing others via computed offsets). Such behavior can violate +/// the assumptions of this decomposition, which expects each member to be +/// accessed independently and explicitly. +/// +/// \param AI the `AllocaInst` representing the struct or (multi-dimensional) +/// array of structs to decompose. +/// \returns `true` if the struct alloca was successfully decomposed; `false` +/// otherwise. +bool SROA::decomposeStructAlloca(AllocaInst &AI) { + // Ensure this is an allocation of a struct or (multi-dimensional) array of + // structs. + Type *Ty = AI.getAllocatedType(); + while (Ty->isArrayTy()) + Ty = Ty->getArrayElementType(); + if (!Ty->isStructTy()) + return false; + StructType *StructTy = cast(Ty); + + const DataLayout &DL = AI.getDataLayout(); + assert(DL.getTypeAllocSize(StructTy).isFixed() && + "The size of the struct must be fixed!"); + uint64_t StructSize = DL.getTypeAllocSize(StructTy).getFixedValue(); + + // Determine whether or not the (array of) struct(s) can be transformed. + LLVM_DEBUG(dbgs() << "Decomposing struct alloca: " << AI << "\n"); + StructDecompositionAnalysis SDA(AI); + LLVM_DEBUG(SDA.print(dbgs())); + + if (SDA.isAborted()) + return false; + + IRBuilder<> Builder(&AI); + + // A map to keep track of allocas and GEPs created for each struct member. + SmallMapVector, Value *, 8> StructMemberPtrMap; + + // Create allocas for each struct member using the same array dimensions. + for (uint64_t I = 0; I < StructTy->getNumElements(); ++I) { + std::string Name = AI.getName().str() + "." + std::to_string(I); + Type *MemberAllocaType = replaceArrayLeafType( + AI.getAllocatedType(), StructTy->getContainedType(I)); + AllocaInst *MemberAlloca = + Builder.CreateAlloca(MemberAllocaType, nullptr, Name); + StructMemberPtrMap.insert({{&AI, I}, MemberAlloca}); + } + + // Update struct accesses to point to the member allocations. + for (Instruction *StructAccess : SDA) { + Builder.SetInsertPoint(StructAccess); + + if (MemSetInst *MS = dyn_cast(StructAccess)) { + // A memset over a struct or array of structs will be replaced with M + // memsets, where M is the number of struct members. + + Value *Dest = MS->getRawDest(); + for (unsigned M = 0; M < StructTy->getNumContainedTypes(); ++M) { + Type *StructMemberTy = StructTy->getContainedType(M); + + Value *NewDest = getPtrForStructMemberAccess( + Builder, Dest, StructMemberPtrMap, StructMemberTy, M); + + assert(DL.getTypeAllocSize(StructMemberTy).isFixed() && + "Struct member types must have a fixed size!"); + uint64_t StructMemberSize = + DL.getTypeAllocSize(StructMemberTy).getFixedValue(); + assert(MS->getLengthInBytes().has_value() && + "The number of bytes to set must be known!"); + uint64_t Length = MS->getLengthInBytes()->getZExtValue(); + Value *NewLength = + Builder.getInt64(StructMemberSize * (Length / StructSize)); + + Builder.CreateMemSet(NewDest, MS->getValue(), NewLength, std::nullopt, + MS->isVolatile()); + } + + } else if (MemTransferInst *MT = dyn_cast(StructAccess)) { + // A memory transfer instruction to copy a struct or (multi-dimensional) + // array of structs from one pointer to another is replaced with N * M + // memory transfer instructions, where N is the number of structs and M is + // the number of struct members. + + Value *Dest = MT->getRawDest(); + Value *Src = MT->getRawSource(); + + // This function returns true if Ptr points into the memory of the given + // alloca instruction Alloca using only GEPs. + auto IsPtrIntoAlloca = [](Value *Ptr, AllocaInst *Alloca) -> bool { + while (auto *GEP = dyn_cast(Ptr)) + Ptr = GEP->getPointerOperand(); + return Ptr == Alloca; + }; + bool DestIsFromAlloca = IsPtrIntoAlloca(Dest, &AI); + bool SrcIsFromAlloca = IsPtrIntoAlloca(Src, &AI); + + for (unsigned M = 0; M < StructTy->getNumContainedTypes(); ++M) { + Type *StructMemberTy = StructTy->getContainedType(M); + + Value *NewDestBasePtr = Dest; + if (DestIsFromAlloca) + NewDestBasePtr = getPtrForStructMemberAccess( + Builder, Dest, StructMemberPtrMap, StructMemberTy, M); + + Value *NewSrcBasePtr = Src; + if (SrcIsFromAlloca) + NewSrcBasePtr = getPtrForStructMemberAccess( + Builder, Src, StructMemberPtrMap, StructMemberTy, M); + + assert(DL.getTypeAllocSize(StructMemberTy).isFixed() && + "Struct member types must have a fixed size!"); + uint64_t StructMemberSize = + DL.getTypeAllocSize(StructMemberTy).getFixedValue(); + assert(MT->getLengthInBytes().has_value() && + "The number of bytes to transfer must be known!"); + uint64_t Length = MT->getLengthInBytes()->getZExtValue(); + uint64_t NumElems = Length / StructSize; + Value *NewLength = Builder.getInt64(StructMemberSize * NumElems); + + assert(NumElems <= UINT32_MAX && + "Number of elements to transfer must fit within a 32-bit " + "unsigned integer!"); + for (uint32_t N = 0; N < NumElems; ++N) { + auto CreateGEPForIndexN = [&](Value *Ptr, + bool IsStructAccess) -> Value * { + Type *SourceElemTy = StructMemberTy; + SmallVector Indices = {Builder.getInt32(N)}; + std::string Name = Ptr->getName().str() + "." + std::to_string(N); + if (IsStructAccess) { + SourceElemTy = StructTy; + Indices.push_back(Builder.getInt32(M)); + Name += "." + std::to_string(M); + } + GEPNoWrapFlags NWF = GEPNoWrapFlags::inBounds(); + if (GetElementPtrInst *GEP = dyn_cast(Ptr)) + NWF = GEP->getNoWrapFlags(); + Value *V = Builder.CreateGEP(SourceElemTy, Ptr, Indices, Name, NWF); + return V; + }; + + Value *NewDest = + CreateGEPForIndexN(NewDestBasePtr, !DestIsFromAlloca); + Value *NewSrc = CreateGEPForIndexN(NewSrcBasePtr, !SrcIsFromAlloca); + Builder.CreateMemTransferInst(MT->getIntrinsicID(), NewDest, + std::nullopt, NewSrc, std::nullopt, + NewLength, MT->isVolatile()); + } + } + + } else if (GetElementPtrInst *GEP = + dyn_cast(StructAccess)) { + // Struct- or (multi-dimensional-)array-of-struct-typed GEPs will be + // replaced with a similar GEP with its source element type modified to + // use the struct member type instead of the struct type. The indices of + // the new GEP will have the struct membere index removed from the GEP + // indices. + + // Get the iterator to the index specifying the struct member to access. + unsigned StructMemberOperandIdx = 2; + Type *Ty = GEP->getSourceElementType(); + while (Ty->isArrayTy()) { + Ty = Ty->getArrayElementType(); + StructMemberOperandIdx++; + } + GetElementPtrInst::op_iterator StructMemberIdxIter = + GEP->idx_begin() + StructMemberOperandIdx - 1; + assert(isa(*StructMemberIdxIter) && + "Index to struct member must be constant!"); + + uint64_t StructMemberId = + cast(*StructMemberIdxIter)->getZExtValue(); + Type *StructMemberTy = StructTy->getContainedType(StructMemberId); + + Type *NewSrcElemTy = + replaceArrayLeafType(GEP->getSourceElementType(), StructMemberTy); + Value *NewPtr = getPtrForStructMemberAccess( + Builder, GEP->getPointerOperand(), StructMemberPtrMap, StructMemberTy, + StructMemberId); + + SmallVector NewIndices(GEP->idx_begin(), StructMemberIdxIter); + NewIndices.insert(NewIndices.end(), std::next(StructMemberIdxIter), + GEP->idx_end()); + std::string NewName = + GEP->getName().str() + "." + std::to_string(StructMemberId); + + Value *NewGEP = Builder.CreateGEP(NewSrcElemTy, NewPtr, NewIndices, + NewName, GEP->getNoWrapFlags()); + GEP->replaceAllUsesWith(NewGEP); + + } else if (IntrinsicInst *II = dyn_cast(StructAccess)) { + assert(II->isLifetimeStartOrEnd() && + "Expected intrinsic instruction to be a lifetime start or end!"); + // Replace lifetime start and end intrinsic instructions with M lifetime + // start and end intrinsic instructions, where M is the number of struct + // members. + + Value *Ptr = II->getArgOperand(0); + assert(Ptr == &AI && "Expected pointer operand of lifetime start or end " + "to be the struct alloca!"); + for (unsigned M = 0; M < StructTy->getNumContainedTypes(); ++M) { + assert(StructMemberPtrMap.contains({Ptr, M}) && + "The struct member pointer map must contain Ptr!"); + Value *MemberAlloca = StructMemberPtrMap[{Ptr, M}]; + assert(isa(MemberAlloca) && + "The struct member pointer map must contain an alloca " + "instruction for this member!"); + if (II->getIntrinsicID() == Intrinsic::lifetime_start) + Builder.CreateLifetimeStart(MemberAlloca); + else if (II->getIntrinsicID() == Intrinsic::lifetime_end) + Builder.CreateLifetimeEnd(MemberAlloca); + } + } else + llvm_unreachable( + "Invalid instruction encountered during struct decomposition!"); + + DeadInsts.push_back(StructAccess); + } + + ArrayRef DeadUsers = SDA.getDeadUsers(); + for (Instruction *I : DeadUsers) + DeadInsts.push_back(I); + DeadInsts.push_back(&AI); + + // Process the allocas of each struct member alloca in case there are further + // SROA or struct decomposition opportunities. + for (unsigned I = 0; I < StructTy->getNumElements(); ++I) + Worklist.insert(cast(StructMemberPtrMap[{&AI, I}])); + + return true; +} + /// Pre-split loads and stores to simplify rewriting. /// /// We want to break up the splittable load+store pairs as much as @@ -5854,6 +6477,17 @@ SROA::runOnAlloca(AllocaInst &AI) { Size.getFixedValue() == 0) return {Changed, CFGChanged}; + // Decompose allocas for structs and (multi-dimensional) arrays of structs. + if (DecomposeStructs || TTI->shouldDecomposeStructAllocas()) { + Type *Ty = AT; + while (Ty->isArrayTy()) + Ty = Ty->getArrayElementType(); + if (Ty->isStructTy()) { + Changed = decomposeStructAlloca(AI); + return {Changed, CFGChanged}; + } + } + // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. IRBuilderTy IRB(&AI); @@ -6038,9 +6672,10 @@ std::pair SROA::runSROA(Function &F) { PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); AssumptionCache &AC = AM.getResult(F); + TargetTransformInfo &TTI = AM.getResult(F); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); auto [Changed, CFGChanged] = - SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F); + SROA(&F.getContext(), &DTU, &TTI, &AC, Options).runSROA(F); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -6054,28 +6689,35 @@ void SROAPass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { static_cast *>(this)->printPipeline( OS, MapClassName2PassName); - OS << (PreserveCFG == SROAOptions::PreserveCFG ? "" - : ""); + OS << (Options.PCFGOption == SROAOptions::PreserveCFG ? "" + : ""); } -SROAPass::SROAPass(SROAOptions PreserveCFG) : PreserveCFG(PreserveCFG) {} +SROAPass::SROAPass(SROAOptions::PreserveCFGOption PreserveCFG) + : Options({PreserveCFG, SROAOptions::NoDecomposeStructs}) {} +SROAPass::SROAPass(const SROAOptions &Options) : Options(Options) {} namespace { /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. class SROALegacyPass : public FunctionPass { - SROAOptions PreserveCFG; + SROAOptions Options; public: static char ID; - SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG) - : FunctionPass(ID), PreserveCFG(PreserveCFG) { + SROALegacyPass(const SROAOptions &Options = + {SROAOptions::PreserveCFGOption::PreserveCFG, + SROAOptions::DecomposeStructsOption::NoDecomposeStructs}) + : FunctionPass(ID), Options(Options) { initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override { - if (skipFunction(F)) + TargetTransformInfo &TTI = + getAnalysis().getTTI(F); + + if (skipFunction(F) && !TTI.shouldDecomposeStructAllocas()) return false; DominatorTree &DT = getAnalysis().getDomTree(); @@ -6083,13 +6725,14 @@ class SROALegacyPass : public FunctionPass { getAnalysis().getAssumptionCache(F); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); auto [Changed, _] = - SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F); + SROA(&F.getContext(), &DTU, &TTI, &AC, Options).runSROA(F); return Changed; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } @@ -6101,9 +6744,11 @@ class SROALegacyPass : public FunctionPass { char SROALegacyPass::ID = 0; -FunctionPass *llvm::createSROAPass(bool PreserveCFG) { - return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG - : SROAOptions::ModifyCFG); +FunctionPass *llvm::createSROAPass(bool PreserveCFG, bool DecomposeStructs) { + return new SROALegacyPass( + {PreserveCFG ? SROAOptions::PreserveCFG : SROAOptions::ModifyCFG, + DecomposeStructs ? SROAOptions::DecomposeStructs + : SROAOptions::NoDecomposeStructs}); } INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", diff --git a/llvm/test/Transforms/SROA/struct-decomposition.ll b/llvm/test/Transforms/SROA/struct-decomposition.ll new file mode 100644 index 0000000000000..df617fa4ca9cf --- /dev/null +++ b/llvm/test/Transforms/SROA/struct-decomposition.ll @@ -0,0 +1,277 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +%struct.basic = type { i32, [2 x i32] } + +define void @basic(i32 %i, i32 %v) { +; CHECK-LABEL: define void @basic( +; CHECK-SAME: i32 [[I:%.*]], i32 [[V:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[ARR_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 0 +; CHECK-NEXT: [[ELEM:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR_1]], i32 0, i32 [[I]] +; CHECK-NEXT: store i32 [[V]], ptr [[ELEM]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca %struct.basic + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + %arr = getelementptr inbounds %struct.basic, ptr %alloca, i32 0, i32 1 + %elem = getelementptr inbounds [2 x i32], ptr %arr, i32 0, i32 %i + store i32 %v, ptr %elem + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +%struct.nested = type { [3 x double], %struct.basic } + +define void @nested(i32 %i, i32 %v) { +; CHECK-LABEL: define void @nested( +; CHECK-SAME: i32 [[I:%.*]], i32 [[V:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1_1:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1_1]]) +; CHECK-NEXT: [[BASIC_1_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1_1]], i32 0 +; CHECK-NEXT: [[ARRAY_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[BASIC_1_1]], i32 0 +; CHECK-NEXT: [[ELEM:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAY_1]], i32 0, i32 [[I]] +; CHECK-NEXT: store i32 [[V]], ptr [[ELEM]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca %struct.nested + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + %basic = getelementptr inbounds %struct.nested, ptr %alloca, i32 0, i32 1 + %array = getelementptr inbounds %struct.basic, ptr %basic, i32 0, i32 1 + %elem = getelementptr inbounds [2 x i32], ptr %array, i32 0, i32 %i + store i32 %v, ptr %elem + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @array1d(i32 %s, i32 %i) { +; +; CHECK-LABEL: define void @array1d( +; CHECK-SAME: i32 [[S:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [10 x [2 x i32]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[STRUCT_1:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[ALLOCA_1]], i32 0, i32 [[S]] +; CHECK-NEXT: [[ARRAY_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[STRUCT_1]], i32 0 +; CHECK-NEXT: [[ELEM:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAY_1]], i32 0, i32 [[I]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ELEM]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [10 x %struct.basic] + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + %struct = getelementptr inbounds [10 x %struct.basic], ptr %alloca, i32 0, i32 %s + %array = getelementptr inbounds %struct.basic, ptr %struct, i32 0, i32 1 + %elem = getelementptr inbounds [2 x i32], ptr %array, i32 0, i32 %i + %load = load i32, ptr %elem + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @array1d_2(i32 %s, i32 %i) { +; +; CHECK-LABEL: define void @array1d_2( +; CHECK-SAME: i32 [[S:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [10 x [2 x i32]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[ARRAY_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 [[S]] +; CHECK-NEXT: [[ELEM:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAY_1]], i32 0, i32 [[I]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ELEM]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [10 x %struct.basic] + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + %array = getelementptr inbounds %struct.basic, ptr %alloca, i32 %s, i32 1 + %elem = getelementptr inbounds [2 x i32], ptr %array, i32 0, i32 %i + %load = load i32, ptr %elem + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @array2d(i32 %si, i32 %sj, i32 %i) { +; +; CHECK-LABEL: define void @array2d( +; CHECK-SAME: i32 [[SI:%.*]], i32 [[SJ:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [3 x [2 x [2 x i32]]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[STRUCT_1:%.*]] = getelementptr inbounds [3 x [2 x [2 x i32]]], ptr [[ALLOCA_1]], i32 0, i32 [[SI]], i32 [[SJ]] +; CHECK-NEXT: [[ARRAY_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[STRUCT_1]], i32 0 +; CHECK-NEXT: [[ELEM:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAY_1]], i32 0, i32 [[I]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ELEM]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [3 x [2 x %struct.basic]] + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + %struct = getelementptr inbounds [3 x [2 x %struct.basic]], ptr %alloca, i32 0, i32 %si, i32 %sj + %array = getelementptr inbounds %struct.basic, ptr %struct, i32 0, i32 1 + %elem = getelementptr inbounds [2 x i32], ptr %array, i32 0, i32 %i + %load = load i32, ptr %elem + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @array2d_2(i32 %si, i32 %sj, i32 %i) { +; +; CHECK-LABEL: define void @array2d_2( +; CHECK-SAME: i32 [[SI:%.*]], i32 [[SJ:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [3 x [2 x [2 x i32]]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[ELEM_1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[ALLOCA_1]], i32 [[SI]], i32 [[SJ]], i32 [[I]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[ELEM_1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [3 x [2 x %struct.basic]] + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + %elem = getelementptr inbounds [2 x %struct.basic], ptr %alloca, i32 %si, i32 %sj, i32 1, i32 %i + %load = load i32, ptr %elem + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +%struct.S = type { [2 x %struct.X], [2 x %struct.Y] } +%struct.X = type { i32, float } +%struct.Y = type { i32, i32 } + +define void @nested_structs_arrays(i32 %i) { +; CHECK-LABEL: define void @nested_structs_arrays( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: [[S_0_1:%.*]] = alloca [10 x [2 x float]], align 4 +; CHECK-NEXT: [[S_1_1:%.*]] = alloca [10 x [2 x i32]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[S_0_1]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[S_1_1]]) +; CHECK-NEXT: [[SI_0_1:%.*]] = getelementptr inbounds [2 x float], ptr [[S_0_1]], i32 [[I]] +; CHECK-NEXT: [[SI_1_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[S_1_1]], i32 [[I]] +; CHECK-NEXT: [[X_0_1:%.*]] = getelementptr inbounds [2 x float], ptr [[SI_0_1]], i32 0 +; CHECK-NEXT: [[B_1:%.*]] = getelementptr inbounds float, ptr [[X_0_1]], i32 [[I]] +; CHECK-NEXT: [[LB:%.*]] = load float, ptr [[B_1]], align 1 +; CHECK-NEXT: [[Y_1_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[SI_1_1]], i32 0 +; CHECK-NEXT: [[D_1:%.*]] = getelementptr inbounds i32, ptr [[Y_1_1]], i32 [[I]] +; CHECK-NEXT: [[LD:%.*]] = load i32, ptr [[D_1]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[S_0_1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[S_1_1]]) +; CHECK-NEXT: ret void +; + %s = alloca [10 x %struct.S] + call void @llvm.lifetime.start.p0(ptr nonnull %s) + %si = getelementptr inbounds %struct.S, ptr %s, i32 %i + %x = getelementptr inbounds %struct.S, ptr %si, i32 0, i32 0 + %b = getelementptr inbounds %struct.X, ptr %x, i32 %i, i32 1 + %lb = load float, ptr %b, align 1 + %y = getelementptr inbounds %struct.S, ptr %si, i32 0, i32 1 + %d = getelementptr inbounds %struct.Y, ptr %y, i32 %i, i32 1 + %ld = load i32, ptr %d, align 1 + call void @llvm.lifetime.end.p0(ptr nonnull %s) + ret void +} + +define void @memset_single_struct() { +; CHECK-LABEL: define void @memset_single_struct() { +; CHECK-NEXT: [[ALLOCA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_0]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ALLOCA_0]], i8 0, i64 4, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ALLOCA_1]], i8 0, i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_0]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca %struct.basic + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + call void @llvm.memset.p0.i32(ptr %alloca, i8 0, i32 12, i1 false) + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @memset_array_of_structs() { +; +; CHECK-LABEL: define void @memset_array_of_structs() { +; CHECK-NEXT: [[ALLOCA_0:%.*]] = alloca [2 x [2 x i32]], align 4 +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [2 x [2 x [2 x i32]]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_0]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ALLOCA_0]], i8 0, i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ALLOCA_1]], i8 0, i64 32, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_0]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [2 x [2 x %struct.basic]] + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + call void @llvm.memset.p0.i32(ptr %alloca, i8 0, i32 48, i1 false) + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @memcpy_single_struct(ptr byval(%struct.basic) %arg) { +; +; CHECK-LABEL: define void @memcpy_single_struct( +; CHECK-SAME: ptr byval([[STRUCT_BASIC:%.*]]) [[ARG:%.*]]) { +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[ARG_0_0:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 0, i32 0 +; CHECK-NEXT: [[ALLOCA_0_0_COPYLOAD:%.*]] = load i32, ptr [[ARG_0_0]], align 1 +; CHECK-NEXT: [[ALLOCA_1_0:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 0 +; CHECK-NEXT: [[ARG_0_1:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 0, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_1_0]], ptr [[ARG_0_1]], i64 8, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca %struct.basic + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + call void @llvm.memcpy.p0.p0.i32(ptr %alloca, ptr %arg, i32 12, i1 false) + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +define void @memcpy_array_of_structs(ptr byval([2 x [2 x %struct.basic]]) %arg) { +; CHECK-LABEL: define void @memcpy_array_of_structs( +; CHECK-SAME: ptr byval([2 x [2 x %struct.basic]]) [[ARG:%.*]]) { +; CHECK-NEXT: [[ALLOCA_0:%.*]] = alloca [2 x [2 x i32]], align 4 +; CHECK-NEXT: [[ALLOCA_1:%.*]] = alloca [2 x [2 x [2 x i32]]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_0]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: [[ALLOCA_0_0:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA_0]], i32 0 +; CHECK-NEXT: [[ARG_0_0:%.*]] = getelementptr inbounds [[STRUCT_BASIC:%.*]], ptr [[ARG]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_0_0]], ptr [[ARG_0_0]], i64 16, i1 false) +; CHECK-NEXT: [[ALLOCA_0_1:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA_0]], i32 1 +; CHECK-NEXT: [[ARG_1_0:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 1, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_0_1]], ptr [[ARG_1_0]], i64 16, i1 false) +; CHECK-NEXT: [[ALLOCA_0_2:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA_0]], i32 2 +; CHECK-NEXT: [[ARG_2_0:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 2, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_0_2]], ptr [[ARG_2_0]], i64 16, i1 false) +; CHECK-NEXT: [[ALLOCA_0_3:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA_0]], i32 3 +; CHECK-NEXT: [[ARG_3_0:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 3, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_0_3]], ptr [[ARG_3_0]], i64 16, i1 false) +; CHECK-NEXT: [[ALLOCA_1_0:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 0 +; CHECK-NEXT: [[ARG_0_1:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 0, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_1_0]], ptr [[ARG_0_1]], i64 32, i1 false) +; CHECK-NEXT: [[ALLOCA_1_1:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 1 +; CHECK-NEXT: [[ARG_1_1:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 1, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_1_1]], ptr [[ARG_1_1]], i64 32, i1 false) +; CHECK-NEXT: [[ALLOCA_1_2:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 2 +; CHECK-NEXT: [[ARG_2_1:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 2, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_1_2]], ptr [[ARG_2_1]], i64 32, i1 false) +; CHECK-NEXT: [[ALLOCA_1_3:%.*]] = getelementptr inbounds [2 x i32], ptr [[ALLOCA_1]], i32 3 +; CHECK-NEXT: [[ARG_3_1:%.*]] = getelementptr inbounds [[STRUCT_BASIC]], ptr [[ARG]], i32 3, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOCA_1_3]], ptr [[ARG_3_1]], i64 32, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_0]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ALLOCA_1]]) +; CHECK-NEXT: ret void +; + %alloca = alloca [2 x [2 x %struct.basic]] + call void @llvm.lifetime.start.p0(ptr nonnull %alloca) + call void @llvm.memcpy.p0.p0.i32(ptr %alloca, ptr %arg, i32 48, i1 false) + call void @llvm.lifetime.end.p0(ptr nonnull %alloca) + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-MODIFY-CFG: {{.*}} +; CHECK-PRESERVE-CFG: {{.*}}