diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 06708cec00cec..286fd0c5bb97b 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -27,6 +27,20 @@ static const int MaxVecSize = 4; using namespace llvm; +// Recursively creates an array-like version of a given vector type. +static Type *equivalentArrayTypeFromVector(Type *T) { + if (auto *VecTy = dyn_cast(T)) + return ArrayType::get(VecTy->getElementType(), + dyn_cast(VecTy)->getNumElements()); + if (auto *ArrayTy = dyn_cast(T)) { + Type *NewElementType = + equivalentArrayTypeFromVector(ArrayTy->getElementType()); + return ArrayType::get(NewElementType, ArrayTy->getNumElements()); + } + // If it's not a vector or array, return the original type. + return T; +} + class DXILDataScalarizationLegacy : public ModulePass { public: @@ -54,8 +68,8 @@ class DataScalarizerVisitor : public InstVisitor { bool visitGetElementPtrInst(GetElementPtrInst &GEPI); bool visitCastInst(CastInst &CI) { return false; } bool visitBitCastInst(BitCastInst &BCI) { return false; } - bool visitInsertElementInst(InsertElementInst &IEI) { return false; } - bool visitExtractElementInst(ExtractElementInst &EEI) { return false; } + bool visitInsertElementInst(InsertElementInst &IEI); + bool visitExtractElementInst(ExtractElementInst &EEI); bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; } bool visitPHINode(PHINode &PHI) { return false; } bool visitLoadInst(LoadInst &LI); @@ -65,6 +79,16 @@ class DataScalarizerVisitor : public InstVisitor { friend bool findAndReplaceVectors(llvm::Module &M); private: + typedef std::pair> AllocaAndGEPs; + typedef SmallDenseMap + VectorToArrayMap; // A map from a vector-typed Value to its corresponding + // AllocaInst and GEPs to each element of an array + VectorToArrayMap VectorAllocaMap; + AllocaAndGEPs createArrayFromVector(IRBuilder<> &Builder, Value *Vec, + const Twine &Name); + bool replaceDynamicInsertElementInst(InsertElementInst &IEI); + bool replaceDynamicExtractElementInst(ExtractElementInst &EEI); + GlobalVariable *lookupReplacementGlobal(Value *CurrOperand); DenseMap GlobalMap; }; @@ -76,6 +100,7 @@ bool DataScalarizerVisitor::visit(Function &F) { for (Instruction &I : make_early_inc_range(*BB)) MadeChange |= InstVisitor::visit(I); } + VectorAllocaMap.clear(); return MadeChange; } @@ -90,20 +115,6 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) { return nullptr; // Not found } -// Recursively creates an array version of the given vector type. -static Type *replaceVectorWithArray(Type *T, LLVMContext &Ctx) { - if (auto *VecTy = dyn_cast(T)) - return ArrayType::get(VecTy->getElementType(), - dyn_cast(VecTy)->getNumElements()); - if (auto *ArrayTy = dyn_cast(T)) { - Type *NewElementType = - replaceVectorWithArray(ArrayTy->getElementType(), Ctx); - return ArrayType::get(NewElementType, ArrayTy->getNumElements()); - } - // If it's not a vector or array, return the original type. - return T; -} - static bool isArrayOfVectors(Type *T) { if (ArrayType *ArrType = dyn_cast(T)) return isa(ArrType->getElementType()); @@ -116,8 +127,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) { ArrayType *ArrType = cast(AI.getAllocatedType()); IRBuilder<> Builder(&AI); - LLVMContext &Ctx = AI.getContext(); - Type *NewType = replaceVectorWithArray(ArrType, Ctx); + Type *NewType = equivalentArrayTypeFromVector(ArrType); AllocaInst *ArrAlloca = Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize"); ArrAlloca->setAlignment(AI.getAlign()); @@ -173,6 +183,124 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) { return false; } +DataScalarizerVisitor::AllocaAndGEPs +DataScalarizerVisitor::createArrayFromVector(IRBuilder<> &Builder, Value *Vec, + const Twine &Name = "") { + // If there is already an alloca for this vector, return it + if (VectorAllocaMap.contains(Vec)) + return VectorAllocaMap[Vec]; + + auto InsertPoint = Builder.GetInsertPoint(); + + // Allocate the array to hold the vector elements + Builder.SetInsertPointPastAllocas(Builder.GetInsertBlock()->getParent()); + Type *ArrTy = equivalentArrayTypeFromVector(Vec->getType()); + AllocaInst *ArrAlloca = + Builder.CreateAlloca(ArrTy, nullptr, Name + ".alloca"); + const uint64_t ArrNumElems = ArrTy->getArrayNumElements(); + + // Create loads and stores to populate the array immediately after the + // original vector's defining instruction if available, else immediately after + // the alloca + if (auto *Instr = dyn_cast(Vec)) + Builder.SetInsertPoint(Instr->getNextNonDebugInstruction()); + SmallVector GEPs(ArrNumElems); + for (unsigned I = 0; I < ArrNumElems; ++I) { + Value *EE = Builder.CreateExtractElement(Vec, I, Name + ".extract"); + GEPs[I] = Builder.CreateInBoundsGEP( + ArrTy, ArrAlloca, {Builder.getInt32(0), Builder.getInt32(I)}, + Name + ".index"); + Builder.CreateStore(EE, GEPs[I]); + } + + VectorAllocaMap.insert({Vec, {ArrAlloca, GEPs}}); + Builder.SetInsertPoint(InsertPoint); + return {ArrAlloca, GEPs}; +} + +/// Returns a pair of Value* with the first being a GEP into ArrAlloca using +/// indices {0, Index}, and the second Value* being a Load of the GEP +static std::pair +dynamicallyLoadArray(IRBuilder<> &Builder, AllocaInst *ArrAlloca, Value *Index, + const Twine &Name = "") { + Type *ArrTy = ArrAlloca->getAllocatedType(); + Value *GEP = Builder.CreateInBoundsGEP( + ArrTy, ArrAlloca, {Builder.getInt32(0), Index}, Name + ".index"); + Value *Load = + Builder.CreateLoad(ArrTy->getArrayElementType(), GEP, Name + ".load"); + return std::make_pair(GEP, Load); +} + +bool DataScalarizerVisitor::replaceDynamicInsertElementInst( + InsertElementInst &IEI) { + IRBuilder<> Builder(&IEI); + + Value *Vec = IEI.getOperand(0); + Value *Val = IEI.getOperand(1); + Value *Index = IEI.getOperand(2); + + AllocaAndGEPs ArrAllocaAndGEPs = + createArrayFromVector(Builder, Vec, IEI.getName()); + AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first; + Type *ArrTy = ArrAlloca->getAllocatedType(); + SmallVector &ArrGEPs = ArrAllocaAndGEPs.second; + + auto GEPAndLoad = + dynamicallyLoadArray(Builder, ArrAlloca, Index, IEI.getName()); + Value *GEP = GEPAndLoad.first; + Value *Load = GEPAndLoad.second; + + Builder.CreateStore(Val, GEP); + Value *NewIEI = PoisonValue::get(Vec->getType()); + for (unsigned I = 0; I < ArrTy->getArrayNumElements(); ++I) { + Value *Load = Builder.CreateLoad(ArrTy->getArrayElementType(), ArrGEPs[I], + IEI.getName() + ".load"); + NewIEI = Builder.CreateInsertElement(NewIEI, Load, Builder.getInt32(I), + IEI.getName() + ".insert"); + } + + // Store back the original value so the Alloca can be reused for subsequent + // insertelement instructions on the same vector + Builder.CreateStore(Load, GEP); + + IEI.replaceAllUsesWith(NewIEI); + IEI.eraseFromParent(); + return true; +} + +bool DataScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { + // If the index is a constant then we don't need to scalarize it + Value *Index = IEI.getOperand(2); + if (isa(Index)) + return false; + return replaceDynamicInsertElementInst(IEI); +} + +bool DataScalarizerVisitor::replaceDynamicExtractElementInst( + ExtractElementInst &EEI) { + IRBuilder<> Builder(&EEI); + + AllocaAndGEPs ArrAllocaAndGEPs = + createArrayFromVector(Builder, EEI.getVectorOperand(), EEI.getName()); + AllocaInst *ArrAlloca = ArrAllocaAndGEPs.first; + + auto GEPAndLoad = dynamicallyLoadArray(Builder, ArrAlloca, + EEI.getIndexOperand(), EEI.getName()); + Value *Load = GEPAndLoad.second; + + EEI.replaceAllUsesWith(Load); + EEI.eraseFromParent(); + return true; +} + +bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) { + // If the index is a constant then we don't need to scalarize it + Value *Index = EEI.getIndexOperand(); + if (isa(Index)) + return false; + return replaceDynamicExtractElementInst(EEI); +} + bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { unsigned NumOperands = GEPI.getNumOperands(); @@ -197,8 +325,8 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { return true; } -Constant *transformInitializer(Constant *Init, Type *OrigType, Type *NewType, - LLVMContext &Ctx) { +static Constant *transformInitializer(Constant *Init, Type *OrigType, + Type *NewType, LLVMContext &Ctx) { // Handle ConstantAggregateZero (zero-initialized constants) if (isa(Init)) { return ConstantAggregateZero::get(NewType); @@ -257,7 +385,7 @@ static bool findAndReplaceVectors(Module &M) { for (GlobalVariable &G : M.globals()) { Type *OrigType = G.getValueType(); - Type *NewType = replaceVectorWithArray(OrigType, Ctx); + Type *NewType = equivalentArrayTypeFromVector(OrigType); if (OrigType != NewType) { // Create a new global variable with the updated type // Note: Initializer is set via transformInitializer diff --git a/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll new file mode 100644 index 0000000000000..0eb65bd4fc751 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalarize-dynamic-vector-index.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Ensure that insertelement instructions have no side effects on each other +; even in the presence of control flow +define void @test_multiple_insert(i32 %c, i32 %i, i32 %j) { +; CHECK-LABEL: define void @test_multiple_insert( +; CHECK-SAME: i32 [[C:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) { +; CHECK-NEXT: [[V0_ALLOCA:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[V_ALLOCA:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[V0_0:%.*]] = insertelement <2 x i32> poison, i32 0, i32 0 +; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i32> [[V0_0]], i32 0, i32 1 +; CHECK-NEXT: [[V0_EXTRACT0:%.*]] = extractelement <2 x i32> [[V0]], i64 0 +; CHECK-NEXT: [[V0_INDEX0:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[V0_EXTRACT0]], ptr [[V0_INDEX0]], align 4 +; CHECK-NEXT: [[V0_EXTRACT1:%.*]] = extractelement <2 x i32> [[V0]], i64 1 +; CHECK-NEXT: [[V0_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[V0_EXTRACT1]], ptr [[V0_INDEX1]], align 4 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[C]], 1 +; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[V1_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]] +; CHECK-NEXT: [[V1_LOAD:%.*]] = load i32, ptr [[V1_INDEX]], align 4 +; CHECK-NEXT: store i32 1, ptr [[V1_INDEX]], align 4 +; CHECK-NEXT: [[V1_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4 +; CHECK-NEXT: [[V1_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V1_LOAD0]], i32 0 +; CHECK-NEXT: [[V1_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4 +; CHECK-NEXT: [[V1_INSERT1:%.*]] = insertelement <2 x i32> [[V1_INSERT0]], i32 [[V1_LOAD1]], i32 1 +; CHECK-NEXT: store i32 [[V1_LOAD]], ptr [[V1_INDEX]], align 4 +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: [[V2_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]] +; CHECK-NEXT: [[V2_LOAD:%.*]] = load i32, ptr [[V2_INDEX]], align 4 +; CHECK-NEXT: store i32 2, ptr [[V2_INDEX]], align 4 +; CHECK-NEXT: [[V2_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4 +; CHECK-NEXT: [[V2_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V2_LOAD0]], i32 0 +; CHECK-NEXT: [[V2_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4 +; CHECK-NEXT: [[V2_INSERT1:%.*]] = insertelement <2 x i32> [[V2_INSERT0]], i32 [[V2_LOAD1]], i32 1 +; CHECK-NEXT: store i32 [[V2_LOAD]], ptr [[V2_INDEX]], align 4 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[V:%.*]] = phi <2 x i32> [ [[V1_INSERT1]], %[[IF]] ], [ [[V2_INSERT1]], %[[ELSE]] ] +; CHECK-NEXT: [[V_EXTRACT:%.*]] = extractelement <2 x i32> [[V]], i64 0 +; CHECK-NEXT: [[V_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[V_EXTRACT]], ptr [[V_INDEX]], align 4 +; CHECK-NEXT: [[V_EXTRACT10:%.*]] = extractelement <2 x i32> [[V]], i64 1 +; CHECK-NEXT: [[V_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[V_EXTRACT10]], ptr [[V_INDEX1]], align 4 +; CHECK-NEXT: [[V3_INDEXJ:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 [[J]] +; CHECK-NEXT: [[V3_LOAD:%.*]] = load i32, ptr [[V3_INDEXJ]], align 4 +; CHECK-NEXT: store i32 3, ptr [[V3_INDEXJ]], align 4 +; CHECK-NEXT: [[V3_LOAD0:%.*]] = load i32, ptr [[V_INDEX]], align 4 +; CHECK-NEXT: [[V3_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V3_LOAD0]], i32 0 +; CHECK-NEXT: [[V3_LOAD1:%.*]] = load i32, ptr [[V_INDEX1]], align 4 +; CHECK-NEXT: [[V3_INSERT1:%.*]] = insertelement <2 x i32> [[V3_INSERT0]], i32 [[V3_LOAD1]], i32 1 +; CHECK-NEXT: store i32 [[V3_LOAD]], ptr [[V3_INDEXJ]], align 4 +; CHECK-NEXT: ret void +; + %v0_0 = insertelement <2 x i32> poison, i32 0, i32 0 + %v0 = insertelement <2 x i32> %v0_0, i32 0, i32 1 + %cond = icmp eq i32 %c, 1 + br i1 %cond, label %if, label %else +if: + %v1 = insertelement <2 x i32> %v0, i32 1, i32 %i + br label %exit +else: + %v2 = insertelement <2 x i32> %v0, i32 2, i32 %i + br label %exit +exit: + %v = phi <2 x i32> [ %v1, %if ], [ %v2, %else ] + %v3 = insertelement <2 x i32> %v, i32 3, i32 %j + ret void +} + +; Allocas can be reused across insert/extractelement instructions on the same vector +define void @test_alloca_reuse(<3 x i32> %v, i32 %a, i32 %i) { +; CHECK-LABEL: define void @test_alloca_reuse( +; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0 +; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4 +; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1 +; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4 +; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2 +; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2 +; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4 +; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]] +; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4 +; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]] +; CHECK-NEXT: [[IE1_LOAD1:%.*]] = load i32, ptr [[IE1_DYNINDEX]], align 4 +; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4 +; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4 +; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0 +; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4 +; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1 +; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4 +; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2 +; CHECK-NEXT: store i32 [[IE1_LOAD1]], ptr [[IE1_DYNINDEX]], align 4 +; CHECK-NEXT: ret void +; + %ee1 = extractelement <3 x i32> %v, i32 %i + %ie1 = insertelement <3 x i32> %v, i32 %a, i32 %i + ret void +} + +define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) { +; CHECK-LABEL: define float @extract_float_vec_dynamic( +; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[EE_ALLOCA:%.*]] = alloca [4 x float], align 4 +; CHECK-NEXT: [[EE_EXTRACT:%.*]] = extractelement <4 x float> [[V]], i64 0 +; CHECK-NEXT: [[EE_INDEX:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: store float [[EE_EXTRACT]], ptr [[EE_INDEX]], align 4 +; CHECK-NEXT: [[EE_EXTRACT1:%.*]] = extractelement <4 x float> [[V]], i64 1 +; CHECK-NEXT: [[EE_INDEX2:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 1 +; CHECK-NEXT: store float [[EE_EXTRACT1]], ptr [[EE_INDEX2]], align 4 +; CHECK-NEXT: [[EE_EXTRACT3:%.*]] = extractelement <4 x float> [[V]], i64 2 +; CHECK-NEXT: [[EE_INDEX4:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 2 +; CHECK-NEXT: store float [[EE_EXTRACT3]], ptr [[EE_INDEX4]], align 4 +; CHECK-NEXT: [[EE_EXTRACT5:%.*]] = extractelement <4 x float> [[V]], i64 3 +; CHECK-NEXT: [[EE_INDEX6:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 3 +; CHECK-NEXT: store float [[EE_EXTRACT5]], ptr [[EE_INDEX6]], align 4 +; CHECK-NEXT: [[EE_INDEX7:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 [[I]] +; CHECK-NEXT: [[EE_LOAD:%.*]] = load float, ptr [[EE_INDEX7]], align 4 +; CHECK-NEXT: ret float [[EE_LOAD]] +; + %ee = extractelement <4 x float> %v, i32 %i + ret float %ee +} + +define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) { +; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic( +; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: [[IE_ALLOCA:%.*]] = alloca [3 x i32], align 4 +; CHECK-NEXT: [[IE_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0 +; CHECK-NEXT: [[IE_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 0 +; CHECK-NEXT: store i32 [[IE_EXTRACT]], ptr [[IE_INDEX]], align 4 +; CHECK-NEXT: [[IE_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1 +; CHECK-NEXT: [[IE_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 1 +; CHECK-NEXT: store i32 [[IE_EXTRACT1]], ptr [[IE_INDEX2]], align 4 +; CHECK-NEXT: [[IE_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2 +; CHECK-NEXT: [[IE_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 2 +; CHECK-NEXT: store i32 [[IE_EXTRACT3]], ptr [[IE_INDEX4]], align 4 +; CHECK-NEXT: [[IE_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 [[I]] +; CHECK-NEXT: [[IE_LOAD1:%.*]] = load i32, ptr [[IE_DYNINDEX]], align 4 +; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX]], align 4 +; CHECK-NEXT: [[IE_LOAD:%.*]] = load i32, ptr [[IE_INDEX]], align 4 +; CHECK-NEXT: [[IE_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD]], i32 0 +; CHECK-NEXT: [[IE_LOAD5:%.*]] = load i32, ptr [[IE_INDEX2]], align 4 +; CHECK-NEXT: [[IE_INSERT6:%.*]] = insertelement <3 x i32> [[IE_INSERT]], i32 [[IE_LOAD5]], i32 1 +; CHECK-NEXT: [[IE_LOAD7:%.*]] = load i32, ptr [[IE_INDEX4]], align 4 +; CHECK-NEXT: [[IE_INSERT8:%.*]] = insertelement <3 x i32> [[IE_INSERT6]], i32 [[IE_LOAD7]], i32 2 +; CHECK-NEXT: store i32 [[IE_LOAD1]], ptr [[IE_DYNINDEX]], align 4 +; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8]] +; + %ie = insertelement <3 x i32> %v, i32 %a, i32 %i + ret <3 x i32> %ie +} + +; An extractelement with a constant index should not be converted to array form +define i16 @extract_i16_vec_constant(<4 x i16> %v) { +; CHECK-LABEL: define i16 @extract_i16_vec_constant( +; CHECK-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-NEXT: [[EE:%.*]] = extractelement <4 x i16> [[V]], i32 1 +; CHECK-NEXT: ret i16 [[EE]] +; + %ee = extractelement <4 x i16> %v, i32 1 + ret i16 %ee +} + +; An insertelement with a constant index should not be converted to array form +define <2 x half> @insert_half_vec_constant(<2 x half> %v, half %a) { +; CHECK-LABEL: define <2 x half> @insert_half_vec_constant( +; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) { +; CHECK-NEXT: [[IE:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1 +; CHECK-NEXT: ret <2 x half> [[IE]] +; + %ie = insertelement <2 x half> %v, half %a, i32 1 + ret <2 x half> %ie +} +