diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt index 5d1dc50fdb0dd..a726071e0dcec 100644 --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(DirectXCodeGen DXContainerGlobals.cpp DXILDataScalarization.cpp DXILFinalizeLinkage.cpp + DXILFlattenArrays.cpp DXILIntrinsicExpansion.cpp DXILOpBuilder.cpp DXILOpLowering.cpp diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp new file mode 100644 index 0000000000000..dec3a9b4a8264 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp @@ -0,0 +1,443 @@ +//===- DXILFlattenArrays.cpp - Flattens DXIL Arrays-----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +/// +/// \file This file contains a pass to flatten arrays for the DirectX Backend. +/// +//===----------------------------------------------------------------------===// + +#include "DXILFlattenArrays.h" +#include "DirectX.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/DXILResource.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/ReplaceConstant.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include +#include +#include + +#define DEBUG_TYPE "dxil-flatten-arrays" + +using namespace llvm; +namespace { + +class DXILFlattenArraysLegacy : public ModulePass { + +public: + bool runOnModule(Module &M) override; + DXILFlattenArraysLegacy() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + static char ID; // Pass identification. +}; + +struct GEPData { + ArrayType *ParentArrayType; + Value *ParendOperand; + SmallVector Indices; + SmallVector Dims; + bool AllIndicesAreConstInt; +}; + +class DXILFlattenArraysVisitor + : public InstVisitor { +public: + DXILFlattenArraysVisitor() {} + bool visit(Function &F); + // InstVisitor methods. They return true if the instruction was scalarized, + // false if nothing changed. + bool visitGetElementPtrInst(GetElementPtrInst &GEPI); + bool visitAllocaInst(AllocaInst &AI); + bool visitInstruction(Instruction &I) { return false; } + bool visitSelectInst(SelectInst &SI) { return false; } + bool visitICmpInst(ICmpInst &ICI) { return false; } + bool visitFCmpInst(FCmpInst &FCI) { return false; } + bool visitUnaryOperator(UnaryOperator &UO) { return false; } + bool visitBinaryOperator(BinaryOperator &BO) { return false; } + bool visitCastInst(CastInst &CI) { return false; } + bool visitBitCastInst(BitCastInst &BCI) { return false; } + bool visitInsertElementInst(InsertElementInst &IEI) { return false; } + bool visitExtractElementInst(ExtractElementInst &EEI) { return false; } + bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; } + bool visitPHINode(PHINode &PHI) { return false; } + bool visitLoadInst(LoadInst &LI); + bool visitStoreInst(StoreInst &SI); + bool visitCallInst(CallInst &ICI) { return false; } + bool visitFreezeInst(FreezeInst &FI) { return false; } + static bool isMultiDimensionalArray(Type *T); + static std::pair getElementCountAndType(Type *ArrayTy); + +private: + SmallVector PotentiallyDeadInstrs; + DenseMap GEPChainMap; + bool finish(); + ConstantInt *genConstFlattenIndices(ArrayRef Indices, + ArrayRef Dims, + IRBuilder<> &Builder); + Value *genInstructionFlattenIndices(ArrayRef Indices, + ArrayRef Dims, + IRBuilder<> &Builder); + void + recursivelyCollectGEPs(GetElementPtrInst &CurrGEP, + ArrayType *FlattenedArrayType, Value *PtrOperand, + unsigned &GEPChainUseCount, + SmallVector Indices = SmallVector(), + SmallVector Dims = SmallVector(), + bool AllIndicesAreConstInt = true); + bool visitGetElementPtrInstInGEPChain(GetElementPtrInst &GEP); + bool visitGetElementPtrInstInGEPChainBase(GEPData &GEPInfo, + GetElementPtrInst &GEP); +}; +} // namespace + +bool DXILFlattenArraysVisitor::finish() { + RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); + return true; +} + +bool DXILFlattenArraysVisitor::isMultiDimensionalArray(Type *T) { + if (ArrayType *ArrType = dyn_cast(T)) + return isa(ArrType->getElementType()); + return false; +} + +std::pair +DXILFlattenArraysVisitor::getElementCountAndType(Type *ArrayTy) { + unsigned TotalElements = 1; + Type *CurrArrayTy = ArrayTy; + while (auto *InnerArrayTy = dyn_cast(CurrArrayTy)) { + TotalElements *= InnerArrayTy->getNumElements(); + CurrArrayTy = InnerArrayTy->getElementType(); + } + return std::make_pair(TotalElements, CurrArrayTy); +} + +ConstantInt *DXILFlattenArraysVisitor::genConstFlattenIndices( + ArrayRef Indices, ArrayRef Dims, IRBuilder<> &Builder) { + assert(Indices.size() == Dims.size() && + "Indicies and dimmensions should be the same"); + unsigned FlatIndex = 0; + unsigned Multiplier = 1; + + for (int I = Indices.size() - 1; I >= 0; --I) { + unsigned DimSize = Dims[I]; + ConstantInt *CIndex = dyn_cast(Indices[I]); + assert(CIndex && "This function expects all indicies to be ConstantInt"); + FlatIndex += CIndex->getZExtValue() * Multiplier; + Multiplier *= DimSize; + } + return Builder.getInt32(FlatIndex); +} + +Value *DXILFlattenArraysVisitor::genInstructionFlattenIndices( + ArrayRef Indices, ArrayRef Dims, IRBuilder<> &Builder) { + if (Indices.size() == 1) + return Indices[0]; + + Value *FlatIndex = Builder.getInt32(0); + unsigned Multiplier = 1; + + for (int I = Indices.size() - 1; I >= 0; --I) { + unsigned DimSize = Dims[I]; + Value *VMultiplier = Builder.getInt32(Multiplier); + Value *ScaledIndex = Builder.CreateMul(Indices[I], VMultiplier); + FlatIndex = Builder.CreateAdd(FlatIndex, ScaledIndex); + Multiplier *= DimSize; + } + return FlatIndex; +} + +bool DXILFlattenArraysVisitor::visitLoadInst(LoadInst &LI) { + unsigned NumOperands = LI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = LI.getOperand(I); + ConstantExpr *CE = dyn_cast(CurrOpperand); + if (CE && CE->getOpcode() == Instruction::GetElementPtr) { + convertUsersOfConstantsToInstructions(CE, + /*RestrictToFunc=*/nullptr, + /*RemoveDeadConstants=*/false, + /*IncludeSelf=*/true); + return false; + } + } + return false; +} + +bool DXILFlattenArraysVisitor::visitStoreInst(StoreInst &SI) { + unsigned NumOperands = SI.getNumOperands(); + for (unsigned I = 0; I < NumOperands; ++I) { + Value *CurrOpperand = SI.getOperand(I); + ConstantExpr *CE = dyn_cast(CurrOpperand); + if (CE && CE->getOpcode() == Instruction::GetElementPtr) { + convertUsersOfConstantsToInstructions(CE, + /*RestrictToFunc=*/nullptr, + /*RemoveDeadConstants=*/false, + /*IncludeSelf=*/true); + return false; + } + } + return false; +} + +bool DXILFlattenArraysVisitor::visitAllocaInst(AllocaInst &AI) { + if (!isMultiDimensionalArray(AI.getAllocatedType())) + return false; + + ArrayType *ArrType = cast(AI.getAllocatedType()); + IRBuilder<> Builder(&AI); + auto [TotalElements, BaseType] = getElementCountAndType(ArrType); + + ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements); + AllocaInst *FlatAlloca = + Builder.CreateAlloca(FattenedArrayType, nullptr, AI.getName() + ".flat"); + FlatAlloca->setAlignment(AI.getAlign()); + AI.replaceAllUsesWith(FlatAlloca); + AI.eraseFromParent(); + return true; +} + +void DXILFlattenArraysVisitor::recursivelyCollectGEPs( + GetElementPtrInst &CurrGEP, ArrayType *FlattenedArrayType, + Value *PtrOperand, unsigned &GEPChainUseCount, SmallVector Indices, + SmallVector Dims, bool AllIndicesAreConstInt) { + Value *LastIndex = CurrGEP.getOperand(CurrGEP.getNumOperands() - 1); + AllIndicesAreConstInt &= isa(LastIndex); + Indices.push_back(LastIndex); + assert(isa(CurrGEP.getSourceElementType())); + Dims.push_back( + cast(CurrGEP.getSourceElementType())->getNumElements()); + bool IsMultiDimArr = isMultiDimensionalArray(CurrGEP.getSourceElementType()); + if (!IsMultiDimArr) { + assert(GEPChainUseCount < FlattenedArrayType->getNumElements()); + GEPChainMap.insert( + {&CurrGEP, + {std::move(FlattenedArrayType), PtrOperand, std::move(Indices), + std::move(Dims), AllIndicesAreConstInt}}); + return; + } + bool GepUses = false; + for (auto *User : CurrGEP.users()) { + if (GetElementPtrInst *NestedGEP = dyn_cast(User)) { + recursivelyCollectGEPs(*NestedGEP, FlattenedArrayType, PtrOperand, + ++GEPChainUseCount, Indices, Dims, + AllIndicesAreConstInt); + GepUses = true; + } + } + // This case is just incase the gep chain doesn't end with a 1d array. + if (IsMultiDimArr && GEPChainUseCount > 0 && !GepUses) { + GEPChainMap.insert( + {&CurrGEP, + {std::move(FlattenedArrayType), PtrOperand, std::move(Indices), + std::move(Dims), AllIndicesAreConstInt}}); + } +} + +bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChain( + GetElementPtrInst &GEP) { + GEPData GEPInfo = GEPChainMap.at(&GEP); + return visitGetElementPtrInstInGEPChainBase(GEPInfo, GEP); +} +bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChainBase( + GEPData &GEPInfo, GetElementPtrInst &GEP) { + IRBuilder<> Builder(&GEP); + Value *FlatIndex; + if (GEPInfo.AllIndicesAreConstInt) + FlatIndex = genConstFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder); + else + FlatIndex = + genInstructionFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder); + + ArrayType *FlattenedArrayType = GEPInfo.ParentArrayType; + Value *FlatGEP = + Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParendOperand, FlatIndex, + GEP.getName() + ".flat", GEP.isInBounds()); + + GEP.replaceAllUsesWith(FlatGEP); + GEP.eraseFromParent(); + return true; +} + +bool DXILFlattenArraysVisitor::visitGetElementPtrInst(GetElementPtrInst &GEP) { + auto It = GEPChainMap.find(&GEP); + if (It != GEPChainMap.end()) + return visitGetElementPtrInstInGEPChain(GEP); + if (!isMultiDimensionalArray(GEP.getSourceElementType())) + return false; + + ArrayType *ArrType = cast(GEP.getSourceElementType()); + IRBuilder<> Builder(&GEP); + auto [TotalElements, BaseType] = getElementCountAndType(ArrType); + ArrayType *FlattenedArrayType = ArrayType::get(BaseType, TotalElements); + + Value *PtrOperand = GEP.getPointerOperand(); + + unsigned GEPChainUseCount = 0; + recursivelyCollectGEPs(GEP, FlattenedArrayType, PtrOperand, GEPChainUseCount); + + // NOTE: hasNUses(0) is not the same as GEPChainUseCount == 0. + // Here recursion is used to get the length of the GEP chain. + // Handle zero uses here because there won't be an update via + // a child in the chain later. + if (GEPChainUseCount == 0) { + SmallVector Indices({GEP.getOperand(GEP.getNumOperands() - 1)}); + SmallVector Dims({ArrType->getNumElements()}); + bool AllIndicesAreConstInt = isa(Indices[0]); + GEPData GEPInfo{std::move(FlattenedArrayType), PtrOperand, + std::move(Indices), std::move(Dims), AllIndicesAreConstInt}; + return visitGetElementPtrInstInGEPChainBase(GEPInfo, GEP); + } + + PotentiallyDeadInstrs.emplace_back(&GEP); + return false; +} + +bool DXILFlattenArraysVisitor::visit(Function &F) { + bool MadeChange = false; + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : make_early_inc_range(RPOT)) { + for (Instruction &I : make_early_inc_range(*BB)) + MadeChange |= InstVisitor::visit(I); + } + finish(); + return MadeChange; +} + +static void collectElements(Constant *Init, + SmallVectorImpl &Elements) { + // Base case: If Init is not an array, add it directly to the vector. + if (!isa(Init->getType())) { + Elements.push_back(Init); + return; + } + + // Recursive case: Process each element in the array. + if (auto *ArrayConstant = dyn_cast(Init)) { + for (unsigned I = 0; I < ArrayConstant->getNumOperands(); ++I) { + collectElements(ArrayConstant->getOperand(I), Elements); + } + } else if (auto *DataArrayConstant = dyn_cast(Init)) { + for (unsigned I = 0; I < DataArrayConstant->getNumElements(); ++I) { + collectElements(DataArrayConstant->getElementAsConstant(I), Elements); + } + } else { + llvm_unreachable( + "Expected a ConstantArray or ConstantDataArray for array initializer!"); + } +} + +static Constant *transformInitializer(Constant *Init, Type *OrigType, + ArrayType *FlattenedType, + LLVMContext &Ctx) { + // Handle ConstantAggregateZero (zero-initialized constants) + if (isa(Init)) + return ConstantAggregateZero::get(FlattenedType); + + // Handle UndefValue (undefined constants) + if (isa(Init)) + return UndefValue::get(FlattenedType); + + if (!isa(OrigType)) + return Init; + + SmallVector FlattenedElements; + collectElements(Init, FlattenedElements); + assert(FlattenedType->getNumElements() == FlattenedElements.size() && + "The number of collected elements should match the FlattenedType"); + return ConstantArray::get(FlattenedType, FlattenedElements); +} + +static void +flattenGlobalArrays(Module &M, + DenseMap &GlobalMap) { + LLVMContext &Ctx = M.getContext(); + for (GlobalVariable &G : M.globals()) { + Type *OrigType = G.getValueType(); + if (!DXILFlattenArraysVisitor::isMultiDimensionalArray(OrigType)) + continue; + + ArrayType *ArrType = cast(OrigType); + auto [TotalElements, BaseType] = + DXILFlattenArraysVisitor::getElementCountAndType(ArrType); + ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements); + + // Create a new global variable with the updated type + // Note: Initializer is set via transformInitializer + GlobalVariable *NewGlobal = + new GlobalVariable(M, FattenedArrayType, G.isConstant(), G.getLinkage(), + /*Initializer=*/nullptr, G.getName() + ".1dim", &G, + G.getThreadLocalMode(), G.getAddressSpace(), + G.isExternallyInitialized()); + + // Copy relevant attributes + NewGlobal->setUnnamedAddr(G.getUnnamedAddr()); + if (G.getAlignment() > 0) { + NewGlobal->setAlignment(G.getAlign()); + } + + if (G.hasInitializer()) { + Constant *Init = G.getInitializer(); + Constant *NewInit = + transformInitializer(Init, OrigType, FattenedArrayType, Ctx); + NewGlobal->setInitializer(NewInit); + } + GlobalMap[&G] = NewGlobal; + } +} + +static bool flattenArrays(Module &M) { + bool MadeChange = false; + DXILFlattenArraysVisitor Impl; + DenseMap GlobalMap; + flattenGlobalArrays(M, GlobalMap); + for (auto &F : make_early_inc_range(M.functions())) { + if (F.isIntrinsic()) + continue; + MadeChange |= Impl.visit(F); + } + for (auto &[Old, New] : GlobalMap) { + Old->replaceAllUsesWith(New); + Old->eraseFromParent(); + MadeChange = true; + } + return MadeChange; +} + +PreservedAnalyses DXILFlattenArrays::run(Module &M, ModuleAnalysisManager &) { + bool MadeChanges = flattenArrays(M); + if (!MadeChanges) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +bool DXILFlattenArraysLegacy::runOnModule(Module &M) { + return flattenArrays(M); +} + +void DXILFlattenArraysLegacy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); +} + +char DXILFlattenArraysLegacy::ID = 0; + +INITIALIZE_PASS_BEGIN(DXILFlattenArraysLegacy, DEBUG_TYPE, + "DXIL Array Flattener", false, false) +INITIALIZE_PASS_END(DXILFlattenArraysLegacy, DEBUG_TYPE, "DXIL Array Flattener", + false, false) + +ModulePass *llvm::createDXILFlattenArraysLegacyPass() { + return new DXILFlattenArraysLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.h b/llvm/lib/Target/DirectX/DXILFlattenArrays.h new file mode 100644 index 0000000000000..aae68496af620 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.h @@ -0,0 +1,23 @@ +//===- DXILFlattenArrays.h - Perform flattening of DXIL Arrays -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H +#define LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// A pass that transforms multidimensional arrays into one-dimensional arrays. +class DXILFlattenArrays : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h index 3221779be2f31..3454f16ecd595 100644 --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -40,6 +40,12 @@ void initializeDXILDataScalarizationLegacyPass(PassRegistry &); /// Pass to scalarize llvm global data into a DXIL legal form ModulePass *createDXILDataScalarizationLegacyPass(); +/// Initializer for DXIL Array Flatten Pass +void initializeDXILFlattenArraysLegacyPass(PassRegistry &); + +/// Pass to flatten arrays into a one dimensional DXIL legal form +ModulePass *createDXILFlattenArraysLegacyPass(); + /// Initializer for DXILOpLowering void initializeDXILOpLoweringLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/DirectX/DirectXPassRegistry.def b/llvm/lib/Target/DirectX/DirectXPassRegistry.def index ae729a1082b86..a0f864ed39375 100644 --- a/llvm/lib/Target/DirectX/DirectXPassRegistry.def +++ b/llvm/lib/Target/DirectX/DirectXPassRegistry.def @@ -24,6 +24,7 @@ MODULE_ANALYSIS("dxil-resource-md", DXILResourceMDAnalysis()) #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("dxil-data-scalarization", DXILDataScalarization()) +MODULE_PASS("dxil-flatten-arrays", DXILFlattenArrays()) MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion()) MODULE_PASS("dxil-op-lower", DXILOpLowering()) MODULE_PASS("dxil-pretty-printer", DXILPrettyPrinterPass(dbgs())) diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 18251ea3bd01d..59dbf053d6c22 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -13,6 +13,7 @@ #include "DirectXTargetMachine.h" #include "DXILDataScalarization.h" +#include "DXILFlattenArrays.h" #include "DXILIntrinsicExpansion.h" #include "DXILOpLowering.h" #include "DXILPrettyPrinter.h" @@ -48,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); initializeDXILDataScalarizationLegacyPass(*PR); + initializeDXILFlattenArraysLegacyPass(*PR); initializeScalarizerLegacyPassPass(*PR); initializeDXILPrepareModulePass(*PR); initializeEmbedDXILPassPass(*PR); @@ -91,6 +93,7 @@ class DirectXPassConfig : public TargetPassConfig { addPass(createDXILDataScalarizationLegacyPass()); ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; + addPass(createDXILFlattenArraysLegacyPass()); addPass(createScalarizerPass(DxilScalarOptions)); addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILFinalizeLinkageLegacyPass()); diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll new file mode 100644 index 0000000000000..fd894e0104c4e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/flatten-array.ll @@ -0,0 +1,188 @@ + +; RUN: opt -S -dxil-flatten-arrays %s | FileCheck %s + +; CHECK-LABEL: alloca_2d_test +define void @alloca_2d_test () { +; CHECK-NEXT: alloca [9 x i32], align 4 +; CHECK-NEXT: ret void +; + %1 = alloca [3 x [3 x i32]], align 4 + ret void +} + +; CHECK-LABEL: alloca_3d_test +define void @alloca_3d_test () { +; CHECK-NEXT: alloca [8 x i32], align 4 +; CHECK-NEXT: ret void +; + %1 = alloca [2 x[2 x [2 x i32]]], align 4 + ret void +} + +; CHECK-LABEL: alloca_4d_test +define void @alloca_4d_test () { +; CHECK-NEXT: alloca [16 x i32], align 4 +; CHECK-NEXT: ret void +; + %1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4 + ret void +} + +; CHECK-LABEL: gep_2d_test +define void @gep_2d_test () { + ; CHECK: [[a:%.*]] = alloca [9 x i32], align 4 + ; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 {{[0-8]}} + ; CHECK-NEXT: ret void + %1 = alloca [3 x [3 x i32]], align 4 + %g2d0 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 0 + %g1d_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 0 + %g1d_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 1 + %g1d_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 2 + %g2d1 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 1 + %g1d1_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 0 + %g1d1_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 1 + %g1d1_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 2 + %g2d2 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 2 + %g1d2_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 0 + %g1d2_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 1 + %g1d2_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 2 + + ret void +} + +; CHECK-LABEL: gep_3d_test +define void @gep_3d_test () { + ; CHECK: [[a:%.*]] = alloca [8 x i32], align 4 + ; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 {{[0-7]}} + ; CHECK-NEXT: ret void + %1 = alloca [2 x[2 x [2 x i32]]], align 4 + %g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 0 + %g2d0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 0 + %g1d_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0, i32 0, i32 0 + %g1d_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0, i32 0, i32 1 + %g2d1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 1 + %g1d1_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1, i32 0, i32 0 + %g1d1_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1, i32 0, i32 1 + %g3d1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 1 + %g2d2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 0 + %g1d2_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d2, i32 0, i32 0 + %g1d2_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d2, i32 0, i32 1 + %g2d3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 1 + %g1d3_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d3, i32 0, i32 0 + %g1d3_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d3, i32 0, i32 1 + ret void +} + +; CHECK-LABEL: gep_4d_test +define void @gep_4d_test () { + ; CHECK: [[a:%.*]] = alloca [16 x i32], align 4 + ; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 {{[0-9]|1[0-5]}} + ; CHECK-NEXT: ret void + %1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4 + %g4d0 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 0 + %g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d0, i32 0, i32 0 + %g2d0_0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 0 + %g1d_0 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_0, i32 0, i32 0 + %g1d_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_0, i32 0, i32 1 + %g2d0_1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 1 + %g1d_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_1, i32 0, i32 0 + %g1d_3 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_1, i32 0, i32 1 + %g3d1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d0, i32 0, i32 1 + %g2d0_2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 0 + %g1d_4 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_2, i32 0, i32 0 + %g1d_5 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_2, i32 0, i32 1 + %g2d1_2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 1 + %g1d_6 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_2, i32 0, i32 0 + %g1d_7 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_2, i32 0, i32 1 + %g4d1 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 1 + %g3d0_1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d1, i32 0, i32 0 + %g2d0_3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0_1, i32 0, i32 0 + %g1d_8 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_3, i32 0, i32 0 + %g1d_9 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_3, i32 0, i32 1 + %g2d0_4 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0_1, i32 0, i32 1 + %g1d_10 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_4, i32 0, i32 0 + %g1d_11 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_4, i32 0, i32 1 + %g3d1_1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d1, i32 0, i32 1 + %g2d0_5 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1_1, i32 0, i32 0 + %g1d_12 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_5, i32 0, i32 0 + %g1d_13 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_5, i32 0, i32 1 + %g2d1_3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1_1, i32 0, i32 1 + %g1d_14 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_3, i32 0, i32 0 + %g1d_15 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_3, i32 0, i32 1 + ret void +} + + +@a = internal global [2 x [3 x [4 x i32]]] [[3 x [4 x i32]] [[4 x i32] [i32 0, i32 1, i32 2, i32 3], + [4 x i32] [i32 4, i32 5, i32 6, i32 7], + [4 x i32] [i32 8, i32 9, i32 10, i32 11]], + [3 x [4 x i32]] [[4 x i32] [i32 12, i32 13, i32 14, i32 15], + [4 x i32] [i32 16, i32 17, i32 18, i32 19], + [4 x i32] [i32 20, i32 21, i32 22, i32 23]]], align 4 + +@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16 + +define void @global_gep_load() { + ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 6 + ; CHECK: load i32, ptr [[GEP_PTR]], align 4 + ; CHECK-NEXT: ret void + %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 0 + %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1 + %3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 2 + %4 = load i32, i32* %3, align 4 + ret void +} + +define void @global_gep_load_index(i32 %row, i32 %col, i32 %timeIndex) { +; CHECK-LABEL: define void @global_gep_load_index( +; CHECK-SAME: i32 [[ROW:%.*]], i32 [[COL:%.*]], i32 [[TIMEINDEX:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TIMEINDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[COL]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[ROW]], 12 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP6]] +; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}} +; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}} +; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}} +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLAT]], align 4 +; CHECK-NEXT: ret void +; + %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row + %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 %col + %3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 %timeIndex + %4 = load i32, i32* %3, align 4 + ret void +} + +define void @global_incomplete_gep_chain(i32 %row, i32 %col) { +; CHECK-LABEL: define void @global_incomplete_gep_chain( +; CHECK-SAME: i32 [[ROW:%.*]], i32 [[COL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[COL]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[ROW]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP4]] +; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}} +; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}} +; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}} +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFLAT]], align 4 +; CHECK-NEXT: ret void +; + %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row + %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 %col + %4 = load i32, i32* %2, align 4 + ret void +} + +define void @global_gep_store() { + ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 13 + ; CHECK: store i32 1, ptr [[GEP_PTR]], align 4 + ; CHECK-NEXT: ret void + %1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @b, i32 0, i32 1 + %2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 0 + %3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 1 + store i32 1, i32* %3, align 4 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index 224037cfe7fbe..f0950df08eff5 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -9,6 +9,7 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Intrinsic Expansion ; CHECK-NEXT: DXIL Data Scalarization +; CHECK-NEXT: DXIL Array Flattener ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Scalarize vector operations diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll new file mode 100644 index 0000000000000..5972520383650 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool llc --version 5 +; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s + +; Make sure we can load groupshared, static vectors and arrays of vectors + +@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@"vecData" = external addrspace(3) global <4 x i32>, align 4 +@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> , <4 x i32> , <4 x i32> ], align 4 +@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 + +; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [6 x float] zeroinitializer, align 16 +; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 +; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4 +; CHECK: @groushared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16 + +; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @arrayofVecData.scalarized +; CHECK-NOT: @vecData +; CHECK-NOT: @staticArrayOfVecData +; CHECK-NOT: @staticArrayOfVecData.scalarized +; CHECK-NOT: @groushared2dArrayofVectors +; CHECK-NOT: @groushared2dArrayofVectors.scalarized + + +define <4 x i32> @load_array_vec_test() #0 { +; CHECK-LABEL: define <4 x i32> @load_array_vec_test( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 2) to ptr addrspace(3) +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3) +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [2 x [3 x float]], ptr addrspace(3) [[TMP9]], i32 0, i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1 +; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2 +; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3 +; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4 +; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]] +; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]] +; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]] +; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]] +; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0 +; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1 +; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3 +; CHECK-NEXT: ret <4 x i32> [[TMP16]] +; + %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 + %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 + %3 = add <4 x i32> %1, %2 + ret <4 x i32> %3 +} + + +define <4 x i32> @load_vec_test() #0 { +; CHECK-LABEL: define <4 x i32> @load_vec_test( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @vecData.scalarized to ptr addrspace(3) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1) to ptr addrspace(3) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2) to ptr addrspace(3) +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3) to ptr addrspace(3) +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i64 1 +; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i64 2 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i64 3 +; CHECK-NEXT: ret <4 x i32> [[TMP9]] +; + %1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4 + ret <4 x i32> %1 +} + +define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { +; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test( +; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr +; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[TMP3]], i32 1 +; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast ptr [[DOTFLAT]] to ptr +; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[TMP4]], i32 2 +; CHECK-NEXT: [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr [[DOTFLAT]] to ptr +; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[TMP5]], i32 3 +; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4 +; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i64 1 +; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i64 3 +; CHECK-NEXT: ret <4 x i32> [[TMP6]] +; + %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index + %4 = load <4 x i32>, <4 x i32>* %3, align 4 + ret <4 x i32> %4 +} + +define <4 x i32> @multid_load_test() #0 { +; CHECK-LABEL: define <4 x i32> @multid_load_test( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3) +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2) to ptr addrspace(3) +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3) +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3) +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x [3 x [4 x i32]]], ptr addrspace(3) [[TMP9]], i32 0, i32 1, i32 1 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1 +; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2 +; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3 +; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4 +; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]] +; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]] +; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]] +; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]] +; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0 +; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1 +; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3 +; CHECK-NEXT: ret <4 x i32> [[TMP16]] +; + %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4 + %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4 + %3 = add <4 x i32> %1, %2 + ret <4 x i32> %3 +} + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll new file mode 100644 index 0000000000000..e5ec62cee759e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll @@ -0,0 +1,30 @@ +; RUN: opt -S -passes='dxil-data-scalarization,scalarizer,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure we can store groupshared, static vectors and arrays of vectors + +@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@"vecData" = external addrspace(3) global <4 x i32>, align 4 + +; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 +; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 +; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @vecData + +; CHECK-LABEL: store_array_vec_test +define void @store_array_vec_test () local_unnamed_addr #0 { + ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} + ; CHECK-NEXT: ret void + store <3 x float> , ptr addrspace(3) @"arrayofVecData", align 16 + store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16 + ret void + } + +; CHECK-LABEL: store_vec_test +define void @store_vec_test(<4 x i32> %inputVec) #0 { + ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4 + ; CHECK-NEXT: ret void + store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4 + ret void +} + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/scalar-data.ll b/llvm/test/CodeGen/DirectX/scalar-data.ll index c436f1eae4425..4861a0890f136 100644 --- a/llvm/test/CodeGen/DirectX/scalar-data.ll +++ b/llvm/test/CodeGen/DirectX/scalar-data.ll @@ -1,12 +1,15 @@ -; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s -; Make sure we don't touch arrays without vectors and that can recurse multiple-dimension arrays of vectors +; Make sure we don't touch arrays without vectors and that can recurse and flatten multiple-dimension arrays of vectors @staticArray = internal global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 @"groushared3dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x <4 x i32>]]] zeroinitializer, align 16 ; CHECK @staticArray ; CHECK-NOT: @staticArray.scalarized -; CHECK: @groushared3dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x [4 x i32]]]] zeroinitializer, align 16 +; CHECK-NOT: @staticArray.scalarized.1dim +; CHECK-NOT: @staticArray.1dim + +; CHECK: @groushared3dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [108 x i32] zeroinitializer, align 16 +; CHECK-NOT: @groushared3dArrayofVectors.scalarized ; CHECK-NOT: @groushared3dArrayofVectors diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll index b911a8f7855bb..a32db8b8e3995 100644 --- a/llvm/test/CodeGen/DirectX/scalar-load.ll +++ b/llvm/test/CodeGen/DirectX/scalar-load.ll @@ -1,5 +1,4 @@ ; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s -; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s ; Make sure we can load groupshared, static vectors and arrays of vectors @@ -22,7 +21,6 @@ ; CHECK-LABEL: load_array_vec_test define <4 x i32> @load_array_vec_test() #0 { ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 - ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 %3 = add <4 x i32> %1, %2 @@ -32,7 +30,6 @@ define <4 x i32> @load_array_vec_test() #0 { ; CHECK-LABEL: load_vec_test define <4 x i32> @load_vec_test() #0 { ; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}} - ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 %1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4 ret <4 x i32> %1 } @@ -41,7 +38,6 @@ define <4 x i32> @load_vec_test() #0 { define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4 - ; CHECK-NOT: load i32, ptr {{.*}}, align 4 %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index %4 = load <4 x i32>, <4 x i32>* %3, align 4 ret <4 x i32> %4 @@ -50,7 +46,6 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { ; CHECK-LABEL: multid_load_test define <4 x i32> @multid_load_test() #0 { ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4 - ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4 %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4 %3 = add <4 x i32> %1, %2 diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll index c45481e8cae14..7e9fe0e330661 100644 --- a/llvm/test/CodeGen/DirectX/scalar-store.ll +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -1,4 +1,3 @@ -; RUN: opt -S -passes='dxil-data-scalarization,scalarizer,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s ; Make sure we can store groupshared, static vectors and arrays of vectors @@ -6,15 +5,17 @@ @"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 @"vecData" = external addrspace(3) global <4 x i32>, align 4 -; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 + +; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [6 x float] zeroinitializer, align 16 ; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 ; CHECK-NOT: @arrayofVecData +; CHECK-NOT: @arrayofVecData.scalarized ; CHECK-NOT: @vecData ; CHECK-LABEL: store_array_vec_test define void @store_array_vec_test () local_unnamed_addr #0 { - ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} - ; CHECK-NOT: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} + ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.1dim.*|%.*)}}, align {{4|8|16}} + ; CHECK-NEXT: ret void store <3 x float> , ptr addrspace(3) @"arrayofVecData", align 16 store <3 x float> , ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16 ret void @@ -23,7 +24,7 @@ define void @store_array_vec_test () local_unnamed_addr #0 { ; CHECK-LABEL: store_vec_test define void @store_vec_test(<4 x i32> %inputVec) #0 { ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4 - ; CHECK-NOT: store i32 %inputVec.{{.*}}, ptr addrspace(3) + ; CHECK-NEXT: ret void store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4 ret void }