diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index a837f00732748..f2451b16e78be 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4641,11 +4641,17 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, LHS.getBaseInfo(), TBAAAccessInfo()); } - // The HLSL runtime handle the subscript expression on global resource arrays. - if (getLangOpts().HLSL && (E->getType()->isHLSLResourceRecord() || - E->getType()->isHLSLResourceRecordArray())) { - std::optional LV = - CGM.getHLSLRuntime().emitResourceArraySubscriptExpr(E, *this); + // The HLSL runtime handles subscript expressions on global resource arrays + // and objects with HLSL buffer layouts. + if (getLangOpts().HLSL) { + std::optional LV; + if (E->getType()->isHLSLResourceRecord() || + E->getType()->isHLSLResourceRecordArray()) { + LV = CGM.getHLSLRuntime().emitResourceArraySubscriptExpr(E, *this); + } else if (E->getType().getAddressSpace() == LangAS::hlsl_constant) { + LV = CGM.getHLSLRuntime().emitBufferArraySubscriptExpr(E, *this, + EmitIdxAfterBase); + } if (LV.has_value()) return *LV; } @@ -5110,6 +5116,11 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { EmitIgnoredExpr(E->getBase()); return EmitDeclRefLValue(DRE); } + if (getLangOpts().HLSL && + E->getType().getAddressSpace() == LangAS::hlsl_constant) { + // We have an HLSL buffer - emit using HLSL's layout rules. + return CGM.getHLSLRuntime().emitBufferMemberExpr(*this, E); + } Expr *BaseExpr = E->getBase(); // Check whether the underlying base pointer is a constant null. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index eee397f1f3d19..bd7d30b10d4d0 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -2279,6 +2279,10 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty, } } + if (getLangOpts().HLSL && Ty.getAddressSpace() == LangAS::hlsl_constant) + if (CGM.getHLSLRuntime().emitBufferCopy(*this, DestPtr, SrcPtr, Ty)) + return; + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 4bdba9b3da502..ed076a2a6fbcf 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -13,10 +13,11 @@ //===----------------------------------------------------------------------===// #include "CGHLSLRuntime.h" -#include "Address.h" #include "CGDebugInfo.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "HLSLBufferLayoutBuilder.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attrs.inc" @@ -26,6 +27,7 @@ #include "clang/AST/Type.h" #include "clang/Basic/TargetOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/HLSL/RootSignatureMetadata.h" @@ -278,23 +280,18 @@ llvm::Triple::ArchType CGHLSLRuntime::getArch() { // Emits constant global variables for buffer constants declarations // and creates metadata linking the constant globals with the buffer global. -void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, - llvm::GlobalVariable *BufGV) { +void CGHLSLRuntime::emitBufferGlobalsAndMetadata( + const HLSLBufferDecl *BufDecl, llvm::GlobalVariable *BufGV, + const CGHLSLOffsetInfo &OffsetInfo) { LLVMContext &Ctx = CGM.getLLVMContext(); // get the layout struct from constant buffer target type llvm::Type *BufType = BufGV->getValueType(); - llvm::Type *BufLayoutType = - cast(BufType)->getTypeParameter(0); llvm::StructType *LayoutStruct = cast( - cast(BufLayoutType)->getTypeParameter(0)); + cast(BufType)->getTypeParameter(0)); - // Start metadata list associating the buffer global variable with its - // constatns - SmallVector BufGlobals; - BufGlobals.push_back(ValueAsMetadata::get(BufGV)); - - const auto *ElemIt = LayoutStruct->element_begin(); + SmallVector> DeclsWithOffset; + size_t OffsetIdx = 0; for (Decl *D : BufDecl->buffer_decls()) { if (isa(D)) // Nothing to do for this declaration. @@ -326,14 +323,28 @@ void CGHLSLRuntime::emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, continue; } + DeclsWithOffset.emplace_back(VD, OffsetInfo[OffsetIdx++]); + } + + if (!OffsetInfo.empty()) + llvm::stable_sort(DeclsWithOffset, [](const auto &LHS, const auto &RHS) { + return CGHLSLOffsetInfo::compareOffsets(LHS.second, RHS.second); + }); + + // Associate the buffer global variable with its constants + SmallVector BufGlobals; + BufGlobals.reserve(DeclsWithOffset.size() + 1); + BufGlobals.push_back(ValueAsMetadata::get(BufGV)); + + auto ElemIt = LayoutStruct->element_begin(); + for (auto &[VD, _] : DeclsWithOffset) { + if (CGM.getTargetCodeGenInfo().isHLSLPadding(*ElemIt)) + ++ElemIt; + assert(ElemIt != LayoutStruct->element_end() && "number of elements in layout struct does not match"); llvm::Type *LayoutType = *ElemIt++; - // FIXME: handle resources inside user defined structs - // (llvm/wg-hlsl#175) - - // create global variable for the constant and to metadata list GlobalVariable *ElemGV = cast(CGM.GetAddrOfGlobalVar(VD, LayoutType)); BufGlobals.push_back(ValueAsMetadata::get(ElemGV)); @@ -410,18 +421,17 @@ void CGHLSLRuntime::addBuffer(const HLSLBufferDecl *BufDecl) { // create global variable for the constant buffer CGHLSLOffsetInfo OffsetInfo = CGHLSLOffsetInfo::fromDecl(*BufDecl); - llvm::TargetExtType *TargetTy = cast( - convertHLSLSpecificType(ResHandleTy, OffsetInfo)); + llvm::Type *LayoutTy = convertHLSLSpecificType(ResHandleTy, OffsetInfo); llvm::GlobalVariable *BufGV = new GlobalVariable( - TargetTy, /*isConstant*/ false, - GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(TargetTy), + LayoutTy, /*isConstant*/ false, + GlobalValue::LinkageTypes::ExternalLinkage, PoisonValue::get(LayoutTy), llvm::formatv("{0}{1}", BufDecl->getName(), BufDecl->isCBuffer() ? ".cb" : ".tb"), GlobalValue::NotThreadLocal); CGM.getModule().insertGlobalVariable(BufGV); // Add globals for constant buffer elements and create metadata nodes - emitBufferGlobalsAndMetadata(BufDecl, BufGV); + emitBufferGlobalsAndMetadata(BufDecl, BufGV, OffsetInfo); // Initialize cbuffer from binding (implicit or explicit) initializeBufferFromBinding(BufDecl, BufGV); @@ -440,7 +450,7 @@ void CGHLSLRuntime::addRootSignature( SignatureDecl->getRootElements(), nullptr, M); } -llvm::TargetExtType * +llvm::StructType * CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { const auto Entry = LayoutTypes.find(StructType); if (Entry != LayoutTypes.end()) @@ -449,7 +459,7 @@ CGHLSLRuntime::getHLSLBufferLayoutType(const RecordType *StructType) { } void CGHLSLRuntime::addHLSLBufferLayoutType(const RecordType *StructType, - llvm::TargetExtType *LayoutTy) { + llvm::StructType *LayoutTy) { assert(getHLSLBufferLayoutType(StructType) == nullptr && "layout type for this struct already exist"); LayoutTypes[StructType] = LayoutTy; @@ -1101,3 +1111,236 @@ std::optional CGHLSLRuntime::emitResourceArraySubscriptExpr( } return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl); } + +std::optional CGHLSLRuntime::emitBufferArraySubscriptExpr( + const ArraySubscriptExpr *E, CodeGenFunction &CGF, + llvm::function_ref EmitIdxAfterBase) { + // Find the element type to index by first padding the element type per HLSL + // buffer rules, and then padding out to a 16-byte register boundary if + // necessary. + llvm::Type *LayoutTy = + HLSLBufferLayoutBuilder(CGF.CGM).layOutType(E->getType()); + uint64_t LayoutSizeInBits = + CGM.getDataLayout().getTypeSizeInBits(LayoutTy).getFixedValue(); + CharUnits ElementSize = CharUnits::fromQuantity(LayoutSizeInBits / 8); + CharUnits RowAlignedSize = ElementSize.alignTo(CharUnits::fromQuantity(16)); + if (RowAlignedSize > ElementSize) { + llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding( + CGM, RowAlignedSize - ElementSize); + assert(Padding && "No padding type for target?"); + LayoutTy = llvm::StructType::get(CGF.getLLVMContext(), {LayoutTy, Padding}, + /*isPacked=*/true); + } + + // If the layout type doesn't introduce any padding, we don't need to do + // anything special. + llvm::Type *OrigTy = CGF.CGM.getTypes().ConvertTypeForMem(E->getType()); + if (LayoutTy == OrigTy) + return std::nullopt; + + LValueBaseInfo EltBaseInfo; + TBAAAccessInfo EltTBAAInfo; + Address Addr = + CGF.EmitPointerWithAlignment(E->getBase(), &EltBaseInfo, &EltTBAAInfo); + llvm::Value *Idx = EmitIdxAfterBase(/*Promote*/ true); + + // Index into the object as-if we have an array of the padded element type, + // and then dereference the element itself to avoid reading padding that may + // be past the end of the in-memory object. + SmallVector Indices; + Indices.push_back(Idx); + Indices.push_back(llvm::ConstantInt::get(Idx->getType(), 0)); + + llvm::Value *GEP = CGF.Builder.CreateGEP(LayoutTy, Addr.emitRawPointer(CGF), + Indices, "cbufferidx"); + Addr = Address(GEP, Addr.getElementType(), RowAlignedSize, KnownNonNull); + + return CGF.MakeAddrLValue(Addr, E->getType(), EltBaseInfo, EltTBAAInfo); +} + +namespace { +/// Utility for emitting copies following the HLSL buffer layout rules (ie, +/// copying out of a cbuffer). +class HLSLBufferCopyEmitter { + CodeGenFunction &CGF; + Address DestPtr; + Address SrcPtr; + llvm::Type *LayoutTy = nullptr; + + SmallVector CurStoreIndices; + SmallVector CurLoadIndices; + + void emitCopyAtIndices(llvm::Type *FieldTy, llvm::ConstantInt *StoreIndex, + llvm::ConstantInt *LoadIndex) { + CurStoreIndices.push_back(StoreIndex); + CurLoadIndices.push_back(LoadIndex); + auto RestoreIndices = llvm::make_scope_exit([&]() { + CurStoreIndices.pop_back(); + CurLoadIndices.pop_back(); + }); + + // First, see if this is some kind of aggregate and recurse. + if (processArray(FieldTy)) + return; + if (processBufferLayoutArray(FieldTy)) + return; + if (processStruct(FieldTy)) + return; + + // When we have a scalar or vector element we can emit the copy. + CharUnits Align = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getABITypeAlign(FieldTy)); + Address SrcGEP = RawAddress( + CGF.Builder.CreateInBoundsGEP(LayoutTy, SrcPtr.getBasePointer(), + CurLoadIndices, "cbuf.src"), + FieldTy, Align, SrcPtr.isKnownNonNull()); + Address DestGEP = CGF.Builder.CreateInBoundsGEP( + DestPtr, CurStoreIndices, FieldTy, Align, "cbuf.dest"); + llvm::Value *Load = CGF.Builder.CreateLoad(SrcGEP, "cbuf.load"); + CGF.Builder.CreateStore(Load, DestGEP); + } + + bool processArray(llvm::Type *FieldTy) { + auto *AT = dyn_cast(FieldTy); + if (!AT) + return false; + + // If we have an llvm::ArrayType this is just a regular array with no top + // level padding, so all we need to do is copy each member. + for (unsigned I = 0, E = AT->getNumElements(); I < E; ++I) + emitCopyAtIndices(AT->getElementType(), + llvm::ConstantInt::get(CGF.SizeTy, I), + llvm::ConstantInt::get(CGF.SizeTy, I)); + return true; + } + + bool processBufferLayoutArray(llvm::Type *FieldTy) { + // A buffer layout array is a struct with two elements: the padded array, + // and the last element. That is, is should look something like this: + // + // { [%n x { %type, %padding }], %type } + // + auto *ST = dyn_cast(FieldTy); + if (!ST || ST->getNumElements() != 2) + return false; + + auto *PaddedEltsTy = dyn_cast(ST->getElementType(0)); + if (!PaddedEltsTy) + return false; + + auto *PaddedTy = dyn_cast(PaddedEltsTy->getElementType()); + if (!PaddedTy || PaddedTy->getNumElements() != 2) + return false; + + if (!CGF.CGM.getTargetCodeGenInfo().isHLSLPadding( + PaddedTy->getElementType(1))) + return false; + + llvm::Type *ElementTy = ST->getElementType(1); + if (PaddedTy->getElementType(0) != ElementTy) + return false; + + // All but the last of the logical array elements are in the padded array. + unsigned NumElts = PaddedEltsTy->getNumElements() + 1; + + // Add an extra indirection to the load for the struct and walk the + // array prefix. + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 0)); + for (unsigned I = 0; I < NumElts - 1; ++I) { + // We need to copy the element itself, without the padding. + CurLoadIndices.push_back(llvm::ConstantInt::get(CGF.SizeTy, I)); + emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.SizeTy, I), + llvm::ConstantInt::get(CGF.Int32Ty, 0)); + CurLoadIndices.pop_back(); + } + CurLoadIndices.pop_back(); + + // Now copy the last element. + emitCopyAtIndices(ElementTy, + llvm::ConstantInt::get(CGF.SizeTy, NumElts - 1), + llvm::ConstantInt::get(CGF.Int32Ty, 1)); + + return true; + } + + bool processStruct(llvm::Type *FieldTy) { + auto *ST = dyn_cast(FieldTy); + if (!ST) + return false; + + // Copy the struct field by field, but skip any explicit padding. + unsigned Skipped = 0; + for (unsigned I = 0, E = ST->getNumElements(); I < E; ++I) { + llvm::Type *ElementTy = ST->getElementType(I); + if (CGF.CGM.getTargetCodeGenInfo().isHLSLPadding(ElementTy)) + ++Skipped; + else + emitCopyAtIndices(ElementTy, llvm::ConstantInt::get(CGF.Int32Ty, I), + llvm::ConstantInt::get(CGF.Int32Ty, I + Skipped)); + } + return true; + } + +public: + HLSLBufferCopyEmitter(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr) + : CGF(CGF), DestPtr(DestPtr), SrcPtr(SrcPtr) {} + + bool emitCopy(QualType CType) { + LayoutTy = HLSLBufferLayoutBuilder(CGF.CGM).layOutType(CType); + + // TODO: We should be able to fall back to a regular memcpy if the layout + // type doesn't have any padding, but that runs into issues in the backend + // currently. + // + // See https://github.com/llvm/wg-hlsl/issues/351 + emitCopyAtIndices(LayoutTy, llvm::ConstantInt::get(CGF.SizeTy, 0), + llvm::ConstantInt::get(CGF.SizeTy, 0)); + return true; + } +}; +} // namespace + +bool CGHLSLRuntime::emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, + Address SrcPtr, QualType CType) { + return HLSLBufferCopyEmitter(CGF, DestPtr, SrcPtr).emitCopy(CType); +} + +LValue CGHLSLRuntime::emitBufferMemberExpr(CodeGenFunction &CGF, + const MemberExpr *E) { + LValue Base = + CGF.EmitCheckedLValue(E->getBase(), CodeGenFunction::TCK_MemberAccess); + auto *Field = dyn_cast(E->getMemberDecl()); + assert(Field && "Unexpected access into HLSL buffer"); + + // Get the field index for the struct. + const RecordDecl *Rec = Field->getParent(); + unsigned FieldIdx = + CGM.getTypes().getCGRecordLayout(Rec).getLLVMFieldNo(Field); + + // Work out the buffer layout type to index into. + QualType RecType = CGM.getContext().getCanonicalTagType(Rec); + assert(RecType->isStructureOrClassType() && "Invalid type in HLSL buffer"); + // Since this is a member of an object in the buffer and not the buffer's + // struct/class itself, we shouldn't have any offsets on the members we need + // to contend with. + CGHLSLOffsetInfo EmptyOffsets; + llvm::StructType *LayoutTy = HLSLBufferLayoutBuilder(CGM).layOutStruct( + RecType->getAsCanonical(), EmptyOffsets); + + // Now index into the struct, making sure that the type we return is the + // buffer layout type rather than the original type in the AST. + QualType FieldType = Field->getType(); + llvm::Type *FieldLLVMTy = CGM.getTypes().ConvertTypeForMem(FieldType); + CharUnits Align = CharUnits::fromQuantity( + CGF.CGM.getDataLayout().getABITypeAlign(FieldLLVMTy)); + Address Addr(CGF.Builder.CreateStructGEP(LayoutTy, Base.getPointer(CGF), + FieldIdx, Field->getName()), + FieldLLVMTy, Align, KnownNonNull); + + LValue LV = LValue::MakeAddr(Addr, FieldType, CGM.getContext(), + LValueBaseInfo(AlignmentSource::Type), + CGM.getTBAAAccessInfo(FieldType)); + LV.getQuals().addCVRQualifiers(Base.getVRQualifiers()); + + return LV; +} diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 488a322ca7569..e42273226f94f 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -15,20 +15,19 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/IntrinsicsDirectX.h" -#include "llvm/IR/IntrinsicsSPIRV.h" - +#include "Address.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/HLSLRuntime.h" - +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Frontend/HLSL/HLSLResource.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/IntrinsicsSPIRV.h" #include #include @@ -100,12 +99,19 @@ class CGHLSLOffsetInfo { /// of the HLSL buffer after all of the elements with specified offset. static CGHLSLOffsetInfo fromDecl(const HLSLBufferDecl &BufDecl); + /// Comparison function for offsets received from `operator[]` suitable for + /// use in a `stable_sort`. This will order implicit bindings after explicit + /// offsets. + static bool compareOffsets(uint32_t LHS, uint32_t RHS) { return LHS < RHS; } + /// Get the given offset, or `~0U` if there is no offset for the member. uint32_t operator[](size_t I) const { if (Offsets.empty()) return Unspecified; return Offsets[I]; } + + bool empty() const { return Offsets.empty(); } }; class CGHLSLRuntime { @@ -214,19 +220,28 @@ class CGHLSLRuntime { llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB); - llvm::TargetExtType * - getHLSLBufferLayoutType(const RecordType *LayoutStructTy); + llvm::StructType *getHLSLBufferLayoutType(const RecordType *LayoutStructTy); void addHLSLBufferLayoutType(const RecordType *LayoutStructTy, - llvm::TargetExtType *LayoutTy); + llvm::StructType *LayoutTy); void emitInitListOpaqueValues(CodeGenFunction &CGF, InitListExpr *E); std::optional emitResourceArraySubscriptExpr(const ArraySubscriptExpr *E, CodeGenFunction &CGF); + std::optional emitBufferArraySubscriptExpr( + const ArraySubscriptExpr *E, CodeGenFunction &CGF, + llvm::function_ref EmitIdxAfterBase); + + bool emitBufferCopy(CodeGenFunction &CGF, Address DestPtr, Address SrcPtr, + QualType CType); + + LValue emitBufferMemberExpr(CodeGenFunction &CGF, const MemberExpr *E); + private: void emitBufferGlobalsAndMetadata(const HLSLBufferDecl *BufDecl, - llvm::GlobalVariable *BufGV); + llvm::GlobalVariable *BufGV, + const CGHLSLOffsetInfo &OffsetInfo); void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl, llvm::GlobalVariable *GV); void initializeBufferFromBinding(const HLSLBufferDecl *BufDecl, @@ -246,7 +261,7 @@ class CGHLSLRuntime { llvm::Triple::ArchType getArch(); - llvm::DenseMap LayoutTypes; + llvm::DenseMap LayoutTypes; unsigned SPIRVLastAssignedInputSemanticLocation = 0; }; diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp index 4bc6d565fd41f..07cc738882b50 100644 --- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.cpp @@ -9,6 +9,7 @@ #include "HLSLBufferLayoutBuilder.h" #include "CGHLSLRuntime.h" #include "CodeGenModule.h" +#include "TargetInfo.h" #include "clang/AST/Type.h" #include @@ -19,72 +20,22 @@ using namespace clang; using namespace clang::CodeGen; -using llvm::hlsl::CBufferRowSizeInBytes; -namespace { - -// Creates a new array type with the same dimentions but with the new -// element type. -static llvm::Type * -createArrayWithNewElementType(CodeGenModule &CGM, - const ConstantArrayType *ArrayType, - llvm::Type *NewElemType) { - const clang::Type *ArrayElemType = ArrayType->getArrayElementTypeNoTypeQual(); - if (ArrayElemType->isConstantArrayType()) - NewElemType = createArrayWithNewElementType( - CGM, cast(ArrayElemType), NewElemType); - return llvm::ArrayType::get(NewElemType, ArrayType->getSExtSize()); -} - -// Returns the size of a scalar or vector in bytes -static unsigned getScalarOrVectorSizeInBytes(llvm::Type *Ty) { - assert(Ty->isVectorTy() || Ty->isIntegerTy() || Ty->isFloatingPointTy()); - if (Ty->isVectorTy()) { - llvm::FixedVectorType *FVT = cast(Ty); - return FVT->getNumElements() * - (FVT->getElementType()->getScalarSizeInBits() / 8); - } - return Ty->getScalarSizeInBits() / 8; -} - -} // namespace +static const CharUnits CBufferRowSize = + CharUnits::fromQuantity(llvm::hlsl::CBufferRowSizeInBytes); namespace clang { namespace CodeGen { -// Creates a layout type for given struct or class with HLSL constant buffer -// layout taking into account PackOffsets, if provided. -// Previously created layout types are cached by CGHLSLRuntime. -// -// The function iterates over all fields of the record type (including base -// classes) and calls layoutField to converts each field to its corresponding -// LLVM type and to calculate its HLSL constant buffer layout. Any embedded -// structs (or arrays of structs) are converted to target layout types as well. -// -// When PackOffsets are specified the elements will be placed based on the -// user-specified offsets. Not all elements must have a packoffset/register(c#) -// annotation though. For those that don't, the PackOffsets array will contain -// -1 value instead. These elements must be placed at the end of the layout -// after all of the elements with specific offset. -llvm::TargetExtType * -HLSLBufferLayoutBuilder::createLayoutType(const RecordType *RT, - const CGHLSLOffsetInfo &OffsetInfo) { +llvm::StructType * +HLSLBufferLayoutBuilder::layOutStruct(const RecordType *RT, + const CGHLSLOffsetInfo &OffsetInfo) { // check if we already have the layout type for this struct - if (llvm::TargetExtType *Ty = - CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT)) + // TODO: Do we need to check for matching OffsetInfo? + if (llvm::StructType *Ty = CGM.getHLSLRuntime().getHLSLBufferLayoutType(RT)) return Ty; - SmallVector Layout; - SmallVector LayoutElements; - unsigned Index = 0; // packoffset index - unsigned EndOffset = 0; - - SmallVector> DelayLayoutFields; - - // reserve first spot in the layout vector for buffer size - Layout.push_back(0); - // iterate over all fields of the record, including fields on base classes llvm::SmallVector RecordDecls; RecordDecls.push_back(RT->castAsCXXRecordDecl()); @@ -95,187 +46,97 @@ HLSLBufferLayoutBuilder::createLayoutType(const RecordType *RT, RecordDecls.push_back(D->bases_begin()->getType()->castAsCXXRecordDecl()); } - unsigned FieldOffset; - llvm::Type *FieldType; - - while (!RecordDecls.empty()) { - const CXXRecordDecl *RD = RecordDecls.pop_back_val(); - - for (const auto *FD : RD->fields()) { - // No PackOffset info at all, or have a valid packoffset/register(c#) - // annotations value -> layout the field. - const uint32_t PO = OffsetInfo[Index++]; - if (PO != CGHLSLOffsetInfo::Unspecified) { - if (!layoutField(FD, EndOffset, FieldOffset, FieldType, PO)) - return nullptr; - Layout.push_back(FieldOffset); - LayoutElements.push_back(FieldType); - continue; - } - // Have PackOffset info, but there is no packoffset/register(cX) - // annotation on this field. Delay the layout until after all of the - // other elements with packoffsets/register(cX) are processed. - DelayLayoutFields.emplace_back(FD, LayoutElements.size()); - // reserve space for this field in the layout vector and elements list - Layout.push_back(UINT_MAX); - LayoutElements.push_back(nullptr); + SmallVector> FieldsWithOffset; + unsigned OffsetIdx = 0; + for (const CXXRecordDecl *RD : llvm::reverse(RecordDecls)) + for (const auto *FD : RD->fields()) + FieldsWithOffset.emplace_back(FD, OffsetInfo[OffsetIdx++]); + + if (!OffsetInfo.empty()) + llvm::stable_sort(FieldsWithOffset, [](const auto &LHS, const auto &RHS) { + return CGHLSLOffsetInfo::compareOffsets(LHS.second, RHS.second); + }); + + SmallVector Layout; + CharUnits CurrentOffset = CharUnits::Zero(); + for (auto &[FD, Offset] : FieldsWithOffset) { + llvm::Type *LayoutType = layOutType(FD->getType()); + + const llvm::DataLayout &DL = CGM.getDataLayout(); + CharUnits Size = + CharUnits::fromQuantity(DL.getTypeSizeInBits(LayoutType) / 8); + CharUnits Align = CharUnits::fromQuantity(DL.getABITypeAlign(LayoutType)); + + if (LayoutType->isAggregateType() || + (CurrentOffset % CBufferRowSize) + Size > CBufferRowSize) + Align = Align.alignTo(CBufferRowSize); + + CharUnits NextOffset = CurrentOffset.alignTo(Align); + + if (Offset != CGHLSLOffsetInfo::Unspecified) { + CharUnits PackOffset = CharUnits::fromQuantity(Offset); + assert(PackOffset >= NextOffset && + "Offset is invalid - would overlap with previous object"); + NextOffset = PackOffset; } - } - - // process delayed layouts - for (auto I : DelayLayoutFields) { - const FieldDecl *FD = I.first; - const unsigned IndexInLayoutElements = I.second; - // the first item in layout vector is size, so we need to offset the index - // by 1 - const unsigned IndexInLayout = IndexInLayoutElements + 1; - assert(Layout[IndexInLayout] == UINT_MAX && - LayoutElements[IndexInLayoutElements] == nullptr); - if (!layoutField(FD, EndOffset, FieldOffset, FieldType)) - return nullptr; - Layout[IndexInLayout] = FieldOffset; - LayoutElements[IndexInLayoutElements] = FieldType; + if (NextOffset > CurrentOffset) { + llvm::Type *Padding = CGM.getTargetCodeGenInfo().getHLSLPadding( + CGM, NextOffset - CurrentOffset); + assert(Padding && "No padding type for target?"); + Layout.emplace_back(Padding); + CurrentOffset = NextOffset; + } + Layout.emplace_back(LayoutType); + CurrentOffset += Size; } - // set the size of the buffer - Layout[0] = EndOffset; - - // create the layout struct type; anonymous struct have empty name but + // Create the layout struct type; anonymous structs have empty name but // non-empty qualified name const auto *Decl = RT->castAsCXXRecordDecl(); std::string Name = Decl->getName().empty() ? "anon" : Decl->getQualifiedNameAsString(); - llvm::StructType *StructTy = - llvm::StructType::create(LayoutElements, Name, true); - // create target layout type - llvm::TargetExtType *NewLayoutTy = llvm::TargetExtType::get( - CGM.getLLVMContext(), LayoutTypeName, {StructTy}, Layout); - if (NewLayoutTy) - CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewLayoutTy); - return NewLayoutTy; + llvm::StructType *NewTy = llvm::StructType::create(Layout, Name, + /*isPacked=*/true); + CGM.getHLSLRuntime().addHLSLBufferLayoutType(RT, NewTy); + return NewTy; } -// The function converts a single field of HLSL Buffer to its corresponding -// LLVM type and calculates it's layout. Any embedded structs (or -// arrays of structs) are converted to target layout types as well. -// The converted type is set to the FieldType parameter, the element -// offset is set to the FieldOffset parameter. The EndOffset (=size of the -// buffer) is also updated accordingly to the offset just after the placed -// element, unless the incoming EndOffset already larger (may happen in case -// of unsorted packoffset annotations). -// Returns true if the conversion was successful. -// The packoffset parameter contains the field's layout offset provided by the -// user or -1 if there was no packoffset (or register(cX)) annotation. -bool HLSLBufferLayoutBuilder::layoutField(const FieldDecl *FD, - unsigned &EndOffset, - unsigned &FieldOffset, - llvm::Type *&FieldType, - uint32_t Packoffset) { - - // Size of element; for arrays this is a size of a single element in the - // array. Total array size of calculated as (ArrayCount-1) * ArrayStride + - // ElemSize. - unsigned ElemSize = 0; - unsigned ElemOffset = 0; - unsigned ArrayCount = 1; - unsigned ArrayStride = 0; - - unsigned NextRowOffset = llvm::alignTo(EndOffset, CBufferRowSizeInBytes); - - llvm::Type *ElemLayoutTy = nullptr; - QualType FieldTy = FD->getType(); +llvm::Type *HLSLBufferLayoutBuilder::layOutArray(const ConstantArrayType *AT) { + llvm::Type *EltTy = layOutType(AT->getElementType()); + uint64_t Count = AT->getZExtSize(); + + CharUnits EltSize = + CharUnits::fromQuantity(CGM.getDataLayout().getTypeSizeInBits(EltTy) / 8); + CharUnits Padding = EltSize.alignTo(CBufferRowSize) - EltSize; + + // If we don't have any padding between elements then we just need the array + // itself. + if (Count < 2 || Padding.isZero()) + return llvm::ArrayType::get(EltTy, Count); + + llvm::LLVMContext &Context = CGM.getLLVMContext(); + llvm::Type *PaddingTy = + CGM.getTargetCodeGenInfo().getHLSLPadding(CGM, Padding); + assert(PaddingTy && "No padding type for target?"); + auto *PaddedEltTy = + llvm::StructType::get(Context, {EltTy, PaddingTy}, /*isPacked=*/true); + return llvm::StructType::get( + Context, {llvm::ArrayType::get(PaddedEltTy, Count - 1), EltTy}, + /*IsPacked=*/true); +} - if (FieldTy->isConstantArrayType()) { - // Unwrap array to find the element type and get combined array size. - QualType Ty = FieldTy; - while (Ty->isConstantArrayType()) { - auto *ArrayTy = CGM.getContext().getAsConstantArrayType(Ty); - ArrayCount *= ArrayTy->getSExtSize(); - Ty = ArrayTy->getElementType(); - } - // For array of structures, create a new array with a layout type - // instead of the structure type. - if (Ty->isStructureOrClassType()) { - CGHLSLOffsetInfo EmptyOffsets; - llvm::Type *NewTy = cast( - createLayoutType(Ty->getAsCanonical(), EmptyOffsets)); - if (!NewTy) - return false; - assert(isa(NewTy) && "expected target type"); - ElemSize = cast(NewTy)->getIntParameter(0); - ElemLayoutTy = createArrayWithNewElementType( - CGM, cast(FieldTy.getTypePtr()), NewTy); - } else { - // Array of vectors or scalars - ElemSize = - getScalarOrVectorSizeInBytes(CGM.getTypes().ConvertTypeForMem(Ty)); - ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy); - } - ArrayStride = llvm::alignTo(ElemSize, CBufferRowSizeInBytes); - ElemOffset = (Packoffset != CGHLSLOffsetInfo::Unspecified) ? Packoffset - : NextRowOffset; +llvm::Type *HLSLBufferLayoutBuilder::layOutType(QualType Ty) { + if (const auto *AT = CGM.getContext().getAsConstantArrayType(Ty)) + return layOutArray(AT); - } else if (FieldTy->isStructureOrClassType()) { - // Create a layout type for the structure + if (Ty->isStructureOrClassType()) { CGHLSLOffsetInfo EmptyOffsets; - ElemLayoutTy = createLayoutType( - cast(FieldTy->getAsCanonical()), EmptyOffsets); - if (!ElemLayoutTy) - return false; - assert(isa(ElemLayoutTy) && "expected target type"); - ElemSize = cast(ElemLayoutTy)->getIntParameter(0); - ElemOffset = (Packoffset != CGHLSLOffsetInfo::Unspecified) ? Packoffset - : NextRowOffset; - - } else { - // scalar or vector - find element size and alignment - unsigned Align = 0; - ElemLayoutTy = CGM.getTypes().ConvertTypeForMem(FieldTy); - if (ElemLayoutTy->isVectorTy()) { - // align vectors by sub element size - const llvm::FixedVectorType *FVT = - cast(ElemLayoutTy); - unsigned SubElemSize = FVT->getElementType()->getScalarSizeInBits() / 8; - ElemSize = FVT->getNumElements() * SubElemSize; - Align = SubElemSize; - } else { - assert(ElemLayoutTy->isIntegerTy() || ElemLayoutTy->isFloatingPointTy()); - ElemSize = ElemLayoutTy->getScalarSizeInBits() / 8; - Align = ElemSize; - } - - // calculate or get element offset for the vector or scalar - if (Packoffset != CGHLSLOffsetInfo::Unspecified) { - ElemOffset = Packoffset; - } else { - ElemOffset = llvm::alignTo(EndOffset, Align); - // if the element does not fit, move it to the next row - if (ElemOffset + ElemSize > NextRowOffset) - ElemOffset = NextRowOffset; - } + return layOutStruct(Ty->getAsCanonical(), EmptyOffsets); } - // Update end offset of the layout; do not update it if the EndOffset - // is already bigger than the new value (which may happen with unordered - // packoffset annotations) - unsigned NewEndOffset = - ElemOffset + (ArrayCount - 1) * ArrayStride + ElemSize; - EndOffset = std::max(EndOffset, NewEndOffset); - - // add the layout element and offset to the lists - FieldOffset = ElemOffset; - FieldType = ElemLayoutTy; - return true; -} - -bool HLSLBufferLayoutBuilder::layoutField(const FieldDecl *FD, - unsigned &EndOffset, - unsigned &FieldOffset, - llvm::Type *&FieldType) { - return layoutField(FD, EndOffset, FieldOffset, FieldType, - CGHLSLOffsetInfo::Unspecified); + return CGM.getTypes().ConvertTypeForMem(Ty); } } // namespace CodeGen diff --git a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h index 916e60e83e2c0..c55f680fe5a98 100644 --- a/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h +++ b/clang/lib/CodeGen/HLSLBufferLayoutBuilder.h @@ -6,16 +6,15 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/TypeBase.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/DerivedTypes.h" namespace clang { -class RecordType; -class FieldDecl; - namespace CodeGen { class CGHLSLOffsetInfo; class CodeGenModule; +class CGHLSLOffsetInfo; //===----------------------------------------------------------------------===// // Implementation of constant buffer layout common between DirectX and @@ -25,24 +24,30 @@ class CodeGenModule; class HLSLBufferLayoutBuilder { private: CodeGenModule &CGM; - llvm::StringRef LayoutTypeName; public: - HLSLBufferLayoutBuilder(CodeGenModule &CGM, llvm::StringRef LayoutTypeName) - : CGM(CGM), LayoutTypeName(LayoutTypeName) {} - - // Returns LLVM target extension type with the name LayoutTypeName - // for given structure type and layout data. The first number in - // the Layout is the size followed by offsets for each struct element. - llvm::TargetExtType *createLayoutType(const RecordType *StructType, - const CGHLSLOffsetInfo &OffsetInfo); - -private: - bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset, - unsigned &FieldOffset, llvm::Type *&FieldType, - uint32_t Packoffset); - bool layoutField(const clang::FieldDecl *FD, unsigned &EndOffset, - unsigned &FieldOffset, llvm::Type *&FieldType); + HLSLBufferLayoutBuilder(CodeGenModule &CGM) : CGM(CGM) {} + + /// Lays out a struct type following HLSL buffer rules and considering any + /// explicit offset information. Previously created layout structs are cached + /// by CGHLSLRuntime. + /// + /// The function iterates over all fields of the record type (including base + /// classes) and works out a padded llvm type to represent the buffer layout. + /// + /// If a non-empty OffsetInfo is provided (ie, from `packoffset` annotations + /// in the source), any provided offsets offsets will be respected. If the + /// OffsetInfo is available but has empty entries, those will be layed out at + /// the end of the structure. + llvm::StructType *layOutStruct(const RecordType *StructType, + const CGHLSLOffsetInfo &OffsetInfo); + + /// Lays out an array type following HLSL buffer rules. + llvm::Type *layOutArray(const ConstantArrayType *AT); + + /// Lays out a type following HLSL buffer rules. Arrays and structures will be + /// padded appropriately and nested objects will be converted as appropriate. + llvm::Type *layOutType(QualType Type); }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h index 383f52f298d2e..db06584d766bf 100644 --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -38,6 +38,7 @@ namespace CodeGen { class ABIInfo; class CallArgList; class CodeGenFunction; +class CGHLSLOffsetInfo; class CGBlockInfo; class CGHLSLOffsetInfo; class SwiftABIInfo; @@ -448,6 +449,15 @@ class TargetCodeGenInfo { return nullptr; } + /// Return an LLVM type that corresponds to padding in HLSL types + virtual llvm::Type *getHLSLPadding(CodeGenModule &CGM, + CharUnits NumBytes) const { + return nullptr; + } + + /// Return true if this is an HLSL padding type. + virtual bool isHLSLPadding(llvm::Type *Ty) const { return false; } + // Set the Branch Protection Attributes of the Function accordingly to the // BPI. Remove attributes that contradict with current BPI. static void diff --git a/clang/lib/CodeGen/Targets/DirectX.cpp b/clang/lib/CodeGen/Targets/DirectX.cpp index f30b30284cb12..a007c90881ab2 100644 --- a/clang/lib/CodeGen/Targets/DirectX.cpp +++ b/clang/lib/CodeGen/Targets/DirectX.cpp @@ -31,6 +31,19 @@ class DirectXTargetCodeGenInfo : public TargetCodeGenInfo { llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *T, const CGHLSLOffsetInfo &OffsetInfo) const override; + + llvm::Type *getHLSLPadding(CodeGenModule &CGM, + CharUnits NumBytes) const override { + unsigned Size = NumBytes.getQuantity(); + return llvm::TargetExtType::get(CGM.getLLVMContext(), "dx.Padding", {}, + {Size}); + } + + bool isHLSLPadding(llvm::Type *Ty) const override { + if (auto *TET = dyn_cast(Ty)) + return TET->getName() == "dx.Padding"; + return false; + } }; llvm::Type *DirectXTargetCodeGenInfo::getHLSLType( @@ -74,10 +87,9 @@ llvm::Type *DirectXTargetCodeGenInfo::getHLSLType( if (ContainedTy.isNull() || !ContainedTy->isStructureType()) return nullptr; - llvm::Type *BufferLayoutTy = - HLSLBufferLayoutBuilder(CGM, "dx.Layout") - .createLayoutType(ContainedTy->castAsCanonical(), - OffsetInfo); + llvm::StructType *BufferLayoutTy = + HLSLBufferLayoutBuilder(CGM).layOutStruct( + ContainedTy->getAsCanonical(), OffsetInfo); if (!BufferLayoutTy) return nullptr; diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index be7e9ccecae9f..1a8c85d8871ec 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -55,6 +55,20 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { llvm::Type *getOpenCLType(CodeGenModule &CGM, const Type *T) const override; llvm::Type *getHLSLType(CodeGenModule &CGM, const Type *Ty, const CGHLSLOffsetInfo &OffsetInfo) const override; + + llvm::Type *getHLSLPadding(CodeGenModule &CGM, + CharUnits NumBytes) const override { + unsigned Size = NumBytes.getQuantity(); + return llvm::TargetExtType::get(CGM.getLLVMContext(), "spirv.Padding", {}, + {Size}); + } + + bool isHLSLPadding(llvm::Type *Ty) const override { + if (auto *TET = dyn_cast(Ty)) + return TET->getName() == "spirv.Padding"; + return false; + } + llvm::Type *getSPIRVImageTypeFromHLSLResource( const HLSLAttributedResourceType::Attributes &attributes, QualType SampledType, CodeGenModule &CGM) const; @@ -563,10 +577,9 @@ llvm::Type *CommonSPIRTargetCodeGenInfo::getHLSLType( if (ContainedTy.isNull() || !ContainedTy->isStructureType()) return nullptr; - llvm::Type *BufferLayoutTy = - HLSLBufferLayoutBuilder(CGM, "spirv.Layout") - .createLayoutType(ContainedTy->castAsCanonical(), - OffsetInfo); + llvm::StructType *BufferLayoutTy = + HLSLBufferLayoutBuilder(CGM).layOutStruct( + ContainedTy->getAsCanonical(), OffsetInfo); uint32_t StorageClass = /* Uniform storage class */ 2; return llvm::TargetExtType::get(Ctx, "spirv.VulkanBuffer", {BufferLayoutTy}, {StorageClass, false}); diff --git a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl index aaa486eff10b7..d1bfc6db8b504 100644 --- a/clang/test/CodeGenHLSL/ArrayAssignable.hlsl +++ b/clang/test/CodeGenHLSL/ArrayAssignable.hlsl @@ -5,18 +5,19 @@ struct S { float f; }; -// CHECK: [[CBLayout:%.*]] = type <{ [2 x float], [2 x <4 x i32>], [2 x [2 x i32]], [1 x target("dx.Layout", %S, 8, 0, 4)] }> -// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", [[CBLayout]], 136, 0, 32, 64, 128)) -// CHECK: @c1 = external hidden addrspace(2) global [2 x float], align 4 +// CHECK: [[CBLayout:%.*]] = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), [2 x <4 x i32>], <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, target("dx.Padding", 12), <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }> }> + +// CHECK: @CBArrays.cb = global target("dx.CBuffer", [[CBLayout]]) +// CHECK: @c1 = external hidden addrspace(2) global <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, align 4 // CHECK: @c2 = external hidden addrspace(2) global [2 x <4 x i32>], align 16 -// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x i32]], align 4 -// CHECK: @c4 = external hidden addrspace(2) global [1 x target("dx.Layout", %S, 8, 0, 4)], align 1 +// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, align 4 +// CHECK: @c4 = external hidden addrspace(2) global <{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, align 1 cbuffer CBArrays : register(b0) { float c1[2]; int4 c2[2]; int c3[2][2]; - S c4[1]; + S c4[2]; } // CHECK-LABEL: define hidden void {{.*}}arr_assign1 @@ -140,40 +141,71 @@ void arr_assign7() { // CHECK-LABEL: define hidden void {{.*}}arr_assign8 // CHECK: [[C:%.*]] = alloca [2 x float], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 8, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c1, i32 8, i1 false) +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0 +// CHECK-NEXT: [[L0:%.*]] = load float, ptr addrspace(2) @c1, align 4 +// CHECK-NEXT: store float [[L0]], ptr [[V0]], align 4 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x float], ptr [[C]], i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, ptr addrspace(2) @c1, i32 0, i32 1), align 4 +// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4 // CHECK-NEXT: ret void void arr_assign8() { - float C[2] = {1.0, 2.0}; + float C[2]; C = c1; } +// TODO: We should be able to just memcpy here. +// See https://github.com/llvm/wg-hlsl/issues/351 +// // CHECK-LABEL: define hidden void {{.*}}arr_assign9 // CHECK: [[C:%.*]] = alloca [2 x <4 x i32>], align 16 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[C]], ptr align 16 {{.*}}, i32 32, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 16 [[C]], ptr addrspace(2) align 16 @c2, i32 32, i1 false) +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0 +// CHECK-NEXT: [[L0:%.*]] = load <4 x i32>, ptr addrspace(2) @c2, align 16 +// CHECK-NEXT: store <4 x i32> [[L0]], ptr [[V0]], align 16 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[C]], i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([2 x <4 x i32>], ptr addrspace(2) @c2, i32 0, i32 1), align 16 +// CHECK-NEXT: store <4 x i32> [[L1]], ptr [[V1]], align 16 // CHECK-NEXT: ret void void arr_assign9() { - int4 C[2] = {1,2,3,4,5,6,7,8}; + int4 C[2]; C = c2; } // CHECK-LABEL: define hidden void {{.*}}arr_assign10 // CHECK: [[C:%.*]] = alloca [2 x [2 x i32]], align 4 -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[C]], ptr align 4 {{.*}}, i32 16, i1 false) -// CHECK-NEXT: call void @llvm.memcpy.p0.p2.i32(ptr align 4 [[C]], ptr addrspace(2) align 4 @c3, i32 16, i1 false) +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c3, align 4 +// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 0, i32 0, i32 0, i32 1), align 4 +// CHECK-NEXT: store i32 [[L1]], ptr [[V1]], align 4 +// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 0 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 0, i32 0, i32 0), align 4 +// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4 +// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[C]], i32 0, i32 1, i32 1 +// CHECK-NEXT: [[L3:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }>, target("dx.Padding", 12) }>], <{ [1 x <{ i32, target("dx.Padding", 12) }>], i32 }> }>, ptr addrspace(2) @c3, i32 0, i32 1, i32 1), align 4 +// CHECK-NEXT: store i32 [[L3]], ptr [[V3]], align 4 // CHECK-NEXT: ret void void arr_assign10() { - int C[2][2] = {1,2,3,4}; + int C[2][2]; C = c3; } // CHECK-LABEL: define hidden void {{.*}}arr_assign11 -// CHECK: [[C:%.*]] = alloca [1 x %struct.S], align 1 -// CHECK: call void @llvm.memcpy.p0.p2.i32(ptr align 1 [[C]], ptr addrspace(2) align 1 @c4, i32 8, i1 false) +// CHECK: [[C:%.*]] = alloca [2 x %struct.S], align 1 +// CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[L0:%.*]] = load i32, ptr addrspace(2) @c4, align 4 +// CHECK-NEXT: store i32 [[L0]], ptr [[V0]], align 4 +// CHECK-NEXT: [[V1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 0, i32 1 +// CHECK-NEXT: [[L1:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 0, i32 0, i32 0, i32 1), align 4 +// CHECK-NEXT: store float [[L1]], ptr [[V1]], align 4 +// CHECK-NEXT: [[V2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 0 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 0), align 4 +// CHECK-NEXT: store i32 [[L2]], ptr [[V2]], align 4 +// CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[C]], i32 0, i32 1, i32 1 +// CHECK-NEXT: [[L3:%.*]] = load float, ptr addrspace(2) getelementptr inbounds (<{ [1 x <{ %S, target("dx.Padding", 8) }>], %S }>, ptr addrspace(2) @c4, i32 0, i32 1, i32 1), align 4 +// CHECK-NEXT: store float [[L3]], ptr [[V3]], align 4 // CHECK-NEXT: ret void void arr_assign11() { - S s = {1, 2.0}; - S C[1] = {s}; + S C[2]; C = c4; } diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl index b36682e065b3a..5553f8c17c6c8 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl +++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl @@ -37,9 +37,8 @@ void main(unsigned GI : SV_GroupIndex) {} // INLINE-NEXT: alloca // INLINE-NEXT: store i32 12 // INLINE-NEXT: store i32 13 -// INLINE-NEXT: %[[HANDLE:.*]] = call target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) -// INLINE-NEXT-SAME: @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_$Globalss_4_0tt"(i32 0, i32 0, i32 1, i32 0, i1 false) -// INLINE-NEXT: store target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 4, 0)) %[[HANDLE]], ptr @"$Globals.cb", align 4 +// INLINE-NEXT: %[[HANDLE:.*]] = call target("dx.CBuffer", %"__cblayout_$Globals") @"llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_$Globalsst"(i32 0, i32 0, i32 1, i32 0, ptr @"$Globals.str") +// INLINE-NEXT: store target("dx.CBuffer", %"__cblayout_$Globals") %[[HANDLE]], ptr @"$Globals.cb", align 4 // INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() // INLINE-NEXT: store i32 % // INLINE-NEXT: store i32 0 diff --git a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl index c8efe0d64c985..b72cf587d0f93 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer.hlsl @@ -1,37 +1,123 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -fnative-int16-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s -// CHECK: %__cblayout_CBScalars = type <{ float, double, half, i64, i32, i16, i32, i64 }> -// CHECK: %__cblayout_CBVectors = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16>, <3 x i64> }> -// CHECK: %__cblayout_CBArrays = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }> -// CHECK: %__cblayout_CBStructs = type <{ target("dx.Layout", %A, 8, 0), target("dx.Layout", %B, 14, 0, 8), -// CHECK-SAME: target("dx.Layout", %C, 24, 0, 16), [5 x target("dx.Layout", %A, 8, 0)], -// CHECK-SAME: target("dx.Layout", %__cblayout_D, 94, 0), half, <3 x i16> }> +// CHECK: %__cblayout_CBScalars = type <{ +// CHECK-SAME: float, target("dx.Padding", 4), double, +// CHECK-SAME: half, target("dx.Padding", 6), i64, +// CHECK-SAME: i32, i16, target("dx.Padding", 2), i32, target("dx.Padding", 4), +// CHECK-SAME: i64 +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBVectors = type <{ +// CHECK-SAME: <3 x float>, target("dx.Padding", 4), +// CHECK-SAME: <3 x double>, <2 x half>, target("dx.Padding", 4), +// CHECK-SAME: <3 x i64>, target("dx.Padding", 8), +// CHECK-SAME: <4 x i32>, +// CHECK-SAME: <3 x i16>, target("dx.Padding", 10), +// CHECK-SAME: <3 x i64> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBArrays = type <{ +// CHECK-SAME: <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), +// CHECK-SAME: <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8), +// CHECK-SAME: <{ [1 x <{ +// CHECK-SAME: <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], +// CHECK-SAME: <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> +// CHECK-SAME: }>, target("dx.Padding", 14), +// CHECK-SAME: <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), +// CHECK-SAME: [2 x [3 x [4 x <4 x i32>]]] +// CHECK-SAME: [1 x i16], target("dx.Padding", 14), +// CHECK-SAME: <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8), +// CHECK-SAME: <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBStructs = type <{ +// CHECK-SAME: %A, target("dx.Padding", 8), + +// TODO: We should have target("dx.Padding", 2) padding after %B, but we don't +// correctly handle 2- and 3-element vectors inside structs yet because of +// DataLayout rules. See https://github.com/llvm/llvm-project/issues/123968. +// +// CHECK-SAME: %B, + +// CHECK-SAME: %C, target("dx.Padding", 8), +// CHECK-SAME: <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, target("dx.Padding", 8), +// CHECK-SAME: %__cblayout_D, half, +// CHECK-SAME: <3 x i16> +// CHECK-SAME: }> // CHECK: %A = type <{ <2 x float> }> // CHECK: %B = type <{ <2 x float>, <3 x i16> }> -// CHECK: %C = type <{ i32, target("dx.Layout", %A, 8, 0) }> -// CHECK: %__cblayout_D = type <{ [2 x [3 x target("dx.Layout", %B, 14, 0, 8)]] }> +// CHECK: %C = type <{ i32, target("dx.Padding", 12), %A }> + +// CHECK: %__cblayout_D = type <{ +// CHECK-SAME: <{ [1 x <{ +// CHECK-SAME: <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }>, target("dx.Padding", 2) +// CHECK-SAME: }>], +// CHECK-SAME: <{ [2 x <{ %B, target("dx.Padding", 2) }>], %B }> }> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CBClasses = type <{ +// CHECK-SAME: %K, target("dx.Padding", 12), +// CHECK-SAME: %L, target("dx.Padding", 8), +// CHECK-SAME: %M, target("dx.Padding", 12), +// CHECK-SAME: <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }> +// CHECK-SAME: }> -// CHECK: %__cblayout_CBClasses = type <{ target("dx.Layout", %K, 4, 0), target("dx.Layout", %L, 8, 0, 4), -// CHECK-SAME: target("dx.Layout", %M, 68, 0), [10 x target("dx.Layout", %K, 4, 0)] }> // CHECK: %K = type <{ float }> // CHECK: %L = type <{ float, float }> -// CHECK: %M = type <{ [5 x target("dx.Layout", %K, 4, 0)] }> - -// CHECK: %__cblayout_CBMix = type <{ [2 x target("dx.Layout", %Test, 8, 0, 4)], float, [3 x [2 x <2 x float>]], float, -// CHECK-SAME: target("dx.Layout", %anon, 4, 0), double, target("dx.Layout", %anon.0, 8, 0), float, <1 x double>, i16 }> +// CHECK: %M = type <{ <{ [4 x <{ %K, target("dx.Padding", 12) }>], %K }> }> + +// CHECK: %__cblayout_CBMix = type <{ +// CHECK-SAME: <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, float, target("dx.Padding", 4) +// CHECK-SAME: <{ [2 x <{ +// CHECK-SAME: <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], +// CHECK-SAME: <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> +// CHECK-SAME: }>, float, target("dx.Padding", 4), +// CHECK-SAME: %anon, target("dx.Padding", 4), double, +// CHECK-SAME: %anon.0, float, target("dx.Padding", 4), +// CHECK-SAME: <1 x double>, i16 +// CHECK-SAME: }> // CHECK: %Test = type <{ float, float }> // CHECK: %anon = type <{ float }> // CHECK: %anon.0 = type <{ <2 x i32> }> -// CHECK: %__cblayout_CB_A = type <{ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }> -// CHECK: %__cblayout_CB_B = type <{ [3 x <3 x double>], <3 x half> }> -// CHECK: %__cblayout_CB_C = type <{ i32, target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90), half, target("dx.Layout", %G, 258, 0, 48, 64, 256), double }> - -// CHECK: %F = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }> -// CHECK: %G = type <{ target("dx.Layout", %E, 36, 0, 8, 16, 20, 22, 24, 32), [1 x float], [2 x target("dx.Layout", %F, 96, 0, 16, 28, 32, 56, 64, 80, 84, 90)], half }> -// CHECK: %E = type <{ float, double, float, half, i16, i64, i32 }> +// CHECK: %__cblayout_CB_A = type <{ +// CHECK-SAME: <{ [1 x <{ double, target("dx.Padding", 8) }>], double }>, target("dx.Padding", 8), +// CHECK-SAME: <{ [2 x <{ <3 x float>, target("dx.Padding", 4) }>], <3 x float> }>, float, +// CHECK-SAME: <{ [2 x <{ double, target("dx.Padding", 8) }>], double }>, half, target("dx.Padding", 6), +// CHECK-SAME: [1 x <2 x double>], +// CHECK-SAME: float, target("dx.Padding", 12), +// CHECK-SAME: <{ [1 x <{ <3 x half>, target("dx.Padding", 10) }>], <3 x half> }>, <3 x half> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CB_B = type <{ +// CHECK-SAME: <{ [2 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, <3 x half> +// CHECK-SAME: }> + +// CHECK: %__cblayout_CB_C = type <{ +// CHECK-SAME: i32, target("dx.Padding", 12), +// CHECK-SAME: %F, +// CHECK-SAME: half, target("dx.Padding", 14), +// CHECK-SAME: %G, target("dx.Padding", 6), double +// CHECK-SAME: }> + +// CHECK: %F = type <{ +// CHECK-SAME: double, target("dx.Padding", 8), +// CHECK-SAME: <3 x float>, float, +// CHECK-SAME: <3 x double>, half, target("dx.Padding", 6), +// CHECK-SAME: <2 x double>, +// CHECK-SAME: float, <3 x half>, <3 x half> +// CHECK-SAME: }> + +// CHECK: %G = type <{ +// CHECK-SAME: %E, target("dx.Padding", 12), +// CHECK-SAME: [1 x float], target("dx.Padding", 12), +// CHECK-SAME: [2 x %F], +// CHECK-SAME: half +// CHECK-SAME: }> + +// CHECK: %E = type <{ float, target("dx.Padding", 4), double, float, half, i16, i64, i32 }> cbuffer CBScalars : register(b1, space5) { float a1; @@ -44,8 +130,7 @@ cbuffer CBScalars : register(b1, space5) { int64_t a8; } -// CHECK: @CBScalars.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, -// CHECK-SAME: 56, 0, 8, 16, 24, 32, 36, 40, 48)) +// CHECK: @CBScalars.cb = global target("dx.CBuffer", %__cblayout_CBScalars) // CHECK: @a1 = external hidden addrspace(2) global float, align 4 // CHECK: @a2 = external hidden addrspace(2) global double, align 8 // CHECK: @a3 = external hidden addrspace(2) global half, align 2 @@ -67,8 +152,7 @@ cbuffer CBVectors { // FIXME: add a bool vectors after llvm-project/llvm#91639 is added } -// CHECK: @CBVectors.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, -// CHECK-SAME: 136, 0, 16, 40, 48, 80, 96, 112)) +// CHECK: @CBVectors.cb = global target("dx.CBuffer", %__cblayout_CBVectors) // CHECK: @b1 = external hidden addrspace(2) global <3 x float>, align 16 // CHECK: @b2 = external hidden addrspace(2) global <3 x double>, align 32 // CHECK: @b3 = external hidden addrspace(2) global <2 x half>, align 4 @@ -89,16 +173,15 @@ cbuffer CBArrays : register(b2) { bool c8[4]; } -// CHECK: @CBArrays.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, -// CHECK-SAME: 708, 0, 48, 112, 176, 224, 608, 624, 656)) -// CHECK: @c1 = external hidden addrspace(2) global [3 x float], align 4 -// CHECK: @c2 = external hidden addrspace(2) global [2 x <3 x double>], align 32 -// CHECK: @c3 = external hidden addrspace(2) global [2 x [2 x half]], align 2 -// CHECK: @c4 = external hidden addrspace(2) global [3 x i64], align 8 +// CHECK: @CBArrays.cb = global target("dx.CBuffer", %__cblayout_CBArrays) +// CHECK: @c1 = external hidden addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4 +// CHECK: @c2 = external hidden addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32 +// CHECK: @c3 = external hidden addrspace(2) global <{ [1 x <{ <{ [1 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>], <{ [1 x <{ half, target("dx.Padding", 14) }>], half }> }>, align 2 +// CHECK: @c4 = external hidden addrspace(2) global <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8 // CHECK: @c5 = external hidden addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16 // CHECK: @c6 = external hidden addrspace(2) global [1 x i16], align 2 -// CHECK: @c7 = external hidden addrspace(2) global [2 x i64], align 8 -// CHECK: @c8 = external hidden addrspace(2) global [4 x i32], align 4 +// CHECK: @c7 = external hidden addrspace(2) global <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, align 8 +// CHECK: @c8 = external hidden addrspace(2) global <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>, align 4 // CHECK: @CBArrays.str = private unnamed_addr constant [9 x i8] c"CBArrays\00", align 1 typedef uint32_t4 uint32_t8[2]; @@ -110,8 +193,7 @@ cbuffer CBTypedefArray : register(space2) { T2 t2[2]; } -// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, -// CHECK-SAME: 128, 0, 64)) +// CHECK: @CBTypedefArray.cb = global target("dx.CBuffer", %__cblayout_CBTypedefArray) // CHECK: @t1 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16 // CHECK: @t2 = external hidden addrspace(2) global [2 x [2 x <4 x i32>]], align 16 // CHECK: @CBTypedefArray.str = private unnamed_addr constant [15 x i8] c"CBTypedefArray\00", align 1 @@ -135,13 +217,12 @@ struct D { Empty es; }; -// CHECK: @CBStructs.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, -// CHECK-SAME: 246, 0, 16, 32, 64, 144, 238, 240)) -// CHECK: @a = external hidden addrspace(2) global target("dx.Layout", %A, 8, 0), align 1 -// CHECK: @b = external hidden addrspace(2) global target("dx.Layout", %B, 14, 0, 8), align 1 -// CHECK: @c = external hidden addrspace(2) global target("dx.Layout", %C, 24, 0, 16), align 1 -// CHECK: @array_of_A = external hidden addrspace(2) global [5 x target("dx.Layout", %A, 8, 0)], align 1 -// CHECK: @d = external hidden addrspace(2) global target("dx.Layout", %__cblayout_D, 94, 0), align 1 +// CHECK: @CBStructs.cb = global target("dx.CBuffer", %__cblayout_CBStructs) +// CHECK: @a = external hidden addrspace(2) global %A, align 1 +// CHECK: @b = external hidden addrspace(2) global %B, align 1 +// CHECK: @c = external hidden addrspace(2) global %C, align 1 +// CHECK: @array_of_A = external hidden addrspace(2) global <{ [4 x <{ %A, target("dx.Padding", 8) }>], %A }>, align 1 +// CHECK: @d = external hidden addrspace(2) global %__cblayout_D, align 1 // CHECK: @e = external hidden addrspace(2) global half, align 2 // CHECK: @f = external hidden addrspace(2) global <3 x i16>, align 8 // CHECK: @CBStructs.str = private unnamed_addr constant [10 x i8] c"CBStructs\00", align 1 @@ -176,27 +257,25 @@ cbuffer CBClasses { K ka[10]; }; -// CHECK: @CBClasses.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, -// CHECK-SAME: 260, 0, 16, 32, 112)) -// CHECK: @k = external hidden addrspace(2) global target("dx.Layout", %K, 4, 0), align 1 -// CHECK: @l = external hidden addrspace(2) global target("dx.Layout", %L, 8, 0, 4), align 1 -// CHECK: @m = external hidden addrspace(2) global target("dx.Layout", %M, 68, 0), align 1 -// CHECK: @ka = external hidden addrspace(2) global [10 x target("dx.Layout", %K, 4, 0)], align 1 +// CHECK: @CBClasses.cb = global target("dx.CBuffer", %__cblayout_CBClasses) +// CHECK: @k = external hidden addrspace(2) global %K, align 1 +// CHECK: @l = external hidden addrspace(2) global %L, align 1 +// CHECK: @m = external hidden addrspace(2) global %M, align 1 +// CHECK: @ka = external hidden addrspace(2) global <{ [9 x <{ %K, target("dx.Padding", 12) }>], %K }>, align 1 // CHECK: @CBClasses.str = private unnamed_addr constant [10 x i8] c"CBClasses\00", align 1 struct Test { float a, b; }; -// CHECK: @CBMix.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, -// CHECK-SAME: 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) -// CHECK: @test = external hidden addrspace(2) global [2 x target("dx.Layout", %Test, 8, 0, 4)], align 1 +// CHECK: @CBMix.cb = global target("dx.CBuffer", %__cblayout_CBMix) +// CHECK: @test = external hidden addrspace(2) global <{ [1 x <{ %Test, target("dx.Padding", 8) }>], %Test }>, align 1 // CHECK: @f1 = external hidden addrspace(2) global float, align 4 -// CHECK: @f2 = external hidden addrspace(2) global [3 x [2 x <2 x float>]], align 8 +// CHECK: @f2 = external hidden addrspace(2) global <{ [2 x <{ <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>], <{ [1 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }> }>, align 8 // CHECK: @f3 = external hidden addrspace(2) global float, align 4 -// CHECK: @f4 = external hidden addrspace(2) global target("dx.Layout", %anon, 4, 0), align 1 +// CHECK: @f4 = external hidden addrspace(2) global %anon, align 1 // CHECK: @f5 = external hidden addrspace(2) global double, align 8 -// CHECK: @f6 = external hidden addrspace(2) global target("dx.Layout", %anon.0, 8, 0), align 1 +// CHECK: @f6 = external hidden addrspace(2) global %anon.0, align 1 // CHECK: @f7 = external hidden addrspace(2) global float, align 4 // CHECK: @f8 = external hidden addrspace(2) global <1 x double>, align 8 // CHECK: @f9 = external hidden addrspace(2) global i16, align 2 @@ -215,7 +294,7 @@ cbuffer CBMix { uint16_t f9; }; -// CHECK: @CB_A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) +// CHECK: @CB_A.cb = global target("dx.CBuffer", %__cblayout_CB_A) cbuffer CB_A { double B0[2]; @@ -229,7 +308,7 @@ cbuffer CB_A { half3 B8; } -// CHECK: @CB_B.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) +// CHECK: @CB_B.cb = global target("dx.CBuffer", %__cblayout_CB_B) cbuffer CB_B { double3 B9[3]; half3 B10; @@ -264,7 +343,7 @@ struct G { half C3; }; -// CHECK: @CB_C.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) +// CHECK: @CB_C.cb = global target("dx.CBuffer", %__cblayout_CB_C) cbuffer CB_C { int D0; F D1; @@ -275,63 +354,63 @@ cbuffer CB_C { // CHECK: define internal void @_init_buffer_CBScalars.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) -// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBScalarss_56_0_8_16_24_32_36_40_48tt(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBScalars, 56, 0, 8, 16, 24, 32, 36, 40, 48)) %CBScalars.cb_h, ptr @CBScalars.cb, align 4 +// CHECK-NEXT: %CBScalars.cb_h = call target("dx.CBuffer", %__cblayout_CBScalars) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBScalarsst(i32 5, i32 1, i32 1, i32 0, ptr @CBScalars.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBScalars) %CBScalars.cb_h, ptr @CBScalars.cb, align 4 // CHECK: define internal void @_init_buffer_CBVectors.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBVectorss_136_0_16_40_48_80_96_112tt(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBVectors, 136, 0, 16, 40, 48, 80, 96, 112)) %CBVectors.cb_h, ptr @CBVectors.cb, align 4 +// CHECK-NEXT: %CBVectors.cb_h = call target("dx.CBuffer", %__cblayout_CBVectors) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBVectorsst(i32 0, i32 0, i32 1, i32 0, ptr @CBVectors.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBVectors) %CBVectors.cb_h, ptr @CBVectors.cb, align 4 // CHECK: define internal void @_init_buffer_CBArrays.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) -// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBArrayss_708_0_48_112_176_224_608_624_656tt(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBArrays, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CBArrays.cb_h, ptr @CBArrays.cb, align 4 +// CHECK-NEXT: %CBArrays.cb_h = call target("dx.CBuffer", %__cblayout_CBArrays) +// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBArraysst(i32 0, i32 2, i32 1, i32 0, ptr @CBArrays.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBArrays) %CBArrays.cb_h, ptr @CBArrays.cb, align 4 // CHECK: define internal void @_init_buffer_CBTypedefArray.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBTypedefArrays_128_0_64tt(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBTypedefArray, 128, 0, 64)) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4 +// CHECK-NEXT: %CBTypedefArray.cb_h = call target("dx.CBuffer", %__cblayout_CBTypedefArray) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBTypedefArrayst(i32 1, i32 2, i32 1, i32 0, ptr @CBTypedefArray.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBTypedefArray) %CBTypedefArray.cb_h, ptr @CBTypedefArray.cb, align 4 // CHECK: define internal void @_init_buffer_CBStructs.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBStructs.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBStructss_246_0_16_32_64_144_238_240tt(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBStructs, 246, 0, 16, 32, 64, 144, 238, 240)) %CBStructs.cb_h, ptr @CBStructs.cb, align 4 +// CHECK-NEXT: %CBStructs.cb_h = call target("dx.CBuffer", %__cblayout_CBStructs) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBStructsst(i32 2, i32 0, i32 1, i32 0, ptr @CBStructs.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBStructs) %CBStructs.cb_h, ptr @CBStructs.cb, align 4 // CHECK: define internal void @_init_buffer_CBClasses.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBClassess_260_0_16_32_112tt(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBClasses, 260, 0, 16, 32, 112)) %CBClasses.cb_h, ptr @CBClasses.cb, align 4 +// CHECK-NEXT: %CBClasses.cb_h = call target("dx.CBuffer", %__cblayout_CBClasses) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBClassesst(i32 3, i32 0, i32 1, i32 0, ptr @CBClasses.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBClasses) %CBClasses.cb_h, ptr @CBClasses.cb, align 4 // CHECK: define internal void @_init_buffer_CBMix.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBMixs_170_0_24_32_120_128_136_144_152_160_168tt(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CBMix, 170, 0, 24, 32, 120, 128, 136, 144, 152, 160, 168)) %CBMix.cb_h, ptr @CBMix.cb, align 4 +// CHECK-NEXT: %CBMix.cb_h = call target("dx.CBuffer", %__cblayout_CBMix) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CBMixst(i32 4, i32 0, i32 1, i32 0, ptr @CBMix.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CBMix) %CBMix.cb_h, ptr @CBMix.cb, align 4 // CHECK: define internal void @_init_buffer_CB_A.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_As_188_0_32_76_80_120_128_144_160_182tt(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_A, 188, 0, 32, 76, 80, 120, 128, 144, 160, 182)) %CB_A.cb_h, ptr @CB_A.cb, align 4 +// CHECK-NEXT: %CB_A.cb_h = call target("dx.CBuffer", %__cblayout_CB_A) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Ast(i32 5, i32 0, i32 1, i32 0, ptr @CB_A.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_A) %CB_A.cb_h, ptr @CB_A.cb, align 4 // CHECK: define internal void @_init_buffer_CB_B.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Bs_94_0_88tt(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_B, 94, 0, 88)) %CB_B.cb_h, ptr @CB_B.cb, align 4 +// CHECK-NEXT: %CB_B.cb_h = call target("dx.CBuffer", %__cblayout_CB_B) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Bst(i32 6, i32 0, i32 1, i32 0, ptr @CB_B.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_B) %CB_B.cb_h, ptr @CB_B.cb, align 4 // CHECK: define internal void @_init_buffer_CB_C.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) -// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_tdx.Layout_s___cblayout_CB_Cs_400_0_16_112_128_392tt(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str) -// CHECK-NEXT: store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_C, 400, 0, 16, 112, 128, 392)) %CB_C.cb_h, ptr @CB_C.cb, align 4 +// CHECK-NEXT: %CB_C.cb_h = call target("dx.CBuffer", %__cblayout_CB_C) +// CHECK-SAME: @llvm.dx.resource.handlefromimplicitbinding.tdx.CBuffer_s___cblayout_CB_Cst(i32 7, i32 0, i32 1, i32 0, ptr @CB_C.str) +// CHECK-NEXT: store target("dx.CBuffer", %__cblayout_CB_C) %CB_C.cb_h, ptr @CB_C.cb, align 4 RWBuffer Buf; diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl index b7bdce32e6507..1fe0a68261c94 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer_and_namespaces.hlsl @@ -4,18 +4,18 @@ // CHECK: %"n0::n1::__cblayout_A" = type <{ float }> // CHECK: %"n0::__cblayout_B" = type <{ float }> -// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Layout", %"n0::Foo", 4, 0) }> +// CHECK: %"n0::n2::__cblayout_C" = type <{ float, target("dx.Padding", 12), %"n0::Foo" }> // CHECK: %"n0::Foo" = type <{ float }> -// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n1::__cblayout_A", 4, 0)) +// CHECK: @A.cb = global target("dx.CBuffer", %"n0::n1::__cblayout_A") // CHECK: @_ZN2n02n11aE = external hidden addrspace(2) global float, align 4 -// CHECK: @B.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::__cblayout_B", 4, 0)) +// CHECK: @B.cb = global target("dx.CBuffer", %"n0::__cblayout_B") // CHECK: @_ZN2n01aE = external hidden addrspace(2) global float, align 4 -// CHECK: @C.cb = global target("dx.CBuffer", target("dx.Layout", %"n0::n2::__cblayout_C", 20, 0, 16)) +// CHECK: @C.cb = global target("dx.CBuffer", %"n0::n2::__cblayout_C") // CHECK: @_ZN2n02n21aE = external hidden addrspace(2) global float, align 4 -// CHECK: external hidden addrspace(2) global target("dx.Layout", %"n0::Foo", 4, 0), align 1 +// CHECK: external hidden addrspace(2) global %"n0::Foo", align 1 namespace n0 { struct Foo { diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_geps.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_geps.hlsl new file mode 100644 index 0000000000000..7a0b45875faf9 --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/cbuffer_geps.hlsl @@ -0,0 +1,117 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -fnative-int16-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// Capture the anonymous struct types for check lines below. +// CHECK: [[ANON_1:%.*]] = type <{ float, target("dx.Padding", 12), <4 x i32> }> +// CHECK: [[ANON_2:%.*]] = type <{ <2 x i32>, target("dx.Padding", 8), <{ [3 x <{ %ArrayAndScalar, target("dx.Padding", 12) }>], %ArrayAndScalar }> + +template void use(T); + +cbuffer CBArrays : register(b2) { + float c1[30]; + double3 c2[20]; + float16_t c3[10][20]; + uint64_t c4[30]; + int4 c5[20][30][40]; + uint16_t c6[10]; + int64_t c7[20]; + bool c8[40]; +} + +// CHECK-LABEL: define hidden void @_Z8cbarraysv() +void cbarrays() { + // CHECK: load float, ptr addrspace(2) @c1, align 16 + use(c1[0]); + // CHECK: load float, ptr addrspace(2) getelementptr (<{ float, target("dx.Padding", 12) }>, ptr addrspace(2) @c1, i32 7, i32 0), align 16 + use(c1[7]); + // CHECK: load float, ptr addrspace(2) getelementptr (<{ float, target("dx.Padding", 12) }>, ptr addrspace(2) @c1, i32 29, i32 0), align 16 + use(c1[29]); + + // CHECK: load <3 x double>, ptr addrspace(2) getelementptr (<{ <3 x double>, target("dx.Padding", 8) }>, ptr addrspace(2) @c2, i32 8, i32 0), align 32 + use(c2[8]); + // CHECK: load half, ptr addrspace(2) getelementptr (<{ half, target("dx.Padding", 14) }>, ptr addrspace(2) getelementptr (<{ <{ [19 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) }>, ptr addrspace(2) @c3, i32 9, i32 0), i32 5, i32 0), align 16 + use(c3[9][5]); + // CHECK: load i64, ptr addrspace(2) getelementptr (<{ i64, target("dx.Padding", 8) }>, ptr addrspace(2) @c4, i32 6, i32 0), align 16 + use(c4[6]); + // CHECK: load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([40 x <4 x i32>], ptr addrspace(2) getelementptr inbounds ([30 x [40 x <4 x i32>]], ptr addrspace(2) getelementptr inbounds ([20 x [30 x [40 x <4 x i32>]]], ptr addrspace(2) @c5, i32 0, i32 1), i32 0, i32 12), i32 0, i32 15), align 16 + use(c5[1][12][15]); + // CHECK: load i16, ptr addrspace(2) getelementptr (<{ i16, target("dx.Padding", 14) }>, ptr addrspace(2) @c6, i32 4, i32 0), align 16 + use(c6[4]); + // CHECK: load i64, ptr addrspace(2) getelementptr (<{ i64, target("dx.Padding", 8) }>, ptr addrspace(2) @c7, i32 17, i32 0), align 16 + use(c7[17]); + // CHECK: load i32, ptr addrspace(2) getelementptr (<{ i32, target("dx.Padding", 12) }>, ptr addrspace(2) @c8, i32 30, i32 0), align 16 + use(c8[30]); +} + +struct A { + float2 a1; +}; + +struct B : A { + uint16_t3 b1; +}; + +struct C { + int c1; + A c2; +}; + +struct D { + B d1[4][6]; +}; + +cbuffer CBStructs { + A s1; + B s2; + C s3; + A s4[5]; + D s5; +}; + +// CHECK-LABEL: define hidden void @_Z9cbstructsv() +void cbstructs() { + // CHECK: load <2 x float>, ptr addrspace(2) @s1, align 8 + use(s1.a1); + // CHECK: load <3 x i16>, ptr addrspace(2) getelementptr inbounds nuw (%B, ptr addrspace(2) @s2, i32 0, i32 1), align 2 + use(s2.b1); + // CHECK: load <2 x float>, ptr addrspace(2) getelementptr inbounds nuw (%C, ptr addrspace(2) @s3, i32 0, i32 1), align 8 + use(s3.c2.a1); + // CHECK: load <2 x float>, ptr addrspace(2) getelementptr (<{ %A, target("dx.Padding", 8) }>, ptr addrspace(2) @s4, i32 2, i32 0), align 8 + use(s4[2].a1); + // CHECK: load <3 x i16>, ptr addrspace(2) getelementptr inbounds nuw (%B, ptr addrspace(2) getelementptr (<{ %B, target("dx.Padding", 2) }>, ptr addrspace(2) getelementptr (<{ <{ [5 x <{ %B, target("dx.Padding", 2) }>], %B }>, target("dx.Padding", 2) }>, ptr addrspace(2) @s5, i32 3, i32 0), i32 5, i32 0), i32 0, i32 1), align 2 + use(s5.d1[3][5].b1); +} + +struct Scalars { + float a, b; +}; + +struct ArrayAndScalar { + uint4 x[5]; + float y; +}; + +cbuffer CBMix { + Scalars m1[3]; + float m2; + ArrayAndScalar m3; + float2 m4[5][4]; + struct { float c; uint4 d; } m5; + struct { int2 i; ArrayAndScalar j[4]; } m6; + vector m7; +}; + +// CHECK-LABEL: define hidden void @_Z5cbmixv() +void cbmix() { + // CHECK: load float, ptr addrspace(2) getelementptr inbounds nuw (%Scalars, ptr addrspace(2) getelementptr (<{ %Scalars, target("dx.Padding", 8) }>, ptr addrspace(2) @m1, i32 2, i32 0), i32 0, i32 1), align 4 + use(m1[2].b); + // CHECK: load float, ptr addrspace(2) getelementptr inbounds nuw (%ArrayAndScalar, ptr addrspace(2) @m3, i32 0, i32 1), align 4 + use(m3.y); + // CHECK: load <2 x float>, ptr addrspace(2) getelementptr (<{ <2 x float>, target("dx.Padding", 8) }>, ptr addrspace(2) getelementptr (<{ <{ [3 x <{ <2 x float>, target("dx.Padding", 8) }>], <2 x float> }>, target("dx.Padding", 8) }>, ptr addrspace(2) @m4, i32 2, i32 0), i32 3, i32 0), align 16 + use(m4[2][3]); + // CHECK: load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw ([[ANON_1]], ptr addrspace(2) @m5, i32 0, i32 1), align 16 + use(m5.d); + // CHECK: load <4 x i32>, ptr addrspace(2) getelementptr inbounds ([5 x <4 x i32>], ptr addrspace(2) getelementptr (<{ %ArrayAndScalar, target("dx.Padding", 12) }>, ptr addrspace(2) getelementptr inbounds nuw ([[ANON_2]], ptr addrspace(2) @m6, i32 0, i32 1), i32 2, i32 0), i32 0, i32 2), align 16 + use(m6.j[2].x[2]); + // CHECK: load <1 x double>, ptr addrspace(2) @m7, align 8 + use(m7); +} diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl index 7bedd63c9f65d..68e263b9fc07f 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_packoffset.hlsl @@ -2,13 +2,24 @@ // RUN: dxil-pc-shadermodel6.3-compute %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -// CHECK: %__cblayout_CB = type <{ float, double, <2 x i32> }> -// CHECK: %__cblayout_CB_1 = type <{ float, <2 x float> }> - -// CHECK: @CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88)) -// CHECK: @a = external hidden addrspace(2) global float, align 4 -// CHECK: @b = external hidden addrspace(2) global double, align 8 -// CHECK: @c = external hidden addrspace(2) global <2 x i32>, align 8 +// CHECK: %__cblayout_CB = type <{ +// CHECK-SAME: target("dx.Padding", 16), +// CHECK-SAME: float, +// CHECK-SAME: target("dx.Padding", 68), +// CHECK-SAME: <2 x i32>, +// CHECK-SAME target("dx.Padding", 72), +// CHECK-SAME: double +// CHECK-SAME: }> +// CHECK: %__cblayout_CB_1 = type <{ +// CHECK-SAME: target("dx.Padding", 80), +// CHECK-SAME: <2 x float>, +// CHECK-SAME: float +// CHECK-SAME: }> + +// CHECK-DAG: @CB.cb = global target("dx.CBuffer", %__cblayout_CB) +// CHECK-DAG: @a = external hidden addrspace(2) global float, align 4 +// CHECK-DAG: @b = external hidden addrspace(2) global double, align 8 +// CHECK-DAG: @c = external hidden addrspace(2) global <2 x i32>, align 8 // CHECK: @CB.str = private unnamed_addr constant [3 x i8] c"CB\00", align 1 cbuffer CB : register(b1, space3) { @@ -17,9 +28,9 @@ cbuffer CB : register(b1, space3) { int2 c : packoffset(c5.z); } -// CHECK: @CB.cb.1 = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB_1, 92, 88, 80)) -// CHECK: @x = external hidden addrspace(2) global float, align 4 -// CHECK: @y = external hidden addrspace(2) global <2 x float>, align 8 +// CHECK-DAG: @CB.cb.1 = global target("dx.CBuffer", %__cblayout_CB_1) +// CHECK-DAG: @x = external hidden addrspace(2) global float, align 4 +// CHECK-DAG: @y = external hidden addrspace(2) global <2 x float>, align 8 // Missing packoffset annotation will produce a warning. // Element x will be placed after the element y that has an explicit packoffset. @@ -30,8 +41,7 @@ cbuffer CB : register(b0) { // CHECK: define internal void @_init_buffer_CB.cb() // CHECK-NEXT: entry: -// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 176, 16, 168, 88)) -// CHECK-SAME: @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_176_16_168_88tt(i32 3, i32 1, i32 1, i32 0, ptr @CB.str) +// CHECK-NEXT: %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_s___cblayout_CBst(i32 3, i32 1, i32 1, i32 0, ptr @CB.str) float foo() { // CHECK: load float, ptr addrspace(2) @a, align 4 @@ -48,5 +58,5 @@ void main() { } // CHECK: !hlsl.cbs = !{![[CB1:[0-9]+]], ![[CB2:[0-9]+]]} -// CHECK: ![[CB1]] = !{ptr @CB.cb, ptr addrspace(2) @a, ptr addrspace(2) @b, ptr addrspace(2) @c} -// CHECK: ![[CB2]] = !{ptr @CB.cb.1, ptr addrspace(2) @x, ptr addrspace(2) @y} +// CHECK: ![[CB1]] = !{ptr @CB.cb, ptr addrspace(2) @a, ptr addrspace(2) @c, ptr addrspace(2) @b} +// CHECK: ![[CB2]] = !{ptr @CB.cb.1, ptr addrspace(2) @y, ptr addrspace(2) @x} diff --git a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl index fa3405df9e3d3..b8c7babb8d634 100644 --- a/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl +++ b/clang/test/CodeGenHLSL/resources/cbuffer_with_static_global_and_function.hlsl @@ -2,7 +2,7 @@ // CHECK: %__cblayout_A = type <{ float }> -// CHECK: @A.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_A, 4, 0)) +// CHECK: @A.cb = global target("dx.CBuffer", %__cblayout_A) // CHECK: @a = external hidden addrspace(2) global float, align 4 // CHECK-DAG: @_ZL1b = internal global float 3.000000e+00, align 4 // CHECK-NOT: @B.cb diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl index ad4d92f8afc02..5333dad962195 100644 --- a/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl +++ b/clang/test/CodeGenHLSL/resources/default_cbuffer.hlsl @@ -1,19 +1,18 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,DXIL // RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -fnative-half-type -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,SPIRV -// DXIL: %"__cblayout_$Globals" = type <{ float, float, target("dx.Layout", %__cblayout_S, 4, 0) }> -// SPIRV: %"__cblayout_$Globals" = type <{ float, float, target("spirv.Layout", %__cblayout_S, 4, 0) }> +// CHECK: %"__cblayout_$Globals" = type <{ float, float, target("{{.*}}.Padding", 8), %__cblayout_S }> // CHECK: %__cblayout_S = type <{ float }> -// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", target("dx.Layout", %"__cblayout_$Globals", 20, 0, 4, 16)) +// DXIL-DAG: @"$Globals.cb" = global target("dx.CBuffer", %"__cblayout_$Globals") // DXIL-DAG: @a = external hidden addrspace(2) global float // DXIL-DAG: @g = external hidden addrspace(2) global float -// DXIL-DAG: @h = external hidden addrspace(2) global target("dx.Layout", %__cblayout_S, 4, 0), align 4 +// DXIL-DAG: @h = external hidden addrspace(2) global %__cblayout_S, align 4 -// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 20, 0, 4, 16), 2, 0) +// SPIRV-DAG: @"$Globals.cb" = global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) // SPIRV-DAG: @a = external hidden addrspace(12) global float // SPIRV-DAG: @g = external hidden addrspace(12) global float -// SPIRV-DAG: @h = external hidden addrspace(12) global target("spirv.Layout", %__cblayout_S, 4, 0), align 8 +// SPIRV-DAG: @h = external hidden addrspace(12) global %__cblayout_S, align 8 struct EmptyStruct { }; diff --git a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl index 1b2cb0e99aa83..7be1f9043042c 100644 --- a/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl +++ b/clang/test/CodeGenHLSL/resources/default_cbuffer_with_layout.hlsl @@ -1,17 +1,26 @@ // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s -// CHECK: %"__cblayout_$Globals" = type <{ i32, float, [4 x double], <4 x i32>, <4 x float>, -// CHECK-SAME: target("dx.Layout", %S, 8, 0) }> +// CHECK: %"__cblayout_$Globals" = type <{ +// CHECK-SAME: float, +// CHECK-SAME: target("dx.Padding", 12), +// CHECK-SAME: <{ [3 x <{ double, target("dx.Padding", 8) }>], double }>, +// CHECK-SAME: target("dx.Padding", 8), +// CHECK-SAME: <4 x i32>, +// CHECK-SAME: %S +// CHECK-SAME: i32, +// CHECK-SAME: target("dx.Padding", 4), +// CHECK-SAME: <4 x float> +// CHECK-SAME: }> + // CHECK: %S = type <{ <2 x float> }> +// CHECK-DAG: @"$Globals.cb" = global target("dx.CBuffer", %"__cblayout_$Globals") +// CHECK-DAG: @a = external hidden addrspace(2) global i32, align 4 // CHECK-DAG: @b = external hidden addrspace(2) global float, align 4 +// CHECK-DAG: @c = external hidden addrspace(2) global <{ [3 x <{ double, target("dx.Padding", 8) }>], double }>, align 8 // CHECK-DAG: @d = external hidden addrspace(2) global <4 x i32>, align 16 -// CHECK-DAG: @"$Globals.cb" = global target("dx.CBuffer", -// CHECK-DAG-SAME: target("dx.Layout", %"__cblayout_$Globals", 144, 120, 16, 32, 64, 128, 112)) -// CHECK-DAG: @a = external hidden addrspace(2) global i32, align 4 -// CHECK-DAG: @c = external hidden addrspace(2) global [4 x double], align 8 // CHECK-DAG: @e = external hidden addrspace(2) global <4 x float>, align 16 -// CHECK-DAG: @s = external hidden addrspace(2) global target("dx.Layout", %S, 8, 0), align 1 +// CHECK-DAG: @s = external hidden addrspace(2) global %S, align 1 struct S { float2 v; @@ -19,8 +28,8 @@ struct S { int a; float b : register(c1); +int4 d : register(c6); double c[4] : register(c2); -int4 d : register(c4); float4 e; S s : register(c7); @@ -32,5 +41,4 @@ void main() { } // CHECK: !hlsl.cbs = !{![[CB:.*]]} -// CHECK: ![[CB]] = !{ptr @"$Globals.cb", ptr addrspace(2) @a, ptr addrspace(2) @b, ptr addrspace(2) @c, -// CHECK-SAME: ptr addrspace(2) @d, ptr addrspace(2) @e, ptr addrspace(2) @s} +// CHECK: ![[CB]] = !{ptr @"$Globals.cb", ptr addrspace(2) @b, ptr addrspace(2) @c, ptr addrspace(2) @d, ptr addrspace(2) @s, ptr addrspace(2) @a, ptr addrspace(2) @e} diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index f253e02f4cdd9..db7d4a4342eb7 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -277,7 +277,7 @@ Examples: Accessing Resources as Memory ----------------------------- -*relevant types: Buffers and Textures* +*relevant types: Buffers, Textures, and CBuffers* Loading and storing from resources is generally represented in LLVM using operations on memory that is only accessible via a handle object. Given a @@ -302,14 +302,14 @@ stores are described later in this document. - - Pointer - A pointer to an object in the buffer - * - ``%buffer`` + * - ``%resource`` - 0 - - ``target(dx.TypedBuffer, ...)`` - - The buffer to access + - Any buffer, texture, or cbuffer type + - The resource to access * - ``%index`` - 1 - ``i32`` - - Index into the buffer + - Index into the resource Examples: diff --git a/llvm/include/llvm/Frontend/HLSL/CBuffer.h b/llvm/include/llvm/Frontend/HLSL/CBuffer.h index 694a7fa854576..f4e232ffe1745 100644 --- a/llvm/include/llvm/Frontend/HLSL/CBuffer.h +++ b/llvm/include/llvm/Frontend/HLSL/CBuffer.h @@ -46,7 +46,8 @@ class CBufferMetadata { CBufferMetadata(NamedMDNode *MD) : MD(MD) {} public: - static std::optional get(Module &M); + static std::optional + get(Module &M, llvm::function_ref IsPadding); using iterator = SmallVector::iterator; iterator begin() { return Mappings.begin(); } @@ -55,9 +56,6 @@ class CBufferMetadata { void eraseFromModule(); }; -APInt translateCBufArrayOffset(const DataLayout &DL, APInt Offset, - ArrayType *Ty); - } // namespace hlsl } // namespace llvm diff --git a/llvm/lib/Frontend/HLSL/CBuffer.cpp b/llvm/lib/Frontend/HLSL/CBuffer.cpp index 1f53c87bb1683..33d9b1a1b3102 100644 --- a/llvm/lib/Frontend/HLSL/CBuffer.cpp +++ b/llvm/lib/Frontend/HLSL/CBuffer.cpp @@ -15,25 +15,28 @@ using namespace llvm; using namespace llvm::hlsl; -static size_t getMemberOffset(GlobalVariable *Handle, size_t Index) { +static SmallVector +getMemberOffsets(const DataLayout &DL, GlobalVariable *Handle, + llvm::function_ref IsPadding) { + SmallVector Offsets; + auto *HandleTy = cast(Handle->getValueType()); assert((HandleTy->getName().ends_with(".CBuffer") || HandleTy->getName() == "spirv.VulkanBuffer") && "Not a cbuffer type"); assert(HandleTy->getNumTypeParameters() == 1 && "Expected layout type"); + auto *LayoutTy = cast(HandleTy->getTypeParameter(0)); - auto *LayoutTy = cast(HandleTy->getTypeParameter(0)); - assert(LayoutTy->getName().ends_with(".Layout") && "Not a layout type"); - - // Skip the "size" parameter. - size_t ParamIndex = Index + 1; - assert(LayoutTy->getNumIntParameters() > ParamIndex && - "Not enough parameters"); + const StructLayout *SL = DL.getStructLayout(LayoutTy); + for (int I = 0, E = LayoutTy->getNumElements(); I < E; ++I) + if (!IsPadding(LayoutTy->getElementType(I))) + Offsets.push_back(SL->getElementOffset(I)); - return LayoutTy->getIntParameter(ParamIndex); + return Offsets; } -std::optional CBufferMetadata::get(Module &M) { +std::optional +CBufferMetadata::get(Module &M, llvm::function_ref IsPadding) { NamedMDNode *CBufMD = M.getNamedMetadata("hlsl.cbs"); if (!CBufMD) return std::nullopt; @@ -52,13 +55,16 @@ std::optional CBufferMetadata::get(Module &M) { cast(cast(OpMD)->getValue()); CBufferMapping &Mapping = Result->Mappings.emplace_back(Handle); + SmallVector MemberOffsets = + getMemberOffsets(M.getDataLayout(), Handle, IsPadding); + for (int I = 1, E = MD->getNumOperands(); I < E; ++I) { Metadata *OpMD = MD->getOperand(I); // Some members may be null if they've been optimized out. if (!OpMD) continue; auto *V = cast(cast(OpMD)->getValue()); - Mapping.Members.emplace_back(V, getMemberOffset(Handle, I - 1)); + Mapping.Members.emplace_back(V, MemberOffsets[I - 1]); } } @@ -69,10 +75,3 @@ void CBufferMetadata::eraseFromModule() { // Remove the cbs named metadata MD->eraseFromParent(); } - -APInt hlsl::translateCBufArrayOffset(const DataLayout &DL, APInt Offset, - ArrayType *Ty) { - int64_t TypeSize = DL.getTypeSizeInBits(Ty->getElementType()) / 8; - int64_t RoundUp = alignTo(TypeSize, Align(CBufferRowSizeInBytes)); - return Offset.udiv(TypeSize) * RoundUp; -} diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 682448fe07352..299d07b81837b 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -1007,6 +1007,10 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { } if (Name == "spirv.IntegralConstant" || Name == "spirv.Literal") return TargetTypeInfo(Type::getVoidTy(C)); + if (Name == "spirv.Padding") + return TargetTypeInfo( + ArrayType::get(Type::getInt8Ty(C), Ty->getIntParameter(0)), + TargetExtType::CanBeGlobal); if (Name.starts_with("spirv.")) return TargetTypeInfo(PointerType::get(C, 0), TargetExtType::HasZeroInit, TargetExtType::CanBeGlobal, diff --git a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp index 44277971acd60..56245321a522a 100644 --- a/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp +++ b/llvm/lib/Target/DirectX/DXILCBufferAccess.cpp @@ -8,11 +8,13 @@ #include "DXILCBufferAccess.h" #include "DirectX.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/Frontend/HLSL/CBuffer.h" #include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/ReplaceConstant.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/FormatVariadic.h" @@ -21,297 +23,41 @@ #define DEBUG_TYPE "dxil-cbuffer-access" using namespace llvm; -namespace { -/// Helper for building a `load.cbufferrow` intrinsic given a simple type. -struct CBufferRowIntrin { - Intrinsic::ID IID; - Type *RetTy; - unsigned int EltSize; - unsigned int NumElts; - - CBufferRowIntrin(const DataLayout &DL, Type *Ty) { - assert(Ty == Ty->getScalarType() && "Expected scalar type"); - - switch (DL.getTypeSizeInBits(Ty)) { - case 16: - IID = Intrinsic::dx_resource_load_cbufferrow_8; - RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); - EltSize = 2; - NumElts = 8; - break; - case 32: - IID = Intrinsic::dx_resource_load_cbufferrow_4; - RetTy = StructType::get(Ty, Ty, Ty, Ty); - EltSize = 4; - NumElts = 4; - break; - case 64: - IID = Intrinsic::dx_resource_load_cbufferrow_2; - RetTy = StructType::get(Ty, Ty); - EltSize = 8; - NumElts = 2; - break; - default: - llvm_unreachable("Only 16, 32, and 64 bit types supported"); - } - } -}; - -// Helper for creating CBuffer handles and loading data from them -struct CBufferResource { - GlobalVariable *GVHandle; - GlobalVariable *Member; - size_t MemberOffset; - - LoadInst *Handle; - - CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member, - size_t MemberOffset) - : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {} - - const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); } - Type *getValueType() { return Member->getValueType(); } - iterator_range users() { - return Member->users(); - } - - /// Get the byte offset of a Pointer-typed Value * `Val` relative to Member. - /// `Val` can either be Member itself, or a GEP of a constant offset from - /// Member - size_t getOffsetForCBufferGEP(Value *Val) { - assert(isa(Val->getType()) && - "Expected a pointer-typed value"); - - if (Val == Member) - return 0; - - if (auto *GEP = dyn_cast(Val)) { - // Since we should always have a constant offset, we should only ever have - // a single GEP of indirection from the Global. - assert(GEP->getPointerOperand() == Member && - "Indirect access to resource handle"); - - const DataLayout &DL = getDataLayout(); - APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); - bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset); - (void)Success; - assert(Success && "Offsets into cbuffer globals must be constant"); - - if (auto *ATy = dyn_cast(Member->getValueType())) - ConstantOffset = - hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy); - - return ConstantOffset.getZExtValue(); - } - - llvm_unreachable("Expected Val to be a GlobalVariable or GEP"); - } - - /// Create a handle for this cbuffer resource using the IRBuilder `Builder` - /// and sets the handle as the current one to use for subsequent calls to - /// `loadValue` - void createAndSetCurrentHandle(IRBuilder<> &Builder) { - Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle, - GVHandle->getName()); +static void replaceUsersOfGlobal(GlobalVariable *Global, + GlobalVariable *HandleGV, size_t Offset) { + for (Use &U : make_early_inc_range(Global->uses())) { + auto UseInst = dyn_cast(U.getUser()); + // TODO: Constants? Metadata? + assert(UseInst && "Non-instruction use of cbuffer"); + + IRBuilder<> Builder(UseInst); + LoadInst *Handle = Builder.CreateLoad(HandleGV->getValueType(), HandleGV, + HandleGV->getName()); + Value *Ptr = Builder.CreateIntrinsic( + Global->getType(), Intrinsic::dx_resource_getpointer, + ArrayRef{Handle, + ConstantInt::get(Builder.getInt32Ty(), Offset)}); + U.set(Ptr); } - /// Load a value of type `Ty` at offset `Offset` using the handle from the - /// last call to `createAndSetCurrentHandle` - Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset, - const Twine &Name = "") { - assert(Handle && - "Expected a handle for this cbuffer global resource to be created " - "before loading a value from it"); - const DataLayout &DL = getDataLayout(); - - size_t TargetOffset = MemberOffset + Offset; - CBufferRowIntrin Intrin(DL, Ty->getScalarType()); - // The cbuffer consists of some number of 16-byte rows. - unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes; - unsigned int CurrentIndex = - (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; - - auto *CBufLoad = Builder.CreateIntrinsic( - Intrin.RetTy, Intrin.IID, - {Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr, - Name + ".load"); - auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, - Name + ".extract"); - - Value *Result = nullptr; - unsigned int Remaining = - ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; - - if (Remaining == 0) { - // We only have a single element, so we're done. - Result = Elt; - - // However, if we loaded a <1 x T>, then we need to adjust the type here. - if (auto *VT = dyn_cast(Ty)) { - assert(VT->getNumElements() == 1 && - "Can't have multiple elements here"); - Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, - Builder.getInt32(0), Name); - } - return Result; - } - - // Walk each element and extract it, wrapping to new rows as needed. - SmallVector Extracts{Elt}; - while (Remaining--) { - CurrentIndex %= Intrin.NumElts; - - if (CurrentIndex == 0) - CBufLoad = Builder.CreateIntrinsic( - Intrin.RetTy, Intrin.IID, - {Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)}, - nullptr, Name + ".load"); - - Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, - Name + ".extract")); - } - - // Finally, we build up the original loaded value. - Result = PoisonValue::get(Ty); - for (int I = 0, E = Extracts.size(); I < E; ++I) - Result = - Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I), - Name + formatv(".upto{}", I)); - return Result; - } -}; - -} // namespace - -/// Replace load via cbuffer global with a load from the cbuffer handle itself. -static void replaceLoad(LoadInst *LI, CBufferResource &CBR, - SmallVectorImpl &DeadInsts) { - size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand()); - IRBuilder<> Builder(LI); - CBR.createAndSetCurrentHandle(Builder); - Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName()); - LI->replaceAllUsesWith(Result); - DeadInsts.push_back(LI); -} - -/// This function recursively copies N array elements from the cbuffer resource -/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional -/// arrays into a sequence of scalar/vector extracts and stores. -static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI, - CBufferResource &CBR, ArrayType *ArrTy, - size_t ArrOffset, size_t N, - const Twine &Name = "") { - const DataLayout &DL = MCI->getDataLayout(); - Type *ElemTy = ArrTy->getElementType(); - size_t ElemTySize = DL.getTypeAllocSize(ElemTy); - for (unsigned I = 0; I < N; ++I) { - size_t Offset = ArrOffset + I * ElemTySize; - - // Recursively copy nested arrays - if (ArrayType *ElemArrTy = dyn_cast(ElemTy)) { - copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset, - ElemArrTy->getNumElements(), Name); - continue; - } - - // Load CBuffer value and store it in Dest - APInt CBufArrayOffset( - DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset); - CBufArrayOffset = - hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy); - Value *CBufferVal = - CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name); - Value *GEP = - Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(), - {Builder.getInt32(Offset)}, Name + ".dest"); - Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile()); - } -} - -/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle -/// itself. Assumes the cbuffer global is an array, and the length of bytes to -/// copy is divisible by array element allocation size. -/// The memcpy source must also be a direct cbuffer global reference, not a GEP. -static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) { - - ArrayType *ArrTy = dyn_cast(CBR.getValueType()); - assert(ArrTy && "MemCpy lowering is only supported for array types"); - - // This assumption vastly simplifies the implementation - if (MCI->getSource() != CBR.Member) - reportFatalUsageError( - "Expected MemCpy source to be a cbuffer global variable"); - - ConstantInt *Length = dyn_cast(MCI->getLength()); - uint64_t ByteLength = Length->getZExtValue(); - - // If length to copy is zero, no memcpy is needed - if (ByteLength == 0) { - MCI->eraseFromParent(); - return; - } - - const DataLayout &DL = CBR.getDataLayout(); - - Type *ElemTy = ArrTy->getElementType(); - size_t ElemSize = DL.getTypeAllocSize(ElemTy); - assert(ByteLength % ElemSize == 0 && - "Length of bytes to MemCpy must be divisible by allocation size of " - "source/destination array elements"); - size_t ElemsToCpy = ByteLength / ElemSize; - - IRBuilder<> Builder(MCI); - CBR.createAndSetCurrentHandle(Builder); - - copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy, - "memcpy." + MCI->getDest()->getName() + "." + - MCI->getSource()->getName()); - - MCI->eraseFromParent(); -} - -static void replaceAccessesWithHandle(CBufferResource &CBR) { - SmallVector DeadInsts; - - SmallVector ToProcess{CBR.users()}; - while (!ToProcess.empty()) { - User *Cur = ToProcess.pop_back_val(); - - // If we have a load instruction, replace the access. - if (auto *LI = dyn_cast(Cur)) { - replaceLoad(LI, CBR, DeadInsts); - continue; - } - - // If we have a memcpy instruction, replace it with multiple accesses and - // subsequent stores to the destination - if (auto *MCI = dyn_cast(Cur)) { - replaceMemCpy(MCI, CBR); - continue; - } - - // Otherwise, walk users looking for a load... - if (isa(Cur) || isa(Cur)) { - ToProcess.append(Cur->user_begin(), Cur->user_end()); - continue; - } - - llvm_unreachable("Unexpected user of Global"); - } - RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + Global->removeFromParent(); } static bool replaceCBufferAccesses(Module &M) { - std::optional CBufMD = hlsl::CBufferMetadata::get(M); + std::optional CBufMD = hlsl::CBufferMetadata::get( + M, [](Type *Ty) { return isa(Ty); }); if (!CBufMD) return false; + SmallVector CBufferGlobals; + for (const hlsl::CBufferMapping &Mapping : *CBufMD) + for (const hlsl::CBufferMember &Member : Mapping.Members) + CBufferGlobals.push_back(Member.GV); + convertUsersOfConstantsToInstructions(CBufferGlobals); + for (const hlsl::CBufferMapping &Mapping : *CBufMD) - for (const hlsl::CBufferMember &Member : Mapping.Members) { - CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset); - replaceAccessesWithHandle(CBR); - Member.GV->removeFromParent(); - } + for (const hlsl::CBufferMember &Member : Mapping.Members) + replaceUsersOfGlobal(Member.GV, Mapping.Handle, Member.Offset); CBufMD->eraseFromModule(); return true; diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index 84b1a313df2ea..f0c5f523a003c 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -110,9 +110,9 @@ class DirectXPassConfig : public TargetPassConfig { void addCodeGenPrepare() override { addPass(createDXILFinalizeLinkageLegacyPass()); addPass(createGlobalDCEPass()); + addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILResourceAccessLegacyPass()); addPass(createDXILIntrinsicExpansionLegacyPass()); - addPass(createDXILCBufferAccessLegacyPass()); addPass(createDXILDataScalarizationLegacyPass()); ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; diff --git a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp index 3ca0b40cac93e..329774df554f4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCBufferAccess.cpp @@ -54,7 +54,12 @@ static Instruction *findHandleDef(GlobalVariable *HandleVar) { } static bool replaceCBufferAccesses(Module &M) { - std::optional CBufMD = hlsl::CBufferMetadata::get(M); + std::optional CBufMD = + hlsl::CBufferMetadata::get(M, [](Type *Ty) { + if (auto *TET = dyn_cast(Ty)) + return TET->getName() == "spirv.Padding"; + return false; + }); if (!CBufMD) return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 599cc35ca2e9d..8e14fb03127fc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -837,9 +837,6 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( if (Ty->isArrayTy()) Ty = Ty->getArrayElementType(); else { - TargetExtType *BufferTy = cast(Ty); - assert(BufferTy->getTargetExtName() == "spirv.Layout"); - Ty = BufferTy->getTypeParameter(0); assert(Ty && Ty->isStructTy()); uint32_t Index = cast(II->getOperand(1))->getZExtValue(); Ty = cast(Ty)->getElementType(Index); diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll deleted file mode 100644 index 52ad0f3df1aba..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/array-typedgep.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -; cbuffer CB : register(b0) { -; float a1[3]; -; } -%__cblayout_CB = type <{ [3 x float] }> - -@CB.cb = global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison -; CHECK: @CB.cb = -; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external addrspace(2) global [3 x float], align 4 - -; CHECK: define void @f -define void @f(ptr %dst) { -entry: - %CB.cb_h = call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) @llvm.dx.resource.handlefrombinding.tdx.CBuffer_tdx.Layout_s___cblayout_CBs_36_0tt(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) %CB.cb_h, ptr @CB.cb, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a1 = load float, ptr addrspace(2) getelementptr inbounds ([3 x float], ptr addrspace(2) @a1, i32 0, i32 1), align 4 - store float %a1, ptr %dst, align 32 - - ret void -} - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll index db4e14c1336a6..7bf447f6ff36a 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/arrays.ll @@ -1,124 +1,50 @@ ; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s ; cbuffer CB : register(b0) { -; float a1[3]; -; double3 a2[2]; -; float16_t a3[2][2]; -; uint64_t a4[3]; -; int4 a5[2][3][4]; -; uint16_t a6[1]; -; int64_t a7[2]; -; bool a8[4]; +; float a1[3]; // offset 0, size 4 (+12) * 3 +; double3 a2[2]; // offset 48, size 24 (+8) * 2 +; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4 ; } -%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [2 x [3 x [4 x <4 x i32>]]], [1 x i16], [2 x i64], [4 x i32] }> +%__cblayout_CB = type <{ + <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12), + <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8), + <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14) +}> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: @CB.cb = ; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 -@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32 -@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2 -@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8 -@a5 = external local_unnamed_addr addrspace(2) global [2 x [3 x [4 x <4 x i32>]]], align 16 -@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2 -@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8 -@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4 +@a1 = external local_unnamed_addr addrspace(2) global <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, align 4 +@a2 = external local_unnamed_addr addrspace(2) global <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, align 32 +@a3 = external local_unnamed_addr addrspace(2) global <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, align 2 ; CHECK: define void @f define void @f(ptr %dst) { entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 608, 624, 656)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a1, ptr %dst, align 32 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) - ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) - ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 - ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 48) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 32 %a2 = load <3 x double>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a2, i32 32), align 8 %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 store <3 x double> %a2, ptr %a2.i, align 32 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8) - ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32 - ; CHECK: store half [[X]], ptr [[PTR]] - %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 6), align 2 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 112) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a3 = load half, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a3, i32 16), align 2 %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32 store half %a3, ptr %a3.i, align 2 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12) - ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 - ; CHECK: store i64 [[X]], ptr [[PTR]] - %a4 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a4, i32 8), align 8 - %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 - store i64 %a4, ptr %a4.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 26) - ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 - ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 - ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 - ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 - ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 - ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] - %a5 = load <4 x i32>, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a5, i32 272), align 4 - %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 - store <4 x i32> %a5, ptr %a5.i, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 38) - ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 64 - ; CHECK: store i16 [[X]], ptr [[PTR]] - %a6 = load i16, ptr addrspace(2) @a6, align 2 - %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 64 - store i16 %a6, ptr %a6.i, align 2 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 40) - ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 - ; CHECK: store i64 [[X]], ptr [[PTR]] - %a7 = load i64, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a7, i32 8), align 8 - %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 - store i64 %a7, ptr %a7.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 42) - ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 80 - ; CHECK: store i32 [[X]], ptr [[PTR]] - %a8 = load i32, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a8, i32 4), align 4, !range !1, !noundef !2 - %a8.i = getelementptr inbounds nuw i8, ptr %dst, i32 80 - store i32 %a8, ptr %a8.i, align 4 - ret void } ; CHECK-NOT: !hlsl.cbs = !hlsl.cbs = !{!0} -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8} -!1 = !{i32 0, i32 2} -!2 = !{} +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll deleted file mode 100644 index d7272b449166d..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/float.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -%__cblayout_CB = type <{ float }> - -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison -; CHECK: @CB.cb = -; CHECK-NOT: external {{.*}} addrspace(2) global -@x = external local_unnamed_addr addrspace(2) global float, align 4 - -; CHECK: define void @f -define void @f(ptr %dst) { -entry: - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %x = load float, ptr addrspace(2) @x, align 4 - store float %x, ptr %dst, align 4 - ret void -} - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @x} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll index abe087dbe6100..8ea6c1c459b5b 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/gep-ce-two-uses.ll @@ -3,28 +3,24 @@ ; cbuffer CB : register(b0) { ; float a1[3]; ; } -%__cblayout_CB = type <{ [3 x float] }> +%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 36, 0)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: @CB.cb = ; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 +@a1 = external addrspace(2) global <{ [2 x <{ float, [12 x i8] }>], float }>, align 4 ; CHECK: define void @f define void @f(ptr %dst) { entry: - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a1 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a1, ptr %dst, align 32 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[X]], ptr %dst - %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 4), align 4 + ; CHECK: [[PTR:%.*]] = call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) + ; CHECK: getelementptr inbounds nuw i8, ptr addrspace(2) [[PTR]], i32 16 + %a2 = load float, ptr addrspace(2) getelementptr inbounds nuw (i8, ptr addrspace(2) @a1, i32 16), align 4 store float %a2, ptr %dst, align 32 ret void diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll deleted file mode 100644 index f1486f974fb36..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/memcpy.ll +++ /dev/null @@ -1,216 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -; cbuffer CB : register(b0) { -; float a1[3]; -; double3 a2[2]; -; float16_t a3[2][2]; -; uint64_t a4[3]; -; int2 a5[3][2]; -; uint16_t a6[1]; -; int64_t a7[2]; -; bool a8[4]; -; } -%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }> - -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison -@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4 -@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32 -@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2 -@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8 -@a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16 -@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2 -@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8 -@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4 - -; CHECK: define void @f( -define void @f(ptr %dst) { -entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4 - - %a1.copy = alloca [3 x float], align 4 - %a2.copy = alloca [2 x <3 x double>], align 32 - %a3.copy = alloca [2 x [2 x half]], align 2 - %a4.copy = alloca [3 x i64], align 8 - %a5.copy = alloca [3 x [2 x <2 x i32>]], align 16 - %a6.copy = alloca [1 x i16], align 2 - %a7.copy = alloca [2 x i64], align 8 - %a8.copy = alloca [4 x i32], align 4 - - ; Try copying no elements -; CHECK-NOT: memcpy - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 0, i1 false) - - ; Try copying only the first element -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) -; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0 -; CHECK: store float [[X]], ptr [[DEST]], align 4 - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 4, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) -; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0 -; CHECK: store float [[X]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) -; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4 -; CHECK: store float [[Y]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) -; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8 -; CHECK: store float [[Z]], ptr [[DEST]], align 4 - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3) -; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4) -; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1 -; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0 -; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) -; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 -; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) -; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 -; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1 -; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 24 -; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 48, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7) -; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0 -; CHECK: store half [[X]], ptr [[DEST]], align 2 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8) -; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2 -; CHECK: store half [[Y]], ptr [[DEST]], align 2 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9) -; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4 -; CHECK: store half [[X]], ptr [[DEST]], align 2 -; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10) -; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6 -; CHECK: store half [[Y]], ptr [[DEST]], align 2 - call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11) -; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0 -; CHECK: store i64 [[X]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12) -; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8 -; CHECK: store i64 [[Y]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13) -; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16 -; CHECK: store i64 [[Z]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 -; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0 -; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40 -; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17) -; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0 -; CHECK: store i16 [[X]], ptr [[DEST]], align 2 - call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18) -; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0 -; CHECK: store i64 [[X]], ptr [[DEST]], align 8 -; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19) -; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8 -; CHECK: store i64 [[Y]], ptr [[DEST]], align 8 - call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false) - -; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20) -; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0 -; CHECK: store i32 [[X]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21) -; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4 -; CHECK: store i32 [[Y]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22) -; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8 -; CHECK: store i32 [[Z]], ptr [[DEST]], align 4 -; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23) -; CHECK: [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 -; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12 -; CHECK: store i32 [[W]], ptr [[DEST]], align 4 - call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false) - - ret void -} - -declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg) - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8} -!1 = !{i32 0, i32 2} -!2 = !{} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll index 7857c25d69636..a55450145c8c6 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/scalars.ll @@ -4,97 +4,42 @@ ; float a1; // offset 0, size 4 ; int a2; // offset 4, size 4 ; bool a3; // offset 8, size 4 -; float16_t a4; // offset 12, size 2 -; uint16_t a5; // offset 14, size 2 -; double a6; // offset 16, size 8 -; int64_t a7; // offset 24, size 8 ; } -%__cblayout_CB = type <{ float, i32, i32, half, i16, double, i64 }> +%__cblayout_CB = type <{ float, i32, i32 }> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison ; CHECK: @CB.cb = ; CHECK-NOT: external {{.*}} addrspace(2) global @a1 = external local_unnamed_addr addrspace(2) global float, align 4 @a2 = external local_unnamed_addr addrspace(2) global i32, align 4 @a3 = external local_unnamed_addr addrspace(2) global i32, align 4 -@a4 = external local_unnamed_addr addrspace(2) global half, align 2 -@a5 = external local_unnamed_addr addrspace(2) global i16, align 2 -@a6 = external local_unnamed_addr addrspace(2) global double, align 8 -@a7 = external local_unnamed_addr addrspace(2) global i64, align 8 ; CHECK: define void @f define void @f(ptr %dst) { entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 32, 0, 4, 8, 12, 14, 16, 24)) %CB.cb_h.i.i, ptr @CB.cb, align 4 + %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) + store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A1:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: store float [[A1]], ptr %dst + ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 0) %a1 = load float, ptr addrspace(2) @a1, align 4 store float %a1, ptr %dst, align 8 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A2:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 - ; CHECK: store i32 [[A2]], ptr [[PTR]] + ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 4) %a2 = load i32, ptr addrspace(2) @a2, align 4 %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 store i32 %a2, ptr %a2.i, align 4 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A3:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8 - ; CHECK: store i32 [[A3]], ptr [[PTR]] + ; CHECK: call ptr addrspace(2) @llvm.dx.resource.getpointer.{{.*}}(target("dx.CBuffer", %__cblayout_CB) {{%.*}}, i32 8) %a3 = load i32, ptr addrspace(2) @a3, align 4, !range !1, !noundef !2 %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 8 store i32 %a3, ptr %a3.i, align 8 - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A4:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 6 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 12 - ; CHECK: store half [[A4]], ptr [[PTR]] - %a4 = load half, ptr addrspace(2) @a4, align 2 - %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 12 - store half %a4, ptr %a4.i, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[A5:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 7 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 14 - ; CHECK: store i16 [[A5]], ptr [[PTR]] - %a5 = load i16, ptr addrspace(2) @a5, align 2 - %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 14 - store i16 %a5, ptr %a5.i, align 2 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[A6:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 - ; CHECK: store double [[A6]], ptr [[PTR]] - %a6 = load double, ptr addrspace(2) @a6, align 8 - %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 - store double %a6, ptr %a6.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[A7:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 24 - ; CHECK: store i64 [[A7]], ptr [[PTR]] - %a7 = load i64, ptr addrspace(2) @a7, align 8 - %a7.i = getelementptr inbounds nuw i8, ptr %dst, i32 24 - store i64 %a7, ptr %a7.i, align 8 - ret void } ; CHECK-NOT: !hlsl.cbs = !hlsl.cbs = !{!0} -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7} +!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3} !1 = !{i32 0, i32 2} !2 = !{} diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll index 6f1bbd050bd7c..8c0d82e43b4b1 100644 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll +++ b/llvm/test/CodeGen/DirectX/CBufferAccess/unused.ll @@ -2,7 +2,7 @@ ; Check that we correctly ignore cbuffers that were nulled out by optimizations. %__cblayout_CB = type <{ float }> -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 4, 0)) poison +@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison @x = external local_unnamed_addr addrspace(2) global float, align 4 ; CHECK-NOT: !hlsl.cbs = diff --git a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll b/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll deleted file mode 100644 index 4160008a986af..0000000000000 --- a/llvm/test/CodeGen/DirectX/CBufferAccess/vectors.ll +++ /dev/null @@ -1,119 +0,0 @@ -; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s - -; cbuffer CB { -; float3 a1; // offset 0, size 12 (+4) -; double3 a2; // offset 16, size 24 -; float16_t2 a3; // offset 40, size 4 (+4) -; uint64_t3 a4; // offset 48, size 24 (+8) -; int4 a5; // offset 80, size 16 -; uint16_t3 a6; // offset 96, size 6 (+10) -; }; -%__cblayout_CB = type <{ <3 x float>, <3 x double>, <2 x half>, <3 x i64>, <4 x i32>, <3 x i16> }> - -@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) poison -; CHECK: @CB.cb = -; CHECK-NOT: external {{.*}} addrspace(2) global -@a1 = external local_unnamed_addr addrspace(2) global <3 x float>, align 16 -@a2 = external local_unnamed_addr addrspace(2) global <3 x double>, align 32 -@a3 = external local_unnamed_addr addrspace(2) global <2 x half>, align 4 -@a4 = external local_unnamed_addr addrspace(2) global <3 x i64>, align 32 -@a5 = external local_unnamed_addr addrspace(2) global <4 x i32>, align 16 -@a6 = external local_unnamed_addr addrspace(2) global <3 x i16>, align 8 - -; CHECK: define void @f -define void @f(ptr %dst) { -entry: - %CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) - store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 102, 0, 16, 40, 48, 80, 96)) %CB.cb_h.i.i, ptr @CB.cb, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0) - ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 1 - ; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 2 - ; CHECK: [[VEC0:%.*]] = insertelement <3 x float> poison, float [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <3 x float> [[VEC0]], float [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <3 x float> [[VEC1]], float [[Z]], i32 2 - ; CHECK: store <3 x float> [[VEC2]], ptr %dst - %a1 = load <3 x float>, ptr addrspace(2) @a1, align 16 - store <3 x float> %a1, ptr %dst, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1) - ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1 - ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) - ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0 - ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 16 - ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]] - %a2 = load <3 x double>, ptr addrspace(2) @a2, align 32 - %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 16 - store <3 x double> %a2, ptr %a2.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2) - ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 4 - ; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 5 - ; CHECK: [[VEC0:%.*]] = insertelement <2 x half> poison, half [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <2 x half> [[VEC0]], half [[Y]], i32 1 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40 - ; CHECK: store <2 x half> [[VEC1]], ptr [[PTR]] - %a3 = load <2 x half>, ptr addrspace(2) @a3, align 4 - %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 40 - store <2 x half> %a3, ptr %a3.i, align 2 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3) - ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 1 - ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4) - ; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0 - ; CHECK: [[VEC0:%.*]] = insertelement <3 x i64> poison, i64 [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <3 x i64> [[VEC0]], i64 [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <3 x i64> [[VEC1]], i64 [[Z]], i32 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 48 - ; CHECK: store <3 x i64> [[VEC2]], ptr [[PTR]] - %a4 = load <3 x i64>, ptr addrspace(2) @a4, align 32 - %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 48 - store <3 x i64> %a4, ptr %a4.i, align 8 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5) - ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 - ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 - ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 - ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <4 x i32> [[VEC0]], i32 [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <4 x i32> [[VEC1]], i32 [[Z]], i32 2 - ; CHECK: [[VEC3:%.*]] = insertelement <4 x i32> [[VEC2]], i32 [[A]], i32 3 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 72 - ; CHECK: store <4 x i32> [[VEC3]], ptr [[PTR]] - %a5 = load <4 x i32>, ptr addrspace(2) @a5, align 16 - %a5.i = getelementptr inbounds nuw i8, ptr %dst, i32 72 - store <4 x i32> %a5, ptr %a5.i, align 4 - - ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb - ; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6) - ; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0 - ; CHECK: [[Y:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 1 - ; CHECK: [[Z:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 2 - ; CHECK: [[VEC0:%.*]] = insertelement <3 x i16> poison, i16 [[X]], i32 0 - ; CHECK: [[VEC1:%.*]] = insertelement <3 x i16> [[VEC0]], i16 [[Y]], i32 1 - ; CHECK: [[VEC2:%.*]] = insertelement <3 x i16> [[VEC1]], i16 [[Z]], i32 2 - ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 88 - ; CHECK: store <3 x i16> [[VEC2]], ptr [[PTR]] - %a6 = load <3 x i16>, ptr addrspace(2) @a6, align 8 - %a6.i = getelementptr inbounds nuw i8, ptr %dst, i32 88 - store <3 x i16> %a6, ptr %a6.i, align 2 - - ret void -} - -; CHECK-NOT: !hlsl.cbs = -!hlsl.cbs = !{!0} - -!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6} diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index d265826cd2469..4452ffd2d868e 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -15,10 +15,10 @@ ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Finalize Linkage ; CHECK-NEXT: Dead Global Elimination +; CHECK-NEXT: DXIL CBuffer Access ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: DXIL Resource Access ; CHECK-NEXT: DXIL Intrinsic Expansion -; CHECK-NEXT: DXIL CBuffer Access ; CHECK-NEXT: DXIL Data Scalarization ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll index 1aee688bc37ea..879cb3a651a09 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWBufferDynamicIdx.ll @@ -5,15 +5,15 @@ @i = external hidden local_unnamed_addr addrspace(12) global i32, align 4 @ReadWriteBuf.str = private unnamed_addr constant [13 x i8] c"ReadWriteBuf\00", align 1 -@"$Globals.cb" = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) poison +@"$Globals.cb" = local_unnamed_addr global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) poison @"$Globals.str" = private unnamed_addr constant [9 x i8] c"$Globals\00", align 1 ; CHECK: OpCapability Shader ; CHECK: OpCapability StorageTexelBufferArrayDynamicIndexingEXT define void @main() local_unnamed_addr #0 { entry: - %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) @"llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_$Globalss_4_0t_2_0t"(i32 1, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str") - store target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) %"$Globals.cb_h.i.i", ptr @"$Globals.cb", align 8 + %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str") + store target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) %"$Globals.cb_h.i.i", ptr @"$Globals.cb", align 8 %0 = load i32, ptr addrspace(12) @i, align 4 %1 = tail call target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_i32_5_2_0_0_2_33t(i32 0, i32 0, i32 64, i32 %0, ptr nonnull @ReadWriteBuf.str) %2 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.Image_i32_5_2_0_0_2_33t(target("spirv.Image", i32, 5, 2, 0, 0, 2, 33) %1, i32 98) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll index 163fc9d97c544..fb4541d7f0121 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/DynamicIdx/RWStructuredBufferDynamicIdx.ll @@ -5,15 +5,15 @@ @i = external hidden local_unnamed_addr addrspace(12) global i32, align 4 @ReadWriteStructuredBuf.str = private unnamed_addr constant [23 x i8] c"ReadWriteStructuredBuf\00", align 1 -@"$Globals.cb" = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) poison +@"$Globals.cb" = local_unnamed_addr global target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) poison @"$Globals.str" = private unnamed_addr constant [9 x i8] c"$Globals\00", align 1 ; CHECK: OpCapability Shader ; CHECK: OpCapability StorageBufferArrayDynamicIndexing define void @main() local_unnamed_addr #0 { entry: - %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) @"llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_$Globalss_4_0t_2_0t"(i32 2, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str") - store target("spirv.VulkanBuffer", target("spirv.Layout", %"__cblayout_$Globals", 4, 0), 2, 0) %"$Globals.cb_h.i.i", ptr @"$Globals.cb", align 8 + %"$Globals.cb_h.i.i" = tail call target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 2, i32 0, i32 1, i32 0, ptr nonnull @"$Globals.str") + store target("spirv.VulkanBuffer", %"__cblayout_$Globals", 2, 0) %"$Globals.cb_h.i.i", ptr @"$Globals.cb", align 8 %0 = load i32, ptr addrspace(12) @i, align 4 %1 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 64, i32 %0, ptr nonnull @ReadWriteStructuredBuf.str) %2 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 99) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll index 6d41875798ebc..8f6166b7232c3 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer.ll @@ -21,7 +21,7 @@ %MyStruct = type { <4 x float> } %__cblayout_MyCBuffer = type <{ %MyStruct, <4 x float> }> -@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) poison +@MyCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) poison @s = external hidden local_unnamed_addr addrspace(12) global %MyStruct, align 16 @v = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16 @MyCBuffer.str = private unnamed_addr constant [10 x i8] c"MyCBuffer\00", align 1 @@ -30,10 +30,10 @@ define void @main() { entry: ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject %[[wrapper_ptr_t]] %[[MyCBuffer]] - %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_MyCBuffers_32_0_16t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_MyCBuffer, 32, 0, 16), 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 + %MyCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) @llvm.spv.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @MyCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_MyCBuffer, 2, 0) %MyCBuffer.cb_h.i.i, ptr @MyCBuffer.cb, align 8 %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.resource.handlefrombinding.tspirv.Image_f32_5_2_0_0_2_3t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str) - + ; CHECK: %[[tmp_ptr:[0-9]+]] = OpAccessChain {{%[0-9]+}} %[[tmp]] %[[uint_0]] %[[uint_0]] ; CHECK: %[[v_ptr:.+]] = OpAccessChain %[[_ptr_Uniform_v4float]] %[[tmp]] %[[uint_0]] %[[uint_1]] ; CHECK: %[[s_ptr_gep:[0-9]+]] = OpInBoundsAccessChain %[[_ptr_Uniform_float]] %[[tmp_ptr]] %[[uint_0]] %[[uint_1]] diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll index c365452a9b404..670548d3d3e27 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/cbuffer_unused.ll @@ -14,12 +14,12 @@ %__cblayout_PartiallyUsedCBuffer = type <{ float, i32 }> %__cblayout_AnotherCBuffer = type <{ <4 x float>, <4 x float> }> -@UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) poison +@UnusedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) poison @UnusedCBuffer.str = private unnamed_addr constant [14 x i8] c"UnusedCBuffer\00", align 1 -@PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) poison +@PartiallyUsedCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) poison @used_member = external hidden local_unnamed_addr addrspace(12) global float, align 4 @PartiallyUsedCBuffer.str = private unnamed_addr constant [21 x i8] c"PartiallyUsedCBuffer\00", align 1 -@AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) poison +@AnotherCBuffer.cb = local_unnamed_addr global target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) poison @a = external hidden local_unnamed_addr addrspace(12) global <4 x float>, align 16 @AnotherCBuffer.str = private unnamed_addr constant [15 x i8] c"AnotherCBuffer\00", align 1 @.str = private unnamed_addr constant [7 x i8] c"output\00", align 1 @@ -28,18 +28,18 @@ ; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none) define void @main() local_unnamed_addr #1 { entry: - %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_UnusedCBuffers_4_0t_2_0t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_UnusedCBuffer, 4, 0), 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8 + %UnusedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 0, i32 0, i32 1, i32 0, ptr nonnull @UnusedCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_UnusedCBuffer, 2, 0) %UnusedCBuffer.cb_h.i.i, ptr @UnusedCBuffer.cb, align 8 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[PartiallyUsedCBuffer]] ; CHECK: %[[used_member_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0:[0-9]+]] - %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_PartiallyUsedCBuffers_8_0_4t_2_0t(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_PartiallyUsedCBuffer, 8, 0, 4), 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8 + %PartiallyUsedCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr nonnull @PartiallyUsedCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_PartiallyUsedCBuffer, 2, 0) %PartiallyUsedCBuffer.cb_h.i.i, ptr @PartiallyUsedCBuffer.cb, align 8 ; CHECK: %[[tmp:[0-9]+]] = OpCopyObject {{%[0-9]+}} %[[AnotherCBuffer]] ; CHECK: %[[a_ptr:.+]] = OpAccessChain %{{.+}} %[[tmp]] %{{.+}} %[[uint_0]] - %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) @llvm.spv.resource.handlefromimplicitbinding.tspirv.VulkanBuffer_tspirv.Layout_s___cblayout_AnotherCBuffers_32_0_16t_2_0t(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str) - store target("spirv.VulkanBuffer", target("spirv.Layout", %__cblayout_AnotherCBuffer, 32, 0, 16), 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8 + %AnotherCBuffer.cb_h.i.i = tail call target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) @llvm.spv.resource.handlefromimplicitbinding(i32 2, i32 0, i32 1, i32 0, ptr nonnull @AnotherCBuffer.str) + store target("spirv.VulkanBuffer", %__cblayout_AnotherCBuffer, 2, 0) %AnotherCBuffer.cb_h.i.i, ptr @AnotherCBuffer.cb, align 8 %0 = tail call target("spirv.Image", float, 5, 2, 0, 0, 2, 1) @llvm.spv.resource.handlefromimplicitbinding.tspirv.Image_f32_5_2_0_0_2_1t(i32 3, i32 0, i32 1, i32 0, ptr nonnull @.str) %2 = load float, ptr addrspace(12) @used_member, align 4