From b93f6a46fd78fe5557f7a5263d8f4885ea3930fd Mon Sep 17 00:00:00 2001 From: alecardoso-tech Date: Fri, 18 Apr 2025 14:59:52 -0700 Subject: [PATCH 01/93] Moved HANDLE management to Test Class Constructor and Destructor (#7357) Test class `ShaderOpTest` had a `HANDLE` not closed after `RunShaderOp` test ran. Created a constructor and destructor for the Test Class to manage the `HANDLE`. --- tools/clang/unittests/HLSLExec/ShaderOpTest.cpp | 13 +++++++++---- tools/clang/unittests/HLSLExec/ShaderOpTest.h | 3 +++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp index e6c9b10f6c..8dde3faa0b 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp @@ -258,6 +258,15 @@ void CommandListRefs::CreateForDevice(ID3D12Device *pDevice, bool compute) { IID_PPV_ARGS(&List))); } +ShaderOpTest::ShaderOpTest() { + m_hFence = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (m_hFence == nullptr) { + AtlThrow(HRESULT_FROM_WIN32(GetLastError())); + } +} + +ShaderOpTest::~ShaderOpTest() { CloseHandle(m_hFence); } + void ShaderOpTest::CopyBackResources() { CommandListRefs ResCommandList; ResCommandList.CreateForDevice(m_pDevice, m_pShaderOp->IsCompute()); @@ -423,10 +432,6 @@ void ShaderOpTest::CreateDevice() { CHECK_HR(m_pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_pFence)); m_pFence->SetName(L"ShaderOpTest Fence"); - m_hFence = CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (m_hFence == nullptr) { - AtlThrow(HRESULT_FROM_WIN32(GetLastError())); - } } static void InitByteCode(D3D12_SHADER_BYTECODE *pBytecode, ID3D10Blob *pBlob) { diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.h b/tools/clang/unittests/HLSLExec/ShaderOpTest.h index e65bd9e4e5..b71ee08765 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.h +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.h @@ -275,6 +275,9 @@ class ShaderOpTest { typedef std::function TShaderCallbackFn; + + ShaderOpTest(); + ~ShaderOpTest(); void GetPipelineStats(D3D12_QUERY_DATA_PIPELINE_STATISTICS *pStats); void GetReadBackData(LPCSTR pResourceName, MappedData *pData); void RunShaderOp(ShaderOp *pShaderOp); From b4a3076caa92c4e9ed05761cbcd2141591fb3f89 Mon Sep 17 00:00:00 2001 From: Chris B Date: Mon, 21 Apr 2025 12:23:02 -0500 Subject: [PATCH 02/93] Revert ADO pipelines to Ubuntu 22.04 temporarily (#7365) DXC seems to be building inocrrectly with GCC-13 and later, which is causing our pre-merge testing on 24.04 to fail. This will take some time to sort out, so in the meantime I'm reverting to 22.04 on our pipelines. --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 33c5349f9e..7967fa03e3 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -52,7 +52,7 @@ stages: variables: macOS: macOS-latest - linux: Ubuntu-latest + linux: Ubuntu-22.04 # FIXME: #7364, DXC does not build correctly with GCC 13+ strategy: matrix: From 8988e473465454f7a6dbc55223c7655c1b5af973 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 22 Apr 2025 18:31:31 +0200 Subject: [PATCH 03/93] [SER] Diagnose payload in HitObject::TraceRay|Invoke (#7356) - Generalize raypayload validation to HitObject::TraceRay|Invoke - Reject non-numeric payload types in [HitObject::]TraceRay|Invoke Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md Bug: #7234 [SER] Diagnose and validate illegal use of HitObject in unsupported contexts --- tools/clang/lib/Sema/SemaDXR.cpp | 141 +++++++++++------- .../hitobject_traceinvoke_payload.hlsl | 27 ++++ .../hitobject_traceinvoke_payload_udt.hlsl | 22 +++ 3 files changed, 133 insertions(+), 57 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 36ab55ea10..e5b2140cca 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -28,6 +28,7 @@ #include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/HlslIntrinsicOp.h" using namespace clang; using namespace sema; @@ -49,9 +50,9 @@ struct PayloadUse { const MemberExpr *Member = nullptr; }; -struct TraceRayCall { - TraceRayCall() = default; - TraceRayCall(const CallExpr *Call, const CFGBlock *Parent) +struct PayloadBuiltinCall { + PayloadBuiltinCall() = default; + PayloadBuiltinCall(const CallExpr *Call, const CFGBlock *Parent) : Call(Call), Parent(Parent) {} const CallExpr *Call = nullptr; const CFGBlock *Parent = nullptr; @@ -71,7 +72,7 @@ struct DxrShaderDiagnoseInfo { const FunctionDecl *funcDecl; const VarDecl *Payload; DXIL::PayloadAccessShaderStage Stage; - std::vector TraceCalls; + std::vector PayloadBuiltinCalls; std::map> WritesPerField; std::map> ReadsPerField; std::vector PayloadAsCallArg; @@ -121,24 +122,42 @@ GetPayloadQualifierForStage(FieldDecl *Field, return DXIL::PayloadAccessQualifier::NoAccess; } -// Returns the declaration of the payload used in a TraceRay call -const VarDecl *GetPayloadParameterForTraceCall(const CallExpr *Trace) { - const Decl *callee = Trace->getCalleeDecl(); - if (!callee) +static int GetPayloadParamIdxForIntrinsic(const FunctionDecl *FD) { + HLSLIntrinsicAttr *IntrinAttr = FD->getAttr(); + if (!IntrinAttr) + return -1; + switch ((IntrinsicOp)IntrinAttr->getOpcode()) { + default: + return -1; + case IntrinsicOp::IOP_TraceRay: + case IntrinsicOp::MOP_DxHitObject_TraceRay: + case IntrinsicOp::MOP_DxHitObject_Invoke: + return FD->getNumParams() - 1; + } +} + +static bool IsBuiltinWithPayload(const FunctionDecl *FD) { + return GetPayloadParamIdxForIntrinsic(FD) >= 0; +} + +// Returns the declaration of the payload used in a call to TraceRay, +// HitObject::TraceRay or HitObject::Invoke. +const VarDecl *GetPayloadParameterForBuiltinCall(const CallExpr *Call) { + const Decl *Callee = Call->getCalleeDecl(); + if (!Callee) return nullptr; - if (!isa(callee)) + if (!isa(Callee)) return nullptr; - const FunctionDecl *FD = cast(callee); + int PldParamIdx = GetPayloadParamIdxForIntrinsic(cast(Callee)); + if (PldParamIdx < 0) + return nullptr; - if (FD->isImplicit() && FD->getName() == "TraceRay") { - const Stmt *Param = IgnoreParensAndDecay(Trace->getArg(7)); - if (const DeclRefExpr *ParamRef = dyn_cast(Param)) { - if (const VarDecl *Decl = dyn_cast(ParamRef->getDecl())) - return Decl; - } - } + const Stmt *Param = IgnoreParensAndDecay(Call->getArg(PldParamIdx)); + if (const DeclRefExpr *ParamRef = dyn_cast(Param)) + if (const VarDecl *Decl = dyn_cast(ParamRef->getDecl())) + return Decl; return nullptr; } @@ -190,12 +209,9 @@ void CollectReadsWritesAndCallsForPayload(const Stmt *S, } } -// Collects all TraceRay calls. -void CollectTraceRayCalls(const Stmt *S, DxrShaderDiagnoseInfo &Info, - const CFGBlock *Block) { - // TraceRay has void as return type so it should never be something else - // than a plain CallExpr. - +// Collects all calls to TraceRay, HitObject::TraceRay and HitObject::Invoke. +void CollectBuiltinCallsWithPayload(const Stmt *S, DxrShaderDiagnoseInfo &Info, + const CFGBlock *Block) { if (const CallExpr *Call = dyn_cast(S)) { const Decl *Callee = Call->getCalleeDecl(); @@ -204,11 +220,8 @@ void CollectTraceRayCalls(const Stmt *S, DxrShaderDiagnoseInfo &Info, const FunctionDecl *CalledFunction = cast(Callee); - // Ignore trace calls here. - if (CalledFunction->isImplicit() && - CalledFunction->getName() == "TraceRay") { - Info.TraceCalls.push_back({Call, Block}); - } + if (IsBuiltinWithPayload(CalledFunction)) + Info.PayloadBuiltinCalls.push_back({Call, Block}); } } @@ -528,13 +541,14 @@ void TraverseCFG(const CFGBlock &Block, Action PerElementAction, } } -// Forward traverse the CFG and collect calls to TraceRay. -void ForwardTraverseCFGAndCollectTraceCalls( +// Forward traverse the CFG and collect calls to TraceRay, HitObject::TraceRay +// and HitObject::Invoke. +void ForwardTraverseCFGAndCollectBuiltinCallsWithPayload( const CFGBlock &Block, DxrShaderDiagnoseInfo &Info, std::set &Visited) { auto Action = [&Info](const CFGBlock &Block, const CFGElement &Element) { if (Optional S = Element.getAs()) { - CollectTraceRayCalls(S->getStmt(), Info, &Block); + CollectBuiltinCallsWithPayload(S->getStmt(), Info, &Block); } }; @@ -664,9 +678,9 @@ DiagnosePayloadAsFunctionArg( const FunctionDecl *CalledFunction = cast(Callee); // Ignore trace calls here. - if (CalledFunction->isImplicit() && - CalledFunction->getName() == "TraceRay") { - Info.TraceCalls.push_back(TraceRayCall{Call, Use.Parent}); + if (IsBuiltinWithPayload(CalledFunction)) { + Info.PayloadBuiltinCalls.push_back( + PayloadBuiltinCall{Call, Use.Parent}); continue; } @@ -789,10 +803,12 @@ void HandlePayloadInitializer(DxrShaderDiagnoseInfo &Info) { } } -// Emit diagnostics for a TraceRay call. -void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, - const TraceRayCall &Trace, DominatorTree &DT) { - // For each TraceRay call check if write(caller) fields are written. +// Emit diagnostics for this call to either TraceRay, HitObject::TraceRay or +// HitObject::Invoke. +void DiagnoseBuiltinCallWithPayload(Sema &S, const VarDecl *Payload, + const PayloadBuiltinCall &PldCall, + DominatorTree &DT) { + // For each call check if write(caller) fields are written. const DXIL::PayloadAccessShaderStage CallerStage = DXIL::PayloadAccessShaderStage::Caller; @@ -810,6 +826,13 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } + // Verify that the payload type is legal + if (!hlsl::IsHLSLCopyableAnnotatableRecord(Payload->getType())) { + S.Diag(Payload->getLocation(), diag::err_payload_attrs_must_be_udt) + << /*payload|attributes|callable*/ 0 << Payload; + return; + } + if (ContainsLongVector(Payload->getType())) { const unsigned PayloadParametersIdx = 10; S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) @@ -832,12 +855,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, std::set Visited; - const CFGBlock *Parent = Trace.Parent; + const CFGBlock *Parent = PldCall.Parent; Visited.insert(Parent); - // Collect payload accesses in the same block until we reach the TraceRay call + // Collect payload accesses in the same block until we reach the call for (auto Element : *Parent) { if (Optional S = Element.getAs()) { - if (S->getStmt() == Trace.Call) + if (S->getStmt() == PldCall.Call) break; CollectReadsWritesAndCallsForPayload(S->getStmt(), TraceInfo, Parent); } @@ -850,10 +873,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, BackwardTraverseCFGAndCollectReadsWrites(*Pred, TraceInfo, Visited); } + int PldArgIdx = PldCall.Call->getNumArgs() - 1; + // Warn if a writeable field has not been written. for (const FieldDecl *Field : WriteableFields) { if (!TraceInfo.WritesPerField.count(Field)) { - S.Diag(Trace.Call->getArg(7)->getExprLoc(), + S.Diag(PldCall.Call->getArg(PldArgIdx)->getExprLoc(), diag::warn_hlsl_payload_access_no_write_for_trace_payload) << Field->getName(); } @@ -862,7 +887,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, for (const FieldDecl *Field : NonWriteableFields) { if (TraceInfo.WritesPerField.count(Field)) { S.Diag( - Trace.Call->getArg(7)->getExprLoc(), + PldCall.Call->getArg(PldArgIdx)->getExprLoc(), diag::warn_hlsl_payload_access_write_but_no_write_for_trace_payload) << Field->getName(); } @@ -878,7 +903,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, bool CallFound = false; for (auto Element : *Parent) { // TODO: reverse iterate? if (Optional S = Element.getAs()) { - if (S->getStmt() == Trace.Call) { + if (S->getStmt() == PldCall.Call) { CallFound = true; continue; } @@ -895,7 +920,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, for (const FieldDecl *Field : ReadableFields) { if (!TraceInfo.ReadsPerField.count(Field)) { - S.Diag(Trace.Call->getArg(7)->getExprLoc(), + S.Diag(PldCall.Call->getArg(PldArgIdx)->getExprLoc(), diag::warn_hlsl_payload_access_read_but_no_read_after_trace) << Field->getName(); } @@ -928,27 +953,29 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, } } -// Emit diagnostics for all TraceRay calls. -void DiagnoseTraceCalls(Sema &S, CFG &ShaderCFG, DominatorTree &DT, - DxrShaderDiagnoseInfo &Info) { - // Collect TraceRay calls in the shader. +// Emit diagnostics for all calls to TraceRay, HitObject::TraceRay or +// HitObject::Invoke. +void DiagnoseBuiltinCallsWithPayload(Sema &S, CFG &ShaderCFG, DominatorTree &DT, + DxrShaderDiagnoseInfo &Info) { + // Collect calls with payload in the shader. std::set Visited; - ForwardTraverseCFGAndCollectTraceCalls(ShaderCFG.getEntry(), Info, Visited); + ForwardTraverseCFGAndCollectBuiltinCallsWithPayload(ShaderCFG.getEntry(), + Info, Visited); std::set Diagnosed; - for (const TraceRayCall &TraceCall : Info.TraceCalls) { - if (Diagnosed.count(TraceCall.Call)) + for (const PayloadBuiltinCall &PldCall : Info.PayloadBuiltinCalls) { + if (Diagnosed.count(PldCall.Call)) continue; - Diagnosed.insert(TraceCall.Call); + Diagnosed.insert(PldCall.Call); - const VarDecl *Payload = GetPayloadParameterForTraceCall(TraceCall.Call); - DiagnoseTraceCall(S, Payload, TraceCall, DT); + const VarDecl *Payload = GetPayloadParameterForBuiltinCall(PldCall.Call); + DiagnoseBuiltinCallWithPayload(S, Payload, PldCall, DT); } } // Emit diagnostics for all access to the payload of a shader, -// and the input to TraceRay calls. +// and the input to TraceRay, HitObject::TraceRay or HitObject::Invoke calls. std::vector DiagnosePayloadAccess(Sema &S, DxrShaderDiagnoseInfo &Info, const std::set &FieldsToIgnoreRead, @@ -1012,7 +1039,7 @@ DiagnosePayloadAccess(Sema &S, DxrShaderDiagnoseInfo &Info, DiagnosePayloadReads(S, TheCFG, DT, Info, NonReadableFields); } - DiagnoseTraceCalls(S, TheCFG, DT, Info); + DiagnoseBuiltinCallsWithPayload(S, TheCFG, DT, Info); return WrittenFields; } diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl new file mode 100644 index 0000000000..f4781bc796 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_9 %s -D TEST_NUM=0 %s -verify +// RUN: %dxc -T lib_6_9 %s -D TEST_NUM=1 %s -verify + +RaytracingAccelerationStructure scene : register(t0); + +struct Payload +{ + int a : read (caller, closesthit, miss) : write(caller, closesthit, miss); +}; + +struct Attribs +{ + float2 barys; +}; + +[shader("raygeneration")] +void RayGen() +{ +// expected-error@+1{{type 'Payload' used as payload requires that it is annotated with the [raypayload] attribute}} + Payload payload_in_rg; + RayDesc ray; +#if TEST_NUM == 0 + dx::HitObject::TraceRay( scene, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload_in_rg ); +#else + dx::HitObject::Invoke( dx::HitObject(), payload_in_rg ); +#endif +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl new file mode 100644 index 0000000000..e89e33a78f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct +[raypayload] +Payload +{ + int a : read(caller, closesthit, miss) : write(caller, closesthit, miss); + dx::HitObject hit; +}; + +struct Attribs +{ + float2 barys; +}; + +[shader("raygeneration")] +void RayGen() +{ + // expected-error@+1{{payload parameter 'payload_in_rg' must be a user-defined type composed of only numeric types}} + Payload payload_in_rg; + dx::HitObject::Invoke( dx::HitObject(), payload_in_rg ); +} \ No newline at end of file From f19b5da54170210e3cbc7f080be3f080abc52505 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 22 Apr 2025 18:32:11 +0200 Subject: [PATCH 04/93] [SER] TraceRay|Invoke HLSL -> DXIL lowering (#7355) Lowering for - HitObject::TraceRay - HitObject::Invoke Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md DXC SER implementation tracker:: #7214 --- include/dxc/DXIL/DxilConstants.h | 5 + include/dxc/HLSL/HLOperations.h | 4 + lib/HLSL/HLOperationLower.cpp | 126 ++++++++----- .../Scalar/ScalarReplAggregatesHLSL.cpp | 2 + tools/clang/lib/Sema/SemaHLSL.cpp | 2 + .../HitObject/hitobject_traceinvoke.hlsl | 102 +++++++++++ .../DxilGen/hitobject_traceinvoke_dxilgen.ll | 167 ++++++++++++++++++ 7 files changed, 367 insertions(+), 41 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 8c73328fbd..3752274f18 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -1556,6 +1556,11 @@ const unsigned kMSStoreOutputColOpIdx = 3; const unsigned kMSStoreOutputVIdxOpIdx = 4; const unsigned kMSStoreOutputValOpIdx = 5; +// HitObject::TraceRay +const unsigned kHitObjectTraceRay_RayDescOpIdx = 7; +const unsigned kHitObjectTraceRay_PayloadOpIdx = 15; +const unsigned kHitObjectTraceRay_NumOp = 16; + // TODO: add operand index for all the OpCodeClass. } // namespace OperandIndex diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index a7db8612a6..970ddd3e85 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -437,6 +437,10 @@ const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; const unsigned kHitObjectMakeMiss_NumOp = 8; const unsigned kHitObjectMakeMissRayDescOpIdx = 4; +// HitObject::TraceRay +const unsigned kHitObjectTraceRay_RayDescOpIdx = 8; +const unsigned kHitObjectTraceRay_NumOp = 10; + } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index be45021e41..69dd803f7b 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -19,6 +19,8 @@ #include #include +#include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilInstructions.h" #include "dxc/DXIL/DxilModule.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilResourceProperties.h" @@ -5718,23 +5720,9 @@ Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter}); } -Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, - HLObjectOperationLowerHelper *pObjHelper, - bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - - Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx); - Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx); - - Value *opArg = hlslOP->GetU32Const(static_cast(opcode)); - - Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; - Args[0] = opArg; - for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) { - Args[i] = CI->getArgOperand(i); - } - IRBuilder<> Builder(CI); +static unsigned LoadRayDescElementsIntoArgs(Value **Args, hlsl::OP *OP, + IRBuilder<> &Builder, + Value *RayDescPtr, unsigned Index) { // struct RayDesc //{ // float3 Origin; @@ -5742,34 +5730,51 @@ Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // float3 Direction; // float TMax; //}; - Value *zeroIdx = hlslOP->GetU32Const(0); - Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx}); - origin = Builder.CreateLoad(origin); - unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx; - Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(origin, 1); - Args[index++] = Builder.CreateExtractElement(origin, 2); + Value *ZeroIdx = OP->GetU32Const(0); + Value *Origin = Builder.CreateGEP(RayDescPtr, {ZeroIdx, ZeroIdx}); + Origin = Builder.CreateLoad(Origin); + Args[Index++] = Builder.CreateExtractElement(Origin, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(Origin, 1); + Args[Index++] = Builder.CreateExtractElement(Origin, 2); - Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)}); - tmin = Builder.CreateLoad(tmin); - Args[index++] = tmin; + Value *TMinPtr = Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(1)}); + Args[Index++] = Builder.CreateLoad(TMinPtr); - Value *direction = - Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)}); - direction = Builder.CreateLoad(direction); + Value *DirectionPtr = + Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(2)}); + Value *Direction = Builder.CreateLoad(DirectionPtr); - Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(direction, 1); - Args[index++] = Builder.CreateExtractElement(direction, 2); + Args[Index++] = Builder.CreateExtractElement(Direction, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(Direction, 1); + Args[Index++] = Builder.CreateExtractElement(Direction, 2); - Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)}); - tmax = Builder.CreateLoad(tmax); - Args[index++] = tmax; + Value *TMaxPtr = Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(3)}); + Args[Index++] = Builder.CreateLoad(TMaxPtr); + return Index; +} + +Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *pObjHelper, + bool &Translated) { + hlsl::OP *OP = &Helper.hlslOP; - Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad; + Value *RayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx); + Value *PayLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx); - Type *Ty = payLoad->getType(); - Function *F = hlslOP->GetOpFunc(opcode, Ty); + Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; + Args[0] = OP->GetU32Const(static_cast(OpCode)); + for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) + Args[i] = CI->getArgOperand(i); + + IRBuilder<> Builder(CI); + LoadRayDescElementsIntoArgs(Args, OP, Builder, RayDesc, + DXIL::OperandIndex::kTraceRayRayDescOpIdx); + + Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = PayLoad; + + Type *Ty = PayLoad->getType(); + Function *F = OP->GetOpFunc(OpCode, Ty); return Builder.CreateCall(F, Args); } @@ -6307,7 +6312,37 @@ Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + const unsigned DxilNumArgs = DxilInst_HitObject_TraceRay::arg_payload + 1; + DXASSERT_NOMSG(CI->getNumArgOperands() == + HLOperandIndex::kHitObjectTraceRay_NumOp); + Value *Args[DxilNumArgs]; + Value *OpArg = OP->GetU32Const(static_cast(OpCode)); + Args[0] = OpArg; + + unsigned DestIdx = 1, SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); + for (; SrcIdx < HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx; + ++SrcIdx, ++DestIdx) { + Args[DestIdx] = CI->getArgOperand(SrcIdx); + } + + Value *RayDescPtr = CI->getArgOperand(SrcIdx++); + DestIdx = LoadRayDescElementsIntoArgs(Args, OP, Builder, RayDescPtr, DestIdx); + Value *Payload = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = Payload; + + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + DXASSERT_NOMSG(DestIdx == DxilNumArgs); + + Function *F = OP->GetOpFunc(OpCode, Payload->getType()); + + Value *OutHitObject = Builder.CreateCall(F, Args); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, @@ -6315,7 +6350,16 @@ Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return nullptr; // TODO: Merge SER DXIL patches + unsigned SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Value *Payload = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + TrivialDxilOperation(OpCode, {nullptr, HitObject, Payload}, + Payload->getType(), CI, &Helper.hlslOP); + return nullptr; } Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index e487079b94..b13e9a0f5d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -1540,6 +1540,8 @@ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { // basing on IOP? IntrinsicOp opcode = static_cast(GetHLOpcode(CI)); if (IntrinsicOp::IOP_TraceRay == opcode || + IntrinsicOp::MOP_DxHitObject_TraceRay == opcode || + IntrinsicOp::MOP_DxHitObject_Invoke == opcode || IntrinsicOp::IOP_ReportHit == opcode || IntrinsicOp::IOP_CallShader == opcode) { return MarkUnsafe(Info, User); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 418425a468..bddf834509 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -12066,8 +12066,10 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, case hlsl::IntrinsicOp::MOP_TraceRayInline: DiagnoseTraceRayInline(*this, CE); break; + case hlsl::IntrinsicOp::MOP_DxHitObject_Invoke: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop: + case hlsl::IntrinsicOp::MOP_DxHitObject_TraceRay: DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false); break; case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread: diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl new file mode 100644 index 0000000000..13bff4a3f4 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl @@ -0,0 +1,102 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> Invoke +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class Tho +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TPayload +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit Invoke 'TResult (Tho, TPayload &) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> ho 'Tho' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Payload 'TPayload &' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used Invoke 'void (dx::HitObject, Payload &)' static +// AST-NEXT: | | | |-TemplateArgument type 'void' +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'Payload' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Invoke 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> ho 'Payload &&__restrict' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 382 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> TraceRay +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TAccelerationStructure +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRayFlags +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TInstanceInclusionMask +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRayContributionToHitGroupIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TMultiplierForGeometryContributionToHitGroupIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TMissShaderIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRay +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TPayload +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit TraceRay 'TResult (TAccelerationStructure, TRayFlags, TInstanceInclusionMask, TRayContributionToHitGroupIndex, TMultiplierForGeometryContributionToHitGroupIndex, TMissShaderIndex, TRay, TPayload &) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> AccelerationStructure 'TAccelerationStructure' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'TRayFlags' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> InstanceInclusionMask 'TInstanceInclusionMask' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> RayContributionToHitGroupIndex 'TRayContributionToHitGroupIndex' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> MultiplierForGeometryContributionToHitGroupIndex 'TMultiplierForGeometryContributionToHitGroupIndex' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'TMissShaderIndex' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'TRay' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Payload 'TPayload &' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used TraceRay 'dx::HitObject (RaytracingAccelerationStructure, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, RayDesc, Payload &)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'RaytracingAccelerationStructure' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'RayDesc' +// AST-NEXT: | | | |-TemplateArgument type 'Payload' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> TraceRay 'RaytracingAccelerationStructure' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> AccelerationStructure 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> InstanceInclusionMask 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayContributionToHitGroupIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MultiplierForGeometryContributionToHitGroupIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'Payload &&__restrict' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 389 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: %[[HANDLE:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %{{[^ ]+}}, %dx.types.Handle %[[HANDLE]], i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %{{[^ ]+}}, %struct.Payload* %{{[^ ]+}}) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %{{[^ ]+}}, %struct.Payload* %{{[^ ]+}}) + +// DXIL: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure +// DXIL: %[[HIT:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %{{[^ ]+}}) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) +// DXIL: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[HIT]], %struct.Payload* nonnull %{{[^ ]+}}) ; HitObject_Invoke(hitObject,payload) + +// DXIL: !dx.dxrPayloadAnnotations = !{![[MDPLD:[^ ]+]]} +// DXIL: ![[MDPLD]] = !{i32 0, %struct.Payload undef, !{{[^ ]+}}} + +RaytracingAccelerationStructure RTAS; +RWStructuredBuffer UAV : register(u0); + +struct [raypayload] +Payload { + float3 dummy : read(closesthit) : write(caller, anyhit); +}; + +[shader("raygeneration")] +void main() { + RayDesc rayDesc; + rayDesc.Origin = float3(0.0, 1.0, 2.0); + rayDesc.TMin = 3.0f; + rayDesc.Direction = float3(4.0, 5.0, 6.0); + rayDesc.TMax = 7.0f; + + Payload pld; + pld.dummy = float3(7.0, 8.0, 9.0); + + dx::HitObject hit = dx::HitObject::TraceRay( + RTAS, + RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, + 1, + 2, + 4, + 0, + rayDesc, + pld); + + dx::HitObject::Invoke(hit, pld); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll new file mode 100644 index 0000000000..6f364a0161 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll @@ -0,0 +1,167 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWStructuredBuffer" = type { float } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %rayDesc = alloca %struct.RayDesc, align 4 + %pld = alloca %struct.Payload, align 4 + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !31 ; line:80 col:3 + call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !31 ; line:80 col:3 + %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !35 ; line:81 col:11 + store <3 x float> , <3 x float>* %Origin, align 4, !dbg !36, !tbaa !37 ; line:81 col:18 + %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !40 ; line:82 col:11 + store float 3.000000e+00, float* %TMin, align 4, !dbg !41, !tbaa !42 ; line:82 col:16 + %Direction = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 2, !dbg !44 ; line:83 col:11 + store <3 x float> , <3 x float>* %Direction, align 4, !dbg !45, !tbaa !37 ; line:83 col:21 + %TMax = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 3, !dbg !46 ; line:84 col:11 + store float 7.000000e+00, float* %TMax, align 4, !dbg !47, !tbaa !42 ; line:84 col:16 + %1 = bitcast %struct.Payload* %pld to i8*, !dbg !48 ; line:86 col:3 + call void @llvm.lifetime.start(i64 12, i8* %1) #0, !dbg !48 ; line:86 col:3 + %dummy = getelementptr inbounds %struct.Payload, %struct.Payload* %pld, i32 0, i32 0, !dbg !49 ; line:87 col:7 + store <3 x float> , <3 x float>* %dummy, align 4, !dbg !50, !tbaa !37 ; line:87 col:13 + %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !51 ; line:89 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !51 ; line:89 col:3 + %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !52 ; line:89 col:23 + %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !52 ; line:89 col:23 + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !52 ; line:89 col:23 + ; CHECK: %[[ORIGINPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR:[^ ]+]], i32 0, i32 0 + ; CHECK: %[[ORIGIN:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGINPTR]] + ; CHECK: %[[O0:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 0 + ; CHECK: %[[O1:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 1 + ; CHECK: %[[O2:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 2 + ; CHECK: %[[TMINPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 1 + ; CHECK: %[[TMIN:[^ ]+]] = load float, float* %[[TMINPTR]] + ; CHECK: %[[DIRPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 2 + ; CHECK: %[[DIR:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRPTR]] + ; CHECK: %[[D0:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 0 + ; CHECK: %[[D1:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 1 + ; CHECK: %[[D2:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 2 + ; CHECK: %[[TMAXPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 3 + ; CHECK: %[[TMAX:[^ ]+]] = load float, float* %[[TMAXPTR]] + ; CHECK: %[[TRACEHO:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, float %[[O0]], float %[[O1]], float %[[O2]], float %[[TMIN]], float %[[D0]], float %[[D1]], float %[[D2]], float %[[TMAX]], %struct.Payload* %pld) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !52 ; line:89 col:23 + ; CHECK: store %dx.types.HitObject %[[TRACEHO]], %dx.types.HitObject* %[[HOPTR:[^ ]+]] + ; CHECK: %[[INVOKEHO:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOPTR]] + ; CHECK: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[INVOKEHO]], %struct.Payload* %pld) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !53 ; line:99 col:3 + %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !54 ; line:100 col:1 + call void @llvm.lifetime.end(i64 4, i8* %6) #0, !dbg !54 ; line:100 col:1 + %7 = bitcast %struct.Payload* %pld to i8*, !dbg !54 ; line:100 col:1 + call void @llvm.lifetime.end(i64 12, i8* %7) #0, !dbg !54 ; line:100 col:1 + %8 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !54 ; line:100 col:1 + call void @llvm.lifetime.end(i64 32, i8* %8) #0, !dbg !54 ; line:100 col:1 + ret void, !dbg !54 ; line:100 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !19} +!dx.entryPoints = !{!23} +!dx.fnprops = !{!28} +!dx.options = !{!29, !30} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %struct.Payload undef, !15, %"class.dx::HitObject" undef, !17} +!5 = !{i32 4, !6, !7} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!7 = !{i32 0, !8} +!8 = !{!9} +!9 = !{i32 0, float undef} +!10 = !{i32 32, !11, !12, !13, !14} +!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!15 = !{i32 12, !16} +!16 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!17 = !{i32 4, !18} +!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!19 = !{i32 1, void ()* @"\01?main@@YAXXZ", !20} +!20 = !{!21} +!21 = !{i32 1, !22, !22} +!22 = !{} +!23 = !{null, !"", null, !24, null} +!24 = !{!25, null, null, null} +!25 = !{!26} +!26 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !27} +!27 = !{i32 0, i32 4} +!28 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!29 = !{i32 -2147483584} +!30 = !{i32 -1} +!31 = !DILocation(line: 80, column: 3, scope: !32) +!32 = !DISubprogram(name: "main", scope: !33, file: !33, line: 79, type: !34, isLocal: false, isDefinition: true, scopeLine: 79, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!33 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl", directory: "") +!34 = !DISubroutineType(types: !22) +!35 = !DILocation(line: 81, column: 11, scope: !32) +!36 = !DILocation(line: 81, column: 18, scope: !32) +!37 = !{!38, !38, i64 0} +!38 = !{!"omnipotent char", !39, i64 0} +!39 = !{!"Simple C/C++ TBAA"} +!40 = !DILocation(line: 82, column: 11, scope: !32) +!41 = !DILocation(line: 82, column: 16, scope: !32) +!42 = !{!43, !43, i64 0} +!43 = !{!"float", !38, i64 0} +!44 = !DILocation(line: 83, column: 11, scope: !32) +!45 = !DILocation(line: 83, column: 21, scope: !32) +!46 = !DILocation(line: 84, column: 11, scope: !32) +!47 = !DILocation(line: 84, column: 16, scope: !32) +!48 = !DILocation(line: 86, column: 3, scope: !32) +!49 = !DILocation(line: 87, column: 7, scope: !32) +!50 = !DILocation(line: 87, column: 13, scope: !32) +!51 = !DILocation(line: 89, column: 3, scope: !32) +!52 = !DILocation(line: 89, column: 23, scope: !32) +!53 = !DILocation(line: 99, column: 3, scope: !32) +!54 = !DILocation(line: 100, column: 1, scope: !32) From ea95489309139c47c87fe2b2a54fc426910e8ccd Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 23 Apr 2025 07:32:35 -0400 Subject: [PATCH 05/93] [SPIRV] Update submodules (#7369) --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index 0e71067798..aa6cef192b 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit 0e710677989b4326ac974fd80c5308191ed80965 +Subproject commit aa6cef192b8e693916eb713e7a9ccadf06062ceb diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index 4bd1536ed7..898ed77be1 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b +Subproject commit 898ed77be18c99418f983ea220be91a926e6e26e diff --git a/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl b/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl index da25ead9c1..312476b260 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl @@ -1,4 +1,4 @@ -// RUN: %dxc -T as_6_6 -E main -fspv-target-env=vulkan1.3 -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T as_6_6 -E main -fspv-target-env=universal1.5 -fcgl %s -spirv | FileCheck %s // CHECK: OpCapability Linkage // CHECK: OpDecorate %external_function LinkageAttributes "external_function" Export @@ -10,4 +10,4 @@ export int external_function() { void main() { external_function(); return; -} \ No newline at end of file +} From 06381f2d7b2c8b32add7839dc068f0e761f4d4b4 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 23 Apr 2025 10:48:19 -0400 Subject: [PATCH 06/93] [SPIRV] Update submodules (#7373) Update the submodules to the latest release candidate for the Vulkan SDK. --- external/SPIRV-Tools | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index 898ed77be1..a62abcb402 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit 898ed77be18c99418f983ea220be91a926e6e26e +Subproject commit a62abcb402009b9ca5975e6167c09f237f630e0e From bddee27c021ce88df9c90ca2d88cf6872f6c0963 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 23 Apr 2025 18:35:56 +0200 Subject: [PATCH 07/93] [SER] HitObject accessors HLSL -> DXIL lowering (#7360) Lowering for all HitObject accessors (ex GetAttributes) Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md DXC SER implementation tracker:: https://github.com/microsoft/DirectXShaderCompiler/issues/7214 --- lib/HLSL/HLOperationLower.cpp | 98 ++- .../HitObject/hitobject_accessors.hlsl | 113 +++ .../DxilGen/hitobject_accessors_dxilgen.ll | 687 ++++++++++++++++++ .../HitObject/hitobject_accessors.hlsl | 263 +++++++ 4 files changed, 1142 insertions(+), 19 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 69dd803f7b..1e43cce07c 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -5958,19 +5958,31 @@ Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, return retVal; } +template +static void GetMatrixIndices(Constant *&Rows, Constant *&Cols, bool Is3x4, + LLVMContext &Ctx) { + if (Is3x4) { + uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; + Rows = ConstantDataVector::get(Ctx, RVals); + ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; + Cols = ConstantDataVector::get(Ctx, CVals); + return; + } + uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; + Rows = ConstantDataVector::get(Ctx, RVals); + ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; + Cols = ConstantDataVector::get(Ctx, CVals); +} + Value *TranslateNoArgMatrix3x4Operation( CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; VectorType *Ty = cast(CI->getType()); - uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; - Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); - uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; - Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); - Value *retVal = - TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP); - return retVal; + Constant *Rows, *Cols; + GetMatrixIndices(Rows, Cols, true, CI->getContext()); + return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); } Value *TranslateNoArgTransposedMatrix3x4Operation( @@ -5979,13 +5991,9 @@ Value *TranslateNoArgTransposedMatrix3x4Operation( bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; VectorType *Ty = cast(CI->getType()); - uint32_t rVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; - Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); - uint8_t cVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; - Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); - Value *retVal = - TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP); - return retVal; + Constant *Rows, *Cols; + GetMatrixIndices(Rows, Cols, false, CI->getContext()); + return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); } /* @@ -6375,7 +6383,12 @@ Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + Value *HitObjectPtr = CI->getArgOperand(1); + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + return TrivialDxilOperation(OpCode, {nullptr, HitObject}, CI->getType(), CI, + OP); } Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, @@ -6383,7 +6396,24 @@ Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + Value *HitObjectPtr = CI->getArgOperand(1); + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + VectorType *Ty = cast(CI->getType()); + uint32_t Vals[] = {0, 1, 2, 3}; + Constant *Src = ConstantDataVector::get(CI->getContext(), Vals); + return TrivialDxilOperation(OpCode, {nullptr, HitObject, Src}, Ty, CI, OP); +} + +static bool IsHitObject3x4Getter(IntrinsicOp IOP) { + switch (IOP) { + default: + return false; + case IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4: + case IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4: + return true; + } } Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, @@ -6391,21 +6421,51 @@ Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + Value *HitObjectPtr = CI->getArgOperand(1); + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + + // Create 3x4 matrix indices + bool Is3x4 = IsHitObject3x4Getter(IOP); + Constant *Rows, *Cols; + GetMatrixIndices(Rows, Cols, Is3x4, CI->getContext()); + + VectorType *Ty = cast(CI->getType()); + return TrivialDxilOperation(OpCode, {nullptr, HitObject, Rows, Cols}, Ty, CI, + OP); } Value *TranslateHitObjectLoadLocalRootTableConstant( CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *HitObjectPtr = CI->getArgOperand(1); + Value *Offset = CI->getArgOperand(2); + + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + return TrivialDxilOperation(OpCode, {nullptr, HitObject, Offset}, + Helper.voidTy, CI, OP); } Value *TranslateHitObjectSetShaderTableIndex( CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *HitObjectPtr = CI->getArgOperand(1); + Value *ShaderTableIndex = CI->getArgOperand(2); + + Value *InHitObject = Builder.CreateLoad(HitObjectPtr); + Value *OutHitObject = TrivialDxilOperation( + OpCode, {nullptr, InHitObject, ShaderTableIndex}, Helper.voidTy, CI, OP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } } // namespace diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl new file mode 100644 index 0000000000..bae2b0590c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl @@ -0,0 +1,113 @@ +// REQUIRES: dxil-1-9 +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// DXIL: %dx.types.HitObject = type { i8* } + +// DXIL: %[[NOP:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL: %[[HIT:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %[[NOP]], i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) +// DXIL-DAG: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %[[HIT]]) ; HitObject_IsHit(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %[[HIT]]) ; HitObject_IsMiss(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %[[HIT]]) ; HitObject_IsNop(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %[[HIT]]) ; HitObject_GeometryIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %[[HIT]]) ; HitObject_HitKind(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %[[HIT]]) ; HitObject_InstanceIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %[[HIT]]) ; HitObject_InstanceID(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %[[HIT]]) ; HitObject_PrimitiveIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %[[HIT]]) ; HitObject_ShaderTableIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %[[HIT]], i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_ObjectRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_ObjectRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_ObjectRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_ObjectRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_ObjectRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_WorldRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_WorldRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_WorldRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_WorldRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_WorldRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_WorldRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 1) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 2) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 3) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 1) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 2) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 3) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 1) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 2) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 3) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 1) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 2) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 3) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 1) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 2) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 3) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 1) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 2) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 3) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL: ret void + +RWByteAddressBuffer outbuf; + +template +float hashM(in matrix mat) { + float h = 0.f; + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + h += mat[i][j]; + return h; +} + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + int isum = 0; + float fsum = 0.0f; + vector vsum = 0; + + ///// Setters + hit.SetShaderTableIndex(1); + + ///// Getters + + // i1 accessors + isum += hit.IsHit(); + isum += hit.IsMiss(); + isum += hit.IsNop(); + + // i32 accessors + isum += hit.GetGeometryIndex(); + isum += hit.GetHitKind(); + isum += hit.GetInstanceIndex(); + isum += hit.GetInstanceID(); + isum += hit.GetPrimitiveIndex(); + isum += hit.GetShaderTableIndex(); + isum += hit.LoadLocalRootTableConstant(42); + + // float3 accessors + vsum += hit.GetWorldRayOrigin(); + vsum += hit.GetWorldRayDirection(); + vsum += hit.GetObjectRayOrigin(); + vsum += hit.GetObjectRayDirection(); + fsum += vsum[0] + vsum[1] + vsum[2]; + + // matrix accessors + fsum += hashM<3, 4>(hit.GetObjectToWorld3x4()); + fsum += hashM<4, 3>(hit.GetObjectToWorld4x3()); + fsum += hashM<3, 4>(hit.GetWorldToObject3x4()); + fsum += hashM<4, 3>(hit.GetWorldToObject4x3()); + + // f32 accessors + isum += hit.GetRayFlags(); + fsum += hit.GetRayTMin(); + fsum += hit.GetRayTCurrent(); + + outbuf.Store(0, fsum); + outbuf.Store(4, isum); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll new file mode 100644 index 0000000000..4fc6a47780 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll @@ -0,0 +1,687 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; outbuf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?outbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 + +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %{{[^ ]+}}, i32 42) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %{{[^ ]+}}, i32 2) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %{{[^ ]+}}, i32 2) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %{{[^ ]+}}, i32 2) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %{{[^ ]+}}, i32 2) + +; CHECK: %[[M34OW00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M34VOW0:[^ ]+]] = insertelement <12 x float> undef, float %[[M34OW00]], i64 0 +; CHECK-NEXT: %[[M34OW01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M34VOW1:[^ ]+]] = insertelement <12 x float> %[[M34VOW0]], float %[[M34OW01]], i64 1 +; CHECK-NEXT: %[[M34OW02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M34VOW2:[^ ]+]] = insertelement <12 x float> %[[M34VOW1]], float %[[M34OW02]], i64 2 +; CHECK-NEXT: %[[M34OW03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M34VOW3:[^ ]+]] = insertelement <12 x float> %[[M34VOW2]], float %[[M34OW03]], i64 3 +; CHECK-NEXT: %[[M34OW10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M34VOW4:[^ ]+]] = insertelement <12 x float> %[[M34VOW3]], float %[[M34OW10]], i64 4 +; CHECK-NEXT: %[[M34OW11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M34VOW5:[^ ]+]] = insertelement <12 x float> %[[M34VOW4]], float %[[M34OW11]], i64 5 +; CHECK-NEXT: %[[M34OW12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M34VOW6:[^ ]+]] = insertelement <12 x float> %[[M34VOW5]], float %[[M34OW12]], i64 6 +; CHECK-NEXT: %[[M34OW13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M34VOW7:[^ ]+]] = insertelement <12 x float> %[[M34VOW6]], float %[[M34OW13]], i64 7 +; CHECK-NEXT: %[[M34OW20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M34VOW8:[^ ]+]] = insertelement <12 x float> %[[M34VOW7]], float %[[M34OW20]], i64 8 +; CHECK-NEXT: %[[M34OW21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M34VOW9:[^ ]+]] = insertelement <12 x float> %[[M34VOW8]], float %[[M34OW21]], i64 9 +; CHECK-NEXT: %[[M34OW22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M34VOW10:[^ ]+]] = insertelement <12 x float> %[[M34VOW9]], float %[[M34OW22]], i64 10 +; CHECK-NEXT: %[[M34OW23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M34VOW10]], float %[[M34OW23]], i64 11 + +; CHECK: %[[M43OW00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M43VOW0:[^ ]+]] = insertelement <12 x float> undef, float %[[M43OW00]], i64 0 +; CHECK-NEXT: %[[M43OW10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M43VOW1:[^ ]+]] = insertelement <12 x float> %[[M43VOW0]], float %[[M43OW10]], i64 1 +; CHECK-NEXT: %[[M43OW20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M43VOW2:[^ ]+]] = insertelement <12 x float> %[[M43VOW1]], float %[[M43OW20]], i64 2 +; CHECK-NEXT: %[[M43OW01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M43VOW3:[^ ]+]] = insertelement <12 x float> %[[M43VOW2]], float %[[M43OW01]], i64 3 +; CHECK-NEXT: %[[M43OW11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M43VOW4:[^ ]+]] = insertelement <12 x float> %[[M43VOW3]], float %[[M43OW11]], i64 4 +; CHECK-NEXT: %[[M43OW21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M43VOW5:[^ ]+]] = insertelement <12 x float> %[[M43VOW4]], float %[[M43OW21]], i64 5 +; CHECK-NEXT: %[[M43OW02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M43VOW6:[^ ]+]] = insertelement <12 x float> %[[M43VOW5]], float %[[M43OW02]], i64 6 +; CHECK-NEXT: %[[M43OW12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M43VOW7:[^ ]+]] = insertelement <12 x float> %[[M43VOW6]], float %[[M43OW12]], i64 7 +; CHECK-NEXT: %[[M43OW22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M43VOW8:[^ ]+]] = insertelement <12 x float> %[[M43VOW7]], float %[[M43OW22]], i64 8 +; CHECK-NEXT: %[[M43OW03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M43VOW9:[^ ]+]] = insertelement <12 x float> %[[M43VOW8]], float %[[M43OW03]], i64 9 +; CHECK-NEXT: %[[M43OW13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M43VOW10:[^ ]+]] = insertelement <12 x float> %[[M43VOW9]], float %[[M43OW13]], i64 10 +; CHECK-NEXT: %[[M43OW23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M43VOW10]], float %[[M43OW23]], i64 11 + +; CHECK: %[[M34WO00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M34VWO0:[^ ]+]] = insertelement <12 x float> undef, float %[[M34WO00]], i64 0 +; CHECK-NEXT: %[[M34WO01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M34VWO1:[^ ]+]] = insertelement <12 x float> %[[M34VWO0]], float %[[M34WO01]], i64 1 +; CHECK-NEXT: %[[M34WO02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M34VWO2:[^ ]+]] = insertelement <12 x float> %[[M34VWO1]], float %[[M34WO02]], i64 2 +; CHECK-NEXT: %[[M34WO03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M34VWO3:[^ ]+]] = insertelement <12 x float> %[[M34VWO2]], float %[[M34WO03]], i64 3 +; CHECK-NEXT: %[[M34WO10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M34VWO4:[^ ]+]] = insertelement <12 x float> %[[M34VWO3]], float %[[M34WO10]], i64 4 +; CHECK-NEXT: %[[M34WO11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M34VWO5:[^ ]+]] = insertelement <12 x float> %[[M34VWO4]], float %[[M34WO11]], i64 5 +; CHECK-NEXT: %[[M34WO12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M34VWO6:[^ ]+]] = insertelement <12 x float> %[[M34VWO5]], float %[[M34WO12]], i64 6 +; CHECK-NEXT: %[[M34WO13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M34VWO7:[^ ]+]] = insertelement <12 x float> %[[M34VWO6]], float %[[M34WO13]], i64 7 +; CHECK-NEXT: %[[M34WO20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M34VWO8:[^ ]+]] = insertelement <12 x float> %[[M34VWO7]], float %[[M34WO20]], i64 8 +; CHECK-NEXT: %[[M34WO21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M34VWO9:[^ ]+]] = insertelement <12 x float> %[[M34VWO8]], float %[[M34WO21]], i64 9 +; CHECK-NEXT: %[[M34WO22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M34VWO10:[^ ]+]] = insertelement <12 x float> %[[M34VWO9]], float %[[M34WO22]], i64 10 +; CHECK-NEXT: %[[M34WO23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M34VWO10]], float %[[M34WO23]], i64 11 + +; CHECK: %[[M43WO00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M43VWO0:[^ ]+]] = insertelement <12 x float> undef, float %[[M43WO00]], i64 0 +; CHECK-NEXT: %[[M43WO10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M43VWO1:[^ ]+]] = insertelement <12 x float> %[[M43VWO0]], float %[[M43WO10]], i64 1 +; CHECK-NEXT: %[[M43WO20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M43VWO2:[^ ]+]] = insertelement <12 x float> %[[M43VWO1]], float %[[M43WO20]], i64 2 +; CHECK-NEXT: %[[M43WO01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M43VWO3:[^ ]+]] = insertelement <12 x float> %[[M43VWO2]], float %[[M43WO01]], i64 3 +; CHECK-NEXT: %[[M43WO11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M43VWO4:[^ ]+]] = insertelement <12 x float> %[[M43VWO3]], float %[[M43WO11]], i64 4 +; CHECK-NEXT: %[[M43WO21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M43VWO5:[^ ]+]] = insertelement <12 x float> %[[M43VWO4]], float %[[M43WO21]], i64 5 +; CHECK-NEXT: %[[M43WO02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M43VWO6:[^ ]+]] = insertelement <12 x float> %[[M43VWO5]], float %[[M43WO02]], i64 6 +; CHECK-NEXT: %[[M43WO12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M43VWO7:[^ ]+]] = insertelement <12 x float> %[[M43VWO6]], float %[[M43WO12]], i64 7 +; CHECK-NEXT: %[[M43WO22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M43VWO8:[^ ]+]] = insertelement <12 x float> %[[M43VWO7]], float %[[M43WO22]], i64 8 +; CHECK-NEXT: %[[M43WO03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M43VWO9:[^ ]+]] = insertelement <12 x float> %[[M43VWO8]], float %[[M43WO03]], i64 9 +; CHECK-NEXT: %[[M43WO13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M43VWO10:[^ ]+]] = insertelement <12 x float> %[[M43VWO9]], float %[[M43WO13]], i64 10 +; CHECK-NEXT: %[[M43WO23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M43VWO10]], float %[[M43WO23]], i64 11 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +for.body.i.lr.ph: + %0 = alloca [12 x float] + %1 = alloca [3 x i32] + %2 = alloca [12 x float] + %3 = alloca [4 x i32] + %4 = alloca [12 x float] + %5 = alloca [3 x i32] + %6 = alloca [12 x float] + %7 = alloca [4 x i32] + %hit = alloca %dx.types.HitObject, align 4 + %8 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:69 col:3 + call void @llvm.lifetime.start(i64 4, i8* %8) #0, !dbg !19 ; line:69 col:3 + %9 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:69 col:17 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 388, %dx.types.HitObject* %hit, i32 1), !dbg !24 ; line:75 col:3 + %10 = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 383, %dx.types.HitObject* %hit), !dbg !25 ; line:80 col:11 + %conv = zext i1 %10 to i32, !dbg !25 ; line:80 col:11 + %11 = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 384, %dx.types.HitObject* %hit), !dbg !26 ; line:81 col:11 + %conv3 = zext i1 %11 to i32, !dbg !26 ; line:81 col:11 + %add4 = add nsw i32 %conv, %conv3, !dbg !27 ; line:81 col:8 + %12 = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 385, %dx.types.HitObject* %hit), !dbg !28 ; line:82 col:11 + %conv6 = zext i1 %12 to i32, !dbg !28 ; line:82 col:11 + %add7 = add nsw i32 %add4, %conv6, !dbg !29 ; line:82 col:8 + %13 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 365, %dx.types.HitObject* %hit), !dbg !30 ; line:85 col:11 + %add9 = add i32 %add7, %13, !dbg !31 ; line:85 col:8 + %14 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 366, %dx.types.HitObject* %hit), !dbg !32 ; line:86 col:11 + %add11 = add i32 %add9, %14, !dbg !33 ; line:86 col:8 + %15 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 368, %dx.types.HitObject* %hit), !dbg !34 ; line:87 col:11 + %add13 = add i32 %add11, %15, !dbg !35 ; line:87 col:8 + %16 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 367, %dx.types.HitObject* %hit), !dbg !36 ; line:88 col:11 + %add15 = add i32 %add13, %16, !dbg !37 ; line:88 col:8 + %17 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 373, %dx.types.HitObject* %hit), !dbg !38 ; line:89 col:11 + %add17 = add i32 %add15, %17, !dbg !39 ; line:89 col:8 + %18 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 377, %dx.types.HitObject* %hit), !dbg !40 ; line:90 col:11 + %add19 = add i32 %add17, %18, !dbg !41 ; line:90 col:8 + %19 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32 386, %dx.types.HitObject* %hit, i32 42), !dbg !42 ; line:91 col:11 + %add21 = add i32 %add19, %19, !dbg !43 ; line:91 col:8 + %20 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 379, %dx.types.HitObject* %hit), !dbg !44 ; line:94 col:11 + %add23 = fadd <3 x float> zeroinitializer, %20, !dbg !45 ; line:94 col:8 + %21 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 378, %dx.types.HitObject* %hit), !dbg !46 ; line:95 col:11 + %add25 = fadd <3 x float> %add23, %21, !dbg !47 ; line:95 col:8 + %22 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 370, %dx.types.HitObject* %hit), !dbg !48 ; line:96 col:11 + %add27 = fadd <3 x float> %add25, %22, !dbg !49 ; line:96 col:8 + %23 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 369, %dx.types.HitObject* %hit), !dbg !50 ; line:97 col:11 + %add29 = fadd <3 x float> %add27, %23, !dbg !51 ; line:97 col:8 + %vsum.0.vec.extract = extractelement <3 x float> %add29, i32 0, !dbg !52 ; line:98 col:11 + %vsum.4.vec.extract = extractelement <3 x float> %add29, i32 1, !dbg !53 ; line:98 col:21 + %add30 = fadd float %vsum.0.vec.extract, %vsum.4.vec.extract, !dbg !54 ; line:98 col:19 + %vsum.8.vec.extract = extractelement <3 x float> %add29, i32 2, !dbg !55 ; line:98 col:31 + %add31 = fadd float %add30, %vsum.8.vec.extract, !dbg !56 ; line:98 col:29 + %add32 = fadd float 0.000000e+00, %add31, !dbg !57 ; line:98 col:8 + %24 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 371, %dx.types.HitObject* %hit), !dbg !58 ; line:101 col:23 + %row2col = shufflevector <12 x float> %24, <12 x float> %24, <12 x i32> , !dbg !59 ; line:101 col:11 + br label %for.body.7.i.lr.ph, !dbg !60 ; line:61 col:3 + +for.body.7.i.lr.ph: ; preds = %for.cond.cleanup.6.i, %for.body.i.lr.ph + %i.i.0 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc9.i, %for.cond.cleanup.6.i ] + %h.i.0 = phi float [ 0.000000e+00, %for.body.i.lr.ph ], [ %add.i, %for.cond.cleanup.6.i ] + br label %for.body.7.i, !dbg !63 ; line:62 col:5 + +for.cond.cleanup.6.i: ; preds = %for.body.7.i + %inc9.i = add nsw i32 %i.i.0, 1, !dbg !64 ; line:61 col:26 + %cmp.i = icmp slt i32 %inc9.i, 3, !dbg !65 ; line:61 col:21 + br i1 %cmp.i, label %for.body.7.i.lr.ph, label %for.body.i.8.lr.ph, !dbg !60 ; line:61 col:3 + +for.body.7.i: ; preds = %for.body.7.i.lr.ph, %for.body.7.i + %h.i.263 = phi float [ %h.i.0, %for.body.7.i.lr.ph ], [ %add.i, %for.body.7.i ] + %j.i.0 = phi i32 [ 0, %for.body.7.i.lr.ph ], [ %inc.i, %for.body.7.i ] + %25 = add i32 3, %i.i.0, !dbg !66 ; line:63 col:12 + %26 = add i32 6, %i.i.0, !dbg !66 ; line:63 col:12 + %27 = add i32 9, %i.i.0, !dbg !66 ; line:63 col:12 + %28 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 0, !dbg !66 ; line:63 col:12 + store i32 %i.i.0, i32* %28, !dbg !66 ; line:63 col:12 + %29 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 1, !dbg !66 ; line:63 col:12 + store i32 %25, i32* %29, !dbg !66 ; line:63 col:12 + %30 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 2, !dbg !66 ; line:63 col:12 + store i32 %26, i32* %30, !dbg !66 ; line:63 col:12 + %31 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 3, !dbg !66 ; line:63 col:12 + store i32 %27, i32* %31, !dbg !66 ; line:63 col:12 + %32 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 %j.i.0, !dbg !66 ; line:63 col:12 + %33 = load i32, i32* %32, !dbg !66 ; line:63 col:12 + %34 = extractelement <12 x float> %row2col, i64 0, !dbg !66 ; line:63 col:12 + %35 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 0, !dbg !66 ; line:63 col:12 + store float %34, float* %35, !dbg !66 ; line:63 col:12 + %36 = extractelement <12 x float> %row2col, i64 1, !dbg !66 ; line:63 col:12 + %37 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 1, !dbg !66 ; line:63 col:12 + store float %36, float* %37, !dbg !66 ; line:63 col:12 + %38 = extractelement <12 x float> %row2col, i64 2, !dbg !66 ; line:63 col:12 + %39 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 2, !dbg !66 ; line:63 col:12 + store float %38, float* %39, !dbg !66 ; line:63 col:12 + %40 = extractelement <12 x float> %row2col, i64 3, !dbg !66 ; line:63 col:12 + %41 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 3, !dbg !66 ; line:63 col:12 + store float %40, float* %41, !dbg !66 ; line:63 col:12 + %42 = extractelement <12 x float> %row2col, i64 4, !dbg !66 ; line:63 col:12 + %43 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 4, !dbg !66 ; line:63 col:12 + store float %42, float* %43, !dbg !66 ; line:63 col:12 + %44 = extractelement <12 x float> %row2col, i64 5, !dbg !66 ; line:63 col:12 + %45 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 5, !dbg !66 ; line:63 col:12 + store float %44, float* %45, !dbg !66 ; line:63 col:12 + %46 = extractelement <12 x float> %row2col, i64 6, !dbg !66 ; line:63 col:12 + %47 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 6, !dbg !66 ; line:63 col:12 + store float %46, float* %47, !dbg !66 ; line:63 col:12 + %48 = extractelement <12 x float> %row2col, i64 7, !dbg !66 ; line:63 col:12 + %49 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 7, !dbg !66 ; line:63 col:12 + store float %48, float* %49, !dbg !66 ; line:63 col:12 + %50 = extractelement <12 x float> %row2col, i64 8, !dbg !66 ; line:63 col:12 + %51 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 8, !dbg !66 ; line:63 col:12 + store float %50, float* %51, !dbg !66 ; line:63 col:12 + %52 = extractelement <12 x float> %row2col, i64 9, !dbg !66 ; line:63 col:12 + %53 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 9, !dbg !66 ; line:63 col:12 + store float %52, float* %53, !dbg !66 ; line:63 col:12 + %54 = extractelement <12 x float> %row2col, i64 10, !dbg !66 ; line:63 col:12 + %55 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 10, !dbg !66 ; line:63 col:12 + store float %54, float* %55, !dbg !66 ; line:63 col:12 + %56 = extractelement <12 x float> %row2col, i64 11, !dbg !66 ; line:63 col:12 + %57 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 11, !dbg !66 ; line:63 col:12 + store float %56, float* %57, !dbg !66 ; line:63 col:12 + %58 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 %33, !dbg !66 ; line:63 col:12 + %59 = load float, float* %58, !dbg !66 ; line:63 col:12 + %add.i = fadd float %h.i.263, %59, !dbg !67 ; line:63 col:9 + %inc.i = add nsw i32 %j.i.0, 1, !dbg !68 ; line:62 col:28 + %cmp3.i = icmp slt i32 %inc.i, 4, !dbg !69 ; line:62 col:23 + br i1 %cmp3.i, label %for.body.7.i, label %for.cond.cleanup.6.i, !dbg !63 ; line:62 col:5 + +for.body.i.8.lr.ph: ; preds = %for.cond.cleanup.6.i + %add35 = fadd float %add32, %add.i, !dbg !70 ; line:101 col:8 + %60 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 372, %dx.types.HitObject* %hit), !dbg !71 ; line:102 col:23 + %row2col52 = shufflevector <12 x float> %60, <12 x float> %60, <12 x i32> , !dbg !72 ; line:102 col:11 + br label %for.body.7.i.15.lr.ph, !dbg !73 ; line:61 col:3 + +for.body.7.i.15.lr.ph: ; preds = %for.cond.cleanup.6.i.12, %for.body.i.8.lr.ph + %i.i.3.0 = phi i32 [ 0, %for.body.i.8.lr.ph ], [ %inc9.i.11, %for.cond.cleanup.6.i.12 ] + %h.i.2.0 = phi float [ 0.000000e+00, %for.body.i.8.lr.ph ], [ %add.i.13, %for.cond.cleanup.6.i.12 ] + br label %for.body.7.i.15, !dbg !76 ; line:62 col:5 + +for.cond.cleanup.6.i.12: ; preds = %for.body.7.i.15 + %inc9.i.11 = add nsw i32 %i.i.3.0, 1, !dbg !77 ; line:61 col:26 + %cmp.i.6 = icmp slt i32 %inc9.i.11, 4, !dbg !78 ; line:61 col:21 + br i1 %cmp.i.6, label %for.body.7.i.15.lr.ph, label %for.body.i.23.lr.ph, !dbg !73 ; line:61 col:3 + +for.body.7.i.15: ; preds = %for.body.7.i.15.lr.ph, %for.body.7.i.15 + %j.i.5.0 = phi i32 [ 0, %for.body.7.i.15.lr.ph ], [ %inc.i.14, %for.body.7.i.15 ] + %h.i.2.2 = phi float [ %h.i.2.0, %for.body.7.i.15.lr.ph ], [ %add.i.13, %for.body.7.i.15 ] + %61 = add i32 4, %i.i.3.0, !dbg !79 ; line:63 col:12 + %62 = add i32 8, %i.i.3.0, !dbg !79 ; line:63 col:12 + %63 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 0, !dbg !79 ; line:63 col:12 + store i32 %i.i.3.0, i32* %63, !dbg !79 ; line:63 col:12 + %64 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 1, !dbg !79 ; line:63 col:12 + store i32 %61, i32* %64, !dbg !79 ; line:63 col:12 + %65 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 2, !dbg !79 ; line:63 col:12 + store i32 %62, i32* %65, !dbg !79 ; line:63 col:12 + %66 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 %j.i.5.0, !dbg !79 ; line:63 col:12 + %67 = load i32, i32* %66, !dbg !79 ; line:63 col:12 + %68 = extractelement <12 x float> %row2col52, i64 0, !dbg !79 ; line:63 col:12 + %69 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 0, !dbg !79 ; line:63 col:12 + store float %68, float* %69, !dbg !79 ; line:63 col:12 + %70 = extractelement <12 x float> %row2col52, i64 1, !dbg !79 ; line:63 col:12 + %71 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 1, !dbg !79 ; line:63 col:12 + store float %70, float* %71, !dbg !79 ; line:63 col:12 + %72 = extractelement <12 x float> %row2col52, i64 2, !dbg !79 ; line:63 col:12 + %73 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 2, !dbg !79 ; line:63 col:12 + store float %72, float* %73, !dbg !79 ; line:63 col:12 + %74 = extractelement <12 x float> %row2col52, i64 3, !dbg !79 ; line:63 col:12 + %75 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 3, !dbg !79 ; line:63 col:12 + store float %74, float* %75, !dbg !79 ; line:63 col:12 + %76 = extractelement <12 x float> %row2col52, i64 4, !dbg !79 ; line:63 col:12 + %77 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 4, !dbg !79 ; line:63 col:12 + store float %76, float* %77, !dbg !79 ; line:63 col:12 + %78 = extractelement <12 x float> %row2col52, i64 5, !dbg !79 ; line:63 col:12 + %79 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 5, !dbg !79 ; line:63 col:12 + store float %78, float* %79, !dbg !79 ; line:63 col:12 + %80 = extractelement <12 x float> %row2col52, i64 6, !dbg !79 ; line:63 col:12 + %81 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 6, !dbg !79 ; line:63 col:12 + store float %80, float* %81, !dbg !79 ; line:63 col:12 + %82 = extractelement <12 x float> %row2col52, i64 7, !dbg !79 ; line:63 col:12 + %83 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 7, !dbg !79 ; line:63 col:12 + store float %82, float* %83, !dbg !79 ; line:63 col:12 + %84 = extractelement <12 x float> %row2col52, i64 8, !dbg !79 ; line:63 col:12 + %85 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 8, !dbg !79 ; line:63 col:12 + store float %84, float* %85, !dbg !79 ; line:63 col:12 + %86 = extractelement <12 x float> %row2col52, i64 9, !dbg !79 ; line:63 col:12 + %87 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 9, !dbg !79 ; line:63 col:12 + store float %86, float* %87, !dbg !79 ; line:63 col:12 + %88 = extractelement <12 x float> %row2col52, i64 10, !dbg !79 ; line:63 col:12 + %89 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 10, !dbg !79 ; line:63 col:12 + store float %88, float* %89, !dbg !79 ; line:63 col:12 + %90 = extractelement <12 x float> %row2col52, i64 11, !dbg !79 ; line:63 col:12 + %91 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 11, !dbg !79 ; line:63 col:12 + store float %90, float* %91, !dbg !79 ; line:63 col:12 + %92 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 %67, !dbg !79 ; line:63 col:12 + %93 = load float, float* %92, !dbg !79 ; line:63 col:12 + %add.i.13 = fadd float %h.i.2.2, %93, !dbg !80 ; line:63 col:9 + %inc.i.14 = add nsw i32 %j.i.5.0, 1, !dbg !81 ; line:62 col:28 + %cmp3.i.9 = icmp slt i32 %inc.i.14, 3, !dbg !82 ; line:62 col:23 + br i1 %cmp3.i.9, label %for.body.7.i.15, label %for.cond.cleanup.6.i.12, !dbg !76 ; line:62 col:5 + +for.body.i.23.lr.ph: ; preds = %for.cond.cleanup.6.i.12 + %add38 = fadd float %add35, %add.i.13, !dbg !83 ; line:102 col:8 + %94 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 380, %dx.types.HitObject* %hit), !dbg !84 ; line:103 col:23 + %row2col53 = shufflevector <12 x float> %94, <12 x float> %94, <12 x i32> , !dbg !85 ; line:103 col:11 + br label %for.body.7.i.30.lr.ph, !dbg !86 ; line:61 col:3 + +for.body.7.i.30.lr.ph: ; preds = %for.cond.cleanup.6.i.27, %for.body.i.23.lr.ph + %i.i.18.0 = phi i32 [ 0, %for.body.i.23.lr.ph ], [ %inc9.i.26, %for.cond.cleanup.6.i.27 ] + %h.i.17.0 = phi float [ 0.000000e+00, %for.body.i.23.lr.ph ], [ %add.i.28, %for.cond.cleanup.6.i.27 ] + br label %for.body.7.i.30, !dbg !88 ; line:62 col:5 + +for.cond.cleanup.6.i.27: ; preds = %for.body.7.i.30 + %inc9.i.26 = add nsw i32 %i.i.18.0, 1, !dbg !89 ; line:61 col:26 + %cmp.i.21 = icmp slt i32 %inc9.i.26, 3, !dbg !90 ; line:61 col:21 + br i1 %cmp.i.21, label %for.body.7.i.30.lr.ph, label %for.body.i.39.lr.ph, !dbg !86 ; line:61 col:3 + +for.body.7.i.30: ; preds = %for.body.7.i.30.lr.ph, %for.body.7.i.30 + %j.i.20.0 = phi i32 [ 0, %for.body.7.i.30.lr.ph ], [ %inc.i.29, %for.body.7.i.30 ] + %h.i.17.2 = phi float [ %h.i.17.0, %for.body.7.i.30.lr.ph ], [ %add.i.28, %for.body.7.i.30 ] + %95 = add i32 3, %i.i.18.0, !dbg !91 ; line:63 col:12 + %96 = add i32 6, %i.i.18.0, !dbg !91 ; line:63 col:12 + %97 = add i32 9, %i.i.18.0, !dbg !91 ; line:63 col:12 + %98 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 0, !dbg !91 ; line:63 col:12 + store i32 %i.i.18.0, i32* %98, !dbg !91 ; line:63 col:12 + %99 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 1, !dbg !91 ; line:63 col:12 + store i32 %95, i32* %99, !dbg !91 ; line:63 col:12 + %100 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 2, !dbg !91 ; line:63 col:12 + store i32 %96, i32* %100, !dbg !91 ; line:63 col:12 + %101 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 3, !dbg !91 ; line:63 col:12 + store i32 %97, i32* %101, !dbg !91 ; line:63 col:12 + %102 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 %j.i.20.0, !dbg !91 ; line:63 col:12 + %103 = load i32, i32* %102, !dbg !91 ; line:63 col:12 + %104 = extractelement <12 x float> %row2col53, i64 0, !dbg !91 ; line:63 col:12 + %105 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 0, !dbg !91 ; line:63 col:12 + store float %104, float* %105, !dbg !91 ; line:63 col:12 + %106 = extractelement <12 x float> %row2col53, i64 1, !dbg !91 ; line:63 col:12 + %107 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 1, !dbg !91 ; line:63 col:12 + store float %106, float* %107, !dbg !91 ; line:63 col:12 + %108 = extractelement <12 x float> %row2col53, i64 2, !dbg !91 ; line:63 col:12 + %109 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 2, !dbg !91 ; line:63 col:12 + store float %108, float* %109, !dbg !91 ; line:63 col:12 + %110 = extractelement <12 x float> %row2col53, i64 3, !dbg !91 ; line:63 col:12 + %111 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 3, !dbg !91 ; line:63 col:12 + store float %110, float* %111, !dbg !91 ; line:63 col:12 + %112 = extractelement <12 x float> %row2col53, i64 4, !dbg !91 ; line:63 col:12 + %113 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 4, !dbg !91 ; line:63 col:12 + store float %112, float* %113, !dbg !91 ; line:63 col:12 + %114 = extractelement <12 x float> %row2col53, i64 5, !dbg !91 ; line:63 col:12 + %115 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 5, !dbg !91 ; line:63 col:12 + store float %114, float* %115, !dbg !91 ; line:63 col:12 + %116 = extractelement <12 x float> %row2col53, i64 6, !dbg !91 ; line:63 col:12 + %117 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 6, !dbg !91 ; line:63 col:12 + store float %116, float* %117, !dbg !91 ; line:63 col:12 + %118 = extractelement <12 x float> %row2col53, i64 7, !dbg !91 ; line:63 col:12 + %119 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 7, !dbg !91 ; line:63 col:12 + store float %118, float* %119, !dbg !91 ; line:63 col:12 + %120 = extractelement <12 x float> %row2col53, i64 8, !dbg !91 ; line:63 col:12 + %121 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 8, !dbg !91 ; line:63 col:12 + store float %120, float* %121, !dbg !91 ; line:63 col:12 + %122 = extractelement <12 x float> %row2col53, i64 9, !dbg !91 ; line:63 col:12 + %123 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 9, !dbg !91 ; line:63 col:12 + store float %122, float* %123, !dbg !91 ; line:63 col:12 + %124 = extractelement <12 x float> %row2col53, i64 10, !dbg !91 ; line:63 col:12 + %125 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 10, !dbg !91 ; line:63 col:12 + store float %124, float* %125, !dbg !91 ; line:63 col:12 + %126 = extractelement <12 x float> %row2col53, i64 11, !dbg !91 ; line:63 col:12 + %127 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 11, !dbg !91 ; line:63 col:12 + store float %126, float* %127, !dbg !91 ; line:63 col:12 + %128 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 %103, !dbg !91 ; line:63 col:12 + %129 = load float, float* %128, !dbg !91 ; line:63 col:12 + %add.i.28 = fadd float %h.i.17.2, %129, !dbg !92 ; line:63 col:9 + %inc.i.29 = add nsw i32 %j.i.20.0, 1, !dbg !93 ; line:62 col:28 + %cmp3.i.24 = icmp slt i32 %inc.i.29, 4, !dbg !94 ; line:62 col:23 + br i1 %cmp3.i.24, label %for.body.7.i.30, label %for.cond.cleanup.6.i.27, !dbg !88 ; line:62 col:5 + +for.body.i.39.lr.ph: ; preds = %for.cond.cleanup.6.i.27 + %add41 = fadd float %add38, %add.i.28, !dbg !95 ; line:103 col:8 + %130 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 381, %dx.types.HitObject* %hit), !dbg !96 ; line:104 col:23 + %row2col54 = shufflevector <12 x float> %130, <12 x float> %130, <12 x i32> , !dbg !97 ; line:104 col:11 + br label %for.body.7.i.46.lr.ph, !dbg !98 ; line:61 col:3 + +for.body.7.i.46.lr.ph: ; preds = %for.cond.cleanup.6.i.43, %for.body.i.39.lr.ph + %i.i.34.0 = phi i32 [ 0, %for.body.i.39.lr.ph ], [ %inc9.i.42, %for.cond.cleanup.6.i.43 ] + %h.i.33.0 = phi float [ 0.000000e+00, %for.body.i.39.lr.ph ], [ %add.i.44, %for.cond.cleanup.6.i.43 ] + br label %for.body.7.i.46, !dbg !100 ; line:62 col:5 + +for.cond.cleanup.6.i.43: ; preds = %for.body.7.i.46 + %inc9.i.42 = add nsw i32 %i.i.34.0, 1, !dbg !101 ; line:61 col:26 + %cmp.i.37 = icmp slt i32 %inc9.i.42, 4, !dbg !102 ; line:61 col:21 + br i1 %cmp.i.37, label %for.body.7.i.46.lr.ph, label %"\01??$hashM@$03$02@@YAMV?$matrix@M$03$02@@@Z.exit.47", !dbg !98 ; line:61 col:3 + +for.body.7.i.46: ; preds = %for.body.7.i.46.lr.ph, %for.body.7.i.46 + %j.i.36.0 = phi i32 [ 0, %for.body.7.i.46.lr.ph ], [ %inc.i.45, %for.body.7.i.46 ] + %h.i.33.2 = phi float [ %h.i.33.0, %for.body.7.i.46.lr.ph ], [ %add.i.44, %for.body.7.i.46 ] + %131 = add i32 4, %i.i.34.0, !dbg !103 ; line:63 col:12 + %132 = add i32 8, %i.i.34.0, !dbg !103 ; line:63 col:12 + %133 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 0, !dbg !103 ; line:63 col:12 + store i32 %i.i.34.0, i32* %133, !dbg !103 ; line:63 col:12 + %134 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 1, !dbg !103 ; line:63 col:12 + store i32 %131, i32* %134, !dbg !103 ; line:63 col:12 + %135 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 2, !dbg !103 ; line:63 col:12 + store i32 %132, i32* %135, !dbg !103 ; line:63 col:12 + %136 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 %j.i.36.0, !dbg !103 ; line:63 col:12 + %137 = load i32, i32* %136, !dbg !103 ; line:63 col:12 + %138 = extractelement <12 x float> %row2col54, i64 0, !dbg !103 ; line:63 col:12 + %139 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 0, !dbg !103 ; line:63 col:12 + store float %138, float* %139, !dbg !103 ; line:63 col:12 + %140 = extractelement <12 x float> %row2col54, i64 1, !dbg !103 ; line:63 col:12 + %141 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 1, !dbg !103 ; line:63 col:12 + store float %140, float* %141, !dbg !103 ; line:63 col:12 + %142 = extractelement <12 x float> %row2col54, i64 2, !dbg !103 ; line:63 col:12 + %143 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 2, !dbg !103 ; line:63 col:12 + store float %142, float* %143, !dbg !103 ; line:63 col:12 + %144 = extractelement <12 x float> %row2col54, i64 3, !dbg !103 ; line:63 col:12 + %145 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 3, !dbg !103 ; line:63 col:12 + store float %144, float* %145, !dbg !103 ; line:63 col:12 + %146 = extractelement <12 x float> %row2col54, i64 4, !dbg !103 ; line:63 col:12 + %147 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 4, !dbg !103 ; line:63 col:12 + store float %146, float* %147, !dbg !103 ; line:63 col:12 + %148 = extractelement <12 x float> %row2col54, i64 5, !dbg !103 ; line:63 col:12 + %149 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 5, !dbg !103 ; line:63 col:12 + store float %148, float* %149, !dbg !103 ; line:63 col:12 + %150 = extractelement <12 x float> %row2col54, i64 6, !dbg !103 ; line:63 col:12 + %151 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 6, !dbg !103 ; line:63 col:12 + store float %150, float* %151, !dbg !103 ; line:63 col:12 + %152 = extractelement <12 x float> %row2col54, i64 7, !dbg !103 ; line:63 col:12 + %153 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 7, !dbg !103 ; line:63 col:12 + store float %152, float* %153, !dbg !103 ; line:63 col:12 + %154 = extractelement <12 x float> %row2col54, i64 8, !dbg !103 ; line:63 col:12 + %155 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 8, !dbg !103 ; line:63 col:12 + store float %154, float* %155, !dbg !103 ; line:63 col:12 + %156 = extractelement <12 x float> %row2col54, i64 9, !dbg !103 ; line:63 col:12 + %157 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 9, !dbg !103 ; line:63 col:12 + store float %156, float* %157, !dbg !103 ; line:63 col:12 + %158 = extractelement <12 x float> %row2col54, i64 10, !dbg !103 ; line:63 col:12 + %159 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 10, !dbg !103 ; line:63 col:12 + store float %158, float* %159, !dbg !103 ; line:63 col:12 + %160 = extractelement <12 x float> %row2col54, i64 11, !dbg !103 ; line:63 col:12 + %161 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 11, !dbg !103 ; line:63 col:12 + store float %160, float* %161, !dbg !103 ; line:63 col:12 + %162 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 %137, !dbg !103 ; line:63 col:12 + %163 = load float, float* %162, !dbg !103 ; line:63 col:12 + %add.i.44 = fadd float %h.i.33.2, %163, !dbg !104 ; line:63 col:9 + %inc.i.45 = add nsw i32 %j.i.36.0, 1, !dbg !105 ; line:62 col:28 + %cmp3.i.40 = icmp slt i32 %inc.i.45, 3, !dbg !106 ; line:62 col:23 + br i1 %cmp3.i.40, label %for.body.7.i.46, label %for.cond.cleanup.6.i.43, !dbg !100 ; line:62 col:5 + +"\01??$hashM@$03$02@@YAMV?$matrix@M$03$02@@@Z.exit.47": ; preds = %for.cond.cleanup.6.i.43 + %add44 = fadd float %add41, %add.i.44, !dbg !107 ; line:104 col:8 + %164 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 374, %dx.types.HitObject* %hit), !dbg !108 ; line:107 col:11 + %add46 = add i32 %add21, %164, !dbg !109 ; line:107 col:8 + %165 = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 376, %dx.types.HitObject* %hit), !dbg !110 ; line:108 col:11 + %add48 = fadd float %add44, %165, !dbg !111 ; line:108 col:8 + %166 = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 375, %dx.types.HitObject* %hit), !dbg !112 ; line:109 col:11 + %add50 = fadd float %add48, %166, !dbg !113 ; line:109 col:8 + %167 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !114 ; line:111 col:3 + %168 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %167), !dbg !114 ; line:111 col:3 + %169 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %168, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !114 ; line:111 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %169, i32 0, float %add50), !dbg !114 ; line:111 col:3 + %170 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !115 ; line:112 col:3 + %171 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %170), !dbg !115 ; line:112 col:3 + %172 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %171, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !115 ; line:112 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32)"(i32 277, %dx.types.Handle %172, i32 4, i32 %add46), !dbg !115 ; line:112 col:3 + %173 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !116 ; line:113 col:1 + call void @llvm.lifetime.end(i64 4, i8* %173) #0, !dbg !116 ; line:113 col:1 + ret void, !dbg !116 ; line:113 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32, %dx.types.HitObject*, i32) #0 + +; Function Attrs: nounwind readnone +declare i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind readnone +declare i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind readonly +declare i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32, %dx.types.HitObject*, i32) #2 + +; Function Attrs: nounwind readnone +declare <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind readnone +declare float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32)"(i32, %dx.types.Handle, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !8} +!dx.entryPoints = !{!12} +!dx.fnprops = !{!16} +!dx.options = !{!17, !18} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4891 (staging/ser_hlslaccessors_patch, 1ca27ee12)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.dx::HitObject" undef, !6} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{null, !"", null, !13, null} +!13 = !{null, !14, null, null} +!14 = !{!15} +!15 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !"outbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!16 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!17 = !{i32 -2147483584} +!18 = !{i32 -1} +!19 = !DILocation(line: 69, column: 3, scope: !20) +!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 68, type: !22, isLocal: false, isDefinition: true, scopeLine: 68, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!21 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl", directory: "") +!22 = !DISubroutineType(types: !11) +!23 = !DILocation(line: 69, column: 17, scope: !20) +!24 = !DILocation(line: 75, column: 3, scope: !20) +!25 = !DILocation(line: 80, column: 11, scope: !20) +!26 = !DILocation(line: 81, column: 11, scope: !20) +!27 = !DILocation(line: 81, column: 8, scope: !20) +!28 = !DILocation(line: 82, column: 11, scope: !20) +!29 = !DILocation(line: 82, column: 8, scope: !20) +!30 = !DILocation(line: 85, column: 11, scope: !20) +!31 = !DILocation(line: 85, column: 8, scope: !20) +!32 = !DILocation(line: 86, column: 11, scope: !20) +!33 = !DILocation(line: 86, column: 8, scope: !20) +!34 = !DILocation(line: 87, column: 11, scope: !20) +!35 = !DILocation(line: 87, column: 8, scope: !20) +!36 = !DILocation(line: 88, column: 11, scope: !20) +!37 = !DILocation(line: 88, column: 8, scope: !20) +!38 = !DILocation(line: 89, column: 11, scope: !20) +!39 = !DILocation(line: 89, column: 8, scope: !20) +!40 = !DILocation(line: 90, column: 11, scope: !20) +!41 = !DILocation(line: 90, column: 8, scope: !20) +!42 = !DILocation(line: 91, column: 11, scope: !20) +!43 = !DILocation(line: 91, column: 8, scope: !20) +!44 = !DILocation(line: 94, column: 11, scope: !20) +!45 = !DILocation(line: 94, column: 8, scope: !20) +!46 = !DILocation(line: 95, column: 11, scope: !20) +!47 = !DILocation(line: 95, column: 8, scope: !20) +!48 = !DILocation(line: 96, column: 11, scope: !20) +!49 = !DILocation(line: 96, column: 8, scope: !20) +!50 = !DILocation(line: 97, column: 11, scope: !20) +!51 = !DILocation(line: 97, column: 8, scope: !20) +!52 = !DILocation(line: 98, column: 11, scope: !20) +!53 = !DILocation(line: 98, column: 21, scope: !20) +!54 = !DILocation(line: 98, column: 19, scope: !20) +!55 = !DILocation(line: 98, column: 31, scope: !20) +!56 = !DILocation(line: 98, column: 29, scope: !20) +!57 = !DILocation(line: 98, column: 8, scope: !20) +!58 = !DILocation(line: 101, column: 23, scope: !20) +!59 = !DILocation(line: 101, column: 11, scope: !20) +!60 = !DILocation(line: 61, column: 3, scope: !61, inlinedAt: !62) +!61 = !DISubprogram(name: "hashM<3, 4>", scope: !21, file: !21, line: 59, type: !22, isLocal: false, isDefinition: true, scopeLine: 59, flags: DIFlagPrototyped, isOptimized: false) +!62 = distinct !DILocation(line: 101, column: 11, scope: !20) +!63 = !DILocation(line: 62, column: 5, scope: !61, inlinedAt: !62) +!64 = !DILocation(line: 61, column: 26, scope: !61, inlinedAt: !62) +!65 = !DILocation(line: 61, column: 21, scope: !61, inlinedAt: !62) +!66 = !DILocation(line: 63, column: 12, scope: !61, inlinedAt: !62) +!67 = !DILocation(line: 63, column: 9, scope: !61, inlinedAt: !62) +!68 = !DILocation(line: 62, column: 28, scope: !61, inlinedAt: !62) +!69 = !DILocation(line: 62, column: 23, scope: !61, inlinedAt: !62) +!70 = !DILocation(line: 101, column: 8, scope: !20) +!71 = !DILocation(line: 102, column: 23, scope: !20) +!72 = !DILocation(line: 102, column: 11, scope: !20) +!73 = !DILocation(line: 61, column: 3, scope: !74, inlinedAt: !75) +!74 = !DISubprogram(name: "hashM<4, 3>", scope: !21, file: !21, line: 59, type: !22, isLocal: false, isDefinition: true, scopeLine: 59, flags: DIFlagPrototyped, isOptimized: false) +!75 = distinct !DILocation(line: 102, column: 11, scope: !20) +!76 = !DILocation(line: 62, column: 5, scope: !74, inlinedAt: !75) +!77 = !DILocation(line: 61, column: 26, scope: !74, inlinedAt: !75) +!78 = !DILocation(line: 61, column: 21, scope: !74, inlinedAt: !75) +!79 = !DILocation(line: 63, column: 12, scope: !74, inlinedAt: !75) +!80 = !DILocation(line: 63, column: 9, scope: !74, inlinedAt: !75) +!81 = !DILocation(line: 62, column: 28, scope: !74, inlinedAt: !75) +!82 = !DILocation(line: 62, column: 23, scope: !74, inlinedAt: !75) +!83 = !DILocation(line: 102, column: 8, scope: !20) +!84 = !DILocation(line: 103, column: 23, scope: !20) +!85 = !DILocation(line: 103, column: 11, scope: !20) +!86 = !DILocation(line: 61, column: 3, scope: !61, inlinedAt: !87) +!87 = distinct !DILocation(line: 103, column: 11, scope: !20) +!88 = !DILocation(line: 62, column: 5, scope: !61, inlinedAt: !87) +!89 = !DILocation(line: 61, column: 26, scope: !61, inlinedAt: !87) +!90 = !DILocation(line: 61, column: 21, scope: !61, inlinedAt: !87) +!91 = !DILocation(line: 63, column: 12, scope: !61, inlinedAt: !87) +!92 = !DILocation(line: 63, column: 9, scope: !61, inlinedAt: !87) +!93 = !DILocation(line: 62, column: 28, scope: !61, inlinedAt: !87) +!94 = !DILocation(line: 62, column: 23, scope: !61, inlinedAt: !87) +!95 = !DILocation(line: 103, column: 8, scope: !20) +!96 = !DILocation(line: 104, column: 23, scope: !20) +!97 = !DILocation(line: 104, column: 11, scope: !20) +!98 = !DILocation(line: 61, column: 3, scope: !74, inlinedAt: !99) +!99 = distinct !DILocation(line: 104, column: 11, scope: !20) +!100 = !DILocation(line: 62, column: 5, scope: !74, inlinedAt: !99) +!101 = !DILocation(line: 61, column: 26, scope: !74, inlinedAt: !99) +!102 = !DILocation(line: 61, column: 21, scope: !74, inlinedAt: !99) +!103 = !DILocation(line: 63, column: 12, scope: !74, inlinedAt: !99) +!104 = !DILocation(line: 63, column: 9, scope: !74, inlinedAt: !99) +!105 = !DILocation(line: 62, column: 28, scope: !74, inlinedAt: !99) +!106 = !DILocation(line: 62, column: 23, scope: !74, inlinedAt: !99) +!107 = !DILocation(line: 104, column: 8, scope: !20) +!108 = !DILocation(line: 107, column: 11, scope: !20) +!109 = !DILocation(line: 107, column: 8, scope: !20) +!110 = !DILocation(line: 108, column: 11, scope: !20) +!111 = !DILocation(line: 108, column: 8, scope: !20) +!112 = !DILocation(line: 109, column: 11, scope: !20) +!113 = !DILocation(line: 109, column: 8, scope: !20) +!114 = !DILocation(line: 111, column: 3, scope: !20) +!115 = !DILocation(line: 112, column: 3, scope: !20) +!116 = !DILocation(line: 113, column: 1, scope: !20) diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl new file mode 100644 index 0000000000..7b4182b739 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl @@ -0,0 +1,263 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetHitKind +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetHitKind 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetHitKind 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 366 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetInstanceID +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetInstanceID 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetInstanceID 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 367 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetInstanceIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetInstanceIndex 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetInstanceIndex 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 368 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectRayDirection +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectRayDirection 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectRayDirection 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 369 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectRayOrigin +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectRayOrigin 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectRayOrigin 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 370 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectToWorld3x4 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectToWorld3x4 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectToWorld3x4 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 371 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectToWorld4x3 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectToWorld4x3 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectToWorld4x3 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 372 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetPrimitiveIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetPrimitiveIndex 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetPrimitiveIndex 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 373 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetRayFlags +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetRayFlags 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetRayFlags 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 374 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetRayTCurrent +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetRayTCurrent 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetRayTCurrent 'float ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'float' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 375 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetRayTMin +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetRayTMin 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetRayTMin 'float ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'float' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 376 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetShaderTableIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetShaderTableIndex 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetShaderTableIndex 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 377 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldRayDirection +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldRayDirection 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldRayDirection 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 378 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldRayOrigin +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldRayOrigin 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldRayOrigin 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 379 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldToObject3x4 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldToObject3x4 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldToObject3x4 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 380 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldToObject4x3 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldToObject4x3 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldToObject4x3 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 381 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> IsHit +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit IsHit 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used IsHit 'bool ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'bool' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 383 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> IsMiss +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit IsMiss 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used IsMiss 'bool ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'bool' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 384 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> IsNop +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit IsNop 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used IsNop 'bool ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'bool' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 385 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> LoadLocalRootTableConstant +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRootConstantOffsetInBytes +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit LoadLocalRootTableConstant 'TResult (TRootConstantOffsetInBytes) const' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> RootConstantOffsetInBytes 'TRootConstantOffsetInBytes' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used LoadLocalRootTableConstant 'unsigned int (unsigned int)' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> LoadLocalRootTableConstant 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 386 +// AST-NEXT: | | | |-PureAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> SetShaderTableIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRecordIndex +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit SetShaderTableIndex 'TResult (TRecordIndex) const' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> RecordIndex 'TRecordIndex' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used SetShaderTableIndex 'void (unsigned int)' extern +// AST-NEXT: | | | |-TemplateArgument type 'void' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> SetShaderTableIndex 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 388 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: define void @"\01?main@@YAXXZ"() #0 { +// FCGL: %{{[^ ]+}} = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT:[^ ]+]]) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 388, %dx.types.HitObject* %[[HIT]], i32 1) +// FCGL: %{{[^ ]+}} = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 383, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 384, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 385, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 365, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 366, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 368, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 367, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 373, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 377, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32 386, %dx.types.HitObject* %[[HIT]], i32 42) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 379, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 378, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 370, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 369, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.3.4 @"dx.hl.op.rn.%class.matrix.float.3.4 (i32, %dx.types.HitObject*)"(i32 371, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.4.3 @"dx.hl.op.rn.%class.matrix.float.4.3 (i32, %dx.types.HitObject*)"(i32 372, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.3.4 @"dx.hl.op.rn.%class.matrix.float.3.4 (i32, %dx.types.HitObject*)"(i32 380, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.4.3 @"dx.hl.op.rn.%class.matrix.float.4.3 (i32, %dx.types.HitObject*)"(i32 381, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 374, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 376, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 375, %dx.types.HitObject* %[[HIT]]) +// FCGL: ret void + +RWByteAddressBuffer outbuf; + +template +float hashM(in matrix mat) { + float h = 0.f; + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + h += mat[i][j]; + return h; +} + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + int isum = 0; + float fsum = 0.0f; + vector vsum = 0; + + ///// Setters + hit.SetShaderTableIndex(1); + + ///// Getters + + // i1 accessors + isum += hit.IsHit(); + isum += hit.IsMiss(); + isum += hit.IsNop(); + + // i32 accessors + isum += hit.GetGeometryIndex(); + isum += hit.GetHitKind(); + isum += hit.GetInstanceIndex(); + isum += hit.GetInstanceID(); + isum += hit.GetPrimitiveIndex(); + isum += hit.GetShaderTableIndex(); + isum += hit.LoadLocalRootTableConstant(42); + + // float3 accessors + vsum += hit.GetWorldRayOrigin(); + vsum += hit.GetWorldRayDirection(); + vsum += hit.GetObjectRayOrigin(); + vsum += hit.GetObjectRayDirection(); + fsum += vsum[0] + vsum[1] + vsum[2]; + + // matrix accessors + fsum += hashM<3, 4>(hit.GetObjectToWorld3x4()); + fsum += hashM<4, 3>(hit.GetObjectToWorld4x3()); + fsum += hashM<3, 4>(hit.GetWorldToObject3x4()); + fsum += hashM<4, 3>(hit.GetWorldToObject4x3()); + + // f32 accessors + isum += hit.GetRayFlags(); + fsum += hit.GetRayTMin(); + fsum += hit.GetRayTCurrent(); + + outbuf.Store(0, fsum); + outbuf.Store(4, isum); +} From 847d5ad29ed989b489e9404e0a94f6d1bdfeeb25 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Wed, 23 Apr 2025 18:36:15 +0200 Subject: [PATCH 08/93] [SER] HitObject::GetAttributes HLSL -> DXIL lowering and attributes sema (#7361) Lowering for `HitObject::GetAttributes()` Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md DXC SER implementation tracker:: https://github.com/microsoft/DirectXShaderCompiler/issues/7214 --- lib/HLSL/HLOperationLower.cpp | 18 ++- .../clang/Basic/DiagnosticSemaKinds.td | 4 +- tools/clang/lib/Sema/SemaDXR.cpp | 12 +- tools/clang/lib/Sema/SemaHLSL.cpp | 63 ++++++--- .../HitObject/hitobject_attributes.hlsl | 26 ++++ .../DxilGen/hitobject_attributes_dxilgen.ll | 130 ++++++++++++++++++ .../HitObject/hitobject_attributes.hlsl | 28 ++++ .../hitobject_attributes_invalid_longvec.hlsl | 14 ++ .../hitobject_attributes_invalid_udt.hlsl | 14 ++ 9 files changed, 284 insertions(+), 25 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 1e43cce07c..f8a9f528cc 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6375,7 +6375,23 @@ Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *HitObjectPtr = CI->getArgOperand(1); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + + Type *AttrTy = cast(CI->getType())->getPointerElementType(); + + IRBuilder<> EntryBuilder( + dxilutil::FindAllocaInsertionPt(CI->getParent()->getParent())); + unsigned AttrAlign = Helper.dataLayout.getABITypeAlignment(AttrTy); + AllocaInst *AttrMem = EntryBuilder.CreateAlloca(AttrTy); + AttrMem->setAlignment(AttrAlign); + Constant *opArg = OP->GetU32Const((unsigned)OpCode); + TrivialDxilOperation(OpCode, {opArg, HitObject, AttrMem}, CI->getType(), + Helper.voidTy, OP, Builder); + return AttrMem; } Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6254e5fc71..ae7e777180 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7646,7 +7646,7 @@ def err_payload_requires_inout : Error< def err_attributes_requiers_in : Error< "intersection attributes parameter %0 must be 'in'">; def err_payload_attrs_must_be_udt : Error< - "%select{payload|attributes|callable}0 parameter %1 must be a user-defined type composed of only numeric types">; + "%select{payload|attributes|callable}0 %select{parameter %2|type}1 must be a user-defined type composed of only numeric types">; def err_shader_must_return_void : Error< "return type for '%0' shaders must be void">; def err_raytracing_entry_param_count : Error< @@ -7885,7 +7885,7 @@ def err_hlsl_unsupported_long_vector "cbuffers or tbuffers|user-defined struct parameter|" "entry function parameters|entry function return type|" "patch constant function parameters|patch constant function return type|" - "payload parameters}0 are not supported">; + "payload parameters|attributes}0 are not supported">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index e5b2140cca..f0102f9e3f 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -829,7 +829,8 @@ void DiagnoseBuiltinCallWithPayload(Sema &S, const VarDecl *Payload, // Verify that the payload type is legal if (!hlsl::IsHLSLCopyableAnnotatableRecord(Payload->getType())) { S.Diag(Payload->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ 0 << Payload; + << /*payload|attributes|callable*/ 0 << /*parameter %2|type*/ 0 + << Payload; return; } @@ -1194,7 +1195,8 @@ void DiagnoseCallableEntry(Sema &S, FunctionDecl *FD, if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ 2 << Param; + << /*payload|attributes|callable*/ 2 << /*parameter %2|type*/ 0 + << Param; } return; } @@ -1235,7 +1237,8 @@ void DiagnoseMissOrAnyHitEntry(Sema &S, FunctionDecl *FD, if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) { S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ Idx << Param; + << /*payload|attributes|callable*/ Idx << /*parameter %2|type*/ 0 + << Param; } } return; @@ -1288,7 +1291,8 @@ void DiagnoseClosestHitEntry(Sema &S, FunctionDecl *FD, if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) { S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ Idx << Param; + << /*payload|attributes|callable*/ Idx << /*parameter %2|type*/ 0 + << Param; } } return; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index bddf834509..6eadfeaac9 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -10770,6 +10770,22 @@ HLSLExternalSource::ApplyTypeSpecSignToParsedType(clang::QualType &type, } } +bool DiagnoseIntersectionAttributes(Sema &S, SourceLocation Loc, QualType Ty) { + // Must be a UDT + if (Ty.isNull() || !hlsl::IsHLSLCopyableAnnotatableRecord(Ty)) { + S.Diag(Loc, diag::err_payload_attrs_must_be_udt) + << /*payload|attributes|callable*/ 1 << /*parameter %2|type*/ 1; + return false; + } + + if (ContainsLongVector(Ty)) { + const unsigned AttributesIdx = 11; + S.Diag(Loc, diag::err_hlsl_unsupported_long_vector) << AttributesIdx; + return false; + } + return true; +} + Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL( FunctionTemplateDecl *FunctionTemplate, @@ -10878,6 +10894,7 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( LPCSTR tableName = cursor.GetTableName(); // Currently only intrinsic we allow for explicit template arguments are // for Load/Store for ByteAddressBuffer/RWByteAddressBuffer + // and HitObject::GetAttributes with user-defined intersection attributes. // Check Explicit template arguments UINT intrinsicOp = (*cursor)->Op; @@ -10892,28 +10909,38 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( IsBABLoad = intrinsicOp == (UINT)IntrinsicOp::MOP_Load; IsBABStore = intrinsicOp == (UINT)IntrinsicOp::MOP_Store; } - if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() > 0) { - bool isLegalTemplate = false; + bool IsHitObjectGetAttributes = + intrinsicOp == (UINT)IntrinsicOp::MOP_DxHitObject_GetAttributes; + if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() >= 1) { SourceLocation Loc = ExplicitTemplateArgs->getLAngleLoc(); - auto TemplateDiag = diag::err_hlsl_intrinsic_template_arg_unsupported; - if (ExplicitTemplateArgs->size() >= 1 && (IsBABLoad || IsBABStore)) { - TemplateDiag = diag::err_hlsl_intrinsic_template_arg_requires_2018; - Loc = (*ExplicitTemplateArgs)[0].getLocation(); - if (Is2018) { - TemplateDiag = diag::err_hlsl_intrinsic_template_arg_numeric; - if (ExplicitTemplateArgs->size() == 1 && - !functionTemplateTypeArg.isNull() && - hlsl::IsHLSLNumericOrAggregateOfNumericType( - functionTemplateTypeArg)) { - isLegalTemplate = true; - } - } + if (!IsBABLoad && !IsBABStore && !IsHitObjectGetAttributes) { + getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_unsupported) + << intrinsicName; + return Sema::TemplateDeductionResult::TDK_Invalid; } - - if (!isLegalTemplate) { - getSema()->Diag(Loc, TemplateDiag) << intrinsicName; + Loc = (*ExplicitTemplateArgs)[0].getLocation(); + if (!Is2018) { + getSema()->Diag(Loc, + diag::err_hlsl_intrinsic_template_arg_requires_2018) + << intrinsicName; return Sema::TemplateDeductionResult::TDK_Invalid; } + + if (IsBABLoad || IsBABStore) { + const bool IsLegalTemplate = + !functionTemplateTypeArg.isNull() && + hlsl::IsHLSLNumericOrAggregateOfNumericType( + functionTemplateTypeArg); + if (!IsLegalTemplate) { + getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_numeric) + << intrinsicName; + return Sema::TemplateDeductionResult::TDK_Invalid; + } + } + if (IsHitObjectGetAttributes && + !DiagnoseIntersectionAttributes(*getSema(), Loc, + functionTemplateTypeArg)) + return Sema::TemplateDeductionResult::TDK_Invalid; } else if (IsBABStore) { // Prior to HLSL 2018, Store operation only stored scalar uint. if (!Is2018) { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl new file mode 100644 index 0000000000..03cefe8e48 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// DXIL: %[[APTR:[^ ]+]] = alloca %struct.CustomAttrs, align 4 +// DXIL: %[[NOP:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL: call void @dx.op.hitObject_Attributes.struct.CustomAttrs(i32 289, %dx.types.HitObject %[[NOP]], %struct.CustomAttrs* nonnull %[[APTR]]) ; HitObject_Attributes(hitObject,attributes) +// DXIL: %[[VPTR:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[APTR]], i32 0, i32 0 +// DXIL: %{{[^ ]+}} = load <4 x float>, <4 x float>* %[[VPTR]], align 4 +// DXIL: %[[IPTR:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[APTR]], i32 0, i32 1 +// DXIL: %{{[^ ]+}} = load i32, i32* %[[IPTR]], align 4 +// DXIL: ret void + +RWByteAddressBuffer outbuf; + +struct +CustomAttrs { + float4 v; + int y; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + CustomAttrs attrs = hit.GetAttributes(); + float sum = attrs.v.x + attrs.v.y + attrs.v.z + attrs.v.w + attrs.y; + outbuf.Store(0, sum); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll new file mode 100644 index 0000000000..4887be4d58 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll @@ -0,0 +1,130 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; outbuf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%dx.types.HitObject = type { i8* } +%struct.CustomAttrs = type { <4 x float>, i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?outbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 + +; CHECK: %[[ATTRA:[^ ]+]] = alloca %struct.CustomAttrs, align 4 +; CHECK: call void @dx.op.hitObject_Attributes.struct.CustomAttrs(i32 289, %dx.types.HitObject %{{[^ ]+}}, %struct.CustomAttrs* %[[ATTRA]]) + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !21 ; line:22 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !21 ; line:22 col:3 + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !25 ; line:22 col:17 + %2 = call %struct.CustomAttrs* @"dx.hl.op..%struct.CustomAttrs* (i32, %dx.types.HitObject*)"(i32 364, %dx.types.HitObject* %hit), !dbg !26 ; line:23 col:23 + %3 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %2, i32 0, i32 0, !dbg !26 ; line:23 col:23 + %4 = load <4 x float>, <4 x float>* %3, !dbg !26 ; line:23 col:23 + %5 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %2, i32 0, i32 1, !dbg !26 ; line:23 col:23 + %6 = load i32, i32* %5, !dbg !26 ; line:23 col:23 + %7 = extractelement <4 x float> %4, i32 0, !dbg !27 ; line:24 col:15 + %8 = extractelement <4 x float> %4, i32 1, !dbg !28 ; line:24 col:27 + %add = fadd float %7, %8, !dbg !29 ; line:24 col:25 + %9 = extractelement <4 x float> %4, i32 2, !dbg !30 ; line:24 col:39 + %add4 = fadd float %add, %9, !dbg !31 ; line:24 col:37 + %10 = extractelement <4 x float> %4, i32 3, !dbg !32 ; line:24 col:51 + %add6 = fadd float %add4, %10, !dbg !33 ; line:24 col:49 + %conv = sitofp i32 %6 to float, !dbg !34 ; line:24 col:63 + %add7 = fadd float %add6, %conv, !dbg !35 ; line:24 col:61 + %11 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !36 ; line:25 col:3 + %12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %11), !dbg !36 ; line:25 col:3 + %13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %12, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !36 ; line:25 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %13, i32 0, float %add7), !dbg !36 ; line:25 col:3 + %14 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !37 ; line:26 col:1 + call void @llvm.lifetime.end(i64 4, i8* %14) #0, !dbg !37 ; line:26 col:1 + ret void, !dbg !37 ; line:26 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare %struct.CustomAttrs* @"dx.hl.op..%struct.CustomAttrs* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !10} +!dx.entryPoints = !{!14} +!dx.fnprops = !{!18} +!dx.options = !{!19, !20} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.dx::HitObject" undef, !5, %struct.CustomAttrs undef, !7} +!5 = !{i32 4, !6} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!7 = !{i32 20, !8, !9} +!8 = !{i32 6, !"v", i32 3, i32 0, i32 7, i32 9, i32 13, i32 4} +!9 = !{i32 6, !"y", i32 3, i32 16, i32 7, i32 4} +!10 = !{i32 1, void ()* @"\01?main@@YAXXZ", !11} +!11 = !{!12} +!12 = !{i32 1, !13, !13} +!13 = !{} +!14 = !{null, !"", null, !15, null} +!15 = !{null, !16, null, null} +!16 = !{!17} +!17 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !"outbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!18 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!19 = !{i32 -2147483584} +!20 = !{i32 -1} +!21 = !DILocation(line: 22, column: 3, scope: !22) +!22 = !DISubprogram(name: "main", scope: !23, file: !23, line: 21, type: !24, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!23 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl", directory: "") +!24 = !DISubroutineType(types: !13) +!25 = !DILocation(line: 22, column: 17, scope: !22) +!26 = !DILocation(line: 23, column: 23, scope: !22) +!27 = !DILocation(line: 24, column: 15, scope: !22) +!28 = !DILocation(line: 24, column: 27, scope: !22) +!29 = !DILocation(line: 24, column: 25, scope: !22) +!30 = !DILocation(line: 24, column: 39, scope: !22) +!31 = !DILocation(line: 24, column: 37, scope: !22) +!32 = !DILocation(line: 24, column: 51, scope: !22) +!33 = !DILocation(line: 24, column: 49, scope: !22) +!34 = !DILocation(line: 24, column: 63, scope: !22) +!35 = !DILocation(line: 24, column: 61, scope: !22) +!36 = !DILocation(line: 25, column: 3, scope: !22) +!37 = !DILocation(line: 26, column: 1, scope: !22) diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl new file mode 100644 index 0000000000..79db78cdaf --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetAttributes +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetAttributes 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetAttributes 'CustomAttrs &()' extern +// AST-NEXT: | | | |-TemplateArgument type 'CustomAttrs' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 364 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: %{{[^ ]+}} = call %struct.CustomAttrs* @"dx.hl.op..%struct.CustomAttrs* (i32, %dx.types.HitObject*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}) + +RWByteAddressBuffer outbuf; + +struct +CustomAttrs { + float4 v; + int y; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + CustomAttrs attrs = hit.GetAttributes(); + float sum = attrs.v.x + attrs.v.y + attrs.v.z + attrs.v.w + attrs.y; + outbuf.Store(0, sum); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl new file mode 100644 index 0000000000..240ccfb9d4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T lib_6_9 -E main %s -verify + +struct +CustomAttrs { + vector v; + int y; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + // expected-error@+1{{vectors of over 4 elements in attributes are not supported}} + CustomAttrs attrs = hit.GetAttributes(); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl new file mode 100644 index 0000000000..0f27f089e4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T lib_6_9 -E main %s -verify + +struct +CustomAttrs { + vector v; + RWStructuredBuffer buf; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + // expected-error@+1{{attributes type must be a user-defined type composed of only numeric types}} + CustomAttrs attrs = hit.GetAttributes(); +} From 624665f3987d379a299aa14dbc53e0cd3b96afea Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 24 Apr 2025 19:01:41 +0200 Subject: [PATCH 09/93] [SER] HitObject::FromRayQuery HLSL -> DXIL lowering (#7370) * HLSL -> DXIL lowering * ast, hlsl->dxil, dxilgen, and ScalarReplAggregatesHLSL tests SER implementation tracker (#7214) --- include/dxc/HLSL/HLOperations.h | 4 + lib/HLSL/HLOperationLower.cpp | 27 +- .../Scalar/ScalarReplAggregatesHLSL.cpp | 25 ++ tools/clang/lib/Sema/SemaHLSL.cpp | 1 + .../HitObject/hitobject_fromrayquery.hlsl | 37 ++ .../DxilGen/hitobject_fromrayquery_dxilgen.ll | 146 +++++++ .../hitobject_fromrayquery_scalarrepl.ll | 383 ++++++++++++++++++ .../HitObject/hitobject_fromrayquery.hlsl | 72 ++++ 8 files changed, 694 insertions(+), 1 deletion(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll create mode 100644 tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 970ddd3e85..0e9b8c2710 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -441,6 +441,10 @@ const unsigned kHitObjectMakeMissRayDescOpIdx = 4; const unsigned kHitObjectTraceRay_RayDescOpIdx = 8; const unsigned kHitObjectTraceRay_NumOp = 10; +// HitObject::FromRayQuery +const unsigned kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx = 4; +const unsigned kHitObjectFromRayQuery_WithAttrs_NumOp = 5; + } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index f8a9f528cc..4ef7591e89 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6312,7 +6312,32 @@ Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + unsigned SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Value *RayQuery = CI->getArgOperand(SrcIdx++); + + if (CI->getNumArgOperands() == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { + Value *HitKind = CI->getArgOperand(SrcIdx++); + Value *AttribSrc = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + OpCode = DXIL::OpCode::HitObject_FromRayQueryWithAttrs; + Type *AttrTy = AttribSrc->getType(); + Value *OutHitObject = TrivialDxilOperation( + OpCode, {nullptr, RayQuery, HitKind, AttribSrc}, AttrTy, CI, OP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; + } + + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + OpCode = DXIL::OpCode::HitObject_FromRayQuery; + Value *OutHitObject = + TrivialDxilOperation(OpCode, {nullptr, RayQuery}, Helper.voidTy, CI, OP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index b13e9a0f5d..20265af40a 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2795,6 +2795,31 @@ void SROA_Helper::RewriteCall(CallInst *CI) { } } LLVM_FALLTHROUGH; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: { + const bool IsWithAttrs = + CI->getNumArgOperands() == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp; + if (IsWithAttrs && + (OldVal == + CI->getArgOperand( + HLOperandIndex:: + kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx))) { + RewriteCallArg( + CI, + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, + /*bIn*/ true, /*bOut*/ false); + break; + } + + // For RayQuery methods, we want to replace the RayQuery this pointer + // with a load and use of the underlying handle value. + // This will allow elimination of RayQuery types earlier. + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + break; + } + LLVM_FALLTHROUGH; default: // RayQuery this pointer replacement. if (OldVal->getType()->isPointerTy() && diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 6eadfeaac9..5131d39f44 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -12093,6 +12093,7 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, case hlsl::IntrinsicOp::MOP_TraceRayInline: DiagnoseTraceRayInline(*this, CE); break; + case hlsl::IntrinsicOp::MOP_DxHitObject_FromRayQuery: case hlsl::IntrinsicOp::MOP_DxHitObject_Invoke: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop: diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl new file mode 100644 index 0000000000..33ea2719be --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl @@ -0,0 +1,37 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %[[RQ:[^ ]+]]) ; HitObject_FromRayQuery(rayQueryHandle) +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %[[RQ]], i32 16, %struct.CustomAttrs* nonnull %{{[^ ]+}}) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + +RaytracingAccelerationStructure RTAS; +RWStructuredBuffer UAV : register(u0); + +RayDesc MakeRayDesc() { + RayDesc desc; + desc.Origin = float3(0, 0, 0); + desc.Direction = float3(1, 0, 0); + desc.TMin = 0.0f; + desc.TMax = 9999.0; + return desc; +} + +struct CustomAttrs { + float x; + float y; +}; + +void Use(in dx::HitObject hit) { + dx::MaybeReorderThread(hit); +} + +[shader("raygeneration")] +void main() { + RayQuery q; + RayDesc ray = MakeRayDesc(); + q.TraceRayInline(RTAS, RAY_FLAG_NONE, 0xFF, ray); + + Use(dx::HitObject::FromRayQuery(q)); + + CustomAttrs attrs = {1.f, 2.f}; + Use(dx::HitObject::FromRayQuery(q, 16, attrs)); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll new file mode 100644 index 0000000000..0ae8e36fa7 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll @@ -0,0 +1,146 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.CustomAttrs = type { float, float } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWStructuredBuffer" = type { float } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.dx::HitObject" = type { i32 } +%"class.RayQuery<5, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; CHECK: %[[ATTRA:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQH:[^ ]+]], %dx.types.Handle %{{[^ ]+}}, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %[[RQH]]) +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %[[RQH]], i32 16, %struct.CustomAttrs* %[[ATTRA]]) + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %0 = alloca %struct.CustomAttrs + %agg.tmp = alloca %dx.types.HitObject, align 4 + %agg.tmp1 = alloca %dx.types.HitObject, align 4 + %q2 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 5, i32 0), !dbg !38 ; line:29 col:78 + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !42 ; line:31 col:3 + %2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %1), !dbg !42 ; line:31 col:3 + %3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !42 ; line:31 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %q2, %dx.types.Handle %3, i32 0, i32 255, <3 x float> zeroinitializer, float 0.000000e+00, <3 x float> , float 9.999000e+03), !dbg !42 ; line:31 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 363, %dx.types.HitObject* %agg.tmp, i32 %q2), !dbg !43 ; line:33 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp) #0, !dbg !44 ; line:24 col:3 + %.0 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %0, i32 0, i32 0 + store float 1.000000e+00, float* %.0 + %.1 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %0, i32 0, i32 1 + store float 2.000000e+00, float* %.1, align 4 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %agg.tmp1, i32 %q2, i32 16, %struct.CustomAttrs* %0), !dbg !47 ; line:36 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp1) #0, !dbg !48 ; line:24 col:3 + ret void, !dbg !49 ; line:37 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32, %dx.types.HitObject*, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !26} +!dx.entryPoints = !{!30} +!dx.fnprops = !{!35} +!dx.options = !{!36, !37} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %"class.dx::HitObject" undef, !15, %"class.RayQuery<5, 0>" undef, !17, %struct.CustomAttrs undef, !23} +!5 = !{i32 4, !6, !7} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!7 = !{i32 0, !8} +!8 = !{!9} +!9 = !{i32 0, float undef} +!10 = !{i32 32, !11, !12, !13, !14} +!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!15 = !{i32 4, !16} +!16 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!17 = !{i32 4, !18, !19} +!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!19 = !{i32 0, !20} +!20 = !{!21, !22} +!21 = !{i32 1, i64 5} +!22 = !{i32 1, i64 0} +!23 = !{i32 8, !24, !25} +!24 = !{i32 6, !"x", i32 3, i32 0, i32 7, i32 9} +!25 = !{i32 6, !"y", i32 3, i32 4, i32 7, i32 9} +!26 = !{i32 1, void ()* @"\01?main@@YAXXZ", !27} +!27 = !{!28} +!28 = !{i32 1, !29, !29} +!29 = !{} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, null, null, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!36 = !{i32 -2147483584} +!37 = !{i32 -1} +!38 = !DILocation(line: 29, column: 78, scope: !39) +!39 = !DISubprogram(name: "main", scope: !40, file: !40, line: 28, type: !41, isLocal: false, isDefinition: true, scopeLine: 28, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!40 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl", directory: "") +!41 = !DISubroutineType(types: !29) +!42 = !DILocation(line: 31, column: 3, scope: !39) +!43 = !DILocation(line: 33, column: 7, scope: !39) +!44 = !DILocation(line: 24, column: 3, scope: !45, inlinedAt: !46) +!45 = !DISubprogram(name: "Use", scope: !40, file: !40, line: 23, type: !41, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false) +!46 = distinct !DILocation(line: 33, column: 3, scope: !39) +!47 = !DILocation(line: 36, column: 7, scope: !39) +!48 = !DILocation(line: 24, column: 3, scope: !45, inlinedAt: !49) +!49 = distinct !DILocation(line: 36, column: 3, scope: !39) +!50 = !DILocation(line: 37, column: 1, scope: !39) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll new file mode 100644 index 0000000000..5afd30b524 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll @@ -0,0 +1,383 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; COM: Original HLSL code +; COM: RaytracingAccelerationStructure RTAS; +; COM: RWStructuredBuffer UAV : register(u0); +; COM: RWByteAddressBuffer inbuf; +; COM: RWByteAddressBuffer outbuf; +; COM: +; COM: RayDesc MakeRayDesc() { +; COM: RayDesc desc; +; COM: desc.Origin = float3(0, 0, 0); +; COM: desc.Direction = float3(1, 0, 0); +; COM: desc.TMin = 0.0f; +; COM: desc.TMax = 9999.0; +; COM: return desc; +; COM: } +; COM: +; COM: struct CustomAttrs { +; COM: float x; +; COM: float y; +; COM: }; +; COM: +; COM: void Use(in dx::HitObject hit) { +; COM: dx::MaybeReorderThread(hit); +; COM: } +; COM: +; COM: [shader("raygeneration")] +; COM: void main() { +; COM: RayQuery q; +; COM: RayDesc ray = MakeRayDesc(); +; COM: q.TraceRayInline(RTAS, RAY_FLAG_NONE, 0xFF, ray); +; COM: +; COM: Use(dx::HitObject::FromRayQuery(q)); +; COM: +; COM: CustomAttrs attrs; +; COM: attrs.x = inbuf.Load(0); +; COM: attrs.y = inbuf.Load(4); +; COM: Use(dx::HitObject::FromRayQuery(q, 16, attrs)); +; COM: +; COM: attrs.x = inbuf.Load(8); +; COM: attrs.y = inbuf.Load(12); +; COM: Use(dx::HitObject::FromRayQuery(q, 17, attrs)); +; COM: +; COM: outbuf.Store(0, attrs.x); +; COM: outbuf.Store(4, attrs.y); +; COM: } + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; Resource bind info for UAV +; { +; +; float $Element; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; UAV UAV struct r/w U0 u0 1 +; inbuf UAV byte r/w U1u4294967295,space4294967295 1 +; outbuf UAV byte r/w U2u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { float } +%struct.RWByteAddressBuffer = type { i32 } +%ConstantBuffer = type opaque +%"class.RayQuery<5, 0>" = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%dx.types.HitObject = type { i8* } +%struct.CustomAttrs = type { float, float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?UAV@@3V?$RWStructuredBuffer@M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"\01?inbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?outbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"$Globals" = external constant %ConstantBuffer + +; CHECK: %[[RQA:[^ ]+]] = alloca i32 +; CHECK: %[[ATTRA0:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: %[[ATTRA1:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: %[[XATTRA:[^ ]+]] = alloca float +; CHECK: %[[YATTRA:[^ ]+]] = alloca float + +; COM: Check same query handle used for TraceRayInline and the FromRayQuery calls +; CHECK: %[[RQH:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQH]], + +; COM: Check RQ handle loaded for first FromRayQuery call +; CHECK: %[[RQH0:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH0]]) + +; COM: Check buffer loads for first FromRayQuery-with-attrs call +; CHECK: %[[XI0:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 0) +; CHECK: %[[XF0:[^ ]+]] = uitofp i32 %[[XI0]] to float +; CHECK: store float %[[XF0]], float* %[[XATTRA]], align 4 +; CHECK: %[[YI0:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 4) +; CHECK: %[[YF0:[^ ]+]] = uitofp i32 %[[YI0]] to float +; CHECK: store float %[[YF0]], float* %[[YATTRA]], align 4 + +; COM: Check that values from buffer flow into first FromRayQuery-with-attrs call +; CHECK: %[[XPTR0:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA0]], i32 0, i32 0 +; CHECK: %[[XF1:[^ ]+]] = load float, float* %[[XATTRA]] +; CHECK: store float %[[XF1]], float* %[[XPTR0]] +; CHECK: %[[YPTR0:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA0]], i32 0, i32 1 +; CHECK: %[[YF1:[^ ]+]] = load float, float* %[[YATTRA]] +; CHECK: store float %[[YF1]], float* %[[YPTR0]], align 4 +; CHECK: %[[RQH1:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH1]], i32 16, %struct.CustomAttrs* %[[ATTRA0]]) + +; COM: Check buffer loads for second FromRayQuery-with-attrs call +; CHECK: %[[XI1:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 8) +; CHECK: %[[XF1:[^ ]+]] = uitofp i32 %[[XI1]] to float +; CHECK: store float %[[XF1]], float* %[[XATTRA]], align 4 +; CHECK: %[[YI1:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 12) +; CHECK: %[[YF1:[^ ]+]] = uitofp i32 %[[YI1]] to float +; CHECK: store float %[[YF1]], float* %[[YATTRA]], align 4 + +; COM: Check that values from buffer flow into second FromRayQuery-with-attrs call +; CHECK: %[[XPTR1:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA1]], i32 0, i32 0 +; CHECK: %[[XF2:[^ ]+]] = load float, float* %[[XATTRA]] +; CHECK: store float %[[XF2]], float* %[[XPTR1]] +; CHECK: %[[YPTR1:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA1]], i32 0, i32 1 +; CHECK: %[[YF2:[^ ]+]] = load float, float* %[[YATTRA]] +; CHECK: store float %[[YF2]], float* %[[YPTR1]], align 4 +; CHECK: %[[RQH2:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH2]], i32 17, %struct.CustomAttrs* %[[ATTRA1]]) + + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %q = alloca %"class.RayQuery<5, 0>", align 4 + %ray = alloca %struct.RayDesc, align 4 + %agg.tmp = alloca %dx.types.HitObject, align 4 + %attrs = alloca %struct.CustomAttrs, align 4 + %agg.tmp4 = alloca %dx.types.HitObject, align 4 + %agg.tmp11 = alloca %dx.types.HitObject, align 4 + %0 = bitcast %"class.RayQuery<5, 0>"* %q to i8*, !dbg !45 ; line:26 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !45 ; line:26 col:3 + %q14 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 5, i32 0), !dbg !49 ; line:26 col:78 + %1 = getelementptr inbounds %"class.RayQuery<5, 0>", %"class.RayQuery<5, 0>"* %q, i32 0, i32 0, !dbg !49 ; line:26 col:78 + store i32 %q14, i32* %1, !dbg !49 ; line:26 col:78 + %2 = bitcast %struct.RayDesc* %ray to i8*, !dbg !50 ; line:27 col:3 + call void @llvm.lifetime.start(i64 32, i8* %2) #0, !dbg !50 ; line:27 col:3 + %Origin.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 0, !dbg !51 ; line:8 col:8 + store <3 x float> zeroinitializer, <3 x float>* %Origin.i, align 4, !dbg !54, !tbaa !55, !alias.scope !58 ; line:8 col:15 + %Direction.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 2, !dbg !61 ; line:9 col:8 + store <3 x float> , <3 x float>* %Direction.i, align 4, !dbg !62, !tbaa !55, !alias.scope !58 ; line:9 col:18 + %TMin.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 1, !dbg !63 ; line:10 col:8 + store float 0.000000e+00, float* %TMin.i, align 4, !dbg !64, !tbaa !65, !alias.scope !58 ; line:10 col:13 + %TMax.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 3, !dbg !67 ; line:11 col:8 + store float 9.999000e+03, float* %TMax.i, align 4, !dbg !68, !tbaa !65, !alias.scope !58 ; line:11 col:13 + %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !69 ; line:28 col:3 + %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !69 ; line:28 col:3 + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !69 ; line:28 col:3 + call void @"dx.hl.op..void (i32, %\22class.RayQuery<5, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<5, 0>"* %q, %dx.types.Handle %5, i32 0, i32 255, %struct.RayDesc* %ray), !dbg !69 ; line:28 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*)"(i32 363, %dx.types.HitObject* %agg.tmp, %"class.RayQuery<5, 0>"* %q), !dbg !70 ; line:30 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp) #0, !dbg !71 ; line:21 col:3 + %6 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !74 ; line:32 col:3 + call void @llvm.lifetime.start(i64 8, i8* %6) #0, !dbg !74 ; line:32 col:3 + %7 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !75 ; line:33 col:13 + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %7), !dbg !75 ; line:33 col:13 + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !75 ; line:33 col:13 + %10 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %9, i32 0), !dbg !75 ; line:33 col:13 + %conv = uitofp i32 %10 to float, !dbg !75 ; line:33 col:13 + %x = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !76 ; line:33 col:9 + store float %conv, float* %x, align 4, !dbg !77, !tbaa !65 ; line:33 col:11 + %11 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !78 ; line:34 col:13 + %12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %11), !dbg !78 ; line:34 col:13 + %13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %12, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !78 ; line:34 col:13 + %14 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %13, i32 4), !dbg !78 ; line:34 col:13 + %conv3 = uitofp i32 %14 to float, !dbg !78 ; line:34 col:13 + %y = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !79 ; line:34 col:9 + store float %conv3, float* %y, align 4, !dbg !80, !tbaa !65 ; line:34 col:11 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %agg.tmp4, %"class.RayQuery<5, 0>"* %q, i32 16, %struct.CustomAttrs* %attrs), !dbg !81 ; line:35 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp4) #0, !dbg !82 ; line:21 col:3 + %15 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !84 ; line:37 col:13 + %16 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %15), !dbg !84 ; line:37 col:13 + %17 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %16, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !84 ; line:37 col:13 + %18 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %17, i32 8), !dbg !84 ; line:37 col:13 + %conv6 = uitofp i32 %18 to float, !dbg !84 ; line:37 col:13 + %x7 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !85 ; line:37 col:9 + store float %conv6, float* %x7, align 4, !dbg !86, !tbaa !65 ; line:37 col:11 + %19 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !87 ; line:38 col:13 + %20 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %19), !dbg !87 ; line:38 col:13 + %21 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %20, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !87 ; line:38 col:13 + %22 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %21, i32 12), !dbg !87 ; line:38 col:13 + %conv9 = uitofp i32 %22 to float, !dbg !87 ; line:38 col:13 + %y10 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !88 ; line:38 col:9 + store float %conv9, float* %y10, align 4, !dbg !89, !tbaa !65 ; line:38 col:11 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %agg.tmp11, %"class.RayQuery<5, 0>"* %q, i32 17, %struct.CustomAttrs* %attrs), !dbg !90 ; line:39 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp11) #0, !dbg !91 ; line:21 col:3 + %x12 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !93 ; line:41 col:25 + %23 = load float, float* %x12, align 4, !dbg !93, !tbaa !65 ; line:41 col:25 + %24 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !94 ; line:41 col:3 + %25 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %24), !dbg !94 ; line:41 col:3 + %26 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %25, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !94 ; line:41 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %26, i32 0, float %23), !dbg !94 ; line:41 col:3 + %y13 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !95 ; line:42 col:25 + %27 = load float, float* %y13, align 4, !dbg !95, !tbaa !65 ; line:42 col:25 + %28 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !96 ; line:42 col:3 + %29 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %28), !dbg !96 ; line:42 col:3 + %30 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %29, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !96 ; line:42 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %30, i32 4, float %27), !dbg !96 ; line:42 col:3 + %31 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !97 ; line:43 col:1 + call void @llvm.lifetime.end(i64 8, i8* %31) #0, !dbg !97 ; line:43 col:1 + %32 = bitcast %struct.RayDesc* %ray to i8*, !dbg !97 ; line:43 col:1 + call void @llvm.lifetime.end(i64 32, i8* %32) #0, !dbg !97 ; line:43 col:1 + %33 = bitcast %"class.RayQuery<5, 0>"* %q to i8*, !dbg !97 ; line:43 col:1 + call void @llvm.lifetime.end(i64 4, i8* %33) #0, !dbg !97 ; line:43 col:1 + ret void, !dbg !97 ; line:43 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<5, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<5, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*)"(i32, %dx.types.HitObject*, %"class.RayQuery<5, 0>"*) #0 + +; Function Attrs: nounwind readonly +declare i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32, %dx.types.HitObject*, %"class.RayQuery<5, 0>"*, i32, %struct.CustomAttrs*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !26} +!dx.entryPoints = !{!30} +!dx.fnprops = !{!42} +!dx.options = !{!43, !44} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %"class.dx::HitObject" undef, !15, %"class.RayQuery<5, 0>" undef, !17, %struct.CustomAttrs undef, !23} +!5 = !{i32 4, !6, !7} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!7 = !{i32 0, !8} +!8 = !{!9} +!9 = !{i32 0, float undef} +!10 = !{i32 32, !11, !12, !13, !14} +!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!15 = !{i32 4, !16} +!16 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!17 = !{i32 4, !18, !19} +!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!19 = !{i32 0, !20} +!20 = !{!21, !22} +!21 = !{i32 1, i64 5} +!22 = !{i32 1, i64 0} +!23 = !{i32 8, !24, !25} +!24 = !{i32 6, !"x", i32 3, i32 0, i32 7, i32 9} +!25 = !{i32 6, !"y", i32 3, i32 4, i32 7, i32 9} +!26 = !{i32 1, void ()* @"\01?main@@YAXXZ", !27} +!27 = !{!28} +!28 = !{i32 1, !29, !29} +!29 = !{} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, !35, !40, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{!36, !38, !39} +!36 = !{i32 0, %"class.RWStructuredBuffer"* @"\01?UAV@@3V?$RWStructuredBuffer@M@@A", !"UAV", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !37} +!37 = !{i32 1, i32 4} +!38 = !{i32 1, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !"inbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!39 = !{i32 2, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !"outbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!40 = !{!41} +!41 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!42 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!43 = !{i32 -2147483584} +!44 = !{i32 -1} +!45 = !DILocation(line: 26, column: 3, scope: !46) +!46 = !DISubprogram(name: "main", scope: !47, file: !47, line: 25, type: !48, isLocal: false, isDefinition: true, scopeLine: 25, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!47 = !DIFile(filename: "hitobject_fromrayquery_scalarrepl.hlsl", directory: "") +!48 = !DISubroutineType(types: !29) +!49 = !DILocation(line: 26, column: 78, scope: !46) +!50 = !DILocation(line: 27, column: 3, scope: !46) +!51 = !DILocation(line: 8, column: 8, scope: !52, inlinedAt: !53) +!52 = !DISubprogram(name: "MakeRayDesc", scope: !47, file: !47, line: 6, type: !48, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: false) +!53 = distinct !DILocation(line: 27, column: 17, scope: !46) +!54 = !DILocation(line: 8, column: 15, scope: !52, inlinedAt: !53) +!55 = !{!56, !56, i64 0} +!56 = !{!"omnipotent char", !57, i64 0} +!57 = !{!"Simple C/C++ TBAA"} +!58 = !{!59} +!59 = distinct !{!59, !60, !"\01?MakeRayDesc@@YA?AURayDesc@@XZ: %agg.result"} +!60 = distinct !{!60, !"\01?MakeRayDesc@@YA?AURayDesc@@XZ"} +!61 = !DILocation(line: 9, column: 8, scope: !52, inlinedAt: !53) +!62 = !DILocation(line: 9, column: 18, scope: !52, inlinedAt: !53) +!63 = !DILocation(line: 10, column: 8, scope: !52, inlinedAt: !53) +!64 = !DILocation(line: 10, column: 13, scope: !52, inlinedAt: !53) +!65 = !{!66, !66, i64 0} +!66 = !{!"float", !56, i64 0} +!67 = !DILocation(line: 11, column: 8, scope: !52, inlinedAt: !53) +!68 = !DILocation(line: 11, column: 13, scope: !52, inlinedAt: !53) +!69 = !DILocation(line: 28, column: 3, scope: !46) +!70 = !DILocation(line: 30, column: 7, scope: !46) +!71 = !DILocation(line: 21, column: 3, scope: !72, inlinedAt: !73) +!72 = !DISubprogram(name: "Use", scope: !47, file: !47, line: 20, type: !48, isLocal: false, isDefinition: true, scopeLine: 20, flags: DIFlagPrototyped, isOptimized: false) +!73 = distinct !DILocation(line: 30, column: 3, scope: !46) +!74 = !DILocation(line: 32, column: 3, scope: !46) +!75 = !DILocation(line: 33, column: 13, scope: !46) +!76 = !DILocation(line: 33, column: 9, scope: !46) +!77 = !DILocation(line: 33, column: 11, scope: !46) +!78 = !DILocation(line: 34, column: 13, scope: !46) +!79 = !DILocation(line: 34, column: 9, scope: !46) +!80 = !DILocation(line: 34, column: 11, scope: !46) +!81 = !DILocation(line: 35, column: 7, scope: !46) +!82 = !DILocation(line: 21, column: 3, scope: !72, inlinedAt: !83) +!83 = distinct !DILocation(line: 35, column: 3, scope: !46) +!84 = !DILocation(line: 37, column: 13, scope: !46) +!85 = !DILocation(line: 37, column: 9, scope: !46) +!86 = !DILocation(line: 37, column: 11, scope: !46) +!87 = !DILocation(line: 38, column: 13, scope: !46) +!88 = !DILocation(line: 38, column: 9, scope: !46) +!89 = !DILocation(line: 38, column: 11, scope: !46) +!90 = !DILocation(line: 39, column: 7, scope: !46) +!91 = !DILocation(line: 21, column: 3, scope: !72, inlinedAt: !92) +!92 = distinct !DILocation(line: 39, column: 3, scope: !46) +!93 = !DILocation(line: 41, column: 25, scope: !46) +!94 = !DILocation(line: 41, column: 3, scope: !46) +!95 = !DILocation(line: 42, column: 25, scope: !46) +!96 = !DILocation(line: 42, column: 3, scope: !46) +!97 = !DILocation(line: 43, column: 1, scope: !46) diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl new file mode 100644 index 0000000000..004d25156a --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> FromRayQuery +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class Trq +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit FromRayQuery 'TResult (Trq) const' static +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> rq 'Trq' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used FromRayQuery 'dx::HitObject (RayQuery)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> FromRayQuery 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 363 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> FromRayQuery +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class Trq +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class THitKind +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TAttributes +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit FromRayQuery 'TResult (Trq, THitKind, TAttributes) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> rq 'Trq' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> HitKind 'THitKind' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Attributes 'TAttributes' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used FromRayQuery 'dx::HitObject (RayQuery, unsigned int, CustomAttrs)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'CustomAttrs' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> FromRayQuery 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> rq 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> HitKind 'CustomAttrs' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 363 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*)"(i32 363, %dx.types.HitObject* %[[HITPTR0:[^ ]+]], %"class.RayQuery<5, 0>"* %[[RQ:[^ ]+]]) +// FCGL-NEXT: call void @"\01?Use@@YAXVHitObject@dx@@@Z"(%dx.types.HitObject* %[[HITPTR0]]) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %[[HITPTR1:[^ ]+]], %"class.RayQuery<5, 0>"* %[[RQ]], i32 16, %struct.CustomAttrs* %{{[^ ]+}}) +// FCGL-NEXT: call void @"\01?Use@@YAXVHitObject@dx@@@Z"(%dx.types.HitObject* %[[HITPTR1]]) + +RaytracingAccelerationStructure RTAS; +RWStructuredBuffer UAV : register(u0); + +RayDesc MakeRayDesc() { + RayDesc desc; + desc.Origin = float3(0, 0, 0); + desc.Direction = float3(1, 0, 0); + desc.TMin = 0.0f; + desc.TMax = 9999.0; + return desc; +} + +struct CustomAttrs { + float x; + float y; +}; + +void Use(in dx::HitObject hit) { + dx::MaybeReorderThread(hit); +} + +[shader("raygeneration")] +void main() { + RayQuery q; + RayDesc ray = MakeRayDesc(); + q.TraceRayInline(RTAS, RAY_FLAG_NONE, 0xFF, ray); + + Use(dx::HitObject::FromRayQuery(q)); + + CustomAttrs attrs = {1.f, 2.f}; + Use(dx::HitObject::FromRayQuery(q, 16, attrs)); +} From 0f7af1be3fcd5ca99877b084afe7801c4e8597fd Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 25 Apr 2025 20:59:25 +0200 Subject: [PATCH 10/93] [SER] Validate HitObject accessors (#7371) Validate: HitObject_GeometryIndex HitObject_HitKind HitObject_InstanceID HitObject_InstanceIndex HitObject_IsHit HitObject_IsMiss HitObject_IsNop HitObject_LoadLocalRootTableConstant HitObject_ObjectRayDirection HitObject_ObjectRayOrigin HitObject_ObjectToWorld3x4 HitObject_PrimitiveIndex HitObject_RayFlags HitObject_RayTCurrent HitObject_RayTMin HitObject_SetShaderTableIndex HitObject_ShaderTableIndex HitObject_WorldRayDirection HitObject_WorldRayOrigin Rules: * No undef parameters (HitObject, RecordIndex, RootTableOffset) * row/col/component indices are constant and in-bounds * If constant, RootTableOffset % 4 == 0 SER implementation tracker: https://github.com/microsoft/DirectXShaderCompiler/issues/7214 --- docs/DXIL.rst | 1 + lib/DxilValidation/DxilValidation.cpp | 110 ++++++++++ .../HitObject/hitobject_accessors.hlsl | 4 +- .../ser_hitobject_accessors_failing.ll | 202 ++++++++++++++++++ .../ser_hitobject_accessors_passing.ll | 2 +- .../HitObject/hitobject_accessors.hlsl | 4 +- utils/hct/hctdb.py | 5 + 7 files changed, 323 insertions(+), 5 deletions(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll diff --git a/docs/DXIL.rst b/docs/DXIL.rst index a1c5055085..a55f476450 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3161,6 +3161,7 @@ INSTR.OPCODERESERVED Instructions must not refe INSTR.OPCONST DXIL intrinsic requires an immediate constant operand INSTR.OPCONSTRANGE Constant values must be in-range for operation. INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range +INSTR.PARAMMULTIPLE Parameter must be a valid multiple INSTR.PTRBITCAST Pointer type bitcast must be have same size. INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 00a6b9ae14..69eb2a88f2 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1644,6 +1644,46 @@ static unsigned getSemanticFlagValidMask(const ShaderModel *pSM) { return static_cast(hlsl::DXIL::BarrierSemanticFlag::ValidMask); } +StringRef GetOpCodeName(DXIL::OpCode OpCode) { + switch (OpCode) { + default: + DXASSERT(false, "Unexpected op code"); + return ""; + case DXIL::OpCode::HitObject_ObjectRayOrigin: + return "HitObject_ObjectRayOrigin"; + case DXIL::OpCode::HitObject_WorldRayDirection: + return "HitObject_WorldRayDirection"; + case DXIL::OpCode::HitObject_WorldRayOrigin: + return "HitObject_WorldRayOrigin"; + case DXIL::OpCode::HitObject_ObjectRayDirection: + return "HitObject_ObjectRayDirection"; + case DXIL::OpCode::HitObject_WorldToObject3x4: + return "HitObject_WorldToObject3x4"; + case DXIL::OpCode::HitObject_ObjectToWorld3x4: + return "HitObject_ObjectToWorld3x4"; + } +} + +static void ValidateConstantRangeUnsigned(Value *Val, StringRef Name, + uint64_t LowerBound, + uint64_t UpperBound, CallInst *CI, + DXIL::OpCode OpCode, + ValidationContext &ValCtx) { + ConstantInt *C = dyn_cast(Val); + if (!C) { + ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, + {Name, GetOpCodeName(OpCode)}); + return; + } + if (C->uge(UpperBound + 1U) || !C->uge(LowerBound)) { + std::string Range = + std::to_string(LowerBound) + "~" + std::to_string(UpperBound); + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrOperandRange, + {Name, Range, C->getValue().toString(10, false)}); + } +} + static void ValidateDxilOperationCallInProfile(CallInst *CI, DXIL::OpCode Opcode, const ShaderModel *pSM, @@ -1910,6 +1950,76 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); } break; + case DXIL::OpCode::HitObject_LoadLocalRootTableConstant: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *Offset = CI->getArgOperand(2); + if (isa(Offset)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + if (ConstantInt *COffset = dyn_cast(Offset)) { + if (COffset->getLimitedValue() % 4 != 0) + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrParamMultiple, + {"offset", "4", COffset->getValue().toString(10, false)}); + } + break; + } + case DXIL::OpCode::HitObject_SetShaderTableIndex: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *RecordIndex = CI->getArgOperand(2); + if (isa(RecordIndex)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + break; + } + + // Shader Execution Reordering - scalar getters + case DXIL::OpCode::HitObject_GeometryIndex: + case DXIL::OpCode::HitObject_HitKind: + case DXIL::OpCode::HitObject_InstanceID: + case DXIL::OpCode::HitObject_InstanceIndex: + case DXIL::OpCode::HitObject_IsHit: + case DXIL::OpCode::HitObject_IsMiss: + case DXIL::OpCode::HitObject_IsNop: + case DXIL::OpCode::HitObject_PrimitiveIndex: + case DXIL::OpCode::HitObject_RayFlags: + case DXIL::OpCode::HitObject_RayTCurrent: + case DXIL::OpCode::HitObject_RayTMin: + case DXIL::OpCode::HitObject_ShaderTableIndex: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + break; + } + + // Shader Execution Reordering - vector getters + case DXIL::OpCode::HitObject_ObjectRayDirection: + case DXIL::OpCode::HitObject_ObjectRayOrigin: + case DXIL::OpCode::HitObject_WorldRayDirection: + case DXIL::OpCode::HitObject_WorldRayOrigin: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *Col = CI->getArgOperand(2); + ValidateConstantRangeUnsigned(Col, "component", 0, 2, CI, Opcode, ValCtx); + break; + } + + // Shader Execution Reordering - matrix getters + case DXIL::OpCode::HitObject_WorldToObject3x4: + case DXIL::OpCode::HitObject_ObjectToWorld3x4: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *Row = CI->getArgOperand(2); + ValidateConstantRangeUnsigned(Row, "row", 0, 2, CI, Opcode, ValCtx); + Value *Col = CI->getArgOperand(3); + ValidateConstantRangeUnsigned(Col, "column", 0, 3, CI, Opcode, ValCtx); + break; + } + case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl index bae2b0590c..daeabf9710 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl @@ -14,7 +14,7 @@ // DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %[[HIT]]) ; HitObject_InstanceID(hitObject) // DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %[[HIT]]) ; HitObject_PrimitiveIndex(hitObject) // DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %[[HIT]]) ; HitObject_ShaderTableIndex(hitObject) -// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %[[HIT]], i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %[[HIT]], i32 40) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) // DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) // DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_ObjectRayOrigin(hitObject,component) // DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_ObjectRayOrigin(hitObject,component) @@ -88,7 +88,7 @@ void main() { isum += hit.GetInstanceID(); isum += hit.GetPrimitiveIndex(); isum += hit.GetShaderTableIndex(); - isum += hit.LoadLocalRootTableConstant(42); + isum += hit.LoadLocalRootTableConstant(40); // float3 accessors vsum += hit.GetWorldRayOrigin(); diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll new file mode 100644 index 0000000000..7270996b91 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll @@ -0,0 +1,202 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r287_ud = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK: note: at '%r287_ud = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 1)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r288_wrongmul = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 7)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: parameter 'offset' must be a multiple of 4, got 7 +; CHECK: note: at '%r288_wrongmul = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 7)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 42)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: parameter 'offset' must be a multiple of 4, got 42 +; CHECK: note: at '%r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 42)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r278_oobc = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_ObjectRayDirection must be an immediate constant. +; CHECK: note: at '%r278_dync = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r277_oobc = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_ObjectRayOrigin must be an immediate constant. +; CHECK: note: at '%r277_dync = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r276_oobc = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_WorldRayDirection must be an immediate constant. +; CHECK: note: at '%r276_dync = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r275_oobc = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_WorldRayOrigin must be an immediate constant. +; CHECK: note: at '%r275_dync = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect column between 0~3, got 4. +; CHECK: note: at '%r280_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 4)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: column of HitObject_WorldToObject3x4 must be an immediate constant. +; CHECK: note: at '%r280_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect row between 0~2, got 3. +; CHECK: note: at '%r280_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 3, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: row of HitObject_WorldToObject3x4 must be an immediate constant. +; CHECK: note: at '%r280_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 %r272, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject undef, i32 0, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect column between 0~3, got 4. +; CHECK: note: at '%r279_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 4)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: column of HitObject_ObjectToWorld3x4 must be an immediate constant. +; CHECK: note: at '%r279_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect row between 0~2, got 3. +; CHECK: note: at '%r279_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 3, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: row of HitObject_ObjectToWorld3x4 must be an immediate constant. +; CHECK: note: at '%r279_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 %r272, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject undef, i32 0, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +%nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + %r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject undef) ; HitObject_IsMiss(hitObject) + + %r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject undef) ; HitObject_IsHit(hitObject) + + %r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject undef) ; HitObject_IsNop(hitObject) + + %r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject undef) ; HitObject_RayFlags(hitObject) + + %r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject undef) ; HitObject_RayTMin(hitObject) + + %r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject undef) ; HitObject_RayTCurrent(hitObject) + + %r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject undef, i32 0) ; HitObject_WorldRayOrigin(hitObject,component) + %r275_dync = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 %r272) ; HitObject_WorldRayOrigin(hitObject,component) + %r275_oobc = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 3) ; HitObject_WorldRayOrigin(hitObject,component) + + %r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject undef, i32 0) ; HitObject_WorldRayDirection(hitObject,component) + %r276_dync = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 %r272) ; HitObject_WorldRayDirection(hitObject,component) + %r276_oobc = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 3) ; HitObject_WorldRayDirection(hitObject,component) + + %r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject undef, i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) + %r277_dync = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 %r272) ; HitObject_ObjectRayOrigin(hitObject,component) + %r277_oobc = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 3) ; HitObject_ObjectRayOrigin(hitObject,component) + + %r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject undef, i32 0) ; HitObject_ObjectRayDirection(hitObject,component) + %r278_dync = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 %r272) ; HitObject_ObjectRayDirection(hitObject,component) + %r278_oobc = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 3) ; HitObject_ObjectRayDirection(hitObject,component) + + %r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject undef, i32 0, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 %r272, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 3, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 %r272) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 4) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + + %r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject undef, i32 0, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 %r272, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 3, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 %r272) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 4) ; HitObject_WorldToObject3x4(hitObject,row,col) + + %r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject undef) ; HitObject_GeometryIndex(hitObject) + + %r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject undef) ; HitObject_InstanceIndex(hitObject) + + %r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject undef) ; HitObject_InstanceID(hitObject) + + %r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject undef) ; HitObject_PrimitiveIndex(hitObject) + + %r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject undef) ; HitObject_HitKind(hitObject) + + %r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject undef) ; HitObject_ShaderTableIndex(hitObject) + + %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) + %r287_ud = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 undef) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) + + %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + %r288_wrongmul = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 7) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind readnone +declare i1 @dx.op.hitObject_StateScalar.i1(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare i32 @dx.op.hitObject_StateScalar.i32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readonly +declare i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32, %dx.types.HitObject, i32) #2 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateVector.f32(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateScalar.f32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateMatrix.f32(i32, %dx.types.HitObject, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } +attributes #3 = { nounwind argmemonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!3, !4} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !5} +!3 = !{null, !"", null, null, !6} +!4 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !7} +!5 = !{!8} +!6 = !{i32 0, i64 0} +!7 = !{i32 8, i32 7, i32 5, !9} +!8 = !{i32 1, !10, !10} +!9 = !{i32 0} +!10 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll index e527125009..74cc94fb78 100644 --- a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll @@ -52,7 +52,7 @@ define void @"\01?main@@YAXXZ"() #0 { %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %nop, i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) - %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 16) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) call void @dx.op.hitObject_Attributes.struct.AttribType(i32 289, %dx.types.HitObject %nop, %struct.AttribType* nonnull %attrs) ; HitObject_Attributes(hitObject,attributes) ret void diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl index 7b4182b739..05aa790ad4 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl @@ -189,7 +189,7 @@ // FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 367, %dx.types.HitObject* %[[HIT]]) // FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 373, %dx.types.HitObject* %[[HIT]]) // FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 377, %dx.types.HitObject* %[[HIT]]) -// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32 386, %dx.types.HitObject* %[[HIT]], i32 42) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32 386, %dx.types.HitObject* %[[HIT]], i32 40) // FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 379, %dx.types.HitObject* %[[HIT]]) // FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 378, %dx.types.HitObject* %[[HIT]]) // FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 370, %dx.types.HitObject* %[[HIT]]) @@ -238,7 +238,7 @@ void main() { isum += hit.GetInstanceID(); isum += hit.GetPrimitiveIndex(); isum += hit.GetShaderTableIndex(); - isum += hit.LoadLocalRootTableConstant(42); + isum += hit.LoadLocalRootTableConstant(40); // float3 accessors vsum += hit.GetWorldRayOrigin(); diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 6344fb5849..b1460de9b8 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8298,6 +8298,11 @@ def build_valrules(self): "Instr.UndefHitObject", "HitObject is undef.", ) + self.add_valrule_msg( + "Instr.ParamMultiple", + "Parameter must be a valid multiple", + "parameter '%0' must be a multiple of %1, got %2", + ) self.add_valrule( "Instr.MayReorderThreadUndefCoherenceHintParam", "Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.", From b4d773fdf89a43cf983e28e11f0d8102e8723e18 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 25 Apr 2025 20:59:38 +0200 Subject: [PATCH 11/93] [SER] Validate MakeMiss (#7372) Validate: HitObject_MakeMiss Rules: No undef MissShaderIndex or RayFlags SER implementation tracker: #7214 --- lib/DxilValidation/DxilValidation.cpp | 6 +++ .../ser_hitobject_make_failing.ll | 44 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 69eb2a88f2..1ef64ee1bb 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1949,6 +1949,12 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, ValCtx.EmitInstrError( CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); } break; + case DXIL::OpCode::HitObject_MakeMiss: { + DxilInst_HitObject_MakeMiss MakeMiss(CI); + if (isa(MakeMiss.get_RayFlags()) || + isa(MakeMiss.get_MissShaderIndex())) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + } break; case DXIL::OpCode::HitObject_LoadLocalRootTableConstant: { Value *HitObject = CI->getArgOperand(1); diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll new file mode 100644 index 0000000000..b47f178ca2 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll @@ -0,0 +1,44 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK: note: at '%r265_udmiss = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK: note: at '%r265_udflags = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 undef, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %r265_udflags = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 undef, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + %r265_udmiss = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32, i32, i32, float, float, float, float, float, float, float, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!9 = !{null, !"", null, null, !10} +!10 = !{i32 0, i64 0} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} From 8209d53f0ef0257e5b8c78d22057086403946cca Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 25 Apr 2025 20:59:48 +0200 Subject: [PATCH 12/93] Fix static member call instantiation (#7377) The first parameter of the HLSL_INTRINSIC record is actually a dummy entry for the function name. When builtin member function templates are instantiated, the 'this' ptr is added as the first ParamVarDecl, which has the same index as the first argument in the HLSL_INTRINSIC record. This shifts the parameter names by one for static member functions, as those do not have a 'this' pointer, as in: ``` | | | `-CXXMethodDecl {{[^ ]+}} <> used MakeMiss 'dx::HitObject (unsigned int, unsigned int, RayDesc)' static | | | |-TemplateArgument type 'dx::HitObject' | | | |-TemplateArgument type 'unsigned int' | | | |-TemplateArgument type 'unsigned int' | | | |-TemplateArgument type 'RayDesc' | | | |-ParmVarDecl {{[^ ]+}} <> MakeMiss 'unsigned int' | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' ``` The fix is the take the first actual parameter name of the HLSL_INTRINSIC if a static member function template is declared. Closes #7374 --- tools/clang/lib/Sema/SemaHLSL.cpp | 10 ++++++++-- .../hlsl/objects/HitObject/hitobject_make.hlsl | 4 ++-- .../objects/HitObject/hitobject_traceinvoke.hlsl | 12 ++++++------ .../objects/HitObject/hitobject_fromrayquery.hlsl | 8 ++++---- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 5131d39f44..43946bc78a 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5945,6 +5945,8 @@ class HLSLExternalSource : public ExternalSemaSource { "otherwise caller didn't initialize - there should be at least a " "void return type"); + const bool IsStatic = IsStaticMember(intrinsic); + // Create the template arguments. SmallVector templateArgs; for (size_t i = 0; i < parameterTypeCount; i++) { @@ -6010,15 +6012,19 @@ class HLSLExternalSource : public ExternalSemaSource { SmallVector Params; for (unsigned int i = 1; i < parameterTypeCount; i++) { + // The first parameter in the HLSL intrinsic record is just the intrinsic + // name and aliases with the 'this' pointer for non-static members. Skip + // this first parameter for static functions. + unsigned ParamIdx = IsStatic ? i : i - 1; IdentifierInfo *id = - &m_context->Idents.get(StringRef(intrinsic->pArgs[i - 1].pName)); + &m_context->Idents.get(StringRef(intrinsic->pArgs[ParamIdx].pName)); ParmVarDecl *paramDecl = ParmVarDecl::Create( *m_context, nullptr, NoLoc, NoLoc, id, parameterTypes[i], nullptr, StorageClass::SC_None, nullptr, paramMods[i - 1]); Params.push_back(paramDecl); } - StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern; + StorageClass SC = IsStatic ? SC_Static : SC_Extern; QualType T = TInfo->getType(); DeclarationNameInfo NameInfo(FunctionTemplate->getDeclName(), NoLoc); CXXMethodDecl *method = CXXMethodDecl::Create( diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl index 1e947b2296..cc9515d7c1 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl @@ -25,9 +25,9 @@ // AST-NEXT: | | | |-TemplateArgument type 'unsigned int' // AST-NEXT: | | | |-TemplateArgument type 'unsigned int' // AST-NEXT: | | | |-TemplateArgument type 'RayDesc' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MakeMiss 'unsigned int' // AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'RayDesc' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 387 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl index 13bff4a3f4..4ea00475f1 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl @@ -13,8 +13,8 @@ // AST-NEXT: | | | |-TemplateArgument type 'void' // AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject':'dx::HitObject' // AST-NEXT: | | | |-TemplateArgument type 'Payload' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Invoke 'dx::HitObject':'dx::HitObject' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> ho 'Payload &&__restrict' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> ho 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Payload 'Payload &&__restrict' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 382 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" @@ -47,14 +47,14 @@ // AST-NEXT: | | | |-TemplateArgument type 'unsigned int' // AST-NEXT: | | | |-TemplateArgument type 'RayDesc' // AST-NEXT: | | | |-TemplateArgument type 'Payload' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> TraceRay 'RaytracingAccelerationStructure' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> AccelerationStructure 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> AccelerationStructure 'RaytracingAccelerationStructure' // AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' // AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> InstanceInclusionMask 'unsigned int' // AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayContributionToHitGroupIndex 'unsigned int' // AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MultiplierForGeometryContributionToHitGroupIndex 'unsigned int' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'Payload &&__restrict' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Payload 'Payload &&__restrict' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 389 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl index 004d25156a..e4a13d8a62 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl @@ -9,7 +9,7 @@ // AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used FromRayQuery 'dx::HitObject (RayQuery)' static // AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' // AST-NEXT: | | | |-TemplateArgument type 'RayQuery':'RayQuery<5, 0>' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> FromRayQuery 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> rq 'RayQuery':'RayQuery<5, 0>' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 363 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" @@ -27,9 +27,9 @@ // AST-NEXT: | | | |-TemplateArgument type 'RayQuery':'RayQuery<5, 0>' // AST-NEXT: | | | |-TemplateArgument type 'unsigned int' // AST-NEXT: | | | |-TemplateArgument type 'CustomAttrs' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> FromRayQuery 'RayQuery':'RayQuery<5, 0>' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> rq 'unsigned int' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> HitKind 'CustomAttrs' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> rq 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> HitKind 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Attributes 'CustomAttrs' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 363 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" From 34b6d0f91e6afd523bdc574836093f021713cce7 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Mon, 28 Apr 2025 10:23:47 -0700 Subject: [PATCH 13/93] Implementation of the CoopVec Inference and Training builtin intrinisics (#7290) (#7381) Authored-by: Anupama Chandrasekhar Implements HLSL: __builtin_MatVecMul __builtin_MatVecMulAdd __builtin_OuterProductAccumulate __builtin_VectorAccumulate Lowered to DXIL: @dx.op.matVecMul @dx.op.matVecMulAdd @dx.op.outerProductAccumulate @dx.op.vectorAccumulate --------- Co-authored-by: Anupama Chandrasekhar Co-authored-by: Simon Moll (cherry picked from commit 1db8c5b30b41f600c4c014fad7669d0e8f154a45) --- docs/DXIL.rst | 12 + include/dxc/DXIL/DxilConstants.h | 96 +++++-- include/dxc/DXIL/DxilInstructions.h | 230 +++++++++++++++ .../dxc/DxilContainer/RDAT_LibraryTypes.inl | 6 +- include/dxc/HLSL/HLOperations.h | 48 ++++ include/dxc/HlslIntrinsicOp.h | 6 +- lib/DXIL/DxilOperations.cpp | 108 ++++++- lib/DxilValidation/DxilValidation.cpp | 271 ++++++++++++++++++ lib/HLSL/HLOperationLower.cpp | 203 +++++++++++++ tools/clang/lib/Sema/SemaHLSL.cpp | 12 + .../linalg_builtins/check-shader-stages.hlsl | 135 +++++++++ .../linalg_builtins/linalg-builtins.hlsl | 79 +++++ .../intrinsics/linalg_builtins/lit.local.cfg | 1 + .../mat-vec-mul-add_multioverload.hlsl | 108 +++++++ .../mat-vec-mul_multioverload.hlsl | 104 +++++++ ...uter-product-accumulate-multioverload.hlsl | 70 +++++ .../linalg_builtins/vector-accumulate.hlsl | 16 ++ .../DXC/Passes/DxilGen/linalg-builtins.ll | 189 ++++++++++++ .../hlsl/linalg/unavailable-pre-sm69.hlsl | 59 ++++ utils/hct/gen_intrin_main.txt | 8 + utils/hct/hctdb.py | 151 ++++++++++ utils/hct/hlsl_intrinsic_opcodes.json | 8 +- 22 files changed, 1894 insertions(+), 26 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl diff --git a/docs/DXIL.rst b/docs/DXIL.rst index a55f476450..69bcae8c53 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -2419,6 +2419,10 @@ ID Name Description 302 ReservedC9 reserved 303 RawBufferVectorLoad reads from a raw buffer and structured buffer 304 RawBufferVectorStore writes to a RWByteAddressBuffer or RWStructuredBuffer +305 MatVecMul Multiplies a MxK dimension matrix and a K sized input vector +306 MatVecMulAdd multiplies a MxK dimension matrix and a K sized input vector and adds an M-sized bias vector +307 OuterProductAccumulate Computes the outer product between column vectors and an MxN matrix is accumulated component-wise atomically (with device scope) in memory +308 VectorAccumulate Accumulates the components of a vector component-wise atomically (with device scope) to the corresponding elements of an array in memory === ===================================================== ======================================================================================================================================================================================================================= @@ -3134,6 +3138,14 @@ INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0] INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. +INSTR.LINALGINTERPRETATIONPARAMARECONST In Linalg operations, Interpretation value is a constant. +INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFORMATVECOPS Matrix Layout for Linalg Mul/MulAdd operation must be valid. +INSTR.LINALGINVALIDMEMORYINTERPVALUE In Memory Interpolation value must be valid. +INSTR.LINALGINVALIDREGISTERINTERPVALUE From Register Interpretation value must be valid. +INSTR.LINALGMATRIXLAYOUTNOTTRANSPOSABLE Row Major and Column Major matrix layouts are not transposable. +INSTR.LINALGMATRIXSHAPEPARAMSARECONST Matrix Layout, Dimensions and isTranspose are constants +INSTR.LINALGNOTANUNSIGNEDTYPE Unsigned flag set for a float signed type +INSTR.MATVECOPISUNSIGNEDFLAGSARECONST In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant. INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 3752274f18..fe32c06f63 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -162,24 +162,32 @@ const unsigned kDxilMaxOloadDims = 2; enum class ComponentType : uint32_t { Invalid = 0, - I1, - I16, - U16, - I32, - U32, - I64, - U64, - F16, - F32, - F64, - SNormF16, - UNormF16, - SNormF32, - UNormF32, - SNormF64, - UNormF64, - PackedS8x32, - PackedU8x32, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, + // END + LastEntry }; @@ -743,6 +751,19 @@ enum class OpCode : unsigned { CreateHandleForLib = 160, // create resource handle from resource struct for library + // Linear Algebra Operations + MatVecMul = + 305, // Multiplies a MxK dimension matrix and a K sized input vector + MatVecMulAdd = 306, // multiplies a MxK dimension matrix and a K sized input + // vector and adds an M-sized bias vector + OuterProductAccumulate = + 307, // Computes the outer product between column vectors and an MxN + // matrix is accumulated component-wise atomically (with device + // scope) in memory + VectorAccumulate = 308, // Accumulates the components of a vector + // component-wise atomically (with device scope) to + // the corresponding elements of an array in memory + // Mesh shader instructions EmitIndices = 169, // emit a primitive's vertex indices in a mesh shader GetMeshPayload = @@ -1060,7 +1081,7 @@ enum class OpCode : unsigned { NumOpCodes_Dxil_1_7 = 226, NumOpCodes_Dxil_1_8 = 258, - NumOpCodes = 305 // exclusive last value of enumeration + NumOpCodes = 309 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -1201,6 +1222,12 @@ enum class OpCodeClass : unsigned { // Library create handle from resource struct (like HL intrinsic) CreateHandleForLib, + // Linear Algebra Operations + MatVecMul, + MatVecMulAdd, + OuterProductAccumulate, + VectorAccumulate, + // Mesh shader instructions EmitIndices, GetMeshPayload, @@ -1385,7 +1412,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 190 // exclusive last value of enumeration + NumOpClasses = 194 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END @@ -1561,6 +1588,28 @@ const unsigned kHitObjectTraceRay_RayDescOpIdx = 7; const unsigned kHitObjectTraceRay_PayloadOpIdx = 15; const unsigned kHitObjectTraceRay_NumOp = 16; +// MatVec Ops +const unsigned kMatVecMulInputVectorIdx = 1; +const unsigned kMatVecMulIsInputUnsignedIdx = 2; +const unsigned kMatVecMulInputInterpretationIdx = 3; +const unsigned kMatVecMulMatrixBufferIdx = 4; +const unsigned kMatVecMulMatrixOffsetIdx = 5; +const unsigned kMatVecMulMatrixInterpretationIdx = 6; +const unsigned kMatVecMulMatrixMIdx = 7; +const unsigned kMatVecMulMatrixKIdx = 8; +const unsigned kMatVecMulMatrixLayoutIdx = 9; +const unsigned kMatVecMulMatrixTransposeIdx = 10; +const unsigned kMatVecMulMatrixStrideIdx = 11; +const unsigned kMatVecMulIsOutputUnsignedIdx = 12; + +// MatVecAdd +const unsigned kMatVecMulAddBiasInterpretation = 14; +const unsigned kMatVecMulAddIsOutputUnsignedIdx = 15; + +// Outer Product Accumulate +const unsigned kOuterProdAccMatrixInterpretation = 5; +const unsigned kOuterProdAccMatrixLayout = 6; + // TODO: add operand index for all the OpCodeClass. } // namespace OperandIndex @@ -2132,6 +2181,13 @@ extern const char *kHostLayoutTypePrefix; extern const char *kWaveOpsIncludeHelperLanesString; +enum class LinalgMatrixLayout : uint32_t { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + } // namespace DXIL } // namespace hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index a99c5360d4..9a4030fd8e 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -9918,5 +9918,235 @@ struct DxilInst_RawBufferVectorStore { llvm::APInt(32, (uint64_t)val))); } }; + +/// This instruction Multiplies a MxK dimension matrix and a K sized input +/// vector +struct DxilInst_MatVecMul { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_MatVecMul(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::MatVecMul); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (13 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_isInputUnsigned = 2, + arg_inputInterpretation = 3, + arg_matrixBuffer = 4, + arg_matrixOffset = 5, + arg_matrixIntepretation = 6, + arg_matrixM = 7, + arg_matrixK = 8, + arg_matrixLayout = 9, + arg_matrixTranspose = 10, + arg_matrixStride = 11, + arg_isOutputUnsigned = 12, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_isInputUnsigned() const { return Instr->getOperand(2); } + void set_isInputUnsigned(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); } + void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixBuffer() const { return Instr->getOperand(4); } + void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_matrixOffset() const { return Instr->getOperand(5); } + void set_matrixOffset(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(6); } + void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_matrixM() const { return Instr->getOperand(7); } + void set_matrixM(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_matrixK() const { return Instr->getOperand(8); } + void set_matrixK(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_matrixLayout() const { return Instr->getOperand(9); } + void set_matrixLayout(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_matrixTranspose() const { return Instr->getOperand(10); } + void set_matrixTranspose(llvm::Value *val) { Instr->setOperand(10, val); } + llvm::Value *get_matrixStride() const { return Instr->getOperand(11); } + void set_matrixStride(llvm::Value *val) { Instr->setOperand(11, val); } + llvm::Value *get_isOutputUnsigned() const { return Instr->getOperand(12); } + void set_isOutputUnsigned(llvm::Value *val) { Instr->setOperand(12, val); } +}; + +/// This instruction multiplies a MxK dimension matrix and a K sized input +/// vector and adds an M-sized bias vector +struct DxilInst_MatVecMulAdd { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_MatVecMulAdd(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::MatVecMulAdd); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (16 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_isInputUnsigned = 2, + arg_inputInterpretation = 3, + arg_matrixBuffer = 4, + arg_matrixOffset = 5, + arg_matrixIntepretation = 6, + arg_matrixM = 7, + arg_matrixK = 8, + arg_matrixLayout = 9, + arg_matrixTranspose = 10, + arg_matrixStride = 11, + arg_biasBuffer = 12, + arg_biasOffset = 13, + arg_biasIntepretation = 14, + arg_isOutputUnsigned = 15, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_isInputUnsigned() const { return Instr->getOperand(2); } + void set_isInputUnsigned(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); } + void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixBuffer() const { return Instr->getOperand(4); } + void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_matrixOffset() const { return Instr->getOperand(5); } + void set_matrixOffset(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(6); } + void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_matrixM() const { return Instr->getOperand(7); } + void set_matrixM(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_matrixK() const { return Instr->getOperand(8); } + void set_matrixK(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_matrixLayout() const { return Instr->getOperand(9); } + void set_matrixLayout(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_matrixTranspose() const { return Instr->getOperand(10); } + void set_matrixTranspose(llvm::Value *val) { Instr->setOperand(10, val); } + llvm::Value *get_matrixStride() const { return Instr->getOperand(11); } + void set_matrixStride(llvm::Value *val) { Instr->setOperand(11, val); } + llvm::Value *get_biasBuffer() const { return Instr->getOperand(12); } + void set_biasBuffer(llvm::Value *val) { Instr->setOperand(12, val); } + llvm::Value *get_biasOffset() const { return Instr->getOperand(13); } + void set_biasOffset(llvm::Value *val) { Instr->setOperand(13, val); } + llvm::Value *get_biasIntepretation() const { return Instr->getOperand(14); } + void set_biasIntepretation(llvm::Value *val) { Instr->setOperand(14, val); } + llvm::Value *get_isOutputUnsigned() const { return Instr->getOperand(15); } + void set_isOutputUnsigned(llvm::Value *val) { Instr->setOperand(15, val); } +}; + +/// This instruction Computes the outer product between column vectors and an +/// MxN matrix is accumulated component-wise atomically (with device scope) in +/// memory +struct DxilInst_OuterProductAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_OuterProductAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::OuterProductAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (8 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector1 = 1, + arg_inputVector2 = 2, + arg_matrixBuffer = 3, + arg_matrixOffset = 4, + arg_matrixIntepretation = 5, + arg_matrixLayout = 6, + arg_matrixStride = 7, + }; + // Accessors + llvm::Value *get_inputVector1() const { return Instr->getOperand(1); } + void set_inputVector1(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_inputVector2() const { return Instr->getOperand(2); } + void set_inputVector2(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_matrixBuffer() const { return Instr->getOperand(3); } + void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixOffset() const { return Instr->getOperand(4); } + void set_matrixOffset(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(5); } + void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(5, val); } + int32_t get_matrixIntepretation_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(5)) + ->getZExtValue()); + } + void set_matrixIntepretation_val(int32_t val) { + Instr->setOperand(5, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_matrixLayout() const { return Instr->getOperand(6); } + void set_matrixLayout(llvm::Value *val) { Instr->setOperand(6, val); } + int32_t get_matrixLayout_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(6)) + ->getZExtValue()); + } + void set_matrixLayout_val(int32_t val) { + Instr->setOperand(6, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_matrixStride() const { return Instr->getOperand(7); } + void set_matrixStride(llvm::Value *val) { Instr->setOperand(7, val); } +}; + +/// This instruction Accumulates the components of a vector component-wise +/// atomically (with device scope) to the corresponding elements of an array in +/// memory +struct DxilInst_VectorAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_VectorAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::VectorAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_arrayBuffer = 2, + arg_arrayOffset = 3, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_arrayBuffer() const { return Instr->getOperand(2); } + void set_arrayBuffer(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_arrayOffset() const { return Instr->getOperand(3); } + void set_arrayOffset(llvm::Value *val) { Instr->setOperand(3, val); } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl index 4b58b406c2..902f2e9652 100644 --- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl +++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl @@ -565,9 +565,13 @@ RDAT_DXIL_ENUM_START(hlsl::DXIL::ComponentType, uint32_t) RDAT_ENUM_VALUE_NODEF(UNormF64) RDAT_ENUM_VALUE_NODEF(PackedS8x32) RDAT_ENUM_VALUE_NODEF(PackedU8x32) + RDAT_ENUM_VALUE_NODEF(U8) + RDAT_ENUM_VALUE_NODEF(I8) + RDAT_ENUM_VALUE_NODEF(F8_E4M3) + RDAT_ENUM_VALUE_NODEF(F8_E5M2) RDAT_ENUM_VALUE_NODEF(LastEntry) #if DEF_RDAT_ENUMS == DEF_RDAT_DUMP_IMPL - static_assert((unsigned)hlsl::DXIL::ComponentType::LastEntry == 19, + static_assert((unsigned)hlsl::DXIL::ComponentType::LastEntry == 23, "otherwise, RDAT_DXIL_ENUM definition needs updating"); #endif RDAT_ENUM_END() diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 0e9b8c2710..c75318da99 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -445,6 +445,54 @@ const unsigned kHitObjectTraceRay_NumOp = 10; const unsigned kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx = 4; const unsigned kHitObjectFromRayQuery_WithAttrs_NumOp = 5; +// Linear Algebra Operations + +// MatVecMul +const unsigned kMatVecMulOutputVectorIdx = 1; +const unsigned kMatVecMulIsOutputUnsignedIdx = 2; +const unsigned kMatVecMulInputVectorIdx = 3; +const unsigned kMatVecMulIsInputUnsignedIdx = 4; +const unsigned kMatVecMulInputInterpretationIdx = 5; +const unsigned kMatVecMulMatrixBufferIdx = 6; +const unsigned kMatVecMulMatrixOffsetIdx = 7; +const unsigned kMatVecMulMatrixInterpretationIdx = 8; +const unsigned kMatVecMulMatrixMIdx = 9; +const unsigned kMatVecMulMatrixKIdx = 10; +const unsigned kMatVecMulMatrixLayoutIdx = 11; +const unsigned kMatVecMulMatrixTransposeIdx = 12; +const unsigned kMatVecMulMatrixStrideIdx = 13; + +// MatVecMulAdd +const unsigned kMatVecMulAddOutputVectorIdx = 1; +const unsigned kMatVecMulAddIsOutputUnsignedIdx = 2; +const unsigned kMatVecMulAddInputVectorIdx = 3; +const unsigned kMatVecMulAddIsInputUnsignedIdx = 4; +const unsigned kMatVecMulAddInputInterpretationIdx = 5; +const unsigned kMatVecMulAddMatrixBufferIdx = 6; +const unsigned kMatVecMulAddMatrixOffsetIdx = 7; +const unsigned kMatVecMulAddMatrixInterpretationIdx = 8; +const unsigned kMatVecMulAddMatrixMIdx = 9; +const unsigned kMatVecMulAddMatrixKIdx = 10; +const unsigned kMatVecMulAddMatrixLayoutIdx = 11; +const unsigned kMatVecMulAddMatrixTransposeIdx = 12; +const unsigned kMatVecMulAddMatrixStrideIdx = 13; +const unsigned kMatVecMulAddBiasBufferIdx = 14; +const unsigned kMatVecMulAddBiasOffsetIdx = 15; +const unsigned kMatVecMulAddBiasInterpretationIdx = 16; + +// OuterProductAccumulate +const unsigned kOuterProdAccInputVec1Idx = 1; +const unsigned kOuterProdAccInputVec2Idx = 2; +const unsigned kOuterProdAccMatrixIdx = 3; +const unsigned kOuterProdAccMatrixOffsetIdx = 4; +const unsigned kOuterProdAccMatrixInterpretationIdx = 5; +const unsigned kOuterProdAccMatrixLayoutIdx = 6; +const unsigned kOuterProdAccMatrixStrideIdx = 7; + +// Vector Accumulate +const unsigned kVectorAccInputVecIdx = 1; +const unsigned kVectorAccMatrixIdx = 2; +const unsigned kVectorAccMatrixOffsetIdx = 3; } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index d37c27a38e..197bd3e1f5 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -107,6 +107,10 @@ enum class IntrinsicOp { IOP_WorldToObject = 99, IOP_WorldToObject3x4 = 100, IOP_WorldToObject4x3 = 101, + IOP___builtin_MatVecMul = 390, + IOP___builtin_MatVecMulAdd = 391, + IOP___builtin_OuterProductAccumulate = 392, + IOP___builtin_VectorAccumulate = 393, IOP_abort = 102, IOP_abs = 103, IOP_acos = 104, @@ -396,7 +400,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 390, + Num_Intrinsics = 394, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index f614ba9d14..95e8dfaeba 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -2652,6 +2652,40 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { 1, {{0x4e7}}, {{0xe7}}}, // Overloads: hfwidlgetNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -6291,6 +6382,19 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { StructType *ST = cast(Ty); return ST->getElementType(0); } + case OpCode::MatVecMul: + case OpCode::MatVecMulAdd: + if (FT->getNumParams() < 2) + return nullptr; + return llvm::StructType::get(Ctx, + {FT->getReturnType(), FT->getParamType(1)}); + + case OpCode::OuterProductAccumulate: + if (FT->getNumParams() < 3) + return nullptr; + return llvm::StructType::get(Ctx, + {FT->getParamType(1), FT->getParamType(2)}); + // OPCODE-OLOAD-TYPES:END default: return Ty; diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 1ef64ee1bb..c4448d1ec4 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -970,6 +970,267 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode, } } +static bool CheckLinalgInterpretation(uint32_t Input, bool InRegister) { + using CT = DXIL::ComponentType; + switch (static_cast(Input)) { + case CT::I16: + case CT::U16: + case CT::I32: + case CT::U32: + case CT::F16: + case CT::F32: + case CT::U8: + case CT::I8: + case CT::F8_E4M3: + case CT::F8_E5M2: + return true; + case CT::PackedS8x32: + case CT::PackedU8x32: + return InRegister; + default: + return false; + } +} + +static bool CheckMatrixLayoutForMatVecMulOps(unsigned Layout) { + return Layout <= + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal); +} + +std::string GetMatrixLayoutStr(unsigned Layout) { + switch (static_cast(Layout)) { + case DXIL::LinalgMatrixLayout::RowMajor: + return "RowMajor"; + case DXIL::LinalgMatrixLayout::ColumnMajor: + return "ColumnMajor"; + case DXIL::LinalgMatrixLayout::MulOptimal: + return "MulOptimal"; + case DXIL::LinalgMatrixLayout::OuterProductOptimal: + return "OuterProductOptimal"; + default: + DXASSERT_NOMSG(false); + return "Invalid"; + } +} + +static bool CheckTransposeForMatrixLayout(unsigned Layout, bool Transposed) { + switch (static_cast(Layout)) { + case DXIL::LinalgMatrixLayout::RowMajor: + case DXIL::LinalgMatrixLayout::ColumnMajor: + return !Transposed; + + default: + return true; + } +} + +static bool CheckUnsignedFlag(Type *VecTy, bool IsUnsigned) { + Type *ElemTy = VecTy->getScalarType(); + if (ElemTy->isFloatingPointTy()) + return !IsUnsigned; + + return true; +} + +static Value *GetMatVecOpIsOutputUnsigned(CallInst *CI, DXIL::OpCode OpCode) { + switch (OpCode) { + case DXIL::OpCode::MatVecMul: + return CI->getOperand(DXIL::OperandIndex::kMatVecMulIsOutputUnsignedIdx); + case DXIL::OpCode::MatVecMulAdd: + return CI->getOperand(DXIL::OperandIndex::kMatVecMulAddIsOutputUnsignedIdx); + + default: + DXASSERT_NOMSG(false); + return nullptr; + } +} + +static void ValidateImmOperandsForMatVecOps(CallInst *CI, DXIL::OpCode OpCode, + ValidationContext &ValCtx) { + + llvm::Value *IsInputUnsigned = + CI->getOperand(DXIL::OperandIndex::kMatVecMulIsInputUnsignedIdx); + ConstantInt *IsInputUnsignedConst = + dyn_cast(IsInputUnsigned); + if (!IsInputUnsignedConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrMatVecOpIsUnsignedFlagsAreConst, + {"IsInputUnsigned"}); + return; + } + + llvm::Value *IsOutputUnsigned = GetMatVecOpIsOutputUnsigned(CI, OpCode); + ConstantInt *IsOutputUnsignedConst = + dyn_cast(IsOutputUnsigned); + if (!IsOutputUnsignedConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrMatVecOpIsUnsignedFlagsAreConst, + {"IsOutputUnsigned"}); + return; + } + + llvm::Value *InputInterpretation = + CI->getOperand(DXIL::OperandIndex::kMatVecMulInputInterpretationIdx); + ConstantInt *II = dyn_cast(InputInterpretation); + if (!II) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"InputInterpretation"}); + return; + } + uint64_t IIValue = II->getLimitedValue(); + if (!CheckLinalgInterpretation(IIValue, true)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidRegisterInterpValue, + {std::to_string(IIValue), "Input"}); + return; + } + + llvm::Value *MatrixInterpretation = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixInterpretationIdx); + ConstantInt *MI = dyn_cast(MatrixInterpretation); + if (!MI) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"MatrixInterpretation"}); + return; + } + uint64_t MIValue = MI->getLimitedValue(); + if (!CheckLinalgInterpretation(MIValue, false)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue, + {std::to_string(MIValue), "Matrix"}); + return; + } + + llvm::Value *MatrixM = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixMIdx); + if (!llvm::isa(MatrixM)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"Matrix M dimension"}); + return; + } + + llvm::Value *MatrixK = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixKIdx); + if (!llvm::isa(MatrixK)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"Matrix K dimension"}); + return; + } + + llvm::Value *MatrixLayout = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixLayoutIdx); + + ConstantInt *MatrixLayoutConst = dyn_cast(MatrixLayout); + if (!MatrixLayoutConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"Matrix Layout"}); + return; + } + uint64_t MLValue = MatrixLayoutConst->getLimitedValue(); + if (!CheckMatrixLayoutForMatVecMulOps(MLValue)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMatrixLayoutValueForMatVecOps, + {std::to_string(MLValue), + std::to_string( + static_cast(DXIL::LinalgMatrixLayout::RowMajor)), + std::to_string(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal))}); + return; + } + + llvm::Value *MatrixTranspose = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixTransposeIdx); + ConstantInt *MatrixTransposeConst = dyn_cast(MatrixTranspose); + if (!MatrixTransposeConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"MatrixTranspose"}); + return; + } + + if (!CheckTransposeForMatrixLayout(MLValue, + MatrixTransposeConst->getLimitedValue())) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixLayoutNotTransposable, + {GetMatrixLayoutStr(MLValue)}); + return; + } + + llvm::Value *InputVector = + CI->getOperand(DXIL::OperandIndex::kMatVecMulInputVectorIdx); + if (!CheckUnsignedFlag(InputVector->getType(), + IsInputUnsignedConst->getLimitedValue())) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgNotAnUnsignedType, {"Input"}); + return; + } + + if (!CheckUnsignedFlag(CI->getType(), + IsOutputUnsignedConst->getLimitedValue())) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgNotAnUnsignedType, {"Output"}); + return; + } + + switch (OpCode) { + case DXIL::OpCode::MatVecMulAdd: { + llvm::Value *BiasInterpretation = + CI->getOperand(DXIL::OperandIndex::kMatVecMulAddBiasInterpretation); + ConstantInt *BI = cast(BiasInterpretation); + if (!BI) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"BiasInterpretation"}); + return; + } + uint64_t BIValue = BI->getLimitedValue(); + if (!CheckLinalgInterpretation(BIValue, false)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue, + {std::to_string(BIValue), "Bias vector"}); + return; + } + } break; + default: + break; + } +} + +static void ValidateImmOperandsForOuterProdAcc(CallInst *CI, + ValidationContext &ValCtx) { + + llvm::Value *MatrixInterpretation = + CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixInterpretation); + ConstantInt *MI = cast(MatrixInterpretation); + if (!MI) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"MatrixInterpretation"}); + return; + } + uint64_t MIValue = MI->getLimitedValue(); + if (!CheckLinalgInterpretation(MIValue, false)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue, + {std::to_string(MIValue), "Matrix"}); + return; + } + + llvm::Value *MatrixLayout = + CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixLayout); + if (!llvm::isa(MatrixLayout)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"MatrixLayout"}); + return; + } +} + // Validate the type-defined mask compared to the store value mask which // indicates which parts were defined returns true if caller should continue // validation @@ -2110,6 +2371,16 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, GetLaunchTypeStr(NodeLaunchType)}); break; + case DXIL::OpCode::MatVecMul: + case DXIL::OpCode::MatVecMulAdd: + ValidateImmOperandsForMatVecOps(CI, Opcode, ValCtx); + break; + case DXIL::OpCode::OuterProductAccumulate: + ValidateImmOperandsForOuterProdAcc(CI, ValCtx); + break; + case DXIL::OpCode::VectorAccumulate: + + break; default: // TODO: make sure every Opcode is checked. diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4ef7591e89..18d003a764 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6562,6 +6562,200 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Builder.CreateSelect(cond, t, f); } + +Value *TranslateMatVecMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input parameters + Value *InputVector = + CI->getArgOperand(HLOperandIndex::kMatVecMulInputVectorIdx); + Value *InputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulIsInputUnsignedIdx); + Value *InputInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulInputInterpretationIdx); + + // Matrix parameters + Value *MatrixBuffer = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixBufferIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixOffsetIdx); + Value *MatrixInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixInterpretationIdx); + Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixMIdx); + Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixKIdx); + Value *MatrixLayout = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixLayoutIdx); + Value *MatrixTranspose = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixTransposeIdx); + Value *MatrixStride = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixStrideIdx); + + // Output parameters + Value *OutputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulIsOutputUnsignedIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc( + OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx) + ->getType() + ->getPointerElementType(), + InputVector->getType()}); + + // Create a call to the DXIL function + Value *NewCI = Builder.CreateCall( + DxilFunc, + {OpArg, InputVector, InputIsUnsigned, InputInterpretation, MatrixBuffer, + MatrixOffset, MatrixInterpretation, MatrixM, MatrixK, MatrixLayout, + MatrixTranspose, MatrixStride, OutputIsUnsigned}); + + // Get the output parameter and store the result + Value *OutParam = + CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx); + + Builder.CreateStore(NewCI, OutParam); + + return nullptr; +} + +Value *TranslateMatVecMulAdd(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameters + Value *InputVector = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputVectorIdx); + Value *InputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsInputUnsignedIdx); + Value *InputInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputInterpretationIdx); + + // Matrix parameters + Value *MatrixBuffer = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixBufferIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixOffsetIdx); + Value *MatrixInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixInterpretationIdx); + Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixMIdx); + Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixKIdx); + Value *MatrixLayout = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixLayoutIdx); + Value *MatrixTranspose = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixTransposeIdx); + Value *MatrixStride = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixStrideIdx); + + // Bias parameters + Value *BiasBuffer = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasBufferIdx); + Value *BiasOffset = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasOffsetIdx); + Value *BiasInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasInterpretationIdx); + + // Output parameters + Value *OutputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsOutputUnsignedIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc( + OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx) + ->getType() + ->getPointerElementType(), + InputVector->getType()}); + + // Create a call to the DXIL function + Value *NewCI = Builder.CreateCall( + DxilFunc, {OpArg, InputVector, InputIsUnsigned, InputInterpretation, + MatrixBuffer, MatrixOffset, MatrixInterpretation, MatrixM, + MatrixK, MatrixLayout, MatrixTranspose, MatrixStride, + BiasBuffer, BiasOffset, BiasInterpretation, OutputIsUnsigned}); + + // Store the result in the output parameter + Value *OutParam = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx); + Builder.CreateStore(NewCI, OutParam); + + return nullptr; +} + +Value *TranslateOuterProductAccumulate(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameters + Value *InputVector1 = + CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec1Idx); + Value *InputVector2 = + CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec2Idx); + + // Matrix parameters + Value *MatrixBuffer = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixOffsetIdx); + Value *MatrixInterpretation = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixInterpretationIdx); + Value *MatrixLayout = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixLayoutIdx); + Value *MatrixStride = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixStrideIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc( + OpCode, {InputVector1->getType(), InputVector2->getType()}); + + return Builder.CreateCall( + DxilFunc, {OpArg, InputVector1, InputVector2, MatrixBuffer, MatrixOffset, + MatrixInterpretation, MatrixLayout, MatrixStride}); +} + +Value *TranslateVectorAccumulate(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameter + Value *InputVector = CI->getArgOperand(HLOperandIndex::kVectorAccInputVecIdx); + + // Matrix parameters + Value *MatrixBuffer = CI->getArgOperand(HLOperandIndex::kVectorAccMatrixIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kVectorAccMatrixOffsetIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType()); + + return Builder.CreateCall(DxilFunc, + {OpArg, InputVector, MatrixBuffer, MatrixOffset}); +} + } // namespace // Lower table. @@ -7275,6 +7469,15 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::HitObject_SetShaderTableIndex}, {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay, DXIL::OpCode::HitObject_TraceRay}, + + {IntrinsicOp::IOP___builtin_MatVecMul, TranslateMatVecMul, + DXIL::OpCode::MatVecMul}, + {IntrinsicOp::IOP___builtin_MatVecMulAdd, TranslateMatVecMulAdd, + DXIL::OpCode::MatVecMulAdd}, + {IntrinsicOp::IOP___builtin_OuterProductAccumulate, + TranslateOuterProductAccumulate, DXIL::OpCode::OuterProductAccumulate}, + {IntrinsicOp::IOP___builtin_VectorAccumulate, TranslateVectorAccumulate, + DXIL::OpCode::VectorAccumulate}, }; } // namespace static_assert( diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 43946bc78a..6e58c0e872 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -12108,6 +12108,18 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, break; case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread: DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, true); + break; + case hlsl::IntrinsicOp::IOP___builtin_MatVecMul: + case hlsl::IntrinsicOp::IOP___builtin_MatVecMulAdd: + case hlsl::IntrinsicOp::IOP___builtin_OuterProductAccumulate: + case hlsl::IntrinsicOp::IOP___builtin_VectorAccumulate: + if (!SM->IsSM69Plus()) { + Diags.Report(CE->getExprLoc(), + diag::warn_hlsl_intrinsic_in_wrong_shader_model) + << FD->getNameAsString() << EntryDecl->getNameAsString() << "6.9"; + return; + } + break; default: break; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl new file mode 100644 index 0000000000..74cb51260c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl @@ -0,0 +1,135 @@ +// RUN: %dxc -T lib_6_9 %s | FileCheck %s + +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +ByteAddressBuffer input_vector_buffer; +RWByteAddressBuffer output_vector_buffer; + +void UseCoopVec() { + vector output_vector; + static const uint is_output_unsigned = 0; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = 9; /*F32*/ + + const uint matrix_offset = 0; + const uint matrix_interpretation = 9; /*F32*/ + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = 0; /*RowMajor*/ + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + output_vector_buffer.Store(0, output_vector); + + const uint bias_offset = 0; + const uint bias_interpretation = 9; /*F32*/ + + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, + bias_interpretation); + output_vector_buffer.Store(1024, output_vector); + + vector input_vector1; + vector input_vector2; + const uint opa_matrix_offset = 0; + const uint opa_matrix_interpretation = 5; /*U32*/ + const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ + const uint opa_matrix_stride = 64; + + __builtin_OuterProductAccumulate(input_vector1, input_vector2, + rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, + opa_matrix_layout, opa_matrix_stride); + + const uint va_matrix_offset = 0; + + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); +} + +// CHECK: define void @ps_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("pixel")] +void ps_main() +{ + UseCoopVec(); +} + +// CHECK: define void @cs_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("compute")] +[NumThreads(1,1,1)] +void cs_main() +{ + UseCoopVec(); +} + +// CHECK: define void @vs_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("vertex")] +void vs_main() +{ + UseCoopVec(); +} + +struct MyRecord{ + uint a; +}; + +// CHECK: define void @ns_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("node")] +[NodeLaunch("thread")] +void ns_main(ThreadNodeInputRecord input) +{ + UseCoopVec(); +} + +// Vertex shader output structure +struct VS_OUT { + float3 Color : COLOR0; +}; + +// Geometry shader output structure +struct GS_OUT { + float3 Color : COLOR0; + float2 TexCoord : TEXCOORD0; +}; + +// CHECK: define void @gs_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[shader("geometry")] +[maxvertexcount(3)] +void gs_main(point VS_OUT input[1], + inout TriangleStream OutputStream) +{ + UseCoopVec(); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl new file mode 100644 index 0000000000..c3b4a3a8d7 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl @@ -0,0 +1,79 @@ +// RUN: %dxc -fcgl -T cs_6_9 -E cs_main %s | FileCheck %s + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer opa_input_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +RWByteAddressBuffer output_vector_buffer; + +[Shader("compute")] +[NumThreads(1,1,1)] +void cs_main() +{ + vector output_vector; + static const uint is_output_unsigned = 0; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = 9; /*F32*/ + + const uint matrix_offset = 0; + const uint matrix_interpretation = 9; /*F32*/ + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = 0; /*RowMajor*/ + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + // CHECK: %[[MLD0:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A" + // CHECK: %[[MCH0:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[MLD0]]) + // CHECK: %[[MAH0:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH0]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x float>* %{{[^ ]+}}, i1 false, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH0]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64) + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + output_vector_buffer.Store(0, output_vector); + + const uint bias_offset = 0; + const uint bias_interpretation = 9; /*F32*/ + + // CHECK: %[[MLD1:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A" + // CHECK: %[[MCH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[MLD1]]) + // CHECK: %[[MAH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH1]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) + // CHECK-NEXT: %[[BLD1:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A" + // CHECK-NEXT: %[[BCH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[BLD1]]) + // CHECK-NEXT: %[[BAH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[BCH1]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) + // CHECK-NEXT: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x float>* %{{[^ ]+}}, i1 false, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH1]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %[[BAH1]], i32 0, i32 9) + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, + bias_interpretation); + output_vector_buffer.Store(1024, output_vector); + + vector input_vector1 = opa_input_buffer.Load >(0); + vector input_vector2 = opa_input_buffer.Load >(128); + const uint opa_matrix_offset = 0; + const uint opa_matrix_interpretation = 5; /*U32*/ + const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ + const uint opa_matrix_stride = 64; + + // CHECK: %[[MLD2:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" + // CHECK: %[[MCH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD2]]) + // CHECK: %[[MAH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH2]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH2]], i32 0, i32 5, i32 3, i32 64) + __builtin_OuterProductAccumulate(input_vector1, input_vector2, + rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, + opa_matrix_layout, opa_matrix_stride); + + const uint va_matrix_offset = 0; + + // CHECK: %[[MLD3:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" + // CHECK: %[[MCH3:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD3]]) + // CHECK: %[[MAH3:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH3]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 393, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH3]], i32 0) + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg new file mode 100644 index 0000000000..c2417a9e43 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg @@ -0,0 +1 @@ +config.unsupported = 'dxil-1-9' not in config.available_features \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl new file mode 100644 index 0000000000..98a568fa22 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl @@ -0,0 +1,108 @@ +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 | FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 | FileCheck %s --check-prefixes COMMON,DXIL-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 | FileCheck %s --check-prefixes COMMON,DXIL-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 | FileCheck %s --check-prefixes COMMON,DXIL-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 | FileCheck %s --check-prefixes COMMON,DXIL-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 | FileCheck %s --check-prefixes COMMON,DXIL-7 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 + + +// COMMON: define void @main() + +// Test minimum support set of combinations for matVecMul +// HLOP-0: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) +// DXIL-0: call <4 x half> @dx.op.matVecMulAdd.v4f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-1: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) +// DXIL-1: call <4 x half> @dx.op.matVecMulAdd.v4f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-2: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) +// DXIL-2: call <4 x half> @dx.op.matVecMulAdd.v4f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-3: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) +// DXIL-3: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-4: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) +// DXIL-4: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// Test unsigned variations +// HLOP-5: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) +// DXIL-5: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 true) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-6: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) +// DXIL-6: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-7: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) +// DXIL-7: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +RWByteAddressBuffer output_vector_buffer; + +enum CompType { + Invalid = 0, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, +}; + +enum MatLayout { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + +[NumThreads(1,1,1)] +void main() +{ + vector output_vector; + static const uint is_output_unsigned = OU; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = IU; + const uint input_interpretation = II; + + const uint matrix_offset = 0; + const uint matrix_interpretation = MI; + const uint matrix_dimM = 8; + const uint matrix_dimK = 8; + const uint matrix_layout = ML; + const bool matrix_is_transposed = (bool) MT; + const uint matrix_stride = 64; + + const uint bias_offset = 0; + const uint bias_interpretation = BI; + + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, + matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, bias_interpretation); + output_vector_buffer.Store(0, output_vector); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl new file mode 100644 index 0000000000..2ca2648503 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl @@ -0,0 +1,104 @@ +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 | FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 | FileCheck %s --check-prefixes COMMON,DXIL-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 | FileCheck %s --check-prefixes COMMON,DXIL-7 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 + +// COMMON: define void @main() + +// Test minimum support set of combinations for matVecMul +// HLOP-0: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) +// DXIL-0: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-1: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64) +// DXIL-1: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-2: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64) +// DXIL-2: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-3: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64) +// DXIL-3: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-4: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64) +// DXIL-4: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// Test unsigned variations +// HLOP-5: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) +// DXIL-5: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-6: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64) +// DXIL-6: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-7: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64) +// DXIL-7: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +RWByteAddressBuffer output_vector_buffer; + +enum CompType { + Invalid = 0, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, +}; + +enum MatLayout { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + +[NumThreads(1,1,1)] +void main() +{ + vector output_vector; + static const uint is_output_unsigned = OU; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = IU; + const uint input_interpretation = II; + + const uint matrix_offset = 0; + const uint matrix_interpretation = MI; + const uint matrix_dimM = 8; + const uint matrix_dimK = 8; + const uint matrix_layout = ML; + const bool matrix_is_transposed = (bool) MT; + const uint matrix_stride = 64; + + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, + matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride); + output_vector_buffer.Store(0, output_vector); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl new file mode 100644 index 0000000000..40bbe62284 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl @@ -0,0 +1,70 @@ +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=RowMajor | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-2 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=RowMajor -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer input_vector_buffer2; +RWByteAddressBuffer matrix_buffer; + +// COMMON: define void @main() +// DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 0, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 0, i32 64) +// DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 64) +// DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 64) + +enum CompType { + Invalid = 0, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, +}; + +enum MatLayout { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + + +[Numthreads(1,1,1)] +void main() +{ + vector input_vector1 = input_vector_buffer.Load >(0); + vector input_vector2 = input_vector_buffer2.Load >(0); + + const uint matrix_interpretation = MI; + const uint matrix_layout = ML; + const uint matrix_offset = 0; + const uint matrix_stride = 64; + + __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride); + +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl new file mode 100644 index 0000000000..dc1bb6c563 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -T cs_6_9 %s | FileCheck %s + +RWByteAddressBuffer matrix_buffer; + +// Test use of __builtin_VectorAccumulate in compute shader +// CHECK: define void @main() +// CHECK: call void @dx.op.vectorAccumulate.v2i32(i32 {{[0-9]+}}, <2 x i32> , %dx.types.Handle {{%[0-9]+}}, i32 0) + +[NumThreads(1,1,1)] +void main() +{ + vector input_vector1 = 5; + const uint matrix_offset = 0; + + __builtin_VectorAccumulate(input_vector1, matrix_buffer, matrix_offset); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll new file mode 100644 index 0000000000..6623f63031 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll @@ -0,0 +1,189 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.ByteAddressBuffer = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?input_vector_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?opa_input_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?matrix_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?bias_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?output_vector_buffer@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 + +; Function Attrs: nounwind +define void @cs_main() #0 { +entry: + ;CHECK-DAG: %[[MLD:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A" + ;CHECK-DAG: %[[BLD:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A" + ;CHECK-DAG: %[[RWMLD0:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" + %output_vector = alloca <4 x float>, align 4 + %tmp = bitcast <4 x float>* %output_vector to i8*, !dbg !21 ; line:14 col:5 + call void @llvm.lifetime.start(i64 16, i8* %tmp) #0, !dbg !21 ; line:14 col:5 + %tmp1 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?input_vector_buffer@@3UByteAddressBuffer@@A", !dbg !25 ; line:17 col:37 + %tmp2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp1), !dbg !25 ; line:17 col:37 + %tmp3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !25 ; line:17 col:37 + %tmp4 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp3, i32 0), !dbg !25 ; line:17 col:37 + %tmp5 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !dbg !26 ; line:33 col:5 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp5), !dbg !26 ; line:33 col:5 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !26 ; line:33 col:5 + + ;CHECK: %[[MCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[MLD]] + ;CHECK: %[[MAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[MCH0]] + ;CHECK: call <4 x float> @dx.op.matVecMul.v4f32.v4f32(i32 305, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH0]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, i1 false) + call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x float>* %output_vector, i1 false, <4 x float> %tmp4, i1 false, i32 9, %dx.types.Handle %tmp7, i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64), !dbg !26 ; line:33 col:5 + + %tmp8 = load <4 x float>, <4 x float>* %output_vector, align 4, !dbg !27, !tbaa !28 ; line:37 col:35 + %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !dbg !31 ; line:37 col:5 + %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9), !dbg !31 ; line:37 col:5 + %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !31 ; line:37 col:5 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp11, i32 0, <4 x float> %tmp8), !dbg !31 ; line:37 col:5 + %tmp12 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !dbg !32 ; line:49 col:5 + %tmp13 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp12), !dbg !32 ; line:49 col:5 + %tmp14 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp13, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !32 ; line:49 col:5 + %tmp15 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A", !dbg !32 ; line:49 col:5 + %tmp16 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp15), !dbg !32 ; line:49 col:5 + %tmp17 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !32 ; line:49 col:5 + + ;CHECK: %[[MCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[MLD]] + ;CHECK: %[[MAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[MCH1]] + ;CHECK: %[[BCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[BLD]] + ;CHECK: %[[BAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[BCH1]] + ;CHECK: call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH1]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %[[BAH1]], i32 0, i32 9, i1 false) + call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x float>* %output_vector, i1 false, <4 x float> %tmp4, i1 false, i32 9, %dx.types.Handle %tmp14, i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %tmp17, i32 0, i32 9), !dbg !32 ; line:49 col:5 + + %tmp18 = load <4 x float>, <4 x float>* %output_vector, align 4, !dbg !33, !tbaa !28 ; line:54 col:38 + %tmp19 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !dbg !34 ; line:54 col:5 + %tmp20 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp19), !dbg !34 ; line:54 col:5 + %tmp21 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp20, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !34 ; line:54 col:5 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp21, i32 1024, <4 x float> %tmp18), !dbg !34 ; line:54 col:5 + %tmp22 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !dbg !35 ; line:56 col:37 + %tmp23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp22), !dbg !35 ; line:56 col:37 + %tmp24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !35 ; line:56 col:37 + %tmp25 = call <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp24, i32 0), !dbg !35 ; line:56 col:37 + %tmp26 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !dbg !36 ; line:57 col:37 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp26), !dbg !36 ; line:57 col:37 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !36 ; line:57 col:37 + %tmp29 = call <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp28, i32 128), !dbg !36 ; line:57 col:37 + %tmp30 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !37 ; line:67 col:5 + %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp30), !dbg !37 ; line:67 col:5 + %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !37 ; line:67 col:5 + + ;CHECK: %[[RWMCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]] + ;CHECK: %[[RWMAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH0]] + ;CHECK: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH0]], i32 0, i32 5, i32 3, i32 64) + call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %tmp25, <8 x i32> %tmp29, %dx.types.Handle %tmp32, i32 0, i32 5, i32 3, i32 64), !dbg !37 ; line:67 col:5 + + + %tmp33 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !38 ; line:77 col:5 + %tmp34 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp33), !dbg !38 ; line:77 col:5 + %tmp35 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp34, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !38 ; line:77 col:5 + + ;CHECK: %[[RWMCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]] + ;CHECK: %[[RWMAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH1]] + ;CHECK: call void @dx.op.vectorAccumulate.v8i32(i32 308, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH1]], i32 0) + call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 393, <8 x i32> %tmp25, %dx.types.Handle %tmp35, i32 0), !dbg !38 ; line:77 col:5 + + %tmp36 = bitcast <4 x float>* %output_vector to i8*, !dbg !39 ; line:79 col:1 + call void @llvm.lifetime.end(i64 16, i8* %tmp36) #0, !dbg !39 ; line:79 col:1 + ret void, !dbg !39 ; line:79 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind readonly +declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32, %struct.ByteAddressBuffer) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer) #2 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #2 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32) #0 + +; Function Attrs: nounwind readonly +declare <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32, <8 x i32>, %dx.types.Handle, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4} +!dx.entryPoints = !{!8} +!dx.fnprops = !{!18} +!dx.options = !{!19, !20} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"cs", i32 6, i32 9} +!4 = !{i32 1, void ()* @cs_main, !5} +!5 = !{!6} +!6 = !{i32 1, !7, !7} +!7 = !{} +!8 = !{void ()* @cs_main, !"cs_main", null, !9, null} +!9 = !{!10, !15, null, null} +!10 = !{!11, !12, !13, !14} +!11 = !{i32 0, %struct.ByteAddressBuffer* @"\01?input_vector_buffer@@3UByteAddressBuffer@@A", !"input_vector_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!12 = !{i32 1, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !"opa_input_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!13 = !{i32 2, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !"matrix_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!14 = !{i32 3, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A", !"bias_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!15 = !{!16, !17} +!16 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !"rw_matrix_buffer", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!17 = !{i32 1, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !"output_vector_buffer", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!18 = !{void ()* @cs_main, i32 5, i32 1, i32 1, i32 1} +!19 = !{i32 -2147483584} +!20 = !{i32 -1} +!21 = !DILocation(line: 14, column: 5, scope: !22) +!22 = !DISubprogram(name: "cs_main", scope: !23, file: !23, line: 12, type: !24, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @cs_main) +!23 = !DIFile(filename: "DirectXShaderCompiler\5Ctools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cintrinsics\5Clinalg_builtins\5Clinalg-builtins.hlsl", directory: "") +!24 = !DISubroutineType(types: !7) +!25 = !DILocation(line: 17, column: 37, scope: !22) +!26 = !DILocation(line: 33, column: 5, scope: !22) +!27 = !DILocation(line: 37, column: 35, scope: !22) +!28 = !{!29, !29, i64 0} +!29 = !{!"omnipotent char", !30, i64 0} +!30 = !{!"Simple C/C++ TBAA"} +!31 = !DILocation(line: 37, column: 5, scope: !22) +!32 = !DILocation(line: 49, column: 5, scope: !22) +!33 = !DILocation(line: 54, column: 38, scope: !22) +!34 = !DILocation(line: 54, column: 5, scope: !22) +!35 = !DILocation(line: 56, column: 37, scope: !22) +!36 = !DILocation(line: 57, column: 37, scope: !22) +!37 = !DILocation(line: 67, column: 5, scope: !22) +!38 = !DILocation(line: 77, column: 5, scope: !22) +!39 = !DILocation(line: 79, column: 1, scope: !22) diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl new file mode 100644 index 0000000000..d5e251ae8b --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl @@ -0,0 +1,59 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; + +[Shader("compute")] +[Numthreads(1,1,1)] +void cs_main() +{ + vector output_vector; + static const uint is_output_unsigned = 0; + + vector input_vector; + const uint is_input_unsigned = 0; + const uint input_interpretation = 9; /*F32*/ + + const uint matrix_offset = 0; + const uint matrix_interpretation = 9; /*F32*/ + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = 0; /*RowMajor*/ + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + //expected-error@+1{{intrinsic __builtin_MatVecMul potentially used by 'cs_main' requires shader model 6.9 or greater}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + + const uint bias_offset = 0; + const uint bias_interpretation = 9; /*F32*/ + + //expected-error@+1{{intrinsic __builtin_MatVecMulAdd potentially used by 'cs_main' requires shader model 6.9 or greater}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, + bias_interpretation); + + vector input_vector1; + vector input_vector2; + const uint opa_matrix_offset = 0; + const uint opa_matrix_interpretation = 5; /*U32*/ + const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ + const uint opa_matrix_stride = 64; + + //expected-error@+1{{intrinsic __builtin_OuterProductAccumulate potentially used by 'cs_main' requires shader model 6.9 or greater}} + __builtin_OuterProductAccumulate(input_vector1, input_vector2, + rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, + opa_matrix_layout, opa_matrix_stride); + + const uint va_matrix_offset = 0; + + //expected-error@+1{{intrinsic __builtin_VectorAccumulate potentially used by 'cs_main' requires shader model 6.9 or greater}} + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); +} \ No newline at end of file diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index f1274fd308..c394611302 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -383,6 +383,14 @@ void [[]] Barrier(in NodeRecordOrUAV o, in uint SemanticFlags); uint [[]] GetRemainingRecursionLevels(); +void [[]] __builtin_MatVecMul(out numeric OutputVector, in bool OutputIsUnsigned, in numeric InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride); + +void [[]] __builtin_MatVecMulAdd(out numeric OutputVector, in bool OutputIsUnsigned, in numeric InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride, in ByteAddressBuffer BiasVector, in uint BiasOffset, in uint BiasInterpretation); + +void [[]] __builtin_OuterProductAccumulate(in numeric InputVector1, in numeric InputVector2, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint MatrixLayout, in uint MatrixStride); + +void [[]] __builtin_VectorAccumulate(in numeric InputVector, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset); + } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index b1460de9b8..57f2574005 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -873,6 +873,11 @@ def populate_categories_and_models(self): "library", "raygeneration", ) + for i in ( + "MatVecMul,MatVecMulAdd,OuterProductAccumulate,VectorAccumulate" + ).split(","): + self.name_idx[i].category = "Linear Algebra Operations" + self.name_idx[i].shader_model = 6, 9 def populate_llvm_instructions(self): # Add instructions that map to LLVM instructions. @@ -6340,6 +6345,103 @@ def UFI(name, **mappings): ) next_op_idx += 1 + self.add_dxil_op( + "MatVecMul", + next_op_idx, + "MatVecMul", + "Multiplies a MxK dimension matrix and a K sized input vector", + " Date: Tue, 29 Apr 2025 09:35:06 -0400 Subject: [PATCH 14/93] [SPIRV] Refactor OpExecutionModeId (#7378) The current implementation of OpExecutionModeId assumes that the operands must be OpConstants. However, they could be the id of a spec constant. The first step in allowing OpExecutionModeId is to modify the internal representation of the instruction to hold general spirv instructions. Part of https://github.com/microsoft/DirectXShaderCompiler/issues/5960 and https://github.com/microsoft/DirectXShaderCompiler/issues/3092. --- .../clang/include/clang/SPIRV/SpirvBuilder.h | 48 +++++++++++++--- .../include/clang/SPIRV/SpirvInstruction.h | 55 ++++++++++++++++--- tools/clang/include/clang/SPIRV/SpirvModule.h | 8 +-- .../clang/include/clang/SPIRV/SpirvVisitor.h | 2 +- tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 2 +- tools/clang/lib/SPIRV/CapabilityVisitor.h | 2 +- tools/clang/lib/SPIRV/EmitVisitor.cpp | 19 ++++--- tools/clang/lib/SPIRV/EmitVisitor.h | 2 +- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 52 ++++++++++++++---- tools/clang/lib/SPIRV/SpirvEmitter.h | 5 +- tools/clang/lib/SPIRV/SpirvInstruction.cpp | 19 ++++--- tools/clang/lib/SPIRV/SpirvModule.cpp | 9 +-- .../spv.intrinsicExecutionModeId.hlsl | 6 +- 13 files changed, 171 insertions(+), 58 deletions(-) diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index 5e03d1ef96..e4e6ef308f 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -615,8 +615,15 @@ class SpirvBuilder { inline SpirvInstruction *addExecutionMode(SpirvFunction *entryPoint, spv::ExecutionMode em, llvm::ArrayRef params, - SourceLocation, - bool useIdParams = false); + SourceLocation); + + /// \brief Adds an execution mode to the module under construction if it does + /// not already exist. Return the newly added instruction or the existing + /// instruction, if one already exists. + inline SpirvInstruction * + addExecutionModeId(SpirvFunction *entryPoint, spv::ExecutionMode em, + llvm::ArrayRef params, + SourceLocation loc); /// \brief Adds an OpModuleProcessed instruction to the module under /// construction. @@ -963,17 +970,44 @@ SpirvBuilder::setDebugSource(uint32_t major, uint32_t minor, SpirvInstruction * SpirvBuilder::addExecutionMode(SpirvFunction *entryPoint, spv::ExecutionMode em, llvm::ArrayRef params, - SourceLocation loc, bool useIdParams) { + SourceLocation loc) { SpirvExecutionMode *mode = nullptr; - SpirvExecutionMode *existingInstruction = + SpirvExecutionModeBase *existingInstruction = mod->findExecutionMode(entryPoint, em); if (!existingInstruction) { - mode = new (context) - SpirvExecutionMode(loc, entryPoint, em, params, useIdParams); + mode = new (context) SpirvExecutionMode(loc, entryPoint, em, params); + mod->addExecutionMode(mode); + } else { + // No execution mode can be used with both OpExecutionMode and + // OpExecutionModeId. If this assert is triggered, then either this + // `addExecutionModeId` should have been called with `em` or the existing + // instruction is wrong. + assert(existingInstruction->getKind() == + SpirvInstruction::IK_ExecutionMode); + mode = cast(existingInstruction); + } + + return mode; +} + +SpirvInstruction *SpirvBuilder::addExecutionModeId( + SpirvFunction *entryPoint, spv::ExecutionMode em, + llvm::ArrayRef params, SourceLocation loc) { + SpirvExecutionModeId *mode = nullptr; + SpirvExecutionModeBase *existingInstruction = + mod->findExecutionMode(entryPoint, em); + if (!existingInstruction) { + mode = new (context) SpirvExecutionModeId(loc, entryPoint, em, params); mod->addExecutionMode(mode); } else { - mode = existingInstruction; + // No execution mode can be used with both OpExecutionMode and + // OpExecutionModeId. If this assert is triggered, then either this + // `addExecutionMode` should have been called with `em` or the existing + // instruction is wrong. + assert(existingInstruction->getKind() == + SpirvInstruction::IK_ExecutionModeId); + mode = cast(existingInstruction); } return mode; diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index f49a295610..6d95459373 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -57,6 +57,7 @@ class SpirvInstruction { IK_MemoryModel, // OpMemoryModel IK_EntryPoint, // OpEntryPoint IK_ExecutionMode, // OpExecutionMode + IK_ExecutionModeId, // OpExecutionModeId IK_String, // OpString (debug) IK_Source, // OpSource (debug) IK_ModuleProcessed, // OpModuleProcessed (debug) @@ -404,12 +405,34 @@ class SpirvEntryPoint : public SpirvInstruction { llvm::SmallVector interfaceVec; }; +class SpirvExecutionModeBase : public SpirvInstruction { +public: + SpirvExecutionModeBase(Kind kind, spv::Op opcode, SourceLocation loc, + SpirvFunction *entryPointFunction, + spv::ExecutionMode executionMode) + : SpirvInstruction(kind, opcode, QualType(), loc), + entryPoint(entryPointFunction), execMode(executionMode) {} + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvExecutionModeBase) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { return false; } + + bool invokeVisitor(Visitor *v) override; + + SpirvFunction *getEntryPoint() const { return entryPoint; } + spv::ExecutionMode getExecutionMode() const { return execMode; } + +private: + SpirvFunction *entryPoint; + spv::ExecutionMode execMode; +}; + /// \brief OpExecutionMode and OpExecutionModeId instructions -class SpirvExecutionMode : public SpirvInstruction { +class SpirvExecutionMode : public SpirvExecutionModeBase { public: SpirvExecutionMode(SourceLocation loc, SpirvFunction *entryPointFunction, - spv::ExecutionMode, llvm::ArrayRef params, - bool usesIdParams); + spv::ExecutionMode, llvm::ArrayRef params); DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvExecutionMode) @@ -420,16 +443,34 @@ class SpirvExecutionMode : public SpirvInstruction { bool invokeVisitor(Visitor *v) override; - SpirvFunction *getEntryPoint() const { return entryPoint; } - spv::ExecutionMode getExecutionMode() const { return execMode; } llvm::ArrayRef getParams() const { return params; } private: - SpirvFunction *entryPoint; - spv::ExecutionMode execMode; llvm::SmallVector params; }; +/// \brief OpExecutionModeId +class SpirvExecutionModeId : public SpirvExecutionModeBase { +public: + SpirvExecutionModeId(SourceLocation loc, SpirvFunction *entryPointFunction, + spv::ExecutionMode em, + llvm::ArrayRef params); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvExecutionModeId) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ExecutionModeId; + } + + bool invokeVisitor(Visitor *v) override; + + llvm::ArrayRef getParams() const { return params; } + +private: + llvm::SmallVector params; +}; + /// \brief OpString instruction class SpirvString : public SpirvInstruction { public: diff --git a/tools/clang/include/clang/SPIRV/SpirvModule.h b/tools/clang/include/clang/SPIRV/SpirvModule.h index 298c06d65e..9ab0c296b8 100644 --- a/tools/clang/include/clang/SPIRV/SpirvModule.h +++ b/tools/clang/include/clang/SPIRV/SpirvModule.h @@ -119,11 +119,11 @@ class SpirvModule { // Returns an existing execution mode instruction that is the same as em if it // exists. Return nullptr otherwise. - SpirvExecutionMode *findExecutionMode(SpirvFunction *entryPoint, - spv::ExecutionMode em); + SpirvExecutionModeBase *findExecutionMode(SpirvFunction *entryPoint, + spv::ExecutionMode em); // Adds an execution mode to the module. - void addExecutionMode(SpirvExecutionMode *); + void addExecutionMode(SpirvExecutionModeBase *em); // Adds an extension to the module. Returns true if the extension was added. // Returns false otherwise (e.g. if the extension already existed). @@ -194,7 +194,7 @@ class SpirvModule { llvm::SmallVector extInstSets; SpirvMemoryModel *memoryModel; llvm::SmallVector entryPoints; - llvm::SmallVector executionModes; + llvm::SmallVector executionModes; llvm::SmallVector constStrings; std::vector sources; std::vector moduleProcesses; diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index 93682518a1..95bc46aa5f 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -64,7 +64,7 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvExtInstImport) DEFINE_VISIT_METHOD(SpirvMemoryModel) DEFINE_VISIT_METHOD(SpirvEntryPoint) - DEFINE_VISIT_METHOD(SpirvExecutionMode) + DEFINE_VISIT_METHOD(SpirvExecutionModeBase) DEFINE_VISIT_METHOD(SpirvString) DEFINE_VISIT_METHOD(SpirvSource) DEFINE_VISIT_METHOD(SpirvModuleProcessed) diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 24dfdc2e9a..9ca9cbc6cd 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -639,7 +639,7 @@ bool CapabilityVisitor::visit(SpirvEntryPoint *entryPoint) { return true; } -bool CapabilityVisitor::visit(SpirvExecutionMode *execMode) { +bool CapabilityVisitor::visit(SpirvExecutionModeBase *execMode) { spv::ExecutionMode executionMode = execMode->getExecutionMode(); SourceLocation execModeSourceLocation = execMode->getSourceLocation(); SourceLocation entryPointSourceLocation = diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.h b/tools/clang/lib/SPIRV/CapabilityVisitor.h index 95db110cce..35d4b5a18b 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.h +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.h @@ -31,7 +31,7 @@ class CapabilityVisitor : public Visitor { bool visit(SpirvDecoration *decor) override; bool visit(SpirvEntryPoint *) override; - bool visit(SpirvExecutionMode *) override; + bool visit(SpirvExecutionModeBase *execMode) override; bool visit(SpirvImageQuery *) override; bool visit(SpirvImageOp *) override; bool visit(SpirvImageSparseTexelsResident *) override; diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index eb00f59632..2a3ffd82f4 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -617,19 +617,20 @@ bool EmitVisitor::visit(SpirvEntryPoint *inst) { return true; } -bool EmitVisitor::visit(SpirvExecutionMode *inst) { +bool EmitVisitor::visit(SpirvExecutionModeBase *inst) { initInstruction(inst); curInst.push_back(getOrAssignResultId(inst->getEntryPoint())); curInst.push_back(static_cast(inst->getExecutionMode())); - if (inst->getopcode() == spv::Op::OpExecutionMode) { - curInst.insert(curInst.end(), inst->getParams().begin(), - inst->getParams().end()); - } else { - for (uint32_t param : inst->getParams()) { - curInst.push_back(typeHandler.getOrCreateConstantInt( - llvm::APInt(32, param), context.getUIntType(32), - /*isSpecConst */ false)); + if (auto *exeModeId = dyn_cast(inst)) { + for (SpirvInstruction *param : exeModeId->getParams()) { + if (auto *ConstantInst = dyn_cast(param)) + typeHandler.getOrCreateConstant(ConstantInst); + curInst.push_back(getOrAssignResultId(param)); } + } else { + auto *exeMode = llvm::cast(inst); + ArrayRef params = exeMode->getParams(); + curInst.insert(curInst.end(), params.begin(), params.end()); } finalizeInstruction(&preambleBinary); return true; diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index 1f9b0939e6..bfa0710998 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -233,7 +233,7 @@ class EmitVisitor : public Visitor { bool visit(SpirvEmitVertex *) override; bool visit(SpirvEndPrimitive *) override; bool visit(SpirvEntryPoint *) override; - bool visit(SpirvExecutionMode *) override; + bool visit(SpirvExecutionModeBase *) override; bool visit(SpirvString *) override; bool visit(SpirvSource *) override; bool visit(SpirvModuleProcessed *) override; diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index cd5f860555..7337a33b01 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -1146,8 +1146,9 @@ void SpirvEmitter::doStmt(const Stmt *stmt, // All cases for expressions used as statements SpirvInstruction *result = doExpr(expr); - if (result && result->getKind() == SpirvInstruction::IK_ExecutionMode && - !attrs.empty()) { + if (result && !attrs.empty() && + (result->getKind() == SpirvInstruction::IK_ExecutionMode || + result->getKind() == SpirvInstruction::IK_ExecutionModeId)) { // Handle [[vk::ext_capability(..)]] and [[vk::ext_extension(..)]] // attributes for vk::ext_execution_mode[_id](..). createSpirvIntrInstExt( @@ -9161,10 +9162,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { retVal = processRawBufferStore(callExpr); break; case hlsl::IntrinsicOp::IOP_Vkext_execution_mode: - retVal = processIntrinsicExecutionMode(callExpr, false); + retVal = processIntrinsicExecutionMode(callExpr); break; case hlsl::IntrinsicOp::IOP_Vkext_execution_mode_id: - retVal = processIntrinsicExecutionMode(callExpr, true); + retVal = processIntrinsicExecutionModeId(callExpr); break; case hlsl::IntrinsicOp::IOP_saturate: retVal = processIntrinsicSaturate(callExpr); @@ -15120,8 +15121,7 @@ SpirvEmitter::processCooperativeMatrixGetLength(const CallExpr *call) { } SpirvInstruction * -SpirvEmitter::processIntrinsicExecutionMode(const CallExpr *expr, - bool useIdParams) { +SpirvEmitter::processIntrinsicExecutionMode(const CallExpr *expr) { llvm::SmallVector execModesParams; uint32_t exeMode = 0; const auto args = expr->getArgs(); @@ -15145,9 +15145,38 @@ SpirvEmitter::processIntrinsicExecutionMode(const CallExpr *expr, assert(entryFunction != nullptr); assert(exeMode != 0); - return spvBuilder.addExecutionMode( - entryFunction, static_cast(exeMode), execModesParams, - expr->getExprLoc(), useIdParams); + return spvBuilder.addExecutionMode(entryFunction, + static_cast(exeMode), + execModesParams, expr->getExprLoc()); +} + +SpirvInstruction * +SpirvEmitter::processIntrinsicExecutionModeId(const CallExpr *expr) { + assert(expr->getNumArgs() > 0); + uint32_t exeMode = 0; + const Expr *modeExpr = expr->getArg(0); + Expr::EvalResult evalResult; + if (modeExpr->EvaluateAsRValue(evalResult, astContext) && + !evalResult.HasSideEffects && evalResult.Val.isInt()) { + exeMode = evalResult.Val.getInt().getZExtValue(); + } else { + emitError("The execution mode must be constant integer", + expr->getExprLoc()); + return nullptr; + } + + llvm::SmallVector execModesParams; + const auto args = expr->getArgs(); + for (uint32_t i = 1; i < expr->getNumArgs(); ++i) { + const Expr *argExpr = args[i]; + SpirvInstruction *argInst = doExpr(argExpr); + execModesParams.push_back(argInst); + } + + assert(entryFunction != nullptr); + return spvBuilder.addExecutionModeId(entryFunction, + static_cast(exeMode), + execModesParams, expr->getExprLoc()); } SpirvInstruction * @@ -15218,8 +15247,9 @@ bool SpirvEmitter::spirvToolsValidate(std::vector *mod, void SpirvEmitter::addDerivativeGroupExecutionMode() { assert(spvContext.isCS()); - SpirvExecutionMode *numThreadsEm = spvBuilder.getModule()->findExecutionMode( - entryFunction, spv::ExecutionMode::LocalSize); + SpirvExecutionMode *numThreadsEm = + cast(spvBuilder.getModule()->findExecutionMode( + entryFunction, spv::ExecutionMode::LocalSize)); auto numThreads = numThreadsEm->getParams(); // The layout of the quad is determined by the numer of threads in each diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 79d2c43c35..6c1e12989c 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -781,8 +781,9 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction *processCooperativeMatrixGetLength(const CallExpr *call); /// Process vk::ext_execution_mode intrinsic - SpirvInstruction *processIntrinsicExecutionMode(const CallExpr *expr, - bool useIdParams); + SpirvInstruction *processIntrinsicExecutionMode(const CallExpr *expr); + /// Process vk::ext_execution_mode_id intrinsic + SpirvInstruction *processIntrinsicExecutionModeId(const CallExpr *expr); /// Processes the 'firstbit{high|low}' intrinsic functions. SpirvInstruction *processIntrinsicFirstbit(const CallExpr *, diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index f41de03adc..3b5861710d 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -33,7 +33,9 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExtension) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExtInstImport) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvMemoryModel) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvEntryPoint) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExecutionModeBase) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExecutionMode) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExecutionModeId) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvString) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvSource) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvModuleProcessed) @@ -207,13 +209,16 @@ SpirvEntryPoint::SpirvEntryPoint(SourceLocation loc, // OpExecutionMode and OpExecutionModeId instructions SpirvExecutionMode::SpirvExecutionMode(SourceLocation loc, SpirvFunction *entry, spv::ExecutionMode em, - llvm::ArrayRef paramsVec, - bool usesIdParams) - : SpirvInstruction(IK_ExecutionMode, - usesIdParams ? spv::Op::OpExecutionModeId - : spv::Op::OpExecutionMode, - QualType(), loc), - entryPoint(entry), execMode(em), + llvm::ArrayRef paramsVec) + : SpirvExecutionModeBase(IK_ExecutionMode, spv::Op::OpExecutionMode, loc, + entry, em), + params(paramsVec.begin(), paramsVec.end()) {} + +SpirvExecutionModeId::SpirvExecutionModeId( + SourceLocation loc, SpirvFunction *entry, spv::ExecutionMode em, + llvm::ArrayRef paramsVec) + : SpirvExecutionModeBase(IK_ExecutionModeId, spv::Op::OpExecutionModeId, + loc, entry, em), params(paramsVec.begin(), paramsVec.end()) {} SpirvString::SpirvString(SourceLocation loc, llvm::StringRef stringLiteral) diff --git a/tools/clang/lib/SPIRV/SpirvModule.cpp b/tools/clang/lib/SPIRV/SpirvModule.cpp index 9c6a826a5b..ed6aca7488 100644 --- a/tools/clang/lib/SPIRV/SpirvModule.cpp +++ b/tools/clang/lib/SPIRV/SpirvModule.cpp @@ -294,9 +294,10 @@ void SpirvModule::addEntryPoint(SpirvEntryPoint *ep) { entryPoints.push_back(ep); } -SpirvExecutionMode *SpirvModule::findExecutionMode(SpirvFunction *entryPoint, - spv::ExecutionMode em) { - for (SpirvExecutionMode *cem : executionModes) { +SpirvExecutionModeBase * +SpirvModule::findExecutionMode(SpirvFunction *entryPoint, + spv::ExecutionMode em) { + for (SpirvExecutionModeBase *cem : executionModes) { if (cem->getEntryPoint() != entryPoint) continue; if (cem->getExecutionMode() != em) @@ -306,7 +307,7 @@ SpirvExecutionMode *SpirvModule::findExecutionMode(SpirvFunction *entryPoint, return nullptr; } -void SpirvModule::addExecutionMode(SpirvExecutionMode *em) { +void SpirvModule::addExecutionMode(SpirvExecutionModeBase *em) { assert(em && "cannot add null execution mode"); executionModes.push_back(em); } diff --git a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl index 0d63662ef8..beb0e23a95 100644 --- a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl +++ b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl @@ -2,11 +2,11 @@ // CHECK: OpCapability ShaderClockKHR // CHECK: OpExtension "SPV_KHR_shader_clock" -// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeId %uint_8 %uint_8 %uint_8 -// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeHintId %uint_4 %uint_4 %uint_4 +// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeId %uint_8 %uint_6 %uint_8 +// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeHintId %int_4 %int_4 %int_4 int main() : SV_Target0 { - vk::ext_execution_mode_id(/*LocalSizeId*/38, 8, 8, 8); + vk::ext_execution_mode_id(/*LocalSizeId*/38, 8u, 6u, 8u); [[vk::ext_capability(5055)]] [[vk::ext_extension("SPV_KHR_shader_clock")]] From 75220978aab87a3d483ce6aadaeb87d7024e20e5 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 29 Apr 2025 10:58:12 -0700 Subject: [PATCH 15/93] [CoopVec] Add Linear Algebra common header with tests (#7350) (#7388) This PR introduces the linear algebra header file, and places it in a location that is by default included in all HLSL compilation. The builtins in the API aren't yet defined, and depend on the #7290 PR merging first. The tests that have been added have temporary diagnostic messages while 7290 is in progress. They will need to be updated. Open to feedback on better / suggested error messages, or whether there shouldn't be any sema-level validation for these errors. Fixes [#7304](https://github.com/microsoft/DirectXShaderCompiler/issues/7304) Cherrypick of https://github.com/microsoft/DirectXShaderCompiler/pull/7350 Co-authored-by: github-actions[bot] --- tools/clang/lib/Headers/hlsl/dx/linalg.h | 182 ++++++++++++++++++ .../CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl | 40 ++++ .../hlsl/linalg/mat-vec-muladd.hlsl | 90 +++++++++ .../hlsl/linalg/outerproductaccumulate.hlsl | 16 ++ .../hlsl/linalg/vectoraccumulate.hlsl | 14 ++ .../hlsl/linalg/make-interp-vec-errors.hlsl | 33 ++++ .../hlsl/linalg/mat-vec-mul-errors.hlsl | 16 ++ .../linalg/mat-vec-mul-transpose-errors.hlsl | 30 +++ .../hlsl/linalg/mat-vec-muladd-errors.hlsl | 16 ++ .../linalg/outerproductaccumulate-errors.hlsl | 44 +++++ .../outerproductaccumulate-spirv-errors.hlsl | 19 ++ .../hlsl/linalg/vectoraccumulate-errors.hlsl | 16 ++ 12 files changed, 516 insertions(+) create mode 100644 tools/clang/lib/Headers/hlsl/dx/linalg.h create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-spirv-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h new file mode 100644 index 0000000000..51e662bbc9 --- /dev/null +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -0,0 +1,182 @@ +// Header for linear algebra APIs. + +#if __spirv__ +#error "Cooperative vectors not (yet) supported for SPIRV" +#endif + +#if ((__SHADER_TARGET_MAJOR > 6) || \ + (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 9)) && \ + (__HLSL_VERSION >= 2021) + +namespace dx { +namespace linalg { + +// NOTE: can't be an enum class because we get this error: +// error: non-type template argument of type 'dx::linalg::DataType' is not +// an integral constant expression +// +enum DataType { + DATA_TYPE_SINT16 = 2, // ComponentType::I16 + DATA_TYPE_UINT16 = 3, // ComponentType::U16 + DATA_TYPE_SINT32 = 4, // ComponentType::I32 + DATA_TYPE_UINT32 = 5, // ComponentType::U32 + DATA_TYPE_FLOAT16 = 8, // ComponentType::F16 + DATA_TYPE_FLOAT32 = 9, // ComponentType::F32 + DATA_TYPE_SINT8_T4_PACKED = 17, // ComponentType::PackedS8x32 + DATA_TYPE_UINT8_T4_PACKED = 18, // ComponentType::PackedU8x32 + DATA_TYPE_UINT8 = 19, // ComponentType::U8 + DATA_TYPE_SINT8 = 20, // ComponentType::I8 + DATA_TYPE_FLOAT8_E4M3 = 21, // ComponentType::F8_E4M3 + // (1 sign, 4 exp, 3 mantissa bits) + DATA_TYPE_FLOAT8_E5M2 = 22, // ComponentType::F8_E5M2 + // (1 sign, 5 exp, 2 mantissa bits) +}; + +enum MatrixLayout { + MATRIX_LAYOUT_ROW_MAJOR = 0, + MATRIX_LAYOUT_COLUMN_MAJOR = 1, + MATRIX_LAYOUT_MUL_OPTIMAL = 2, + MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL = 3 +}; + +// +// Helper for signedness +// +namespace details { +template bool IsUnsigned() { return false; } + +#ifdef __HLSL_ENABLE_16_BIT +template <> bool IsUnsigned() { return true; } +#endif + +template <> bool IsUnsigned() { return true; } +template <> bool IsUnsigned() { return true; } +} // namespace details + +// +// (RW)MatrixRef +// + +template +struct MatrixRefImpl { + BufferTy Buffer; + uint StartOffset; + uint Stride; +}; + +template +using MatrixRef = MatrixRefImpl; + +template +using RWMatrixRef = MatrixRefImpl; + +// +// (RW)VectorRef +// + +template struct VectorRefImpl { + BufferTy Buffer; + uint StartOffset; +}; + +template using VectorRef = VectorRefImpl; + +template +using RWVectorRef = VectorRefImpl; + +// +// Vector +// + +template struct InterpretedVector { + vector Data; +}; + +template +InterpretedVector MakeInterpretedVector(vector Vec) { + InterpretedVector IV = {Vec}; + return IV; +} + +// +// Mul +// + +template +vector +Mul(MatrixRefImpl + Matrix, + InterpretedVector InputVector) { + + vector OutputVector; + + __builtin_MatVecMul( + /*out*/ OutputVector, details::IsUnsigned(), InputVector.Data, + details::IsUnsigned(), InputDT, Matrix.Buffer, + Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, MatrixLayout, + MatrixTranspose, Matrix.Stride); + + return OutputVector; +} + +// +// MulAdd +// + +template +vector +MulAdd(MatrixRefImpl + Matrix, + InterpretedVector InputVector, + VectorRefImpl BiasVector) { + + vector OutputVector; + + __builtin_MatVecMulAdd( + /*out*/ OutputVector, details::IsUnsigned(), InputVector.Data, + details::IsUnsigned(), InputDT, Matrix.Buffer, + Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, MatrixLayout, + MatrixTranspose, Matrix.Stride, BiasVector.Buffer, BiasVector.StartOffset, + BiasVectorDT); + + return OutputVector; +} + +// +// OuterProductAccumulate +// + +template +void OuterProductAccumulate( + vector InputVector1, vector InputVector2, + RWMatrixRef Matrix) { + __builtin_OuterProductAccumulate(InputVector1, InputVector2, Matrix.Buffer, + Matrix.StartOffset, MatrixDT, MatrixLayout, + Matrix.Stride); +} + +// +// VectorAccumulate +// + +template +void VectorAccumulate(vector InputVector, + RWByteAddressBuffer Buffer, uint Offset) { + __builtin_VectorAccumulate(InputVector, Buffer, Offset); +} + +} // namespace linalg +} // namespace dx + +#endif // SM 6.9 check and HV version check diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl new file mode 100644 index 0000000000..141801c71c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl @@ -0,0 +1,40 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s | FileCheck %s + +#include + +ByteAddressBuffer Buf; + +export float4 Test1(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMul.v4f32.v4f32(i32 305, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle %{{.+}}, i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, i1 false) + return Mul( + Matrix, MakeInterpretedVector(Input)); +} + +export vector Test2(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // note the stride argument is dropped. + // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6f32(i32 305, <6 x float> %{{.+}}, i1 false, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 2, i1 false, i32 0, i1 false) + return Mul(Matrix, + MakeInterpretedVector(Input)); +} + +// test that "stride" isn't ignored in non-optimal layouts +export vector Test3(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6f32(i32 305, <6 x float> %{{.+}}, i1 false, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 false) + return Mul(Matrix, + MakeInterpretedVector(Input)); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl new file mode 100644 index 0000000000..c19e601904 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl @@ -0,0 +1,90 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s | FileCheck %s + +#include + +ByteAddressBuffer Buf; + +export float4 Test1(float4 input) { + using namespace dx::linalg; + + MatrixRef matrix = {Buf, + 0, 0}; + VectorRef biasVector = {Buf, 256}; + + InterpretedVector theVector = {input}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 false, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false) + return MulAdd( + matrix, theVector, + biasVector); +} + +export float4 Test2(float4 input) { + using namespace dx::linalg; + + MatrixRef matrix = { + Buf, 0, 0}; + VectorRef biasVector = {Buf, 256}; + + InterpretedVector theVector = {input}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false) + return MulAdd( + matrix, theVector, + biasVector); +} + +export float4 Test3(float4 input) { + using namespace dx::linalg; + + MatrixRef matrix = { + Buf, 0, 0}; + VectorRef biasVector = {Buf, 256}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false) + return MulAdd( + matrix, MakeInterpretedVector(input), + biasVector); +} + +namespace ProposalExample { + +ByteAddressBuffer model; + +vector ApplyNeuralMaterial(vector inputVector) { + using namespace dx::linalg; + + MatrixRef matrix0 = { + model, 0, 0}; + + VectorRef biasVector0 = {model, 1024}; + + MatrixRef matrix1 = + {model, 2048, 0}; + + VectorRef biasVector1 = {model, 3072}; + + MatrixRef matrix2 = { + model, 4096, 0}; + + VectorRef biasVector2 = {model, 5120}; + + vector layer0 = MulAdd( + matrix0, MakeInterpretedVector(inputVector), + biasVector0); + layer0 = max(layer0, 0); + + vector layer1 = MulAdd( + matrix1, MakeInterpretedVector(layer0), + biasVector1); + layer1 = max(layer1, 0); + + vector output = MulAdd( + matrix2, MakeInterpretedVector(layer1), + biasVector2); + output = exp(output); + + return output; +} + +} // namespace ProposalExample diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl new file mode 100644 index 0000000000..eda15c66f6 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s | FileCheck %s + +#include + +RWByteAddressBuffer RWBuf; + +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // CHECK: call void @dx.op.outerProductAccumulate.v128f16.v64f16(i32 307, <128 x half> %{{.+}}, <64 x half> %{{.+}}, %dx.types.Handle %{{.+}}, i32 0, i32 8, i32 3, i32 0) + + OuterProductAccumulate(Input1, Input2, matrix); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl new file mode 100644 index 0000000000..9157156f10 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s | FileCheck %s + +#include + +RWByteAddressBuffer RWBuf; + +export void Test5(vector Input) { + using namespace dx::linalg; + + RWBuf.Store >(0, Input); + + // CHECK: call void @dx.op.vectorAccumulate.v128f32(i32 308, <128 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 0) + VectorAccumulate(Input, RWBuf, 0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl new file mode 100644 index 0000000000..9f2793d417 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s -verify + +#include +ByteAddressBuffer Buf; + +export float4 Test1(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}} + // expected-note@dx/linalg.h:97{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} + return Mul( + Matrix, MakeInterpretedVector<2>(Input)); +} + +enum DataType { + DATA_TYPE_InvalidType = 40 +}; + +export float4 Test2(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}} + // expected-note@dx/linalg.h:97{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} + return Mul( + Matrix, MakeInterpretedVector(Input)); +} + diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl new file mode 100644 index 0000000000..2d5a11e83e --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s -verify + +#include + +ByteAddressBuffer Buf; + +vector MixUpVectorAndMatrixArguments(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+2{{no matching function for call to 'Mul'}} + // expected-note@dx/linalg.h:111{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} + return Mul(MakeInterpretedVector(Input), Matrix); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl new file mode 100644 index 0000000000..2018acafab --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl @@ -0,0 +1,30 @@ +// XFAIL: * +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +ByteAddressBuffer Buf; + +export float4 Test1(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // PREVIEW CHECK TODO: + // expected-error@+1{{something about transposing not supported for rowmajor / colmajor layouts}} + return Mul( + Matrix, MakeInterpretedVector(Input)); +} + +export vector Test2(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // PREVIEW CHECK TODO: + // expected-error@+1{{something about transposing not supported for rowmajor / colmajor layouts}} + return Mul(Matrix, + MakeInterpretedVector(Input)); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl new file mode 100644 index 0000000000..f444f81c3a --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s -verify + +#include + +ByteAddressBuffer Buf; + +vector MixUpVectorAndMatrixArguments(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+2{{no matching function for call to 'MulAdd'}} + // expected-note@dx/linalg.h:137{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} + return MulAdd(MakeInterpretedVector(Input), Matrix, MakeInterpretedVector(Input)); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl new file mode 100644 index 0000000000..6f503b367b --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl @@ -0,0 +1,44 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +RWByteAddressBuffer RWBuf; + +// test for inputs of different size +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} + // expected-note@dx/linalg.h:161{{candidate template ignored: could not match 0 against 1}} + + OuterProductAccumulate(Input1, Input2, matrix); +} + +// now test for an error when element types differ +export void Test5(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} + // expected-note@dx/linalg.h:161{{candidate template ignored: could not match 0 against 1}} + + OuterProductAccumulate(Input1, Input2, matrix); +} + +// now test for an error when matrix transpose parameter is true +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} + // expected-note@dx/linalg.h:161{{candidate template ignored: deduced conflicting types for parameter 'ElTy' ('int' vs. 'unsigned int')}} + + OuterProductAccumulate(Input1, Input2, matrix); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-spirv-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-spirv-errors.hlsl new file mode 100644 index 0000000000..0213103926 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-spirv-errors.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types -spirv %s -verify + +// Tests that the header file cannot be included for spirv compilations +// This is a copy of \tools\clang\test\CodeGenDXIL\hlsl\linalg\outerproductaccumulate.hlsl +// except that spirv is targeted + +// expected-error@dx/linalg.h:4{{Cooperative vectors not (yet) supported for SPIRV}} +#include + +RWByteAddressBuffer RWBuf; + +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + OuterProductAccumulate(Input1, Input2, matrix); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl new file mode 100644 index 0000000000..4c8ae6f049 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl @@ -0,0 +1,16 @@ +// XFAIL: * +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s | FileCheck %s + +#include + +RWByteAddressBuffer RWBuf; + +export void Test5(vector Input) { + using namespace dx::linalg; + + RWBuf.Store >(0, Input); + + // PREVIEW CHECK TODO: + // CHECK: Something about an error due to illegal conversions + VectorAccumulate(Input, RWBuf, 0); +} From 3bf630525da490d3c62d06e26922d00661822438 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 29 Apr 2025 23:44:59 +0200 Subject: [PATCH 16/93] [SER] Validate %dx.types.HitObject as legal type same as handle (#7390) Validator did not recognize %dx.types.HitObject as an allowed type. This lead to validation failures in -Od compiles where allocas, loads and stores remain in the generated DXIL: ``` dxc.exe -T lib_6_9 -Od \tools\clang\test\CodeGenDXIL\hlsl\intrinsics\maybereorder.hlsl error: validation errors error: Declaration '%dx.types.HitObject = type { i8* }' uses a reserved prefix. ``` Closes #7387 --- lib/DxilValidation/DxilValidation.cpp | 3 +++ .../hlsl/intrinsics/maybereorder_od.hlsl | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index c4448d1ec4..694ab43a7a 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2599,6 +2599,9 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, if (ValCtx.HandleTy == Ty) return true; hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); + // Allow HitObject type. + if (ST == HlslOP->GetHitObjectType()) + return true; if (IsDxilBuiltinStructType(ST, HlslOP)) { ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser); Result = false; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl new file mode 100644 index 0000000000..42dff9c52c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T lib_6_9 -E main %s -Od | FileCheck %s --check-prefix DXIL + +// DXIL: %[[HOA:[^ ]+]] = alloca %dx.types.HitObject, align 4 +// DXIL-NEXT: %[[NOP:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL-NEXT: store %dx.types.HitObject %[[NOP]], %dx.types.HitObject* %[[HOA]] +// DXIL-NEXT: %[[LD0:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOA]] +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[LD0]], i32 undef, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: %[[LD1:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOA]] +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[LD1]], i32 241, i32 3) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: %[[NOP2:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP2]], i32 242, i32 7) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} From 0fd79eba6bb23f50ec21a7a7daeee3614bebe12b Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 29 Apr 2025 15:25:25 -0700 Subject: [PATCH 17/93] Change default validation behavior (#7392) This PR changes the default validation behavior to use the internal validator by default. If no options are specified, the internal validator will be used, and if it fails, then compilation fails. The external validator can still be run but must be explicitly chosen. Specifying internal works just as before. There is plenty of testing and infrastructure that needs to be added to verify this change, but that needs to be added in a separate change. This change is step 1. Addresses https://github.com/microsoft/DirectXShaderCompiler/issues/7389 --- include/dxc/Support/HLSLOptions.h | 2 +- tools/clang/tools/dxcompiler/dxcutil.cpp | 24 +++++++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h index 56e95a1659..bad330747b 100644 --- a/include/dxc/Support/HLSLOptions.h +++ b/include/dxc/Support/HLSLOptions.h @@ -115,7 +115,7 @@ struct RewriterOpts { }; enum class ValidatorSelection : int { - Auto, // Try DXIL.dll; fallback to internal validator + Auto, // Force internal validator (even if DXIL.dll is present) Internal, // Force internal validator (even if DXIL.dll is present) External, // Use DXIL.dll, failing compilation if not available Invalid = -1 // Invalid diff --git a/tools/clang/tools/dxcompiler/dxcutil.cpp b/tools/clang/tools/dxcompiler/dxcutil.cpp index d3a531d4c6..ea3f72dcb4 100644 --- a/tools/clang/tools/dxcompiler/dxcutil.cpp +++ b/tools/clang/tools/dxcompiler/dxcutil.cpp @@ -49,6 +49,7 @@ HRESULT RunInternalValidator(IDxcValidator *pValidator, namespace { // AssembleToContainer helper functions. +// return true if the internal validator was used, false otherwise bool CreateValidator(CComPtr &pValidator, hlsl::options::ValidatorSelection SelectValidator = hlsl::options::ValidatorSelection::Auto) { @@ -56,16 +57,25 @@ bool CreateValidator(CComPtr &pValidator, SelectValidator == hlsl::options::ValidatorSelection::Internal; bool bExternal = SelectValidator == hlsl::options::ValidatorSelection::External; - if (!bInternal && DxilLibIsEnabled()) - DxilLibCreateInstance(CLSID_DxcValidator, &pValidator); + bool bAuto = SelectValidator == hlsl::options::ValidatorSelection::Auto; - bool bInternalValidator = false; - if (pValidator == nullptr) { - IFTBOOL(!bExternal, DXC_E_VALIDATOR_MISSING); + // default behavior uses internal validator, as well as + // explicitly specifying internal + if (bInternal || bAuto) { IFT(CreateDxcValidator(IID_PPV_ARGS(&pValidator))); - bInternalValidator = true; + return true; + } + + if (bExternal) { + // if external was explicitly specified, but no + // external validator could be found (no DXIL.dll), then error + IFTBOOL(DxilLibIsEnabled(), DXC_E_VALIDATOR_MISSING); + IFT(DxilLibCreateInstance(CLSID_DxcValidator, &pValidator)); + + return false; } - return bInternalValidator; + + return false; } } // namespace From 4d7c704e42921fa4a4b545963b3b353a6cdb7363 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 29 Apr 2025 17:04:49 -0700 Subject: [PATCH 18/93] Update version to 1.8.2505 (#7398) Update version to 1.8.2505 --- docs/ReleaseNotes.md | 5 ++--- utils/version/latest-release.json | 4 ++-- utils/version/version.inc | 6 +++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 637bd8dae8..5d7dfcb9f4 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -17,11 +17,10 @@ The included licenses apply to the following files: ## Changelog -### Upcoming Release - -Place release notes for the upcoming release below this line and remove this line upon naming this release. +### Version 1.8.2505 - Typed buffers (including ROV buffers) no longer accept types other than vectors and scalars. Any other types will produce descriptive errors. This removes support for appropriately sized matrices and structs. Though it worked in some contexts, code generated from such types was unreliable. +- By default, the internal validator will be used instead of searching externally for an existing DXIL.dll. ### Version 1.8.2502 diff --git a/utils/version/latest-release.json b/utils/version/latest-release.json index 3138ccd2b1..146acf0708 100644 --- a/utils/version/latest-release.json +++ b/utils/version/latest-release.json @@ -2,7 +2,7 @@ "version": { "major": "1", "minor": "8", - "rev": "2502" + "rev": "2505" }, - "sha": "070d0d5a2beacef9eeb51037a9b04665716fd6f3" + "sha": "0fd79eba6bb23f50ec21a7a7daeee3614bebe12b" } diff --git a/utils/version/version.inc b/utils/version/version.inc index 2577daa529..1d33b63ee2 100644 --- a/utils/version/version.inc +++ b/utils/version/version.inc @@ -18,7 +18,7 @@ #ifdef RC_VERSION_FIELD_3 #undef RC_VERSION_FIELD_3 #endif -#define RC_VERSION_FIELD_3 2502 +#define RC_VERSION_FIELD_3 2505 #ifdef RC_VERSION_FIELD_4 #undef RC_VERSION_FIELD_4 @@ -28,7 +28,7 @@ #ifdef RC_FILE_VERSION #undef RC_FILE_VERSION #endif -#define RC_FILE_VERSION "1.8.2502.0" +#define RC_FILE_VERSION "1.8.2505.0" #ifdef RC_FILE_DESCRIPTION #undef RC_FILE_DESCRIPTION @@ -49,7 +49,7 @@ #ifdef RC_PRODUCT_VERSION #undef RC_PRODUCT_VERSION #endif -#define RC_PRODUCT_VERSION "1.8.2502.0" +#define RC_PRODUCT_VERSION "1.8.2505.0" #ifdef HLSL_TOOL_NAME #undef HLSL_TOOL_NAME From b98c00f56818348ceb8da053794e44cb3245bafb Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Tue, 29 Apr 2025 19:44:51 -0700 Subject: [PATCH 19/93] Add upcoming release (#7399) Add upcoming release section to the release notes. --- docs/ReleaseNotes.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 5d7dfcb9f4..7788c57726 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -17,6 +17,10 @@ The included licenses apply to the following files: ## Changelog +### Upcoming Release + +Place release notes for the upcoming release below this line and remove this line upon naming this release. + ### Version 1.8.2505 - Typed buffers (including ROV buffers) no longer accept types other than vectors and scalars. Any other types will produce descriptive errors. This removes support for appropriately sized matrices and structs. Though it worked in some contexts, code generated from such types was unreliable. From 6d67e4a0af4710d18515ccaf0153922c172d415d Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 30 Apr 2025 13:04:21 -0700 Subject: [PATCH 20/93] Move spirv file to appropriate dir with lit config settings (#7406) This test that enables spirv codegen needs to be in a directory that has a lit cfg file that specifies that spirv is required, that lack of spirv codegen means the test is unsupported. --- .../linalg/outerproductaccumulate-spirv-errors.hlsl | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tools/clang/test/{SemaHLSL/hlsl => CodeGenSPIRV}/linalg/outerproductaccumulate-spirv-errors.hlsl (100%) diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-spirv-errors.hlsl b/tools/clang/test/CodeGenSPIRV/linalg/outerproductaccumulate-spirv-errors.hlsl similarity index 100% rename from tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-spirv-errors.hlsl rename to tools/clang/test/CodeGenSPIRV/linalg/outerproductaccumulate-spirv-errors.hlsl From 1a75a30f815309ae5bb4cf0211450751e8f753c1 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 1 May 2025 07:54:04 -0700 Subject: [PATCH 21/93] Don't pass through to DXIL.dll in DxcCreateInstance for CLSID_DxcValidator (#7409) This change removes the code meant to pass through to DXIL.dll when creating a CLSID_DxcValidator object with DxcCreateInstance. Since the internal validator is now the default, and it will sign shaders, there is no reason to pass through to DXIL.dll when using DxcCreateInstance on DxCompiler.dll. Testing will come with planned work to include the ability to globally override the default validator to external. --- tools/clang/tools/dxcompiler/dxcapi.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/clang/tools/dxcompiler/dxcapi.cpp b/tools/clang/tools/dxcompiler/dxcapi.cpp index a6a877cba4..ab2cf1f40e 100644 --- a/tools/clang/tools/dxcompiler/dxcapi.cpp +++ b/tools/clang/tools/dxcompiler/dxcapi.cpp @@ -87,11 +87,7 @@ static HRESULT ThreadMallocDxcCreateInstance(REFCLSID rclsid, REFIID riid, } else if (IsEqualCLSID(rclsid, CLSID_DxcUtils)) { hr = CreateDxcUtils(riid, ppv); } else if (IsEqualCLSID(rclsid, CLSID_DxcValidator)) { - if (DxilLibIsEnabled()) { - hr = DxilLibCreateInstance(rclsid, riid, (IUnknown **)ppv); - } else { - hr = CreateDxcValidator(riid, ppv); - } + hr = CreateDxcValidator(riid, ppv); } else if (IsEqualCLSID(rclsid, CLSID_DxcAssembler)) { hr = CreateDxcAssembler(riid, ppv); } else if (IsEqualCLSID(rclsid, CLSID_DxcOptimizer)) { From 11e28952e8b7ee7d20d7bcdb1577c781c08fbe51 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 1 May 2025 10:39:22 -0700 Subject: [PATCH 22/93] Add DXIL REQUIRES to certain tests that require a minimum validation versoin (#7408) Internal testing using older validators reveals some failures on tests that were intended to be run on newer validators. This PR changes the tests to require a minimum validation version to run. --- tools/clang/test/LitDXILValidation/load-store-validation.ll | 1 + tools/clang/test/LitDXILValidation/vector-validation.ll | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll index 34b2f6b602..16c64672bd 100644 --- a/tools/clang/test/LitDXILValidation/load-store-validation.ll +++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll @@ -1,3 +1,4 @@ +; REQUIRES: dxil-1-9 ; RUN: not %dxv %s 2>&1 | FileCheck %s ; Ensure proper validation errors are produced for invalid parameters to load and store operations. diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll index 74e8116e88..b32ac0cd5c 100644 --- a/tools/clang/test/LitDXILValidation/vector-validation.ll +++ b/tools/clang/test/LitDXILValidation/vector-validation.ll @@ -1,3 +1,4 @@ +; REQUIRES: dxil-1-9 ; RUN: not %dxv %s 2>&1 | FileCheck %s ; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8 From a4f8cf9483ba4b07b498eb8add736dfbb1821404 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 2 May 2025 18:23:05 +0200 Subject: [PATCH 23/93] [SER] Validate HitObject_FromRayQuery[WithAttrs] (#7402) Validate: HitObject_FromRayQuery HitObject_FromRayQueryWithAttrs Rules: No `undef` inputs SER implementation tracker: #7214 --- lib/DxilValidation/DxilValidation.cpp | 11 +++ .../ser_hitobject_fromrayquery_failing.ll | 99 +++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 694ab43a7a..11dfb42a6c 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2287,6 +2287,17 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; } + // Shader Execution Reordering - from ray query + case DXIL::OpCode::HitObject_FromRayQuery: + case DXIL::OpCode::HitObject_FromRayQueryWithAttrs: { + for (unsigned i = 1; i < CI->getNumOperands(); ++i) { + Value *Arg = CI->getArgOperand(i); + if (isa(Arg)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + } + break; + } + case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll new file mode 100644 index 0000000000..602ff99a55 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll @@ -0,0 +1,99 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%attrsud3 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 16, %struct.CustomAttrs* nonnull undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%attrsud2 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 undef, %struct.CustomAttrs* nonnull %attra)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%attrsud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 undef, i32 16, %struct.CustomAttrs* nonnull %attra)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%ud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Validation failed. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%struct.CustomAttrs = type { float, float } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %ldh = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %attra = alloca %struct.CustomAttrs, align 4 + %rq = call i32 @dx.op.allocateRayQuery(i32 178, i32 5) ; AllocateRayQuery(constRayFlags) + %createh = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %ldh) ; CreateHandleForLib(Resource) + %annoth = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %createh, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %rq, %dx.types.Handle %annoth, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; RayQuery_TraceRayInline(rayQueryHandle,accelerationStructure,rayFlags,instanceInclusionMask,origin_X,origin_Y,origin_Z,tMin,direction_X,direction_Y,direction_Z,tMax) + + %ok = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %rq) ; HitObject_FromRayQuery(rayQueryHandle) + %ud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 undef) ; HitObject_FromRayQuery(rayQueryHandle) + + %attrsok = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 16, %struct.CustomAttrs* nonnull %attra) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + %attrsud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 undef, i32 16, %struct.CustomAttrs* nonnull %attra) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + %attrsud2 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 undef, %struct.CustomAttrs* nonnull %attra) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + %attrsud3 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 16, %struct.CustomAttrs* nonnull undef) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + + ret void +} + +; Function Attrs: nounwind +declare i32 @dx.op.allocateRayQuery(i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.rayQuery_TraceRayInline(i32, i32, %dx.types.Handle, i32, i32, float, float, float, float, float, float, float, float) #0 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32, i32, i32, %struct.CustomAttrs*) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!6} +!dx.dxrPayloadAnnotations = !{!10} +!dx.entryPoints = !{!13, !15} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!3, null, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5} +!5 = !{i32 0, i32 4} +!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!7 = !{!8} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, %struct.Payload undef, !11} +!11 = !{!12} +!12 = !{i32 0, i32 8210} +!13 = !{null, !"", null, !2, !14} +!14 = !{i32 0, i64 33554432} +!15 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !16} +!16 = !{i32 8, i32 7, i32 5, !17} +!17 = !{i32 0} From 9536291dccd13728f9d54fbc8adbb3d8ba73f0dd Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 2 May 2025 19:38:54 +0200 Subject: [PATCH 24/93] [SER] Validate HitObject::TraceRay|Invoke (#7384) Validate: HitObject_Invoke HitObject_TraceRay Rules: No undef params Resource handle must be AS SER implementation tracker: #7214 --- lib/DxilValidation/DxilValidation.cpp | 15 +++ .../ser_hitobject_invoke_failing.ll | 58 +++++++++ .../ser_hitobject_trace_failing.ll | 114 ++++++++++++++++++ .../ser_hitobject_trace_invaliduav.ll | 108 +++++++++++++++++ 4 files changed, 295 insertions(+) create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll create mode 100644 tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 11dfb42a6c..bd69cdaf5d 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -2298,6 +2298,21 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, break; } + case DXIL::OpCode::HitObject_Invoke: { + if (isa(CI->getArgOperand(1))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + if (isa(CI->getArgOperand(2))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + } break; + case DXIL::OpCode::HitObject_TraceRay: { + Value *Hdl = CI->getArgOperand( + DxilInst_HitObject_TraceRay::arg_accelerationStructure); + ValidateASHandle(CI, Hdl, ValCtx); + for (unsigned ArgIdx = 2; ArgIdx < CI->getNumArgOperands(); ++ArgIdx) + if (isa(CI->getArgOperand(ArgIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + DxilInst_HitObject_TraceRay HOTraceRay(CI); + } break; case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll new file mode 100644 index 0000000000..a6bdd49f72 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll @@ -0,0 +1,58 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at 'call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %nop, %struct.Payload* nonnull undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK-NEXT: note: at 'call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject undef, %struct.Payload* nonnull %pld)' in block '#0' of function '?main@@YAXXZ'. + +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %pld = alloca %struct.Payload, align 4 + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %nop, %struct.Payload* nonnull %pld) ; HitObject_Invoke(hitObject,payload) + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject undef, %struct.Payload* nonnull %pld) ; HitObject_Invoke(hitObject,payload) + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %nop, %struct.Payload* nonnull undef) ; HitObject_Invoke(hitObject,payload) + + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.hitObject_Invoke.struct.Payload(i32, %dx.types.HitObject, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.dxrPayloadAnnotations = !{!3} +!dx.entryPoints = !{!4, !6} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!3 = !{i32 0, %struct.Payload undef, !8} +!4 = !{null, !"", null, null, !5} +!5 = !{i32 0, i64 0} +!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!7 = !{!10} +!8 = !{!11} +!9 = !{i32 8, i32 7, i32 5, !12} +!10 = !{i32 1, !13, !13} +!11 = !{i32 0, i32 8210} +!12 = !{i32 0} +!13 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll new file mode 100644 index 0000000000..eb0d2576b0 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll @@ -0,0 +1,114 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud16 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud15 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float undef, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud14 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float undef, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud13 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float undef, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud12 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud11 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float undef, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud10 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float undef, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud9 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float undef, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud8 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud7 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 undef, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud6 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 undef, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 undef, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud4 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 undef, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud3 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 undef, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud2 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle undef, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: TraceRay should only use RTAccelerationStructure. +; CHECK-NEXT: note: at '%tud2 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle undef, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. + +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = alloca %struct.Payload, align 4 + %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + + %tok = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud2 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle undef, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud3 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 undef, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud4 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 undef, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 undef, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud6 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 undef, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud7 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 undef, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud8 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud9 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float undef, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud10 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float undef, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud11 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float undef, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud12 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud13 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float undef, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud14 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float undef, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud15 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float undef, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud16 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* undef) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!3} +!dx.dxrPayloadAnnotations = !{!4} +!dx.entryPoints = !{!5, !6} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!7, null, null, null} +!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !8} +!4 = !{i32 0, %struct.Payload undef, !9} +!5 = !{null, !"", null, !2, null} +!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !10} +!7 = !{!11} +!8 = !{!12} +!9 = !{!13} +!10 = !{i32 8, i32 7, i32 5, !14} +!11 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !15} +!12 = !{i32 1, !16, !16} +!13 = !{i32 0, i32 8210} +!14 = !{i32 0} +!15 = !{i32 0, i32 4} +!16 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll new file mode 100644 index 0000000000..c4f3a918f8 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll @@ -0,0 +1,108 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; shader hash: b22988e7874179601860019e56fb877e +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; nonas_buf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 +@"\01?nonas_buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; CHECK: Function: ?main@@YAXXZ: error: TraceRay should only use RTAccelerationStructure. +; CHECK-NEXT: note: at '%invalid = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %7, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?nonas_buf@@3URWByteAddressBuffer@@A", align 4 + %3 = alloca %struct.Payload, align 4 + %4 = bitcast %struct.Payload* %3 to i8* + call void @llvm.lifetime.start(i64 12, i8* %4) #0 + %5 = getelementptr inbounds %struct.Payload, %struct.Payload* %3, i32 0, i32 0 + store <3 x float> , <3 x float>* %5, align 4, !tbaa !20 + %6 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %2) ; CreateHandleForLib(Resource) + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %7, i32 0, i32 undef, float 1.100000e+01, float undef, float undef, float undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %8 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %9 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + + %valid = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %9, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %3) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + + %invalid = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %7, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %3) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + + call void @llvm.lifetime.end(i64 12, i8* %4) #0 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!8} +!dx.dxrPayloadAnnotations = !{!12} +!dx.entryPoints = !{!15, !17} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!3, !6, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5} +!5 = !{i32 0, i32 4} +!6 = !{!7} +!7 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?nonas_buf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"nonas_buf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{i32 0, %struct.Payload undef, !13} +!13 = !{!14} +!14 = !{i32 0, i32 8210} +!15 = !{null, !"", null, !2, !16} +!16 = !{i32 0, i64 8589934608} +!17 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !18} +!18 = !{i32 8, i32 7, i32 5, !19} +!19 = !{i32 0} +!20 = !{!21, !21, i64 0} +!21 = !{!"omnipotent char", !22, i64 0} +!22 = !{!"Simple C/C++ TBAA"} From 1198c30f05ed944873ca55e89970fae407e2aacc Mon Sep 17 00:00:00 2001 From: Russell Liu Date: Sat, 3 May 2025 15:00:29 +0800 Subject: [PATCH 25/93] [SPIRV] Fix constant value in function (#7415) We will get a crash when use spirv intrinsic to create a constant value. ```fundamental fatal error: generated SPIR-V is invalid: Constant cannot appear in a function declaration %spirvIntrinsicType_42 = OpConstant %spirvIntrinsicType 42 ``` --- tools/clang/lib/SPIRV/EmitVisitor.cpp | 8 +++++++- .../CodeGenSPIRV/spv.intrinsicConstantValue.hlsl | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 2a3ffd82f4..f58160254a 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -2000,7 +2000,13 @@ bool EmitVisitor::visit(SpirvIntrinsicInstruction *inst) { } } - finalizeInstruction(&mainBinary); + auto opcode = static_cast(inst->getInstruction()); + if ((opcode == spv::Op::OpSpecConstant || opcode == spv::Op::OpConstant) && + !inst->getInstructionSet()) { + finalizeInstruction(&typeConstantBinary); + } else { + finalizeInstruction(&mainBinary); + } return true; } diff --git a/tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl b/tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl new file mode 100644 index 0000000000..a592863f1b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl @@ -0,0 +1,13 @@ +// RUN: %dxc -Od -T cs_6_8 -spirv -fcgl %s | FileCheck %s + +// CHECK: %spirvIntrinsicType = OpTypeInt 8 0 +using uint8_t [[vk::ext_capability(/* Int8 */ 39)]] = + vk::SpirvType >, + vk::Literal > >; + +[[vk::ext_instruction(/* OpConstant */ 43)]] uint8_t mkconsant([[vk::ext_literal]] int v); + +// CHECK: OpConstant %spirvIntrinsicType 42 +static const uint8_t K = mkconsant(42); + +[numthreads(1, 1, 1)] void main() {} From 6f17379e95b70943d8be4e52079509ac7c0cec02 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 6 May 2025 17:59:21 +0200 Subject: [PATCH 26/93] [SER] REORDER_SCOPE check-fail validation tests (#7428) Show 'REORDER_SCOPE' memory flag rejected pre DXIL 1.9. REORDER_SCOPE validation logic already implemented in #7263 Move REORDER_SCOPE validation tests to `LitDXILValidation/` and adapt RUN lines as we are touching this (#7359). SER implementation tracker: #7214 --- .../ser_reorder_scope_sm68_failing.ll | 77 +++++++++++++++++++ .../ser_reorder_scope_sm69_passing.ll | 5 +- 2 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll rename tools/clang/test/{HLSLFileCheck/validation => LitDXILValidation}/ser_reorder_scope_sm69_passing.ll (96%) diff --git a/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll new file mode 100644 index 0000000000..cd93eca793 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll @@ -0,0 +1,77 @@ +; REQUIRES: dxil-1-8 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; BAB UAV byte r/w U0 u1 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?BAB@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; CHECK: Function: ?main@@YAXXZ: error: Invalid semantic flags on DXIL operation 'BarrierByMemoryType' +; CHECK-NEXT: note: at 'call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Invalid semantic flags on DXIL operation 'barrierByMemoryHandle' +; CHECK-NEXT: note: at 'call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Function uses features incompatible with the shader model. +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A", align 4 + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8) ; BarrierByMemoryType(MemoryTypeFlags,SemanticFlags) + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8) ; BarrierByMemoryHandle(object,SemanticFlags) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32) #1 + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!5} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 8} +!1 = !{!"lib", i32 6, i32 8} +!2 = !{null, !3, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"BAB", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!5 = !{i32 1, void ()* @"\01?main@@YAXXZ", !6} +!6 = !{!7} +!7 = !{i32 1, !8, !8} +!8 = !{} +!9 = !{null, !"", null, !2, !10} +!10 = !{i32 0, i64 8589934608} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm69_passing.ll similarity index 96% rename from tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll rename to tools/clang/test/LitDXILValidation/ser_reorder_scope_sm69_passing.ll index cab9942b02..fa2733ef22 100644 --- a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll +++ b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm69_passing.ll @@ -1,4 +1,7 @@ -; RUN: %dxilver 1.9 | %dxv %s +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. ; Buffer Definitions: ; From 556fc5b3307a0f9571a7d567116e01f8977c8d85 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 7 May 2025 01:06:46 -0700 Subject: [PATCH 27/93] [Internal] Remove dead fallthrough statement (#7433) This PR removes a dead fallthrough statement after a switch-statement's case that is terminated by a break; This needs to be removed, because the way linux builds dxc in our internal infrastructure, it errors when a dead fallthrough statement is detected. For context, here is the relevant error: ``` ScalarReplAggregatesHLSL.cpp:2822:9: error: fallthrough annotation in unreachable code [-Werror,-Wimplicit-fallthrough] LLVM_FALLTHROUGH; ^ ... DXC/include/llvm/Support/Compiler.h:224:26: note: expanded from macro 'LLVM_FALLTHROUGH' #define LLVM_FALLTHROUGH [[fallthrough]] ^ 1 error generated. ``` --- lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 20265af40a..8bd78dd9a6 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -2819,7 +2819,6 @@ void SROA_Helper::RewriteCall(CallInst *CI) { DeadInsts.push_back(CI); break; } - LLVM_FALLTHROUGH; default: // RayQuery this pointer replacement. if (OldVal->getType()->isPointerTy() && From 8df744951f4389c2d682faae1cb98a8475c74c04 Mon Sep 17 00:00:00 2001 From: Chris B Date: Wed, 7 May 2025 10:51:53 -0700 Subject: [PATCH 28/93] Pull format checking tools from `main` (#7403) This updates the clang-format-checker action workflow to pull the format checker and tools from `main` instead of from the PR. Note: This PR basically can't be tested pre-merge since the pre-merge check will use the version of the action in `main`. --- .github/workflows/clang-format-checker.yml | 45 ++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml index d1887e4519..74b734a7c0 100644 --- a/.github/workflows/clang-format-checker.yml +++ b/.github/workflows/clang-format-checker.yml @@ -12,7 +12,7 @@ jobs: permissions: pull-requests: write steps: - - name: Fetch LLVM sources + - name: Fetch DirectXShaderCompiler sources uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.head.sha }} @@ -31,6 +31,20 @@ jobs: separator: "," skip_initial_fetch: true + # We need to pull the script from the main branch, so that we ensure + # we get the latest version of this script. + - name: Fetch code formatting utils + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: microsoft/DirectXShaderCompiler + ref: ${{ github.base_ref }} + sparse-checkout: | + utils/git/requirements_formatting.txt + utils/git/code-format-helper.py + utils/git/code-format-save-diff.py + sparse-checkout-cone-mode: false + path: code-format-tools + - name: "Listed files" env: LISTED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} @@ -48,10 +62,10 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: 'utils/git/requirements_formatting.txt' + cache-dependency-path: 'code-format-tools/utils/git/requirements_formatting.txt' - name: Install python dependencies - run: pip install -r utils/git/requirements_formatting.txt + run: pip install -r code-format-tools/utils/git/requirements_formatting.txt - name: Run code formatter id: formatter @@ -61,7 +75,7 @@ jobs: END_REV: ${{ github.event.pull_request.head.sha }} CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} run: | - python utils/git/code-format-helper.py \ + python code-format-tools/utils/git/code-format-helper.py \ --token ${{ secrets.GITHUB_TOKEN }} \ --issue-number $GITHUB_PR_NUMBER \ --start-rev $START_REV \ @@ -92,28 +106,37 @@ jobs: } catch (err) { core.setFailed(`Request failed with error ${err}`) } - - name: Fetch LLVM sources - uses: actions/checkout@v4 + + # We need to pull the script from the main branch, so that we ensure + # we get the latest version of this script. + - name: Fetch code formatting utils + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - fetch-depth: 2 - path: build/main_src + repository: microsoft/DirectXShaderCompiler + ref: ${{ github.base_ref }} + sparse-checkout: | + utils/git/requirements_formatting.txt + utils/git/code-format-helper.py + utils/git/code-format-save-diff.py + sparse-checkout-cone-mode: false + path: code-format-tools - name: Setup Python env uses: actions/setup-python@v4 with: python-version: '3.11' cache: 'pip' - cache-dependency-path: 'build/main_src/utils/git/requirements_formatting.txt' + cache-dependency-path: 'code-format-tools/utils/git/requirements_formatting.txt' - name: Install python dependencies - run: pip install -r build/main_src/utils/git/requirements_formatting.txt + run: pip install -r code-format-tools/utils/git/requirements_formatting.txt - name: Apply code diff env: GITHUB_PR_NUMBER: ${{ github.event.issue.number }} COMMENT_ID: ${{ github.event.comment.id }} run: | - python build/main_src/utils/git/code-format-save-diff.py \ + python code-format-tools/utils/git/code-format-save-diff.py \ --token ${{ secrets.GITHUB_TOKEN }} \ --issue-number $GITHUB_PR_NUMBER \ --tmp-diff-file $TMP_DIFF_FILE \ From 231d648af0f9bf9ecb9a61c591337ceca67c6cd3 Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 7 May 2025 13:34:07 -0700 Subject: [PATCH 29/93] [SM6.9] Disable native vec deriv ops and expand testing (#7432) Several intrinsics that were enabled for native vectors late got their testing removed as it expected scalarized forms. This adds tests for pow, modf, and abs in their native vector intrinsic forms. It removes native vector intrinsics for derivative operations as they require more scalarization removal due to their convergent markers and restores the scalarized testing for them. The 1024 size was removed from longvec-intrinsics as the verbose way that constant vectors are represented in the disassembly made the test take significantly longer. --- lib/DXIL/DxilOperations.cpp | 16 +++--- .../hlsl/types/longvec-intrinsics.hlsl | 50 ++++++++++++++++++- ...longvec-trivial-scalarized-intrinsics.hlsl | 7 +++ ...ongvec-trivial-unary-float-intrinsics.hlsl | 2 + utils/hct/hctdb.py | 8 +-- 5 files changed, 70 insertions(+), 13 deletions(-) diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index 95e8dfaeba..a66dfc68d4 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -765,32 +765,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x403}}, - {{0x3}}}, // Overloads: hf [[tmp]], @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec2]]) ; Log(value) + // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp2]]) ; Exp(value) + hRes += pow(hVec2, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec2]]) ; Log(value) + // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]]) ; Exp(value) + fRes += pow(fVec2, fVec1); + + vector hVal; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 29, <[[NUM]] x half> [[hvec1]]) ; Round_z(value) + // CHECK: fsub fast <[[NUM]] x half> [[hvec1]], [[tmp]] + hRes *= modf(hVec1, hVal); + hRes += hVal; + + vector fVal; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 29, <[[NUM]] x float> [[fvec1]]) ; Round_z(value) + // CHECK: fsub fast <[[NUM]] x float> [[fvec1]], [[tmp]] + fRes *= modf(fVec1, fVal); + fRes += fVal; + // CHECK-NOT: extractelement // CHECK-NOT: insertelement // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]] @@ -227,6 +256,25 @@ void main() { // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] fRes += smoothstep(fVec1, fVec2, fVec3); + // Note that Fabs is tested in longvec-trivial-unary-float-intrinsics. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i16> zeroinitializer, [[svec1]] + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[tmp]]) ; IMax(a,b) + sRes += abs(sVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i32> zeroinitializer, [[ivec1]] + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[tmp]]) ; IMax(a,b) + iRes += abs(iVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i64> zeroinitializer, [[lvec1]] + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[tmp]]) ; IMax(a,b) + lRes += abs(lVec1); + // Intrinsics that expand into llvm ops. // CHECK-NOT: extractelement diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl index 6ebb511b00..37fb1d2e15 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -9,6 +9,13 @@ // RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD // RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD // RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl index 91ab631a7e..9cc3d23b66 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -1,3 +1,5 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=1022 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 57f2574005..7954faf2af 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -2629,7 +2629,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2647,7 +2647,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2665,7 +2665,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2683,7 +2683,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf<", + "hf", "rn", [ db_dxil_param( From 422604be6845851bb955579cab21498b5b38301a Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 8 May 2025 16:32:27 -0700 Subject: [PATCH 30/93] [HLSL Options] Remove select-validator option (#7423) This PR removes the select-validator option. It is being deprecated, and it wasn't ever officially documented. Fixes https://github.com/microsoft/DirectXShaderCompiler/issues/7419 --- include/dxc/Support/HLSLOptions.td | 2 -- lib/DxcSupport/HLSLOptions.cpp | 14 -------------- .../test/DXC/deprecated-select-validator.hlsl | 14 ++++++++++++++ .../hlsl/types/struct/struct-annotations.hlsl | 6 +++--- .../test/HLSLFileCheck/infra/auto-dxilver.hlsl | 11 ++++++----- .../WrongShaderModel.hlsl | 4 +--- 6 files changed, 24 insertions(+), 27 deletions(-) create mode 100644 tools/clang/test/DXC/deprecated-select-validator.hlsl diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index ea000f4877..4d72cb2312 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -317,8 +317,6 @@ def print_before_all : Flag<["-", "/"], "print-before-all">, Group; def print_before : Separate<["-", "/"], "print-before">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Print LLVM IR before a specific pass. May be specificied multiple times.">; -def select_validator : Separate<["-", "/"], "select-validator">, Group, Flags<[CoreOption, HelpHidden]>, - HelpText<"Select validator: auto: (default) use DXIL.dll if found, otherwise use internal; internal: internal non-signing validator; external: use DXIL.dll if found, otherwise fail compilation.">; def print_after_all : Flag<["-", "/"], "print-after-all">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Print LLVM IR after each pass.">; def print_after : Separate<["-", "/"], "print-after">, Group, Flags<[CoreOption, HelpHidden]>, diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 1ce7d0dfc0..eb071eb0a6 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -1033,20 +1033,6 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, opts.ValVerMinor = (unsigned long)minor64; } - llvm::StringRef valSelectStr = Args.getLastArgValue(OPT_select_validator); - if (!valSelectStr.empty()) { - opts.SelectValidator = llvm::StringSwitch(valSelectStr) - .Case("auto", ValidatorSelection::Auto) - .Case("internal", ValidatorSelection::Internal) - .Case("external", ValidatorSelection::External) - .Default(ValidatorSelection::Invalid); - if (opts.SelectValidator == ValidatorSelection::Invalid) { - errors << "Unsupported value '" << valSelectStr - << "for -select-validator option."; - return 1; - } - } - if (opts.IsLibraryProfile() && Minor == 0xF) { if (opts.ValVerMajor != UINT_MAX && opts.ValVerMajor != 0) { errors << "Offline library profile cannot be used with non-zero " diff --git a/tools/clang/test/DXC/deprecated-select-validator.hlsl b/tools/clang/test/DXC/deprecated-select-validator.hlsl new file mode 100644 index 0000000000..2ad3e5199c --- /dev/null +++ b/tools/clang/test/DXC/deprecated-select-validator.hlsl @@ -0,0 +1,14 @@ +// Test that the deprecated option, select-validator, doesn't work. +// RUN: not %dxc -E main -T vs_6_7 -select-validator internal %s 2>&1 | FileCheck %s + +// CHECK: dxc failed : Unknown argument: '-select-validator' + +float4 main(int loc : SV_StartVertexLocation + , uint loc2 : SV_StartInstanceLocation + ) : SV_Position +{ + float4 r = 0; + r += loc; + r += loc2; + return r; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl index 5a1b5e43d8..4ffb325c8b 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl @@ -1,5 +1,5 @@ -// RUN: %dxc -T ps_6_8 -E main -Qkeep_reflect_in_dxil -select-validator internal %s | FileCheck -check-prefix=CHECK68 %s -// RUN: %dxc -T ps_6_7 -E main -Qkeep_reflect_in_dxil -select-validator internal %s | FileCheck -check-prefix=CHECK67 %s +// RUN: %dxc -T ps_6_8 -E main -Qkeep_reflect_in_dxil %s | FileCheck -check-prefix=CHECK68 %s +// RUN: %dxc -T ps_6_7 -E main -Qkeep_reflect_in_dxil %s | FileCheck -check-prefix=CHECK67 %s // Make sure the vector is annotated with vector size (DXIL 1.8 and higher), // matrix is annotated with matrix size and orientation, and scalar does not @@ -47,4 +47,4 @@ StructuredBuffer g_myStruct; float main() : SV_Target { return g_myStruct[0].vec.x + g_myStruct[0].vec.y; -} \ No newline at end of file +} diff --git a/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl b/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl index 166fa5918d..14ee7f7bf9 100644 --- a/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl +++ b/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl @@ -5,14 +5,17 @@ // This should implicitly require dxilver 1.8. // RUN: %dxc -T vs_6_8 -Vd %s | FileCheck %s -// Even though this is using -Vd, the validator version is set by the available -// validator. If that isn't version 1.8 or above, we'll see an error. +// Even though this is using -Vd, the validator version being checked is the internal +// validator's version. If a pre-DXIL-1.8 DXC was used to run this test, we expect failure, +// since the internal validator will be the same version as the older DXC. // The implicit dxilver logic should not skip the check when -Vd is used. // CHECK-NOT: error: validator version {{.*}} does not support target profile. // RUN: %dxc -T vs_6_0 -validator-version 1.8 %s | FileCheck %s // Even though target is 6.0, the explicit -validator-version should add an -// implicit dxilver 1.8 requirement. +// implicit dxilver 1.8 requirement. The requirement should pass for DXCs that +// are newer than DXIL Version 1.8, since then, the internal validator's version will +// be sufficiently new for this check. // CHECK-NOT: error: The module cannot be validated by the version of the validator currently attached. // This error would occur if run against wrong compiler. @@ -21,8 +24,6 @@ // Catch any other unexpected error cases. // CHECK-NOT: error -// RUN: %dxc -T vs_6_8 -select-validator internal %s | FileCheck %s -// This should always be run, and always succeed. // CHECK: define void @main() void main() {} diff --git a/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl b/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl index 667e1f4579..4bddf37acd 100644 --- a/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl @@ -1,7 +1,5 @@ // TODO: use -verify instead of FileCheck after fix https://github.com/microsoft/DirectXShaderCompiler/issues/5768 -// -select-validator internal used to avoid downlevel validator testing -// incompatibility with shader model 6.7. -// RUN: not %dxc -E main -T vs_6_7 -select-validator internal %s 2>&1 | FileCheck %s --check-prefix=SM67 +// RUN: not %dxc -E main -T vs_6_7 %s 2>&1 | FileCheck %s --check-prefix=SM67 // SM67:invalid semantic 'SV_StartVertexLocation' for vs 6.7 // SM67:invalid semantic 'SV_StartInstanceLocation' for vs 6.7 From 474f9d28a334ac3abd449e4afa823a14719fad19 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 9 May 2025 18:42:44 +0200 Subject: [PATCH 31/93] [SER] Validate 'reordercoherent' resource property (#7429) Validates: All resources All instructions using resources Rules: 'reordercoherent' may only be used in SM6.9+ in resource handles and resource declarations. Increment/DecrementCounter unsupported on 'reordercoherent' resources. Create a new DXIL 1.9 variant of the 'CompileWhenOkThenCheckRDAT' container test and restore the old one without 'reordercoherent' (pre-#7250). The validator now rejects 'reordercoherent' in DXIL 1.3 and accepts from DXIL 1.9+. SER implementation tracker: #7214 --------- Co-authored-by: Tex Riddell Co-authored-by: github-actions[bot] --- docs/DXIL.rst | 3 +- lib/DxilValidation/DxilValidation.cpp | 24 ++- .../ser_reordercoherent_invalid_incdec.ll | 92 +++++++++++ .../ser_reordercoherent_invalid_sm.ll | 83 ++++++++++ .../unittests/HLSL/DxilContainerTest.cpp | 143 +++++++++++++++++- tools/clang/unittests/HLSL/ValidationTest.cpp | 2 +- utils/hct/hctdb.py | 11 +- 7 files changed, 345 insertions(+), 13 deletions(-) create mode 100644 tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll create mode 100644 tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 69bcae8c53..7532ec3c42 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3175,6 +3175,7 @@ INSTR.OPCONSTRANGE Constant values must be in INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range INSTR.PARAMMULTIPLE Parameter must be a valid multiple INSTR.PTRBITCAST Pointer type bitcast must be have same size. +INSTR.REORDERCOHERENTREQUIRESSM69 reordercoherent requires SM 6.9 or later. INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. @@ -3216,6 +3217,7 @@ META.BARYCENTRICSTWOPERSPECTIVES There can only be up to tw META.BRANCHFLATTEN Can't use branch and flatten attributes together. META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. +META.COHERENCENOTONAPPENDCONSUME globally/reorder coherent incompatible with append/consume/counter buffers META.COMPUTEWITHNODE Compute entry must not have node metadata META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. META.DENSERESIDS Resource identifiers must be zero-based and dense. @@ -3223,7 +3225,6 @@ META.DUPLICATESYSVALUE System value may only appe META.ENTRYFUNCTION entrypoint not found. META.FLAGSUSAGE Flags must match usage. META.FORCECASEONSWITCH Attribute forcecase only works for switch. -META.GLCNOTONAPPENDCONSUME globallycoherent cannot be used with append/consume buffers: '%0'. META.INTEGERINTERPMODE Interpolation mode on integer must be Constant META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. META.INTERPMODEVALID Interpolation mode must be valid diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index bd69cdaf5d..28917e0600 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -165,7 +165,8 @@ ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId, static DxilResourceProperties GetResourceFromHandle(Value *Handle, ValidationContext &ValCtx) { - if (!isa(Handle)) { + CallInst *HandleCall = dyn_cast(Handle); + if (!HandleCall) { if (Instruction *I = dyn_cast(Handle)) ValCtx.EmitInstrError(I, ValidationRule::InstrHandleNotFromCreateHandle); else @@ -175,10 +176,13 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle, } DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle); - if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) { + if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) ValCtx.EmitInstrError(cast(Handle), ValidationRule::InstrHandleNotFromCreateHandle); - } + if (RP.Basic.IsReorderCoherent && + !ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + ValCtx.EmitInstrError(HandleCall, + ValidationRule::InstrReorderCoherentRequiresSM69); return RP; } @@ -4182,6 +4186,9 @@ static void ValidateResourceOverlap( static void ValidateResource(hlsl::DxilResource &Res, ValidationContext &ValCtx) { + if (Res.IsReorderCoherent() && !ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + ValCtx.EmitResourceError(&Res, + ValidationRule::InstrReorderCoherentRequiresSM69); switch (Res.GetKind()) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: @@ -4413,10 +4420,13 @@ static void ValidateResources(ValidationContext &ValCtx) { ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmCounterOnlyOnStructBuf); } - if (Uav->HasCounter() && Uav->IsGloballyCoherent()) - ValCtx.EmitResourceFormatError(Uav.get(), - ValidationRule::MetaGlcNotOnAppendConsume, - {ValCtx.GetResourceName(Uav.get())}); + const bool UavIsCoherent = + Uav->IsGloballyCoherent() || Uav->IsReorderCoherent(); + if (Uav->HasCounter() && UavIsCoherent) { + StringRef Prefix = Uav->IsGloballyCoherent() ? "globally" : "reorder"; + ValCtx.EmitResourceFormatError( + Uav.get(), ValidationRule::MetaCoherenceNotOnAppendConsume, {Prefix}); + } ValidateResource(*Uav, ValCtx); ValidateResourceOverlap(*Uav, UavAllocator, ValCtx); diff --git a/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll new file mode 100644 index 0000000000..1f68a9a95f --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll @@ -0,0 +1,92 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; COM: Original HLSL source: +; COM: reordercoherent RWStructuredBuffer buffer; +; COM: +; COM: +; COM: [Shader("raygeneration")] +; COM: void +; COM: main() +; COM: { +; COM: buffer.IncrementCounter(); +; COM: buffer.DecrementCounter(); +; COM: } + +; CHECK: error: reordercoherent cannot be used on buffer with counter 'buffer' +; CHECK-NEXT: Validation failed. + +; shader hash: 638950814a9023bf537d61dbb330a4c8 +; +; Buffer Definitions: +; +; Resource bind info for buffer +; { +; +; float $Element; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; buffer UAV struct r/w+cnt U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWStructuredBuffer" = type { float } + +@"\01?buffer@@3V?$RWStructuredBuffer@M@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buffer@@3V?$RWStructuredBuffer@M@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 102412, i32 4 }) ; AnnotateHandle(res,props) resource: reordercoherent RWStructuredBuffer + %4 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %3, i8 1) ; BufferUpdateCounter(uav,inc) + %5 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %6 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 102412, i32 4 }) ; AnnotateHandle(res,props) resource: reordercoherent RWStructuredBuffer + %7 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %6, i8 -1) ; BufferUpdateCounter(uav,inc) + ret void +} + +; Function Attrs: nounwind +declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!6} +!dx.entryPoints = !{!10, !12} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{null, !3, null, null} +!3 = !{!4} +!4 = !{i32 0, %"class.RWStructuredBuffer"* bitcast (%dx.types.Handle* @"\01?buffer@@3V?$RWStructuredBuffer@M@@A" to %"class.RWStructuredBuffer"*), !"buffer", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 true, i1 false, !5} +!5 = !{i32 1, i32 4, i32 4, i1 true} +!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!7 = !{!8} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{null, !"", null, !2, !11} +!11 = !{i32 0, i64 8589934608} +!12 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !13} +!13 = !{i32 8, i32 7, i32 5, !14} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll new file mode 100644 index 0000000000..efcb7d3c2b --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll @@ -0,0 +1,83 @@ +; REQUIRES: dxil-1-8 +; RUN: not %dxv %s 2>&1 | FileCheck %s + + +; CHECK: error: reordercoherent requires SM 6.9 or later. 'buf' +; CHECK-NEXT: Function: ?main@@YAXXZ: error: reordercoherent requires SM 6.9 or later. +; CHECK-NEXT: note: at '%3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 69643, i32 0 })' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: reordercoherent requires SM 6.9 or later. +; CHECK-NEXT: note: at '%3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 69643, i32 0 })' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Validation failed. +; COM: Original HLSL source: +; COM: reordercoherent RWByteAddressBuffer buf; +; COM: +; COM: [Shader("raygeneration")] +; COM: void main() +; COM: { +; COM: buf.Store(0, 11.f); +; COM: } + +; shader hash: f7be6354830d1423764991adcfc26b0b +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; buf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 69643, i32 0 }) ; AnnotateHandle(res,props) resource: reordercoherent RWByteAddressBuffer + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %3, i32 0, i32 undef, float 1.100000e+01, float undef, float undef, float undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + ret void +} + +; Function Attrs: nounwind +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!4, !5} + +!0 = !{i32 1, i32 8} +!1 = !{!"lib", i32 6, i32 8} +!2 = !{null, !6, null, null} +!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!4 = !{null, !"", null, !2, !8} +!5 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!6 = !{!10} +!7 = !{!11} +!8 = !{i32 0, i64 8589934608} +!9 = !{i32 8, i32 7, i32 5, !12} +!10 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"buf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, !13} +!11 = !{i32 1, !14, !14} +!12 = !{i32 0} +!13 = !{i32 4, i1 true} +!14 = !{} diff --git a/tools/clang/unittests/HLSL/DxilContainerTest.cpp b/tools/clang/unittests/HLSL/DxilContainerTest.cpp index 339b33c655..34b4d338fe 100644 --- a/tools/clang/unittests/HLSL/DxilContainerTest.cpp +++ b/tools/clang/unittests/HLSL/DxilContainerTest.cpp @@ -103,6 +103,7 @@ class DxilContainerTest : public ::testing::Test { TEST_METHOD(CompileCSWaveSizeRange_CheckPSV0) TEST_METHOD(CompileWhenOkThenCheckRDAT) TEST_METHOD(CompileWhenOkThenCheckRDAT2) + TEST_METHOD(CompileWhenOkThenCheckRDATSM69) TEST_METHOD(CompileWhenOkThenCheckReflection1) TEST_METHOD(DxcUtils_CreateReflection) TEST_METHOD(CheckReflectionQueryInterface) @@ -1444,6 +1445,146 @@ TEST_F(DxilContainerTest, CompileCSWaveSizeRange_CheckPSV0) { TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { if (m_ver.SkipDxilVersion(1, 3)) return; + const char *shader = + "float c_buf;" + "RWTexture1D tex : register(u5);" + "Texture1D tex2 : register(t0);" + "RWByteAddressBuffer b_buf;" + "struct Foo { float2 f2; int2 i2; };" + "AppendStructuredBuffer append_buf;" + "ConsumeStructuredBuffer consume_buf;" + "RasterizerOrderedByteAddressBuffer rov_buf;" + "globallycoherent RWByteAddressBuffer gc_buf;" + "float function_import(float x);" + "export float function0(min16float x) { " + " return x + 1 + tex[0].x; }" + "export float function1(float x, min12int i) {" + " return x + c_buf + b_buf.Load(x) + tex2[i].x; }" + "export float function2(float x) { return x + function_import(x); }" + "export void function3(int i) {" + " Foo f = consume_buf.Consume();" + " f.f2 += 0.5; append_buf.Append(f);" + " rov_buf.Store(i, f.i2.x);" + " gc_buf.Store(i, f.i2.y);" + " b_buf.Store(i, f.i2.x + f.i2.y); }"; + CComPtr pCompiler; + CComPtr pSource; + CComPtr pProgram; + CComPtr pDisassembly; + CComPtr pResult; + + struct CheckResFlagInfo { + std::string name; + hlsl::DXIL::ResourceKind kind; + hlsl::RDAT::DxilResourceFlag flag; + }; + const unsigned numResFlagCheck = 5; + CheckResFlagInfo resFlags[numResFlagCheck] = { + {"b_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::None}, + {"append_buf", hlsl::DXIL::ResourceKind::StructuredBuffer, + hlsl::RDAT::DxilResourceFlag::UAVCounter}, + {"consume_buf", hlsl::DXIL::ResourceKind::StructuredBuffer, + hlsl::RDAT::DxilResourceFlag::UAVCounter}, + {"gc_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::UAVGloballyCoherent}, + {"rov_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::UAVRasterizerOrderedView}}; + + VERIFY_SUCCEEDED(CreateCompiler(&pCompiler)); + CreateBlobFromText(shader, &pSource); + VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main", + L"lib_6_3", nullptr, 0, nullptr, 0, + nullptr, &pResult)); + HRESULT hrStatus; + VERIFY_SUCCEEDED(pResult->GetStatus(&hrStatus)); + VERIFY_SUCCEEDED(hrStatus); + VERIFY_SUCCEEDED(pResult->GetResult(&pProgram)); + CComPtr containerReflection; + uint32_t partCount; + IFT(m_dllSupport.CreateInstance(CLSID_DxcContainerReflection, + &containerReflection)); + IFT(containerReflection->Load(pProgram)); + IFT(containerReflection->GetPartCount(&partCount)); + bool blobFound = false; + for (uint32_t i = 0; i < partCount; ++i) { + uint32_t kind; + IFT(containerReflection->GetPartKind(i, &kind)); + if (kind == (uint32_t)hlsl::DxilFourCC::DFCC_RuntimeData) { + blobFound = true; + using namespace hlsl::RDAT; + CComPtr pBlob; + IFT(containerReflection->GetPartContent(i, &pBlob)); + // Validate using DxilRuntimeData + DxilRuntimeData context; + context.InitFromRDAT((char *)pBlob->GetBufferPointer(), + pBlob->GetBufferSize()); + auto funcTable = context.GetFunctionTable(); + auto resTable = context.GetResourceTable(); + VERIFY_ARE_EQUAL(funcTable.Count(), 4U); + std::string str("function"); + for (uint32_t j = 0; j < funcTable.Count(); ++j) { + auto funcReader = funcTable[j]; + std::string funcName(funcReader.getUnmangledName()); + VERIFY_IS_TRUE(str.compare(funcName.substr(0, 8)) == 0); + std::string cur_str = str; + cur_str.push_back('0' + j); + if (cur_str.compare("function0") == 0) { + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), 1U); + hlsl::ShaderFlags flag; + flag.SetUAVLoadAdditionalFormats(true); + flag.SetLowPrecisionPresent(true); + uint64_t rawFlag = flag.GetFeatureInfo(); + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags(), rawFlag); + auto resReader = funcReader.getResources()[0]; + VERIFY_ARE_EQUAL(resReader.getClass(), + hlsl::DXIL::ResourceClass::UAV); + VERIFY_ARE_EQUAL(resReader.getKind(), + hlsl::DXIL::ResourceKind::Texture1D); + } else if (cur_str.compare("function1") == 0) { + hlsl::ShaderFlags flag; + flag.SetLowPrecisionPresent(true); + uint64_t rawFlag = flag.GetFeatureInfo(); + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags(), rawFlag); + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), 3U); + } else if (cur_str.compare("function2") == 0) { + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags() & 0xffffffffffffffff, + 0U); + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), 0U); + std::string dependency = funcReader.getFunctionDependencies()[0]; + VERIFY_IS_TRUE(dependency.find("function_import") != + std::string::npos); + } else if (cur_str.compare("function3") == 0) { + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags() & 0xffffffffffffffff, + 0U); + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), numResFlagCheck); + for (unsigned i = 0; i < funcReader.getResources().Count(); ++i) { + auto resReader = funcReader.getResources()[0]; + VERIFY_ARE_EQUAL(resReader.getClass(), + hlsl::DXIL::ResourceClass::UAV); + unsigned j = 0; + for (; j < numResFlagCheck; ++j) { + if (resFlags[j].name.compare(resReader.getName()) == 0) + break; + } + VERIFY_IS_LESS_THAN(j, numResFlagCheck); + VERIFY_ARE_EQUAL(resReader.getKind(), resFlags[j].kind); + VERIFY_ARE_EQUAL(resReader.getFlags(), + static_cast(resFlags[j].flag)); + } + } else { + IFTBOOLMSG(false, E_FAIL, "unknown function name"); + } + } + VERIFY_ARE_EQUAL(resTable.Count(), 8U); + } + } + IFTBOOLMSG(blobFound, E_FAIL, "failed to find RDAT blob after compiling"); +} + +TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDATSM69) { + if (m_ver.SkipDxilVersion(1, 9)) + return; const char *shader = "float c_buf;" "RWTexture1D tex : register(u5);" @@ -1497,7 +1638,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { VERIFY_SUCCEEDED(CreateCompiler(&pCompiler)); CreateBlobFromText(shader, &pSource); VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main", - L"lib_6_3", nullptr, 0, nullptr, 0, + L"lib_6_9", nullptr, 0, nullptr, 0, nullptr, &pResult)); HRESULT hrStatus; VERIFY_SUCCEEDED(pResult->GetStatus(&hrStatus)); diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index 01f24e0227..980bf6c7c2 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -1488,7 +1488,7 @@ TEST_F(ValidationTest, StructBufGlobalCoherentAndCounter) { L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", "!\"buf2\", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false", "!\"buf2\", i32 0, i32 0, i32 1, i32 12, i1 true, i1 true", - "globallycoherent cannot be used with append/consume buffers: 'buf2'"); + "globallycoherent cannot be used on buffer with counter 'buf2'"); } TEST_F(ValidationTest, StructBufStrideAlign) { diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 7954faf2af..65f9aa1d80 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8015,9 +8015,10 @@ def build_valrules(self): "Hull Shader MaxTessFactor must be [%0..%1]. %2 specified.", ) self.add_valrule("Meta.ValidSamplerMode", "Invalid sampler mode on sampler .") - self.add_valrule( - "Meta.GlcNotOnAppendConsume", - "globallycoherent cannot be used with append/consume buffers: '%0'.", + self.add_valrule_msg( + "Meta.CoherenceNotOnAppendConsume", + "globally/reorder coherent incompatible with append/consume/counter buffers", + "%0coherent cannot be used on buffer with counter", ) self.add_valrule_msg( "Meta.StructBufAlignment", @@ -8409,6 +8410,10 @@ def build_valrules(self): "Instr.MayReorderThreadUndefCoherenceHintParam", "Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.", ) + self.add_valrule( + "Instr.ReorderCoherentRequiresSM69", + "reordercoherent requires SM 6.9 or later.", + ) # Linalg ops self.add_valrule_msg( From 377c4ca6d82adb83bf2eaf978a7040443848d6fd Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Mon, 12 May 2025 10:35:22 -0700 Subject: [PATCH 32/93] Fix the return types of `dot4add_i8packed` and `dot4add_u8packed`. (#7401) Change the definition of the HLSL `dot4add_i8packed` and `dot4add_u8packed` intrinsics in `utils/hct/gen_intrin_main.txt` to simply spell out the return types, rather than saying that their return type is determined by their third argument. This prevents DXC from trying to give those functions declarations like declare i64 @"\01?dot4add_u8packed@hlsl@@YA_JII_J@Z"(i32, i32, i64 signext) #1 which seems to expect a 64-bit third argument and return value. These functions are not generic, and they have only one overload, so there is no need to use interesting `uComponentTypeId` values to get the right effects, and `HLSLExternalSource::MatchArguments` seems to get confused about how to treat argument types that affect the return types. Fixes #7400. --- tools/clang/lib/Sema/SemaHLSL.cpp | 4 +-- .../test/DXC/dot4add_i8_u8_packed-types.hlsl | 34 +++++++++++++++++++ utils/hct/gen_intrin_main.txt | 6 ++-- 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 6e58c0e872..b15068638d 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6737,8 +6737,8 @@ bool HLSLExternalSource::MatchArguments( (iArg != retArgIdx && retTypeIdx == pIntrinsicArg->uComponentTypeId); // For literal arg which don't affect return type, find concrete type. // For literal arg affect return type, - // TryEvalIntrinsic in CGHLSLMS.cpp will take care of cases - // where all argumentss are literal. + // TryEvalIntrinsic in CGHLSLMSFinishCodeGen.cpp will take care of + // cases where all arguments are literal. // CombineBasicTypes will cover the rest cases. if (!affectRetType) { TypeInfoEltKind = diff --git a/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl b/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl new file mode 100644 index 0000000000..53c87bb9c1 --- /dev/null +++ b/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc /enable-16bit-types /T cs_6_8 %s | FileCheck %s + +// Compiling this HLSL would fail this assertion in TranslateDot4AddPacked: +// +// DXASSERT( +// !accTy->isVectorTy() && accTy->isIntegerTy(32), +// "otherwise, unexpected vector support in high level intrinsic template"); +// +// Bug was fixed by changing the declarations of dot4add_i8packed and +// dot4add_u8packed in utils/hct/gen_intrin_main.txt to simply write +// out their argument and return types, rather than using the $typeN +// reference syntax. + +// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddI8Packed(acc,a,b) +// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddU8Packed(acc,a,b) +// CHECK: call float @dx.op.dot2AddHalf.f32{{.*}}Dot2AddHalf(acc,ax,ay,bx,by) + +RWByteAddressBuffer buf; + +[numthreads(1, 1, 1)] +void main() +{ + int a = dot4add_i8packed(0, 0, 0); + int b = dot4add_i8packed(0, 0, a); + buf.Store(0, b); + + uint c = dot4add_u8packed(0, 0, 0); + uint d = dot4add_u8packed(0, 0, c); + buf.Store(4, d); + + float e = dot2add(half2(0,0), half2(0,0), 1.0); + float f = dot2add(half2(0,0), half2(0,0), e); + buf.Store(8, f); +} diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index c394611302..e5e4119330 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -339,9 +339,9 @@ float<4,3> [[rn]] ObjectToWorld4x3(); float<4,3> [[rn]] WorldToObject4x3(); // Packed dot products with accumulate: -$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c); -$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c); -$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c); +uint [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c); +int [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c); +float [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c); // Unpacking intrinsics int16_t<4> [[rn]] unpack_s8s16(in p32i8 pk); From fb4d7d17bd5104cbbf259e8484390175fdc23192 Mon Sep 17 00:00:00 2001 From: Anupama Chandrasekhar Date: Tue, 13 May 2025 09:06:24 -0700 Subject: [PATCH 33/93] [0029] [Main] For OuterProductAccumulate, matrix layout must be outerproductoptimal and matrix stride must be zero (#7417) Implements the DXIL portion of https://github.com/microsoft/hlsl-specs/pull/494, The HLSL checks will be a part of the HLSL validation checks, did not add it to this PR due to shared infrastructure. --- docs/DXIL.rst | 584 +++++++++--------- include/dxc/DXIL/DxilConstants.h | 1 + lib/DxilValidation/DxilValidation.cpp | 26 + .../linalg_builtins/check-shader-stages.hlsl | 2 +- .../linalg_builtins/linalg-builtins.hlsl | 4 +- ...uter-product-accumulate-multioverload.hlsl | 18 +- ...uter-product-accumulate-matrix-layout.hlsl | 28 + .../DXC/Passes/DxilGen/linalg-builtins.ll | 4 +- ...roduct-accumulate-matrix-layout-failing.ll | 86 +++ ...roduct-accumulate-matrix-layout-passing.ll | 65 ++ utils/hct/hctdb.py | 12 + 11 files changed, 525 insertions(+), 305 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl create mode 100644 tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll create mode 100644 tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll diff --git a/docs/DXIL.rst b/docs/DXIL.rst index 7532ec3c42..1a2a691d27 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -3069,297 +3069,299 @@ The set of validation rules that are known to hold for a DXIL program is identif .. hctdb_instrhelp.get_valrules_rst() .. VALRULES-RST:BEGIN -===================================================== ======================================================================================================================================================================================================================================================================================================== -Rule Code Description -===================================================== ======================================================================================================================================================================================================================================================================================================== -BITCODE.VALID Module must be bitcode-valid -CONTAINER.CONTENTINVALID DXIL Container Content is well-formed -CONTAINER.CONTENTMATCHES DXIL Container Content must match Module -CONTAINER.PARTINVALID DXIL Container must not contain unknown parts -CONTAINER.PARTMATCHES DXIL Container Parts must match Module -CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module -CONTAINER.PARTREPEATED DXIL Container must have only one of each part type -CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader -CONTAINER.UNUSEDITEMINTABLE Items in Table must be used -DECL.ALLOCATERAYQUERY2FLAGSARECONST constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant -DECL.ALLOCATERAYQUERYFLAGSARECONST RayFlags for AllocateRayQuery must be constant -DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument -DECL.ATTRSTRUCT Attributes parameter must be struct type -DECL.DXILFNEXTERN External function must be a DXIL function -DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types -DECL.EXTRAARGS Extra arguments not allowed for shader functions -DECL.FNATTRIBUTE Functions should only contain known function attributes -DECL.FNFLATTENPARAM Function parameters must not use struct types -DECL.FNISCALLED Functions can only be used by call instructions -DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record -DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type -DECL.NOTUSEDEXTERNAL External declaration should not be used -DECL.PARAMSTRUCT Callable function parameter must be struct type -DECL.PAYLOADSTRUCT Payload parameter must be struct type -DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures -DECL.RESOURCEINFNSIG Resources not allowed in function signatures -DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types -DECL.SHADERRETURNVOID Shader functions must return void -DECL.USEDEXTERNALFUNCTION External function must be used -DECL.USEDINTERNAL Internal declaration must be used -FLOW.DEADLOOP Loop must have break. -FLOW.FUNCTIONCALL Function with parameter is not permitted -FLOW.NORECURSION Recursion is not permitted. -FLOW.REDUCIBLE Execution flow must be reducible. -INSTR.ALLOWED Instructions must be of an allowed type. -INSTR.ATOMICCONST Constant destination to atomic. -INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. -INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. -INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. -INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' -INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). -INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. -INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. -INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant -INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. -INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. -INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. -INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature -INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed -INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. -INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. -INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. -INSTR.CONSTALIGNFORRAWBUF Raw Buffer alignment value must be a constant. -INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer offset must be undef. -INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer requires defined index and offset coordinates. -INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. -INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. -INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. -INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. -INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. -INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. -INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. -INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. -INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. -INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. -INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. -INSTR.LINALGINTERPRETATIONPARAMARECONST In Linalg operations, Interpretation value is a constant. -INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFORMATVECOPS Matrix Layout for Linalg Mul/MulAdd operation must be valid. -INSTR.LINALGINVALIDMEMORYINTERPVALUE In Memory Interpolation value must be valid. -INSTR.LINALGINVALIDREGISTERINTERPVALUE From Register Interpretation value must be valid. -INSTR.LINALGMATRIXLAYOUTNOTTRANSPOSABLE Row Major and Column Major matrix layouts are not transposable. -INSTR.LINALGMATRIXSHAPEPARAMSARECONST Matrix Layout, Dimensions and isTranspose are constants -INSTR.LINALGNOTANUNSIGNEDTYPE Unsigned flag set for a float signed type -INSTR.MATVECOPISUNSIGNEDFLAGSARECONST In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant. -INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. -INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. -INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. -INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. -INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. -INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. -INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. -INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. -INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. -INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. -INSTR.NOIDIVBYZERO No signed integer division by zero. -INSTR.NOINDEFINITEACOS No indefinite arccosine. -INSTR.NOINDEFINITEASIN No indefinite arcsine. -INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. -INSTR.NOINDEFINITELOG No indefinite logarithm. -INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. -INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. -INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. -INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. -INSTR.NOUDIVBYZERO No unsigned integer division by zero. -INSTR.OFFSETONUAVLOAD uav load don't support offset. -INSTR.OLOAD DXIL intrinsic overload must be valid. -INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. -INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. -INSTR.OPCONST DXIL intrinsic requires an immediate constant operand -INSTR.OPCONSTRANGE Constant values must be in-range for operation. -INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range -INSTR.PARAMMULTIPLE Parameter must be a valid multiple -INSTR.PTRBITCAST Pointer type bitcast must be have same size. -INSTR.REORDERCOHERENTREQUIRESSM69 reordercoherent requires SM 6.9 or later. -INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. -INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. -INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. -INSTR.RESOURCECOORDINATEMISS coord uninitialized. -INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. -INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. -INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. -INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. -INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. -INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. -INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. -INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. -INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. -INSTR.RESOURCEOFFSETMISS offset uninitialized. -INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. -INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. -INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. -INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. -INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. -INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. -INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. -INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. -INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. -INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. -INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. -INSTR.UNDEFHITOBJECT HitObject is undef. -INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. -INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. -INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. -INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. -INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. -META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. -META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. -META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. -META.BRANCHFLATTEN Can't use branch and flatten attributes together. -META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components -META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. -META.COHERENCENOTONAPPENDCONSUME globally/reorder coherent incompatible with append/consume/counter buffers -META.COMPUTEWITHNODE Compute entry must not have node metadata -META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. -META.DENSERESIDS Resource identifiers must be zero-based and dense. -META.DUPLICATESYSVALUE System value may only appear once in signature -META.ENTRYFUNCTION entrypoint not found. -META.FLAGSUSAGE Flags must match usage. -META.FORCECASEONSWITCH Attribute forcecase only works for switch. -META.INTEGERINTERPMODE Interpolation mode on integer must be Constant -META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. -META.INTERPMODEVALID Interpolation mode must be valid -META.INVALIDCONTROLFLOWHINT Invalid control flow hint. -META.KNOWN Named metadata should be known -META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. -META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. -META.NOSEMANTICOVERLAP Semantics must not overlap -META.REQUIRED Required metadata missing. -META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. -META.SEMAKINDVALID Semantic kind must be valid -META.SEMANTICCOMPTYPE %0 must be %1. -META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index -META.SEMANTICLEN Semantic length must be at least 1 and at most 64. -META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location -META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 -META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. -META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. -META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value -META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together -META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size -META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. -META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned -META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds -META.SYSTEMVALUEROWS System value may only have 1 row -META.TARGET Target triple must be 'dxil-ms-dx' -META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. -META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. -META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. -META.USED All metadata must be used by dxil. -META.VALIDSAMPLERMODE Invalid sampler mode on sampler . -META.VALUERANGE Metadata value must be within range. -META.VERSIONSUPPORTED Version in metadata must be supported. -META.WELLFORMED Metadata must be well-formed in operand count and types. -SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. -SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. -SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes -SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow -SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap -SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes -SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. -SM.COMPLETEPOSITION Not all elements of SV_Position were written. -SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. -SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. -SM.CSNOSIGNATURES Compute shaders must not have shader signatures. -SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. -SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. -SM.DXILVERSION Target shader model requires specific Dxil Version -SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. -SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. -SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. -SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. -SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. -SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. -SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count -SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry -SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above -SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode -SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties -SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group -SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model -SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage -SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible -SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. -SM.INVALIDRESOURCEKIND Invalid resources kind. -SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. -SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. -SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. -SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. -SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. -SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. -SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. -SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. -SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. -SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. -SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. -SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists -SM.NAME Target shader model name must be known -SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. -SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. -SM.OPCODE Opcode must be defined in target shader model -SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function -SM.OPERAND Operand must be defined in target shader model. -SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. -SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . -SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. -SM.PROGRAMVERSION Program Version in Dxil Container does not match Dxil Module shader model version -SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). -SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. -SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. -SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. -SM.PSTARGETCOL0 SV_Target packed location must start at column 0. -SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. -SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. -SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. -SM.RESOURCERANGEOVERLAP Resource ranges must not overlap -SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. -SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. -SM.SEMANTIC Semantic must be defined in target shader model -SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. -SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. -SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. -SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. -SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. -SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. -SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. -SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature -SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 -SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. -SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max -SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min -SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. -SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. -SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders -SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range -SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. -SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. -SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. -SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] -SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. -TYPES.DEFINED Type must be defined based on DXIL primitives -TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. -TYPES.INTWIDTH Int type must be of valid width -TYPES.NOMULTIDIM Only one dimension allowed for array type. -TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. -TYPES.NOVECTOR Vector types must not be present -===================================================== ======================================================================================================================================================================================================================================================================================================== +============================================================= ======================================================================================================================================================================================================================================================================================================== +Rule Code Description +============================================================= ======================================================================================================================================================================================================================================================================================================== +BITCODE.VALID Module must be bitcode-valid +CONTAINER.CONTENTINVALID DXIL Container Content is well-formed +CONTAINER.CONTENTMATCHES DXIL Container Content must match Module +CONTAINER.PARTINVALID DXIL Container must not contain unknown parts +CONTAINER.PARTMATCHES DXIL Container Parts must match Module +CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module +CONTAINER.PARTREPEATED DXIL Container must have only one of each part type +CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader +CONTAINER.UNUSEDITEMINTABLE Items in Table must be used +DECL.ALLOCATERAYQUERY2FLAGSARECONST constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant +DECL.ALLOCATERAYQUERYFLAGSARECONST RayFlags for AllocateRayQuery must be constant +DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument +DECL.ATTRSTRUCT Attributes parameter must be struct type +DECL.DXILFNEXTERN External function must be a DXIL function +DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types +DECL.EXTRAARGS Extra arguments not allowed for shader functions +DECL.FNATTRIBUTE Functions should only contain known function attributes +DECL.FNFLATTENPARAM Function parameters must not use struct types +DECL.FNISCALLED Functions can only be used by call instructions +DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record +DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type +DECL.NOTUSEDEXTERNAL External declaration should not be used +DECL.PARAMSTRUCT Callable function parameter must be struct type +DECL.PAYLOADSTRUCT Payload parameter must be struct type +DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures +DECL.RESOURCEINFNSIG Resources not allowed in function signatures +DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types +DECL.SHADERRETURNVOID Shader functions must return void +DECL.USEDEXTERNALFUNCTION External function must be used +DECL.USEDINTERNAL Internal declaration must be used +FLOW.DEADLOOP Loop must have break. +FLOW.FUNCTIONCALL Function with parameter is not permitted +FLOW.NORECURSION Recursion is not permitted. +FLOW.REDUCIBLE Execution flow must be reducible. +INSTR.ALLOWED Instructions must be of an allowed type. +INSTR.ATOMICCONST Constant destination to atomic. +INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. +INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. +INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. +INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' +INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). +INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. +INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. +INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant +INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. +INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. +INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. +INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature +INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed +INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. +INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. +INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. +INSTR.CONSTALIGNFORRAWBUF Raw Buffer alignment value must be a constant. +INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer offset must be undef. +INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer requires defined index and offset coordinates. +INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. +INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. +INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. +INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. +INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. +INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. +INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. +INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. +INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. +INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. +INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. +INSTR.LINALGINTERPRETATIONPARAMARECONST In Linalg operations, Interpretation value is a constant. +INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFORMATVECOPS Matrix Layout for Linalg Mul/MulAdd operation must be valid. +INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFOROUTERPRODUCTACCUMULATE Matrix Layout for Linalg Mul/MulAdd operation must be valid. +INSTR.LINALGINVALIDMEMORYINTERPVALUE In Memory Interpolation value must be valid. +INSTR.LINALGINVALIDREGISTERINTERPVALUE From Register Interpretation value must be valid. +INSTR.LINALGMATRIXLAYOUTNOTTRANSPOSABLE Row Major and Column Major matrix layouts are not transposable. +INSTR.LINALGMATRIXSHAPEPARAMSARECONST Matrix Layout, Dimensions and isTranspose are constants +INSTR.LINALGMATRIXSTRIDEZEROFOROPTIMALLAYOUTS For optimal layouts, matrix stride must be zero. +INSTR.LINALGNOTANUNSIGNEDTYPE Unsigned flag set for a float signed type +INSTR.MATVECOPISUNSIGNEDFLAGSARECONST In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant. +INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. +INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. +INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. +INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. +INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. +INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. +INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. +INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. +INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. +INSTR.NOIDIVBYZERO No signed integer division by zero. +INSTR.NOINDEFINITEACOS No indefinite arccosine. +INSTR.NOINDEFINITEASIN No indefinite arcsine. +INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. +INSTR.NOINDEFINITELOG No indefinite logarithm. +INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. +INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. +INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. +INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. +INSTR.NOUDIVBYZERO No unsigned integer division by zero. +INSTR.OFFSETONUAVLOAD uav load don't support offset. +INSTR.OLOAD DXIL intrinsic overload must be valid. +INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. +INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. +INSTR.OPCONST DXIL intrinsic requires an immediate constant operand +INSTR.OPCONSTRANGE Constant values must be in-range for operation. +INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range +INSTR.PARAMMULTIPLE Parameter must be a valid multiple +INSTR.PTRBITCAST Pointer type bitcast must be have same size. +INSTR.REORDERCOHERENTREQUIRESSM69 reordercoherent requires SM 6.9 or later. +INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. +INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. +INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. +INSTR.RESOURCECOORDINATEMISS coord uninitialized. +INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. +INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. +INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. +INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. +INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. +INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. +INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. +INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. +INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. +INSTR.RESOURCEOFFSETMISS offset uninitialized. +INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. +INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. +INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. +INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. +INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. +INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. +INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. +INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. +INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. +INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. +INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. +INSTR.UNDEFHITOBJECT HitObject is undef. +INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. +INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. +INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. +INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. +INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. +META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. +META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. +META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. +META.BRANCHFLATTEN Can't use branch and flatten attributes together. +META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components +META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. +META.COHERENCENOTONAPPENDCONSUME globally/reorder coherent incompatible with append/consume/counter buffers +META.COMPUTEWITHNODE Compute entry must not have node metadata +META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. +META.DENSERESIDS Resource identifiers must be zero-based and dense. +META.DUPLICATESYSVALUE System value may only appear once in signature +META.ENTRYFUNCTION entrypoint not found. +META.FLAGSUSAGE Flags must match usage. +META.FORCECASEONSWITCH Attribute forcecase only works for switch. +META.INTEGERINTERPMODE Interpolation mode on integer must be Constant +META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. +META.INTERPMODEVALID Interpolation mode must be valid +META.INVALIDCONTROLFLOWHINT Invalid control flow hint. +META.KNOWN Named metadata should be known +META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. +META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. +META.NOSEMANTICOVERLAP Semantics must not overlap +META.REQUIRED Required metadata missing. +META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. +META.SEMAKINDVALID Semantic kind must be valid +META.SEMANTICCOMPTYPE %0 must be %1. +META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index +META.SEMANTICLEN Semantic length must be at least 1 and at most 64. +META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location +META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 +META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. +META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. +META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value +META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together +META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size +META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. +META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned +META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds +META.SYSTEMVALUEROWS System value may only have 1 row +META.TARGET Target triple must be 'dxil-ms-dx' +META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. +META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. +META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. +META.USED All metadata must be used by dxil. +META.VALIDSAMPLERMODE Invalid sampler mode on sampler . +META.VALUERANGE Metadata value must be within range. +META.VERSIONSUPPORTED Version in metadata must be supported. +META.WELLFORMED Metadata must be well-formed in operand count and types. +SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. +SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. +SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes +SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow +SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap +SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes +SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. +SM.COMPLETEPOSITION Not all elements of SV_Position were written. +SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. +SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. +SM.CSNOSIGNATURES Compute shaders must not have shader signatures. +SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. +SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. +SM.DXILVERSION Target shader model requires specific Dxil Version +SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. +SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. +SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. +SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. +SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. +SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. +SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count +SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry +SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above +SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode +SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties +SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group +SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model +SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage +SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible +SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. +SM.INVALIDRESOURCEKIND Invalid resources kind. +SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. +SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. +SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. +SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. +SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. +SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. +SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. +SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. +SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. +SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. +SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. +SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists +SM.NAME Target shader model name must be known +SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. +SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. +SM.OPCODE Opcode must be defined in target shader model +SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function +SM.OPERAND Operand must be defined in target shader model. +SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. +SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . +SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. +SM.PROGRAMVERSION Program Version in Dxil Container does not match Dxil Module shader model version +SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). +SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. +SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. +SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. +SM.PSTARGETCOL0 SV_Target packed location must start at column 0. +SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. +SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. +SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. +SM.RESOURCERANGEOVERLAP Resource ranges must not overlap +SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. +SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. +SM.SEMANTIC Semantic must be defined in target shader model +SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. +SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. +SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. +SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. +SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. +SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. +SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. +SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature +SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 +SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. +SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max +SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min +SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. +SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. +SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders +SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range +SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. +SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. +SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. +SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] +SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. +TYPES.DEFINED Type must be defined based on DXIL primitives +TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. +TYPES.INTWIDTH Int type must be of valid width +TYPES.NOMULTIDIM Only one dimension allowed for array type. +TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. +TYPES.NOVECTOR Vector types must not be present +============================================================= ======================================================================================================================================================================================================================================================================================================== .. VALRULES-RST:END diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index fe32c06f63..bf6de7ed3b 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -1609,6 +1609,7 @@ const unsigned kMatVecMulAddIsOutputUnsignedIdx = 15; // Outer Product Accumulate const unsigned kOuterProdAccMatrixInterpretation = 5; const unsigned kOuterProdAccMatrixLayout = 6; +const unsigned kOuterProdAccMatrixStride = 7; // TODO: add operand index for all the OpCodeClass. } // namespace OperandIndex diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 28917e0600..db596a3821 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1233,6 +1233,32 @@ static void ValidateImmOperandsForOuterProdAcc(CallInst *CI, {"MatrixLayout"}); return; } + ConstantInt *ML = cast(MatrixLayout); + uint64_t MLValue = ML->getLimitedValue(); + if (MLValue != + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal)) + ValCtx.EmitInstrFormatError( + CI, + ValidationRule:: + InstrLinalgInvalidMatrixLayoutValueForOuterProductAccumulate, + {GetMatrixLayoutStr(MLValue), + GetMatrixLayoutStr(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal))}); + + llvm::Value *MatrixStride = + CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixStride); + if (!llvm::isa(MatrixStride)) { + ValCtx.EmitInstrError( + CI, ValidationRule::InstrLinalgMatrixStrideZeroForOptimalLayouts); + return; + } + ConstantInt *MS = cast(MatrixStride); + uint64_t MSValue = MS->getLimitedValue(); + if (MSValue != 0) { + ValCtx.EmitInstrError( + CI, ValidationRule::InstrLinalgMatrixStrideZeroForOptimalLayouts); + return; + } } // Validate the type-defined mask compared to the store value mask which diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl index 74cb51260c..75e7c8a5cd 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl @@ -43,7 +43,7 @@ void UseCoopVec() { const uint opa_matrix_offset = 0; const uint opa_matrix_interpretation = 5; /*U32*/ const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ - const uint opa_matrix_stride = 64; + const uint opa_matrix_stride = 0; __builtin_OuterProductAccumulate(input_vector1, input_vector2, rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl index c3b4a3a8d7..f1badb9101 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl @@ -58,12 +58,12 @@ void cs_main() const uint opa_matrix_offset = 0; const uint opa_matrix_interpretation = 5; /*U32*/ const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ - const uint opa_matrix_stride = 64; + const uint opa_matrix_stride = 0; // CHECK: %[[MLD2:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" // CHECK: %[[MCH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD2]]) // CHECK: %[[MAH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH2]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) - // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH2]], i32 0, i32 5, i32 3, i32 64) + // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH2]], i32 0, i32 5, i32 3, i32 0) __builtin_OuterProductAccumulate(input_vector1, input_vector2, rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, opa_matrix_layout, opa_matrix_stride); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl index 40bbe62284..c40365078f 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl @@ -1,8 +1,8 @@ -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=RowMajor | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-0 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-1 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-2 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=RowMajor -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 @@ -11,12 +11,12 @@ ByteAddressBuffer input_vector_buffer2; RWByteAddressBuffer matrix_buffer; // COMMON: define void @main() -// DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 0, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) -// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 0, i32 64) -// DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) -// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 64) -// DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) -// HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 64) +// DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) +// DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0) +// DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0) enum CompType { Invalid = 0, @@ -63,7 +63,7 @@ void main() const uint matrix_interpretation = MI; const uint matrix_layout = ML; const uint matrix_offset = 0; - const uint matrix_stride = 64; + const uint matrix_stride = 0; __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl new file mode 100644 index 0000000000..e930557cf9 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -I %hlsl_headers -T cs_6_9 %s -enable-16bit-types -DML=MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL -DSTRIDE=0 2>&1 | FileCheck %s + +//Source file for the IR in \tools\clang\test\LitDXILValidation\outer-product-accumulate-matrix-layout-failing.ll +//Source file for the IR in \tools\clang\test\LitDXILValidation\outer-product-accumulate-matrix-layout-passing.ll + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer input_vector_buffer2; +RWByteAddressBuffer matrix_buffer; + +#include + +// CHECK: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) +using namespace dx::linalg; + +[Numthreads(1,1,1)] +[shader("compute")] +void main() +{ + vector input_vector1 = input_vector_buffer.Load >(0); + vector input_vector2 = input_vector_buffer2.Load >(0); + + const uint matrix_interpretation = DATA_TYPE_FLOAT16; + const uint matrix_layout = ML; + const uint matrix_offset = 0; + const uint matrix_stride = STRIDE; + + __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll index 6623f63031..ea1be46c4c 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll @@ -76,8 +76,8 @@ entry: ;CHECK: %[[RWMCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]] ;CHECK: %[[RWMAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH0]] - ;CHECK: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH0]], i32 0, i32 5, i32 3, i32 64) - call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %tmp25, <8 x i32> %tmp29, %dx.types.Handle %tmp32, i32 0, i32 5, i32 3, i32 64), !dbg !37 ; line:67 col:5 + ;CHECK: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH0]], i32 0, i32 5, i32 3, i32 0) + call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %tmp25, <8 x i32> %tmp29, %dx.types.Handle %tmp32, i32 0, i32 5, i32 3, i32 0), !dbg !37 ; line:67 col:5 %tmp33 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !38 ; line:77 col:5 diff --git a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll new file mode 100644 index 0000000000..33591126e5 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll @@ -0,0 +1,86 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Original Source: \tools\clang\test\CodeGenHLSL\linalg\outer-product-accumulate-matrix-layout.hlsl +; The failing tests were generated by manually editing the IR produced from the IR from the passing +; case generated by running the hlsl above (Original Source) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v8f16 = type { <8 x half>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } + +; As noted in other tests, the validation errors come out in +; an order different from the IR. So listed them here in the +; order they appear and added comments for correlation + +;CHECK: error: matrix stride must be a constant zero for optimal layouts +;CHECK: error: matrix stride must be a constant zero for optimal layouts +;CHECK-NOT: error: matrix layout value 'OuterProductOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +;CHECK: error: matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +;CHECK: error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +;CHECK: error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +; CHECK: Validation failed. + +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %5 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %4, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %6 = extractvalue %dx.types.ResRet.v8f16 %5, 0 + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %8 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %7, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %9 = extractvalue %dx.types.ResRet.v8f16 %8, 0 + %10 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 0, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 1, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 2, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; error: matrix stride must be a constant zero for optimal layouts + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; error: matrix stride must be a constant zero for optimal layouts + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 63) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ret void +} + +; Function Attrs: nounwind readonly +declare %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.outerProductAccumulate.v8f16.v8f16(i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.entryPoints = !{!8} + +!0 = !{i32 1, i32 9} +!1 = !{!"cs", i32 6, i32 9} +!2 = !{!3, !6, null, null} +!3 = !{!4, !5} +!4 = !{i32 0, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i32 0, null} +!5 = !{i32 1, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{!7} +!7 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!8 = !{void ()* @main, !"main", null, !2, !9} +!9 = !{i32 0, i64 8598323216, i32 4, !10} +!10 = !{i32 1, i32 1, i32 1} diff --git a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll new file mode 100644 index 0000000000..44cd3e48b3 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll @@ -0,0 +1,65 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +;Original Source: \tools\clang\test\CodeGenHLSL\linalg\outer-product-accumulate-matrix-layout.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v8f16 = type { <8 x half>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } + +;CHECK: Validation succeeded. + +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %5 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %4, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %6 = extractvalue %dx.types.ResRet.v8f16 %5, 0 + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %8 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %7, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %9 = extractvalue %dx.types.ResRet.v8f16 %8, 0 + %10 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ret void +} + +; Function Attrs: nounwind readonly +declare %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.outerProductAccumulate.v8f16.v8f16(i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.entryPoints = !{!8} + +!0 = !{i32 1, i32 9} +!1 = !{!"cs", i32 6, i32 9} +!2 = !{!3, !6, null, null} +!3 = !{!4, !5} +!4 = !{i32 0, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i32 0, null} +!5 = !{i32 1, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{!7} +!7 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!8 = !{void ()* @main, !"main", null, !2, !9} +!9 = !{i32 0, i64 8598323216, i32 4, !10} +!10 = !{i32 1, i32 1, i32 1} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 65f9aa1d80..5567a6a88d 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8452,6 +8452,12 @@ def build_valrules(self): "matrix layout value '%0' is not valid. Must be between [%1 - %2]", ) + self.add_valrule_msg( + "Instr.LinalgMatrixStrideZeroForOptimalLayouts", + "For optimal layouts, matrix stride must be zero.", + "matrix stride must be a constant zero for optimal layouts", + ) + self.add_valrule_msg( "Instr.LinalgMatrixLayoutNotTransposable", "Row Major and Column Major matrix layouts are not transposable.", @@ -8464,6 +8470,12 @@ def build_valrules(self): "IsUnsigned flag set to true for a float type '%0' vector", ) + self.add_valrule_msg( + "Instr.LinalgInvalidMatrixLayoutValueForOuterProductAccumulate", + "Matrix Layout for Linalg Mul/MulAdd operation must be valid.", + "matrix layout value '%0' is not valid for outerproductaccumulate, must be '%1'", + ) + # Some legacy rules: # - space is only supported for shader targets 5.1 and higher # - multiple rules regarding derivatives, which isn't a supported feature for DXIL From f5214f17ec23fe3fd263315e643fe5f470a8ea84 Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Wed, 14 May 2025 18:45:50 +0100 Subject: [PATCH 34/93] Support SV_DispatchGrid semantic in a nested record (#6931) The SV_DispatchGrid DXIL metadata for a node input record was not generated in cases where: - the field with the SV_DispatchGrid semantic was in a nested record - the field with the SV_DispatchGrid semantic was in a record field - the field with the SV_DispatchGrid semantic was inherited from a base record - in any combinations of the above Added FindDispatchGridSemantic() to be used by the AddHLSLNodeRecordTypeInfo() function, and added a test case. Fixes #6928 --------- Co-authored-by: Tim Corringham Co-authored-by: Tex Riddell --- tools/clang/lib/CodeGen/CGHLSLMS.cpp | 121 ++++++++-------- .../workgraph/nested_sv_dispatchgrid.hlsl | 130 ++++++++++++++++++ 2 files changed, 196 insertions(+), 55 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 16ddeaec60..b5add521a6 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -288,6 +288,9 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { llvm::Value *DestPtr, clang::QualType DestTy) override; void AddHLSLFunctionInfo(llvm::Function *, const FunctionDecl *FD) override; + bool FindDispatchGridSemantic(const CXXRecordDecl *RD, + hlsl::SVDispatchGrid &SDGRec, + CharUnits Offset = CharUnits()); void AddHLSLNodeRecordTypeInfo(const clang::ParmVarDecl *parmDecl, hlsl::NodeIOProperties &node); void EmitHLSLFunctionProlog(llvm::Function *, @@ -2560,6 +2563,66 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { m_ScopeMap[F] = ScopeInfo(F, FD->getLocation()); } +// Find the input node record field with the SV_DispatchGrid semantic. +// We have already diagnosed any error conditions in Sema, so we +// expect valid size and types, and use the first occurance found. +// We return true if we have populated the SV_DispatchGrid values. +bool CGMSHLSLRuntime::FindDispatchGridSemantic(const CXXRecordDecl *RD, + hlsl::SVDispatchGrid &SDGRec, + CharUnits Offset) { + const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); + + // Check (non-virtual) bases + for (const CXXBaseSpecifier &Base : RD->bases()) { + DXASSERT(!Base.getType()->isDependentType(), + "Node Record with dependent base class not caught by Sema"); + if (Base.getType()->isDependentType()) + continue; + CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); + CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(BaseDecl); + if (FindDispatchGridSemantic(BaseDecl, SDGRec, BaseOffset)) + return true; + } + + // Check each field in this record. + for (FieldDecl *Field : RD->fields()) { + uint64_t FieldNo = Field->getFieldIndex(); + CharUnits FieldOffset = Offset + CGM.getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FieldNo)); + + // If this field is a record check its fields + if (const CXXRecordDecl *D = Field->getType()->getAsCXXRecordDecl()) { + if (FindDispatchGridSemantic(D, SDGRec, FieldOffset)) + return true; + } + // Otherwise check this field for the SV_DispatchGrid semantic annotation + for (const hlsl::UnusualAnnotation *UA : Field->getUnusualAnnotations()) { + if (UA->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { + const hlsl::SemanticDecl *SD = cast(UA); + if (SD->SemanticName.equals("SV_DispatchGrid")) { + const llvm::Type *FTy = CGM.getTypes().ConvertType(Field->getType()); + const llvm::Type *ElTy = FTy; + SDGRec.NumComponents = 1; + SDGRec.ByteOffset = (unsigned)FieldOffset.getQuantity(); + if (const llvm::VectorType *VT = dyn_cast(FTy)) { + SDGRec.NumComponents = VT->getNumElements(); + ElTy = VT->getElementType(); + } else if (const llvm::ArrayType *AT = + dyn_cast(FTy)) { + SDGRec.NumComponents = AT->getNumElements(); + ElTy = AT->getElementType(); + } + SDGRec.ComponentType = (ElTy->getIntegerBitWidth() == 16) + ? DXIL::ComponentType::U16 + : DXIL::ComponentType::U32; + return true; + } + } + } + } + return false; +} + void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo( const clang::ParmVarDecl *parmDecl, hlsl::NodeIOProperties &node) { clang::QualType paramTy = parmDecl->getType().getCanonicalType(); @@ -2577,7 +2640,6 @@ void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo( DiagnosticsEngine &Diags = CGM.getDiags(); auto &Rec = TemplateArgs.get(0); clang::QualType RecType = Rec.getAsType(); - llvm::Type *Type = CGM.getTypes().ConvertType(RecType); CXXRecordDecl *RD = RecType->getAsCXXRecordDecl(); // Get the TrackRWInputSharing flag from the record attribute @@ -2597,63 +2659,12 @@ void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo( // Ex: For DispatchNodeInputRecord, set size = // size(MY_RECORD), alignment = alignof(MY_RECORD) + llvm::Type *Type = CGM.getTypes().ConvertType(RecType); node.RecordType.size = CGM.getDataLayout().getTypeAllocSize(Type); node.RecordType.alignment = CGM.getDataLayout().getABITypeAlignment(Type); - // Iterate over fields of the MY_RECORD(example) struct - for (auto fieldDecl : RD->fields()) { - // Check if any of the fields have a semantic annotation = - // SV_DispatchGrid - for (const hlsl::UnusualAnnotation *it : - fieldDecl->getUnusualAnnotations()) { - if (it->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { - const hlsl::SemanticDecl *sd = cast(it); - // if we find a field with SV_DispatchGrid, fill out the - // SV_DispatchGrid member with byteoffset of the field, - // NumComponents (3 for uint3 etc) and U32 vs U16 types, which are - // the only types allowed - if (sd->SemanticName.equals("SV_DispatchGrid")) { - clang::QualType FT = fieldDecl->getType(); - auto &DL = CGM.getDataLayout(); - auto &SDGRec = node.RecordType.SV_DispatchGrid; - - DXASSERT_NOMSG(SDGRec.NumComponents == 0); - - unsigned fieldIdx = fieldDecl->getFieldIndex(); - if (StructType *ST = dyn_cast(Type)) { - SDGRec.ByteOffset = - DL.getStructLayout(ST)->getElementOffset(fieldIdx); - } - const llvm::Type *lTy = CGM.getTypes().ConvertType(FT); - if (const llvm::VectorType *VT = - dyn_cast(lTy)) { - DXASSERT(VT->getElementType()->isIntegerTy(), "invalid type"); - SDGRec.NumComponents = VT->getNumElements(); - SDGRec.ComponentType = - (VT->getElementType()->getIntegerBitWidth() == 16) - ? DXIL::ComponentType::U16 - : DXIL::ComponentType::U32; - } else if (const llvm::ArrayType *AT = - dyn_cast(lTy)) { - DXASSERT(AT->getElementType()->isIntegerTy(), "invalid type"); - DXASSERT_NOMSG(AT->getNumElements() <= 3); - SDGRec.NumComponents = AT->getNumElements(); - SDGRec.ComponentType = - (AT->getElementType()->getIntegerBitWidth() == 16) - ? DXIL::ComponentType::U16 - : DXIL::ComponentType::U32; - } else { - // Scalar U16 or U32 - DXASSERT(lTy->isIntegerTy(), "invalid type"); - SDGRec.NumComponents = 1; - SDGRec.ComponentType = (lTy->getIntegerBitWidth() == 16) - ? DXIL::ComponentType::U16 - : DXIL::ComponentType::U32; - } - } - } - } - } + + FindDispatchGridSemantic(RD, node.RecordType.SV_DispatchGrid); } } } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl new file mode 100644 index 0000000000..1da45dae1d --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl @@ -0,0 +1,130 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Check that the SV_DispatchGrid DXIL metadata for a node input record is +// generated in cases where: +// node1 - the field with the SV_DispatchGrid semantic is in a nested record +// node2 - the field with the SV_DispatchGrid semantic is in a record field +// node3 - the field with the SV_DispatchGrid semantic is inherited from a base record +// node4 - the field with the SV_DispatchGrid semantic is within a nested record inherited from a base record +// node5 - the field with the SV_DispatchGrid semantic is within a base record of a nested record +// node6 - the field with the SV_DispatchGrid semantic is within a templated base record +// node7 - the field with the SV_DispatchGrid semantic is within a templated base record of a templated record +// node8 - the field with the SV_DispatchGrid semantic has templated type + +struct Record1 { + struct { + // SV_DispatchGrid is within a nested record + uint3 grid : SV_DispatchGrid; + }; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node1(DispatchNodeInputRecord input) {} +// CHECK: {!"node1" +// CHECK: , i32 1, ![[SVDG_1:[0-9]+]] +// CHECK: [[SVDG_1]] = !{i32 0, i32 5, i32 3} + +struct Record2a { + uint u; + uint2 grid : SV_DispatchGrid; +}; + +struct Record2 { + uint a; + // SV_DispatchGrid is within a record field + Record2a b; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node2(DispatchNodeInputRecord input) {} +// CHECK: {!"node2" +// CHECK: , i32 1, ![[SVDG_2:[0-9]+]] +// CHECK: [[SVDG_2]] = !{i32 8, i32 5, i32 2} + +struct Record3 : Record2a { + // SV_DispatchGrid is inherited + uint4 n; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node3(DispatchNodeInputRecord input) {} +// CHECK: {!"node3" +// CHECK: , i32 1, ![[SVDG_3:[0-9]+]] +// CHECK: [[SVDG_3]] = !{i32 4, i32 5, i32 2} + +struct Record4 : Record2 { + // SV_DispatchGrid is in a nested field in a base record + float f; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node4(DispatchNodeInputRecord input) {} +// CHECK: {!"node4" +// CHECK: , i32 1, ![[SVDG_2]] + +struct Record5 { + uint4 x; + // SV_DispatchGrid is in a base record of a record field + Record3 r; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node5(DispatchNodeInputRecord input) {} +// CHECK: {!"node5" +// CHECK: , i32 1, ![[SVDG_5:[0-9]+]] +// CHECK: [[SVDG_5]] = !{i32 20, i32 5, i32 2} + +template +struct Base { + T DG : SV_DispatchGrid; +}; + +struct Derived1 : Base { + int4 x; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node6(DispatchNodeInputRecord input) {} +// CHECK: {!"node6" +// CHECK: , i32 1, ![[SVDG_1]] + +template +struct Derived2 : Base { + T Y; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node7(DispatchNodeInputRecord > input) {} +// CHECK: {!"node7" +// CHECK: , i32 1, ![[SVDG_7:[0-9]+]] +// CHECK: [[SVDG_7]] = !{i32 0, i32 5, i32 2} + +template +struct Derived3 { + Derived2 V; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node8(DispatchNodeInputRecord< Derived3 > input) {} +// CHECK: {!"node8" +// CHECK: , i32 1, ![[SVDG_1]] From 8b406b5717ca17874bd6b2ce832a8802c6fb3979 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Wed, 14 May 2025 17:57:23 -0700 Subject: [PATCH 35/93] Fix detection of builtin UDT DXR struct types (#7452) Built-in DXR struct types RayDesc and BuiltInTriangleIntersectionAttributes were not treated identically to other UDT types. This caused differences in intrinsic codegen when one of these types is returned. This change corrects this difference so these builtin structs are handled in the same way as other UDTs. Fixes #7450. --- tools/clang/include/clang/AST/HlslTypes.h | 1 - tools/clang/lib/AST/HlslTypes.cpp | 28 ++++++------- .../hitobject_attributes_builtin.hlsl | 42 +++++++++++++++++++ 3 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 3a02824b3a..c14f562101 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -494,7 +494,6 @@ DXIL::NodeIOKind GetNodeIOType(clang::QualType type); bool IsHLSLStructuredBufferType(clang::QualType type); bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type); -bool IsHLSLNumericUserDefinedType(clang::QualType type); bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT); bool IsHLSLBuiltinRayAttributeStruct(clang::QualType QT); bool IsHLSLAggregateType(clang::QualType type); diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 5b19e064a3..07efb53c8c 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -103,14 +103,19 @@ bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type) { BuiltinTy->getKind() != BuiltinType::Kind::Char_S; } -bool IsHLSLNumericUserDefinedType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); +// In some cases we need record types that are annotatable and trivially +// copyable from outside the shader. This excludes resource types which may be +// trivially copyable inside the shader, and builtin matrix and vector types +// which can't be annotated. But includes UDTs of trivially copyable data and +// the builtin trivially copyable raytracing structs. +bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT) { + const clang::Type *Ty = QT.getCanonicalType().getTypePtr(); if (const RecordType *RT = dyn_cast(Ty)) { const RecordDecl *RD = RT->getDecl(); - if (!IsUserDefinedRecordType(type)) + if (!IsUserDefinedRecordType(QT)) return false; - for (auto member : RD->fields()) { - if (!IsHLSLNumericOrAggregateOfNumericType(member->getType())) + for (auto Member : RD->fields()) { + if (!IsHLSLNumericOrAggregateOfNumericType(Member->getType())) return false; } return true; @@ -118,16 +123,6 @@ bool IsHLSLNumericUserDefinedType(clang::QualType type) { return false; } -// In some cases we need record types that are annotatable and trivially -// copyable from outside the shader. This excludes resource types which may be -// trivially copyable inside the shader, and builtin matrix and vector types -// which can't be annotated. But includes UDTs of trivially copyable data and -// the builtin trivially copyable raytracing structs. -bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT) { - return IsHLSLNumericUserDefinedType(QT) || - IsHLSLBuiltinRayAttributeStruct(QT); -} - bool IsHLSLBuiltinRayAttributeStruct(clang::QualType QT) { QT = QT.getCanonicalType(); const clang::Type *Ty = QT.getTypePtr(); @@ -609,7 +604,8 @@ bool IsUserDefinedRecordType(clang::QualType QT) { const clang::Type *Ty = QT.getCanonicalType().getTypePtr(); if (const RecordType *RT = dyn_cast(Ty)) { const RecordDecl *RD = RT->getDecl(); - if (RD->isImplicit()) + // Built-in ray tracing struct types are considered user defined types. + if (RD->isImplicit() && !IsHLSLBuiltinRayAttributeStruct(QT)) return false; if (auto TD = dyn_cast(RD)) if (TD->getSpecializedTemplate()->isImplicit()) diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl new file mode 100644 index 0000000000..a096bb6f11 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl @@ -0,0 +1,42 @@ +// RUN: %dxc /Tlib_6_9 %s | FileCheck %s +// RUN: %dxc /Tlib_6_9 -fcgl %s | FileCheck %s -check-prefix=FCGL + +// Make sure that we can use the BuiltInTriangleIntersectionAttributes struct +// as a template argument to GetAttributes. + +// For -fcgl, just check the form of the HL call. +// FCGL: %{{[^ ]+}} = call %struct.BuiltInTriangleIntersectionAttributes* @"dx.hl.op..%struct.BuiltInTriangleIntersectionAttributes* (i32, %dx.types.HitObject*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}) + +// CHECK: %[[ATTR:[^ ]+]] = alloca %struct.BuiltInTriangleIntersectionAttributes +// CHECK: call void @dx.op.hitObject_Attributes.struct.BuiltInTriangleIntersectionAttributes(i32 289, %dx.types.HitObject %{{[^ ]+}}, %struct.BuiltInTriangleIntersectionAttributes* nonnull %[[ATTR]]) + +RaytracingAccelerationStructure Scene : register(t0, space0); +RWTexture2D RenderTarget : register(u0); + +struct [raypayload] RayPayload +{ + float4 color : write(caller, closesthit, miss) : read(caller); +}; + +typedef BuiltInTriangleIntersectionAttributes MyAttribs; + +[shader("raygeneration")] +void MyRaygenShader() +{ + RayDesc ray; + ray.Origin = float3(0,0,0); + ray.Direction = float3(0, 0, 1); + ray.TMin = 0.001; + ray.TMax = 10000.0; + + RayPayload payload = { float4(0, 0, 0, 0) }; + float4 color = float4(1,1,1,1); + + dx::HitObject hit = dx::HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, ray, payload); + + MyAttribs attr = hit.GetAttributes(); + payload.color += float4(attr,0,1); + + // Write the raytraced color to the output texture. + RenderTarget[DispatchRaysIndex().xy] = payload.color; +} From 9b04d69dbfc181966a06fce46b9005d685558724 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 15 May 2025 14:38:09 -0400 Subject: [PATCH 36/93] [SPIRV] Cast derivative opts to 32-bits. (#7445) The SPIR-V operations require 32-bit floats. Smaller float type can be cast to 32-bits to perform the operation. The FE already emits a warning for 64-bits. Fixes #7431 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 88 +++++++++++++++++-- tools/clang/lib/SPIRV/SpirvEmitter.h | 15 ++++ .../CodeGenSPIRV/intrinsics.ddx.double.hlsl | 21 +++++ .../CodeGenSPIRV/intrinsics.ddx.half.hlsl | 19 ++++ 4 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 7337a33b01..4da8584eee 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -9484,12 +9484,17 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { retVal = processIntrinsicPointerCast(callExpr, true); break; } - INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true); - INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false); - INTRINSIC_SPIRV_OP_CASE(ddx_fine, DPdxFine, false); - INTRINSIC_SPIRV_OP_CASE(ddy, DPdy, true); - INTRINSIC_SPIRV_OP_CASE(ddy_coarse, DPdyCoarse, false); - INTRINSIC_SPIRV_OP_CASE(ddy_fine, DPdyFine, false); + case hlsl::IntrinsicOp::IOP_ddx: + case hlsl::IntrinsicOp::IOP_ddx_coarse: + case hlsl::IntrinsicOp::IOP_ddx_fine: + case hlsl::IntrinsicOp::IOP_ddy: + case hlsl::IntrinsicOp::IOP_ddy_coarse: + case hlsl::IntrinsicOp::IOP_ddy_fine: { + retVal = processDerivativeIntrinsic(hlslOpcode, callExpr->getArg(0), + callExpr->getExprLoc(), + callExpr->getSourceRange()); + break; + } INTRINSIC_SPIRV_OP_CASE(countbits, BitCount, false); INTRINSIC_SPIRV_OP_CASE(fmod, FRem, true); INTRINSIC_SPIRV_OP_CASE(fwidth, Fwidth, true); @@ -9572,6 +9577,77 @@ SpirvEmitter::processIntrinsicFirstbit(const CallExpr *callExpr, srcRange); } +SpirvInstruction *SpirvEmitter::processMatrixDerivativeIntrinsic( + hlsl::IntrinsicOp hlslOpcode, const Expr *arg, SourceLocation loc, + SourceRange range) { + const auto actOnEachVec = [this, hlslOpcode, loc, range]( + uint32_t /*index*/, QualType inType, + QualType outType, SpirvInstruction *curRow) { + return processDerivativeIntrinsic(hlslOpcode, curRow, loc, range); + }; + + return processEachVectorInMatrix(arg, arg->getType(), doExpr(arg), + actOnEachVec, loc, range); +} + +SpirvInstruction * +SpirvEmitter::processDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + const Expr *arg, SourceLocation loc, + SourceRange range) { + if (isMxNMatrix(arg->getType())) { + return processMatrixDerivativeIntrinsic(hlslOpcode, arg, loc, range); + } + return processDerivativeIntrinsic(hlslOpcode, doExpr(arg), loc, range); +} + +SpirvInstruction *SpirvEmitter::processDerivativeIntrinsic( + hlsl::IntrinsicOp hlslOpcode, SpirvInstruction *arg, SourceLocation loc, + SourceRange range) { + QualType returnType = arg->getAstResultType(); + assert(isFloatOrVecOfFloatType(returnType)); + + if (!spvContext.isPS()) + addDerivativeGroupExecutionMode(); + needsLegalization = true; + + QualType B32Type = astContext.FloatTy; + uint32_t vectorSize = 0; + QualType elementType = returnType; + if (isVectorType(returnType, &elementType, &vectorSize)) { + B32Type = astContext.getExtVectorType(B32Type, vectorSize); + } + + // Derivative operations work on 32-bit floats only. Cast to 32-bit if needed. + SpirvInstruction *operand = castToType(arg, returnType, B32Type, loc, range); + + spv::Op opcode = spv::Op::OpNop; + switch (hlslOpcode) { + case hlsl::IntrinsicOp::IOP_ddx: + opcode = spv::Op::OpDPdx; + break; + case hlsl::IntrinsicOp::IOP_ddx_coarse: + opcode = spv::Op::OpDPdxCoarse; + break; + case hlsl::IntrinsicOp::IOP_ddx_fine: + opcode = spv::Op::OpDPdxFine; + break; + case hlsl::IntrinsicOp::IOP_ddy: + opcode = spv::Op::OpDPdy; + break; + case hlsl::IntrinsicOp::IOP_ddy_coarse: + opcode = spv::Op::OpDPdyCoarse; + break; + case hlsl::IntrinsicOp::IOP_ddy_fine: + opcode = spv::Op::OpDPdyFine; + break; + }; + + SpirvInstruction *result = + spvBuilder.createUnaryOp(opcode, B32Type, operand, loc, range); + result = castToType(result, B32Type, returnType, loc, range); + return result; +} + // Returns true is the given expression can be used as an output parameter. // // Warning: this function could return false negatives. diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 6c1e12989c..10694313a8 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -789,6 +789,21 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction *processIntrinsicFirstbit(const CallExpr *, GLSLstd450 glslOpcode); + SpirvInstruction * + processMatrixDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + const Expr *arg, SourceLocation loc, + SourceRange range); + + SpirvInstruction *processDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + const Expr *arg, + SourceLocation loc, + SourceRange range); + + SpirvInstruction *processDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + SpirvInstruction *arg, + SourceLocation loc, + SourceRange range); + private: /// Returns the for constant value 0 of the given type. SpirvConstant *getValueZero(QualType type); diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl new file mode 100644 index 0000000000..a306463466 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc -T ps_6_2 -E main -fcgl %s -spirv 2>&1 | FileCheck %s + +// CHECK: :14:22: warning: conversion from larger type 'double' to smaller type 'float', possible loss of data [-Wconversion] +// CHECK: :20:22: warning: conversion from larger type 'double2' to smaller type 'vector', possible loss of data [-Wconversion] + +void main() { + double a; + double2 b; + +// CHECK: [[a:%[0-9]+]] = OpLoad %double %a +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %float [[a]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %float [[c]] +// CHECK-NEXT: OpFConvert %double [[r]] + double da = ddx(a); + +// CHECK: [[b:%[0-9]+]] = OpLoad %v2double %b +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %v2float [[b]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %v2float [[c]] +// CHECK-NEXT: OpFConvert %v2double [[r]] + double2 db = ddx(b); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl new file mode 100644 index 0000000000..11b63151ee --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T ps_6_2 -E main -enable-16bit-types -fcgl %s -spirv | FileCheck %s + +void main() { + + half a; + half2 b; + +// CHECK: [[a:%[0-9]+]] = OpLoad %half %a +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %float [[a]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %float [[c]] +// CHECK-NEXT: OpFConvert %half [[r]] + half da = ddx(a); + +// CHECK: [[b:%[0-9]+]] = OpLoad %v2half %b +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %v2float [[b]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %v2float [[c]] +// CHECK-NEXT: OpFConvert %v2half [[r]] + half2 db = ddx(b); +} From 14e1f83cd1c437cd74804d4a99861c4961ded646 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 15 May 2025 14:38:47 -0400 Subject: [PATCH 37/93] [SPIRV] Allow decoration attributes on fields. (#7453) Fixes #7270 --- tools/clang/include/clang/Basic/Attr.td | 6 ++++-- .../spv.inline.decorate.member.hlsl | 18 +++++++++++++----- .../attributes/spv.inline.decorate.member.hlsl | 13 ------------- 3 files changed, 17 insertions(+), 20 deletions(-) delete mode 100644 tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 2518423565..db7fdea8d9 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -1418,7 +1418,8 @@ def VKDecorateExt : InheritableAttr { def VKDecorateIdExt : InheritableAttr { let Spellings = [CXX11<"vk", "ext_decorate_id">]; - let Subjects = SubjectList<[Function, Var, ParmVar, TypedefName], ErrorDiag>; + let Subjects = + SubjectList<[Function, Var, ParmVar, Field, TypedefName], ErrorDiag>; let Args = [UnsignedArgument<"decorate">, VariadicExprArgument<"arguments">]; let LangOpts = [SPIRV]; let Documentation = [Undocumented]; @@ -1426,7 +1427,8 @@ def VKDecorateIdExt : InheritableAttr { def VKDecorateStringExt : InheritableAttr { let Spellings = [CXX11<"vk", "ext_decorate_string">]; - let Subjects = SubjectList<[Function, Var, ParmVar, TypedefName], ErrorDiag>; + let Subjects = + SubjectList<[Function, Var, ParmVar, Field, TypedefName], ErrorDiag>; let Args = [UnsignedArgument<"decorate">, VariadicStringArgument<"arguments">]; let LangOpts = [SPIRV]; let Documentation = [Undocumented]; diff --git a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl index bb4c2efde1..88a902d326 100644 --- a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl +++ b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl @@ -4,9 +4,9 @@ template [[vk::ext_instruction(/*spv::OpBitcast*/124)]] T Bitcast(U); -// CHECK: OpMemberDecorate %S 0 Offset 0 -// CHECK: OpMemberDecorate %S 1 Offset 16 -// CHECK: %S = OpTypeStruct %v4float %v4float +// CHECK-DAG: OpMemberDecorate %S 0 Offset 0 +// CHECK-DAG: OpMemberDecorate %S 1 Offset 16 +// CHECK-DAG: %S = OpTypeStruct %v4float %v4float struct S { @@ -14,6 +14,12 @@ struct S [[vk::ext_decorate(/*offset*/ 35, 16)]] float4 f2; }; +// CHECK-DAG: OpDecorateString %out_var_SV_TARGET UserSemantic "raster_order_group_0" +struct PixelOutput +{ + [[vk::location(0), vk::ext_decorate_string(5635, "raster_order_group_0")]] float4 rt0 : SV_TARGET; +}; + using PointerType = vk::SpirvOpaqueType< /* OpTypePointer */ 32, /* PhysicalStorageBuffer */ vk::Literal >, @@ -27,14 +33,16 @@ S Load(PointerType pointer, uint64_t address; -float4 main() : SV_TARGET +PixelOutput main() { // CHECK: [[BC:%[0-9]+]] = OpBitcast %_ptr_PhysicalStorageBuffer_S {{%[0-9]+}} PointerType ptr = Bitcast(address); +PixelOutput output; // CHECK: [[LD:%[0-9]+]] = OpLoad %S [[BC]] Aligned 32 // CHECK: [[RET:%[0-9]+]] = OpCompositeExtract %v4float [[LD]] 0 // CHECK: OpStore %out_var_SV_TARGET [[RET]] - return Load(ptr).f1; +output.rt0 = Load(ptr).f1; + return output; } diff --git a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl deleted file mode 100644 index ece7e3f2f4..0000000000 --- a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -// REQUIRES: spirv -// RUN: %dxc -T ps_6_0 -E main -verify -spirv %s - -struct S -{ - [[vk::ext_decorate_id(/*offset*/ 35, 0)]] float4 f1; /* expected-error{{'ext_decorate_id' attribute only applies to functions, variables, parameters, and types}} */ - [[vk::ext_decorate_string(/*offset*/ 35, "16")]] float4 f2; /* expected-error{{'ext_decorate_string' attribute only applies to functions, variables, parameters, and types}} */ -}; - -float4 main() : SV_TARGET -{ - -} From 2a6bacd8712b040c5ab490c80dfa7553a355d2e0 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 15 May 2025 14:39:03 -0400 Subject: [PATCH 38/93] [SPIRV] Treat vk::Spirv*Type as opaque when reconstructing (#7454) It is possible to have two struct types in spir-v that are the same except for the decorations. Sometimes we have to reconstruct the value from one type to another. In the case of a vk::SpirvType, we do not know anything about the type, so this should not happen. When trying to reconstuct the value, we should simply return the original value. Fixes #6963 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 11 +++++++---- .../CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 4da8584eee..dc2b332d31 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -7081,14 +7081,17 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, // Structs if (const auto *recordType = valType->getAs()) { - assert(recordType->isStructureType()); - if (isTypeInVkNamespace(recordType) && - recordType->getDecl()->getName().equals("BufferPointer")) { - // Uniquely among structs, vk::BufferPointer lowers to a pointer type. + (recordType->getDecl()->getName().equals("BufferPointer") || + recordType->getDecl()->getName().equals("SpirvType") || + recordType->getDecl()->getName().equals("SpirvOpaqueType"))) { + // vk::BufferPointer lowers to a pointer type. No need to reconstruct + // the value. The vk::Spirv*Type should be treated an opaque type. All we + // can do is leave it the same. return srcVal; } + assert(recordType->isStructureType()); LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, spvBuilder); const StructType *spirvStructType = diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl index 7be0713e48..c2892cfc29 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl @@ -12,7 +12,16 @@ struct BufferData { float3 v; }; +using MyInt = vk::SpirvType< + /*spv::OpTypeInt*/21, + 1,1, // size and alignment + vk::Literal >, // bits + vk::Literal > // signed +>; + uint64_t Address; + +[[vk::ext_capability(/* Int16 */ 22)]] float4 main() : SV_Target0 { // CHECK: [[addr:%[0-9]+]] = OpLoad %ulong // CHECK-NEXT: [[buf:%[0-9]+]] = OpBitcast %_ptr_PhysicalStorageBuffer_float [[addr]] @@ -50,5 +59,10 @@ float4 main() : SV_Target0 { // CHECK-NEXT: [[load:%[0-9]+]] = OpLoad %BufferData_0 [[buf]] Aligned 4 d = vk::RawBufferLoad(0); + // CHECK: [[buf:%[0-9]+]] = OpBitcast %_ptr_PhysicalStorageBuffer_spirvIntrinsicType %ulong_0 + // CHECK-NEXT: [[load:%[0-9]+]] = OpLoad %spirvIntrinsicType [[buf]] Aligned 4 + // CHECK-NEXT: OpStore %mi [[load]] + MyInt mi = vk::RawBufferLoad(0); + return float4(w.x, x, y, z); } From 242196438a1fad25da901b6a095b33929ca19ee3 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 15 May 2025 17:52:56 -0400 Subject: [PATCH 39/93] [SPIRV] Add warning for initialized globals (#7448) To be consistent with DXIL, we will start emitting a warning for extenally visible variables that have an initializer. Until now, there were silently ignored. Fixes #3950 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 4 ++++ .../groupshared.init.warning.hlsl | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index dc2b332d31..575597352d 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -2021,6 +2021,10 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { // variables) belongs to the Function storage class. if (isExternalVar(decl)) { var = declIdMapper.createExternVar(decl); + if (decl->hasInit()) { + emitWarning("Initializer of external global will be ignored", + decl->getLocation()); + } } else { // We already know the variable is not externally visible here. If it does // not have local storage, it should be file scope variable. diff --git a/tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl b/tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl new file mode 100644 index 0000000000..c49534948b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T cs_6_0 -E main -spirv %s 2>&1 | FileCheck %s + +groupshared uint testing = 0; + +[numthreads(64, 1, 1)] +void main(uint local_thread_id_flat : SV_GroupIndex) { + + InterlockedAdd(testing, 1); + GroupMemoryBarrierWithGroupSync(); + + if (local_thread_id_flat == 0) { + if (testing > 64) { + printf("testing is %u wtf", testing); + } + } +} + +// CHECK: warning: Initializer of external global will be ignored +// CHECK-NEXT: groupshared uint testing = 0; \ No newline at end of file From 7054e5207ae9cb573e02068aac29ddf6299d2c2e Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 16 May 2025 06:11:59 -0400 Subject: [PATCH 40/93] [SEMA] Don't emit an error for sizeof an enum. (#7449) Fixes #7416 --- tools/clang/lib/AST/HlslTypes.cpp | 2 ++ .../clang/test/CodeGenSPIRV/enum_sizeof.hlsl | 31 +++++++++++++++++++ tools/clang/test/SemaHLSL/enum_sizeof.hlsl | 31 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl create mode 100644 tools/clang/test/SemaHLSL/enum_sizeof.hlsl diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 07efb53c8c..e081362ebf 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -95,6 +95,8 @@ bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type) { } else if (type->isArrayType()) { return IsHLSLNumericOrAggregateOfNumericType( QualType(type->getArrayElementTypeNoTypeQual(), 0)); + } else if (type->isEnumeralType()) { + return true; } // Chars can only appear as part of strings, which we don't consider numeric. diff --git a/tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl b/tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl new file mode 100644 index 0000000000..f596a2db50 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T cs_6_0 -E main -fcgl %s -spirv | FileCheck %s + +enum E1 : uint64_t +{ + v1 = 0, +}; + +enum E2 : uint32_t +{ + v2 = 0, +}; + +struct S { + E1 e1; + E2 e2; +}; + +RWBuffer b; + +[numthreads(128, 1, 1)] +void main() +{ +// CHECK: OpImageWrite {{%.*}} %uint_0 %int_8 None + b[0] = sizeof(E1); + +// CHECK: OpImageWrite {{%.*}} %uint_1 %int_4 None + b[1] = sizeof(E2); + +// CHECK: OpImageWrite {{%.*}} %uint_2 %int_16 None + b[2] = sizeof(S); +} diff --git a/tools/clang/test/SemaHLSL/enum_sizeof.hlsl b/tools/clang/test/SemaHLSL/enum_sizeof.hlsl new file mode 100644 index 0000000000..71723976a9 --- /dev/null +++ b/tools/clang/test/SemaHLSL/enum_sizeof.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T cs_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +enum E1 : uint64_t +{ + v1 = 0, +}; + +enum E2 : uint32_t +{ + v2 = 0, +}; + +struct S { + E1 e1; + E2 e2; +}; + +RWBuffer b; + +[numthreads(128, 1, 1)] +void main() +{ +// AST: UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'E1' + b[0] = sizeof(E1); + +// AST: UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'E2' + b[1] = sizeof(E2); + +// AST: UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'S' + b[2] = sizeof(S); +} From fef2f94250793edd2358bc44f5aab8d6251a80e1 Mon Sep 17 00:00:00 2001 From: Nielsbishere Date: Fri, 16 May 2025 16:10:20 +0200 Subject: [PATCH 41/93] Fix -fvk-invert-y (#7447) https://github.com/microsoft/DirectXShaderCompiler/issues/7446 This fixes some outdated documentation as well as a compile error when enabling fvk-invert-y on lib files and makes sure that it only gets enabled on SV_POSITION that is used in VS/GS/DS/MS (so PS doesn't get caught in the crossfire). Also tested the dx-position-w one and that one already has correct behavior here. --------- Co-authored-by: NielsbishereAlt --- docs/SPIR-V.rst | 2 +- include/dxc/Support/HLSLOptions.td | 2 +- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 12 ++++++++---- .../test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl | 12 ++++++++++++ 4 files changed, 22 insertions(+), 6 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index b5e9c05079..f3981ba854 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -4227,7 +4227,7 @@ codegen for Vulkan: - ``-fvk-use-dx-layout``: Uses DirectX layout rules for resources. - ``-fvk-invert-y``: Negates (additively inverts) SV_Position.y before writing to stage output. Used to accommodate the difference between Vulkan's - coordinate system and DirectX's. Only allowed in VS/DS/GS. + coordinate system and DirectX's. Only allowed in VS/DS/GS/MS/Lib. - ``-fvk-use-dx-position-w``: Reciprocates (multiplicatively inverts) SV_Position.w after reading from stage input. Used to accommodate the difference between Vulkan DirectX: the w component of SV_Position in PS is diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 4d72cb2312..58f6bdfbf3 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -368,7 +368,7 @@ def fvk_bind_register : MultiArg<["-"], "fvk-bind-register", 4>, MetaVarName<"; def vkbr : MultiArg<["-"], "vkbr", 4>, Flags<[CoreOption, DriverOption]>, Alias; def fvk_invert_y: Flag<["-"], "fvk-invert-y">, Group, Flags<[CoreOption, DriverOption]>, - HelpText<"Negate SV_Position.y before writing to stage output in VS/DS/GS to accommodate Vulkan's coordinate system">; + HelpText<"Negate SV_Position.y before writing to stage output in VS/DS/GS/MS/Lib to accommodate Vulkan's coordinate system">; def fvk_use_dx_position_w: Flag<["-"], "fvk-use-dx-position-w">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Reciprocate SV_Position.w after reading from stage input in PS to accommodate the difference between Vulkan and DirectX">; def fvk_support_nonzero_base_instance: Flag<["-"], "fvk-support-nonzero-base-instance">, Group, Flags<[CoreOption, DriverOption]>, diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 575597352d..9ffa978511 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -604,8 +604,8 @@ SpirvEmitter::SpirvEmitter(CompilerInstance &ci) emitError("unknown shader module: %0", {}) << shaderModel->GetName(); if (spirvOptions.invertY && !shaderModel->IsVS() && !shaderModel->IsDS() && - !shaderModel->IsGS() && !shaderModel->IsMS()) - emitError("-fvk-invert-y can only be used in VS/DS/GS/MS", {}); + !shaderModel->IsGS() && !shaderModel->IsMS() && !shaderModel->IsLib()) + emitError("-fvk-invert-y can only be used in VS/DS/GS/MS/Lib", {}); if (spirvOptions.useGlLayout && spirvOptions.useDxLayout) emitError("cannot specify both -fvk-use-dx-layout and -fvk-use-gl-layout", @@ -14964,8 +14964,12 @@ SpirvEmitter::createSpirvIntrInstExt(llvm::ArrayRef attrs, SpirvInstruction *SpirvEmitter::invertYIfRequested(SpirvInstruction *position, SourceLocation loc, SourceRange range) { - // Negate SV_Position.y if requested - if (spirvOptions.invertY) { + // Negate SV_Position.y if requested and supported + + bool supportsInvertY = spvContext.isVS() || spvContext.isGS() || + spvContext.isDS() || spvContext.isMS(); + + if (spirvOptions.invertY && supportsInvertY) { const auto oldY = spvBuilder.createCompositeExtract( astContext.FloatTy, position, {1}, loc, range); const auto newY = spvBuilder.createUnaryOp( diff --git a/tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl b/tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl new file mode 100644 index 0000000000..6dac20fc6f --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T lib_6_3 -fvk-invert-y -fcgl %s -spirv | FileCheck %s + +[shader("vertex")] +float4 main(float4 a : A) : SV_Position { + return a; +} + +// CHECK: [[a:%[0-9]+]] = OpFunctionCall %v4float %src_main %param_var_a +// CHECK-NEXT: [[oldY:%[0-9]+]] = OpCompositeExtract %float [[a]] 1 +// CHECK-NEXT: [[newY:%[0-9]+]] = OpFNegate %float [[oldY]] +// CHECK-NEXT: [[pos:%[0-9]+]] = OpCompositeInsert %v4float [[newY]] [[a]] 1 +// CHECK-NEXT: OpStore %gl_Position [[pos]] From 053e7ac656e01d90aa9931c4d8b8a89c14741027 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Fri, 16 May 2025 08:42:57 -0700 Subject: [PATCH 42/93] Refactor udt intrinsic arg copy to before SROA, flatten RayDesc (#7440) Intrinsics that take UDT arguments need copy-in/copy-out. Other aggregate args are flattened for intrinsic calls. Previously, these operations were intermingled, driven by SROA on alloca/GV values. There were RayDesc arguments that weren't treated consistently, and weren't copied in when necessary, leading to problems. They should be flattened into the intrinsic arguments, but TraceRay calls didn't do this. This change: - flattens RayDesc args for all intrinsics that use them. - separates the copy-in/copy-out generation into a separate operation before SROA. Ideally, this copy-in/copy-out would have been generated by CodeGen based on by-value passing, but that's a deeper intrinsic AST issue potentially. - Updated and added tests. Fixes #7434. --- include/dxc/DXIL/DxilConstants.h | 4 + include/dxc/HLSL/HLOperations.h | 23 +- lib/HLSL/HLOperationLower.cpp | 200 ++++++------- .../Scalar/ScalarReplAggregatesHLSL.cpp | 266 +++++++++++------- .../hlsl/objects/RayQuery/tracerayinline.hlsl | 0 .../RayQuery/tracerayinline_cb_raydesc.hlsl | 14 + .../DxilGen/hitobject_traceinvoke_dxilgen.ll | 201 ++++++------- .../tracerayinline_cb_raydesc_dxilgen.ll | 160 +++++++++++ .../Passes/DxilGen/tracerayinline_dxilgen.ll | 134 +++++++++ .../hitobject_fromrayquery_scalarrepl.ll | 8 +- .../hitobject_make_scalarrepl.ll | 13 +- .../hitobject_traceinvoke_scalarrepl.ll | 198 +++++++++++++ .../ScalarReplHLSL/traceray_scalarrepl.ll | 182 ++++++++++++ .../tracerayinline_cb_raydesc_scalarrepl.ll | 154 ++++++++++ .../tracerayinline_scalarrepl.ll | 155 ++++++++++ .../pix/AnnotateVirtualRegs-Raygen.hlsl | 36 --- ...raytracing_intersection_geometryIndex.hlsl | 8 +- tools/clang/unittests/HLSL/PixTest.cpp | 100 ------- 18 files changed, 1375 insertions(+), 481 deletions(-) rename tools/clang/test/{HLSLFileCheck => CodeGenDXIL}/hlsl/objects/RayQuery/tracerayinline.hlsl (100%) create mode 100644 tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl create mode 100644 tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll create mode 100644 tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll create mode 100644 tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll delete mode 100644 tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index bf6de7ed3b..0f28edbc39 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -1583,6 +1583,10 @@ const unsigned kMSStoreOutputColOpIdx = 3; const unsigned kMSStoreOutputVIdxOpIdx = 4; const unsigned kMSStoreOutputValOpIdx = 5; +// HitObject::MakeMiss +const unsigned kHitObjectMakeMiss_RayDescOpIdx = 3; +const unsigned kHitObjectMakeMiss_NumOp = 11; + // HitObject::TraceRay const unsigned kHitObjectTraceRay_RayDescOpIdx = 7; const unsigned kHitObjectTraceRay_PayloadOpIdx = 15; diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index c75318da99..0da9804ecb 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -396,7 +396,12 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3; // TraceRay. const unsigned kTraceRayRayDescOpIdx = 7; -const unsigned kTraceRayPayLoadOpIdx = 8; +// kTraceRayPayloadPreOpIdx is before flattening the RayDesc +const unsigned kTraceRayPayloadPreOpIdx = 8; +// kTraceRayPayloadOpIdx is after flattening the RayDesc +const unsigned kTraceRayPayloadOpIdx = 11; +const unsigned kTraceRay_PreNumOp = 9; +const unsigned kTraceRay_NumOp = 12; // AllocateRayQuery const unsigned kAllocateRayQueryRayFlagsIdx = 1; @@ -407,6 +412,10 @@ const unsigned kCallShaderPayloadOpIdx = 2; // TraceRayInline. const unsigned kTraceRayInlineRayDescOpIdx = 5; +// kTraceRayInlinePayloadPreOpIdx is before flattening the RayDesc +const unsigned kTraceRayInlinePayloadPreOpIdx = 6; +// kTraceRayInlinePayloadOpIdx is after flattening the RayDesc +const unsigned kTraceRayInlinePayloadOpIdx = 9; // ReportIntersection. const unsigned kReportIntersectionAttributeOpIdx = 3; @@ -435,11 +444,19 @@ const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; // HitObject::MakeMiss const unsigned kHitObjectMakeMiss_NumOp = 8; -const unsigned kHitObjectMakeMissRayDescOpIdx = 4; +const unsigned kHitObjectMakeMiss_RayDescOpIdx = 4; // HitObject::TraceRay const unsigned kHitObjectTraceRay_RayDescOpIdx = 8; -const unsigned kHitObjectTraceRay_NumOp = 10; +// kHitObjectTraceRay_PayloadPreOpIdx is before flattening the RayDesc +const unsigned kHitObjectTraceRay_PayloadPreOpIdx = 9; +// kHitObjectTraceRay_PayloadOpIdx is after flattening the RayDesc +const unsigned kHitObjectTraceRay_PayloadOpIdx = 12; +const unsigned kHitObjectTraceRay_PreNumOp = 10; +const unsigned kHitObjectTraceRay_NumOp = 13; + +// HitObject::Invoke +const unsigned kHitObjectInvoke_PayloadOpIdx = 2; // HitObject::FromRayQuery const unsigned kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx = 4; diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 18d003a764..58c1de3941 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -5720,37 +5720,24 @@ Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter}); } -static unsigned LoadRayDescElementsIntoArgs(Value **Args, hlsl::OP *OP, - IRBuilder<> &Builder, - Value *RayDescPtr, unsigned Index) { - // struct RayDesc - //{ - // float3 Origin; - // float TMin; - // float3 Direction; - // float TMax; - //}; - Value *ZeroIdx = OP->GetU32Const(0); - Value *Origin = Builder.CreateGEP(RayDescPtr, {ZeroIdx, ZeroIdx}); - Origin = Builder.CreateLoad(Origin); - Args[Index++] = Builder.CreateExtractElement(Origin, (uint64_t)0); - Args[Index++] = Builder.CreateExtractElement(Origin, 1); - Args[Index++] = Builder.CreateExtractElement(Origin, 2); - - Value *TMinPtr = Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(1)}); - Args[Index++] = Builder.CreateLoad(TMinPtr); - - Value *DirectionPtr = - Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(2)}); - Value *Direction = Builder.CreateLoad(DirectionPtr); - - Args[Index++] = Builder.CreateExtractElement(Direction, (uint64_t)0); - Args[Index++] = Builder.CreateExtractElement(Direction, 1); - Args[Index++] = Builder.CreateExtractElement(Direction, 2); - - Value *TMaxPtr = Builder.CreateGEP(RayDescPtr, {ZeroIdx, OP->GetU32Const(3)}); - Args[Index++] = Builder.CreateLoad(TMaxPtr); - return Index; +static void TransferRayDescArgs(Value **Args, hlsl::OP *OP, + IRBuilder<> &Builder, CallInst *CI, + unsigned &Index, unsigned &HLIndex) { + // Extract elements from flattened ray desc arguments in HL op. + // float3 Origin; + Value *origin = CI->getArgOperand(HLIndex++); + Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(origin, 1); + Args[Index++] = Builder.CreateExtractElement(origin, 2); + // float TMin; + Args[Index++] = CI->getArgOperand(HLIndex++); + // float3 Direction; + Value *direction = CI->getArgOperand(HLIndex++); + Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(direction, 1); + Args[Index++] = Builder.CreateExtractElement(direction, 2); + // float TMax; + Args[Index++] = CI->getArgOperand(HLIndex++); } Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, @@ -5759,21 +5746,24 @@ Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, bool &Translated) { hlsl::OP *OP = &Helper.hlslOP; - Value *RayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx); - Value *PayLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx); - Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; Args[0] = OP->GetU32Const(static_cast(OpCode)); - for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) - Args[i] = CI->getArgOperand(i); + unsigned Index = 1, HLIndex = 1; + while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx) + Args[Index++] = CI->getArgOperand(HLIndex++); IRBuilder<> Builder(CI); - LoadRayDescElementsIntoArgs(Args, OP, Builder, RayDesc, - DXIL::OperandIndex::kTraceRayRayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex); + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx); + + Value *Payload = CI->getArgOperand(HLIndex++); + Args[Index++] = Payload; - Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = PayLoad; + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp); - Type *Ty = PayLoad->getType(); + Type *Ty = Payload->getType(); Function *F = OP->GetOpFunc(OpCode, Ty); return Builder.CreateCall(F, Args); @@ -5817,33 +5807,16 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; Args[0] = opArg; - for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) { - Args[i] = CI->getArgOperand(i); - } + unsigned Index = 1, HLIndex = 1; + while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx) + Args[Index++] = CI->getArgOperand(HLIndex++); IRBuilder<> Builder(CI); - unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx; - unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx; - - // struct RayDesc - //{ - // float3 Origin; - Value *origin = CI->getArgOperand(hlIndex++); - Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(origin, 1); - Args[index++] = Builder.CreateExtractElement(origin, 2); - // float TMin; - Args[index++] = CI->getArgOperand(hlIndex++); - // float3 Direction; - Value *direction = CI->getArgOperand(hlIndex++); - Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(direction, 1); - Args[index++] = Builder.CreateExtractElement(direction, 2); - // float TMax; - Args[index++] = CI->getArgOperand(hlIndex++); - //}; - - DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp); + DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx); + TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex); + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp); Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); @@ -6197,55 +6170,49 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // Shader Execution Reordering. namespace { -Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, - HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { +Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { hlsl::OP *HlslOP = &Helper.hlslOP; IRBuilder<> Builder(CI); - unsigned SrcIdx = 1; - Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); - if (Opcode == OP::OpCode::HitObject_MakeNop) { - Value *HitObject = TrivialDxilOperation( - Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); - Builder.CreateStore(HitObject, HitObjectPtr); - DXASSERT( - CI->use_empty(), - "Default ctor return type is a Clang artifact. Value must not be used"); - return nullptr; - } + Value *HitObjectPtr = CI->getArgOperand(1); + Value *HitObject = TrivialDxilOperation( + Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); + Builder.CreateStore(HitObject, HitObjectPtr); + DXASSERT( + CI->use_empty(), + "Default ctor return type is a Clang artifact. Value must not be used"); + return nullptr; +} +Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { DXASSERT_NOMSG(CI->getNumArgOperands() == HLOperandIndex::kHitObjectMakeMiss_NumOp); - Value *RayFlags = CI->getArgOperand(SrcIdx++); - Value *MissShaderIdx = CI->getArgOperand(SrcIdx++); - DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx); - Value *RayDescOrigin = CI->getArgOperand(SrcIdx++); - Value *RayDescOriginX = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0); - Value *RayDescOriginY = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1); - Value *RayDescOriginZ = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2); - - Value *RayDescTMin = CI->getArgOperand(SrcIdx++); - Value *RayDescDirection = CI->getArgOperand(SrcIdx++); - Value *RayDescDirectionX = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)0); - Value *RayDescDirectionY = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)1); - Value *RayDescDirectionZ = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)2); - - Value *RayDescTMax = CI->getArgOperand(SrcIdx++); + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp]; + Args[0] = nullptr; // Filled in by TrivialDxilOperation + + unsigned DestIdx = 1, SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx + + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp); - Value *OutHitObject = TrivialDxilOperation( - Opcode, - {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY, - RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY, - RayDescDirectionZ, RayDescTMax}, - Helper.voidTy, CI, HlslOP); + Value *OutHitObject = + TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP); Builder.CreateStore(OutHitObject, HitObjectPtr); return nullptr; } @@ -6348,10 +6315,9 @@ Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, hlsl::OP *OP = &Helper.hlslOP; IRBuilder<> Builder(CI); - const unsigned DxilNumArgs = DxilInst_HitObject_TraceRay::arg_payload + 1; DXASSERT_NOMSG(CI->getNumArgOperands() == HLOperandIndex::kHitObjectTraceRay_NumOp); - Value *Args[DxilNumArgs]; + Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp]; Value *OpArg = OP->GetU32Const(static_cast(OpCode)); Args[0] = OpArg; @@ -6363,13 +6329,19 @@ Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, Args[DestIdx] = CI->getArgOperand(SrcIdx); } - Value *RayDescPtr = CI->getArgOperand(SrcIdx++); - DestIdx = LoadRayDescElementsIntoArgs(Args, OP, Builder, RayDescPtr, DestIdx); + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx); + Value *Payload = CI->getArgOperand(SrcIdx++); Args[DestIdx++] = Payload; DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); - DXASSERT_NOMSG(DestIdx == DxilNumArgs); + DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp); Function *F = OP->GetOpFunc(OpCode, Payload->getType()); @@ -7402,7 +7374,7 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake, + {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop, DXIL::OpCode::HitObject_MakeNop}, {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, DXIL::OpCode::MaybeReorderThread}, @@ -7462,7 +7434,7 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, TranslateHitObjectLoadLocalRootTableConstant, DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, - {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake, + {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss, DXIL::OpCode::HitObject_MakeMiss}, {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, TranslateHitObjectSetShaderTableIndex, diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index 8bd78dd9a6..d8746862bc 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -129,7 +129,6 @@ class SROA_Helper { void RewriteMemIntrin(MemIntrinsic *MI, Value *OldV); void RewriteCall(CallInst *CI); void RewriteBitCast(BitCastInst *BCI); - void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut); }; } // namespace @@ -1478,6 +1477,53 @@ void isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, } } +// Returns whether the `OpIdx` argument of HL intrinsic call `CI` is expected to +// be a user-defined-type. +static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { + if (HLOpcodeGroup::HLIntrinsic != GetHLOpcodeGroup(CI->getCalledFunction())) + return false; + const unsigned NumOps = CI->getNumArgOperands(); + switch (static_cast(GetHLOpcode(CI))) { + case IntrinsicOp::IOP_TraceRay: + if (NumOps == HLOperandIndex::kTraceRay_PreNumOp && + OpIdx == HLOperandIndex::kTraceRayPayloadPreOpIdx) + return true; + else if (NumOps == HLOperandIndex::kTraceRay_NumOp && + OpIdx == HLOperandIndex::kTraceRayPayloadOpIdx) + return true; + break; + case IntrinsicOp::IOP_ReportHit: + if (OpIdx == HLOperandIndex::kReportIntersectionAttributeOpIdx) + return true; + break; + case IntrinsicOp::IOP_CallShader: + if (OpIdx == HLOperandIndex::kCallShaderPayloadOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: + if (NumOps == HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp && + OpIdx == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + if (NumOps == HLOperandIndex::kHitObjectTraceRay_PreNumOp && + OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx) + return true; + else if (NumOps == HLOperandIndex::kHitObjectTraceRay_NumOp && + OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_Invoke: + if (OpIdx == HLOperandIndex::kHitObjectInvoke_PayloadOpIdx) + return true; + break; + default: + break; + } + return false; +} + /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to /// performing scalar replacement of alloca AI. The results are flagged in /// the Info parameter. Offset indicates the position within AI that is @@ -1535,18 +1581,9 @@ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { // Most HL functions are safe for scalar repl. if (HLOpcodeGroup::NotHL == group) return MarkUnsafe(Info, User); - else if (HLOpcodeGroup::HLIntrinsic == group) { - // TODO: should we check HL parameter type for UDT overload instead of - // basing on IOP? - IntrinsicOp opcode = static_cast(GetHLOpcode(CI)); - if (IntrinsicOp::IOP_TraceRay == opcode || - IntrinsicOp::MOP_DxHitObject_TraceRay == opcode || - IntrinsicOp::MOP_DxHitObject_Invoke == opcode || - IntrinsicOp::IOP_ReportHit == opcode || - IntrinsicOp::IOP_CallShader == opcode) { - return MarkUnsafe(Info, User); - } - } + else if (HLOpcodeGroup::HLIntrinsic == group && + isUDTIntrinsicArg(CI, U.getOperandNo())) + return MarkUnsafe(Info, User); } else { return MarkUnsafe(Info, User); } @@ -2662,12 +2699,11 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) { RewriteForGEP(cast(GEP), GEPBuilder); } -/// RewriteCallArg - For Functions which don't flat, -/// replace OldVal with alloca and -/// copy in copy out data between alloca and flattened NewElts -/// in CallInst. -void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, - bool bOut) { +/// memcpyAggCallArg - For an aggregate call argument, this replaces the +/// argument with an alloca and inserts a memcpy for input (if CopyIn) and +/// output (if CopyOut). +static void memcpyAggCallArg(CallInst *CI, unsigned ArgIdx, bool CopyIn, + bool CopyOut) { Function *F = CI->getParent()->getParent(); IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F)); const DataLayout &DL = F->getParent()->getDataLayout(); @@ -2677,17 +2713,79 @@ void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, Type *userTyElt = userTy->getElementType(); Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt); IRBuilder<> Builder(CI); - if (bIn) { - MemCpyInst *cpy = cast(Builder.CreateMemCpy( - Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false)); - RewriteMemIntrin(cpy, cpy->getRawSource()); - } + if (CopyIn) + Builder.CreateMemCpy(Alloca, userTyV, DL.getTypeAllocSize(userTyElt), + false); CI->setArgOperand(ArgIdx, Alloca); - if (bOut) { + if (CopyOut) { Builder.SetInsertPoint(CI->getNextNode()); - MemCpyInst *cpy = cast(Builder.CreateMemCpy( - userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false)); - RewriteMemIntrin(cpy, cpy->getRawSource()); + Builder.CreateMemCpy(userTyV, Alloca, DL.getTypeAllocSize(userTyElt), + false); + } +} + +static void copyIntrinsicAggArgs(HLModule &HLM) { + // Iterate HLIntrinsic function users + // For specific intrinsics, use memcpyAggCallArg on aggregate args + // This ensures that the call does not directly use the pointer supplied, + // allowing certain arguments to be flattened, and UDT args to be correctly + // lowered. + for (Function &F : HLM.GetModule()->functions()) { + if (F.isIntrinsic() || !F.isDeclaration()) + continue; + if (GetHLOpcodeGroup(&F) != HLOpcodeGroup::HLIntrinsic) + continue; + // Iterate users + for (User *U : F.users()) { + if (CallInst *CI = dyn_cast(U)) { + switch (static_cast(GetHLOpcode(CI))) { + case IntrinsicOp::IOP_TraceRay: + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayPayloadPreOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::IOP_ReportHit: + memcpyAggCallArg(CI, + HLOperandIndex::kReportIntersectionAttributeOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::IOP_CallShader: + memcpyAggCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::MOP_TraceRayInline: + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayInlineRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: + if (CI->getNumArgOperands() == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) + memcpyAggCallArg( + CI, + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + memcpyAggCallArg(CI, + HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::MOP_DxHitObject_Invoke: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectInvoke_PayloadOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + default: + break; + } + } + } } } @@ -2741,13 +2839,26 @@ static CallInst *RewriteWithFlattenedHLIntrinsicCall(CallInst *CI, /// RewriteCall - Replace OldVal with flattened NewElts in CallInst. void SROA_Helper::RewriteCall(CallInst *CI) { - HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); - if (group != HLOpcodeGroup::NotHL) { + HLOpcodeGroup Group = GetHLOpcodeGroupByName(CI->getCalledFunction()); + if (Group != HLOpcodeGroup::NotHL) { unsigned opcode = GetHLOpcode(CI); - if (group == HLOpcodeGroup::HLIntrinsic) { + if (Group == HLOpcodeGroup::HLIntrinsic) { + // RayQuery this pointer replacement. + if (OldVal->getType()->isPointerTy() && + dxilutil::IsHLSLRayQueryType( + OldVal->getType()->getPointerElementType())) { + // For RayQuery methods, we want to replace the RayQuery this pointer + // with a load and use of the underlying handle value. + // This will allow elimination of RayQuery types earlier. + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; + } + IntrinsicOp IOP = static_cast(opcode); switch (IOP) { - case IntrinsicOp::MOP_Append: { + case IntrinsicOp::MOP_Append: // Buffer Append already expand in code gen. // Must be OutputStream Append here. // Every Elt has a pointer type. @@ -2755,87 +2866,47 @@ void SROA_Helper::RewriteCall(CallInst *CI) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ false); DeadInsts.push_back(CI); - } break; - case IntrinsicOp::IOP_TraceRay: { + return; + case IntrinsicOp::IOP_TraceRay: if (OldVal == CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { - RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, - /*bIn*/ true, /*bOut*/ false); - } else { - DXASSERT(OldVal == - CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx), - "else invalid TraceRay"); - RewriteCallArg(CI, HLOperandIndex::kTraceRayPayLoadOpIdx, - /*bIn*/ true, /*bOut*/ true); - } - } break; - case IntrinsicOp::IOP_ReportHit: { - RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); - } break; - case IntrinsicOp::IOP_CallShader: { - RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, - /*bIn*/ true, /*bOut*/ true); - } break; - case IntrinsicOp::MOP_DxHitObject_MakeMiss: { - if (OldVal == - CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); + return; } - } break; - case IntrinsicOp::MOP_TraceRayInline: { - if (OldVal == - CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + if (OldVal == CI->getArgOperand( + HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); - break; + return; } - } - LLVM_FALLTHROUGH; - case IntrinsicOp::MOP_DxHitObject_FromRayQuery: { - const bool IsWithAttrs = - CI->getNumArgOperands() == - HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp; - if (IsWithAttrs && - (OldVal == - CI->getArgOperand( - HLOperandIndex:: - kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx))) { - RewriteCallArg( - CI, - HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); - break; + break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: + if (OldVal == CI->getArgOperand( + HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx)) { + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; } - - // For RayQuery methods, we want to replace the RayQuery this pointer - // with a load and use of the underlying handle value. - // This will allow elimination of RayQuery types earlier. - RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, - /*loadElts*/ true); - DeadInsts.push_back(CI); break; - } - default: - // RayQuery this pointer replacement. - if (OldVal->getType()->isPointerTy() && - CI->getNumArgOperands() >= HLOperandIndex::kHandleOpIdx && - OldVal == CI->getArgOperand(HLOperandIndex::kHandleOpIdx) && - dxilutil::IsHLSLRayQueryType( - OldVal->getType()->getPointerElementType())) { - // For RayQuery methods, we want to replace the RayQuery this pointer - // with a load and use of the underlying handle value. - // This will allow elimination of RayQuery types earlier. + case IntrinsicOp::MOP_TraceRayInline: + if (OldVal == + CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); - break; + return; } - DXASSERT(0, "cannot flatten hlsl intrinsic."); + break; + default: + break; } + DXASSERT(0, "cannot flatten hlsl intrinsic."); } // TODO: check other high level dx operations if need to. } else { @@ -4416,6 +4487,9 @@ class SROA_Parameter_HLSL : public ModulePass { F->eraseFromParent(); } + // Expand flattened copy-in/copy-out for intrinsic UDT args: + copyIntrinsicAggArgs(*m_pHLModule); + // SROA globals and allocas. SROAGlobalAndAllocas(*m_pHLModule, m_HasDbgInfo); diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl similarity index 100% rename from tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl rename to tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl new file mode 100644 index 0000000000..256b6a04e8 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s + +// CHECK-DAG: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false) +// CHECK-DAG: %[[RQ:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) +// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, + +RaytracingAccelerationStructure RTAS; + +RayDesc rayDesc; + +void main() { + RayQuery rayQuery; + rayQuery.TraceRayInline(RTAS, 1, 2, rayDesc); +} diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll index 6f364a0161..03bb0716ce 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll @@ -1,26 +1,16 @@ ; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s ; REQUIRES: dxil-1-9 -; -; Buffer Definitions: -; -; -; Resource Bindings: -; -; Name Type Format Dim ID HLSL Bind Count -; ------------------------------ ---------- ------- ----------- ------- -------------- ------ -; RTAS texture i32 ras T0t4294967295,space4294967295 1 -; target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" target triple = "dxil-ms-dx" %struct.RaytracingAccelerationStructure = type { i32 } -%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } %struct.Payload = type { <3 x float> } %dx.types.HitObject = type { i8* } %dx.types.Handle = type { i8* } %dx.types.ResourceProperties = type { i32, i32 } %"class.RWStructuredBuffer" = type { float } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } %"class.dx::HitObject" = type { i32 } @"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 @@ -28,55 +18,37 @@ target triple = "dxil-ms-dx" ; Function Attrs: nounwind define void @"\01?main@@YAXXZ"() #0 { entry: - %rayDesc = alloca %struct.RayDesc, align 4 - %pld = alloca %struct.Payload, align 4 + %pld_invoke = alloca %struct.Payload + %pld_trace = alloca %struct.Payload %hit = alloca %dx.types.HitObject, align 4 - %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !31 ; line:80 col:3 - call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !31 ; line:80 col:3 - %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !35 ; line:81 col:11 - store <3 x float> , <3 x float>* %Origin, align 4, !dbg !36, !tbaa !37 ; line:81 col:18 - %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !40 ; line:82 col:11 - store float 3.000000e+00, float* %TMin, align 4, !dbg !41, !tbaa !42 ; line:82 col:16 - %Direction = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 2, !dbg !44 ; line:83 col:11 - store <3 x float> , <3 x float>* %Direction, align 4, !dbg !45, !tbaa !37 ; line:83 col:21 - %TMax = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 3, !dbg !46 ; line:84 col:11 - store float 7.000000e+00, float* %TMax, align 4, !dbg !47, !tbaa !42 ; line:84 col:16 - %1 = bitcast %struct.Payload* %pld to i8*, !dbg !48 ; line:86 col:3 - call void @llvm.lifetime.start(i64 12, i8* %1) #0, !dbg !48 ; line:86 col:3 - %dummy = getelementptr inbounds %struct.Payload, %struct.Payload* %pld, i32 0, i32 0, !dbg !49 ; line:87 col:7 - store <3 x float> , <3 x float>* %dummy, align 4, !dbg !50, !tbaa !37 ; line:87 col:13 - %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !51 ; line:89 col:3 - call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !51 ; line:89 col:3 - %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !52 ; line:89 col:23 - %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !52 ; line:89 col:23 - %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !52 ; line:89 col:23 - ; CHECK: %[[ORIGINPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR:[^ ]+]], i32 0, i32 0 - ; CHECK: %[[ORIGIN:[^ ]+]] = load <3 x float>, <3 x float>* %[[ORIGINPTR]] - ; CHECK: %[[O0:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 0 - ; CHECK: %[[O1:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 1 - ; CHECK: %[[O2:[^ ]+]] = extractelement <3 x float> %[[ORIGIN]], i64 2 - ; CHECK: %[[TMINPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 1 - ; CHECK: %[[TMIN:[^ ]+]] = load float, float* %[[TMINPTR]] - ; CHECK: %[[DIRPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 2 - ; CHECK: %[[DIR:[^ ]+]] = load <3 x float>, <3 x float>* %[[DIRPTR]] - ; CHECK: %[[D0:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 0 - ; CHECK: %[[D1:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 1 - ; CHECK: %[[D2:[^ ]+]] = extractelement <3 x float> %[[DIR]], i64 2 - ; CHECK: %[[TMAXPTR:[^ ]+]] = getelementptr %struct.RayDesc, %struct.RayDesc* %[[RAYDESCPTR]], i32 0, i32 3 - ; CHECK: %[[TMAX:[^ ]+]] = load float, float* %[[TMAXPTR]] - ; CHECK: %[[TRACEHO:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, float %[[O0]], float %[[O1]], float %[[O2]], float %[[TMIN]], float %[[D0]], float %[[D1]], float %[[D2]], float %[[TMAX]], %struct.Payload* %pld) - call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !52 ; line:89 col:23 - ; CHECK: store %dx.types.HitObject %[[TRACEHO]], %dx.types.HitObject* %[[HOPTR:[^ ]+]] - ; CHECK: %[[INVOKEHO:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOPTR]] - ; CHECK: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[INVOKEHO]], %struct.Payload* %pld) - call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !53 ; line:99 col:3 - %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !54 ; line:100 col:1 - call void @llvm.lifetime.end(i64 4, i8* %6) #0, !dbg !54 ; line:100 col:1 - %7 = bitcast %struct.Payload* %pld to i8*, !dbg !54 ; line:100 col:1 - call void @llvm.lifetime.end(i64 12, i8* %7) #0, !dbg !54 ; line:100 col:1 - %8 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !54 ; line:100 col:1 - call void @llvm.lifetime.end(i64 32, i8* %8) #0, !dbg !54 ; line:100 col:1 - ret void, !dbg !54 ; line:100 col:1 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:91 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !32 ; line:91 col:3 + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !36 ; line:91 col:23 + %rtas = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %1), !dbg !36 ; line:91 col:23 + + ; Capture the handle for the RTAS + ; CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + %2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %rtas, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !36 ; line:91 col:23 + + %3 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_trace, i32 0, i32 0, !dbg !36 ; line:91 col:23 + store <3 x float> , <3 x float>* %3, !dbg !36 ; line:91 col:23 + + ; CHECK: %[[TRACEHO:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* %pld_trace), !dbg !3 ; line:91 col:23 + ; CHECK: store %dx.types.HitObject %[[TRACEHO]], %dx.types.HitObject* %hit + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %2, i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> , float 3.000000e+00, <3 x float> , float 7.000000e+00, %struct.Payload* %pld_trace), !dbg !36 ; line:91 col:23 + + %4 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_trace, i32 0, i32 0, !dbg !37 ; line:101 col:3 + %5 = load <3 x float>, <3 x float>* %4, !dbg !37 ; line:101 col:3 + %6 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_invoke, i32 0, i32 0, !dbg !37 ; line:101 col:3 + store <3 x float> %5, <3 x float>* %6, !dbg !37 ; line:101 col:3 + + ; CHECK: %[[INVOKEHO:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %hit + ; CHECK: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[INVOKEHO]], %struct.Payload* %pld_invoke) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld_invoke), !dbg !37 ; line:101 col:3 + + %7 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !38 ; line:102 col:1 + call void @llvm.lifetime.end(i64 4, i8* %7) #0, !dbg !38 ; line:102 col:1 + ret void, !dbg !38 ; line:102 col:1 } ; Function Attrs: nounwind @@ -85,9 +57,6 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0 ; Function Attrs: nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) #0 -; Function Attrs: nounwind -declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 - ; Function Attrs: nounwind readnone declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 @@ -97,71 +66,59 @@ declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.type ; Function Attrs: nounwind declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*) #0 + attributes #0 = { nounwind } attributes #1 = { nounwind readnone } !llvm.module.flags = !{!0} !pauseresume = !{!1} -!dx.version = !{!2} -!dx.valver = !{!2} -!dx.shaderModel = !{!3} -!dx.typeAnnotations = !{!4, !19} -!dx.entryPoints = !{!23} -!dx.fnprops = !{!28} -!dx.options = !{!29, !30} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!29} +!dx.options = !{!30, !31} !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} -!2 = !{i32 1, i32 9} -!3 = !{!"lib", i32 6, i32 9} -!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %struct.Payload undef, !15, %"class.dx::HitObject" undef, !17} -!5 = !{i32 4, !6, !7} -!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} -!7 = !{i32 0, !8} -!8 = !{!9} -!9 = !{i32 0, float undef} -!10 = !{i32 32, !11, !12, !13, !14} -!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} -!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} -!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} -!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} -!15 = !{i32 12, !16} -!16 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} -!17 = !{i32 4, !18} -!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} -!19 = !{i32 1, void ()* @"\01?main@@YAXXZ", !20} -!20 = !{!21} -!21 = !{i32 1, !22, !22} -!22 = !{} -!23 = !{null, !"", null, !24, null} -!24 = !{!25, null, null, null} -!25 = !{!26} -!26 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !27} -!27 = !{i32 0, i32 4} -!28 = !{void ()* @"\01?main@@YAXXZ", i32 7} -!29 = !{i32 -2147483584} -!30 = !{i32 -1} -!31 = !DILocation(line: 80, column: 3, scope: !32) -!32 = !DISubprogram(name: "main", scope: !33, file: !33, line: 79, type: !34, isLocal: false, isDefinition: true, scopeLine: 79, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") -!33 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl", directory: "") -!34 = !DISubroutineType(types: !22) -!35 = !DILocation(line: 81, column: 11, scope: !32) -!36 = !DILocation(line: 81, column: 18, scope: !32) -!37 = !{!38, !38, i64 0} -!38 = !{!"omnipotent char", !39, i64 0} -!39 = !{!"Simple C/C++ TBAA"} -!40 = !DILocation(line: 82, column: 11, scope: !32) -!41 = !DILocation(line: 82, column: 16, scope: !32) -!42 = !{!43, !43, i64 0} -!43 = !{!"float", !38, i64 0} -!44 = !DILocation(line: 83, column: 11, scope: !32) -!45 = !DILocation(line: 83, column: 21, scope: !32) -!46 = !DILocation(line: 84, column: 11, scope: !32) -!47 = !DILocation(line: 84, column: 16, scope: !32) -!48 = !DILocation(line: 86, column: 3, scope: !32) -!49 = !DILocation(line: 87, column: 7, scope: !32) -!50 = !DILocation(line: 87, column: 13, scope: !32) -!51 = !DILocation(line: 89, column: 3, scope: !32) -!52 = !DILocation(line: 89, column: 23, scope: !32) -!53 = !DILocation(line: 99, column: 3, scope: !32) -!54 = !DILocation(line: 100, column: 1, scope: !32) +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer" undef, !6, %struct.RayDesc undef, !11, %struct.Payload undef, !16, %"class.dx::HitObject" undef, !18} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, float undef} +!11 = !{i32 32, !12, !13, !14, !15} +!12 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!13 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!14 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!15 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!16 = !{i32 12, !17} +!17 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!18 = !{i32 4, !19} +!19 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!20 = !{i32 1, void ()* @"\01?main@@YAXXZ", !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{null, !"", null, !25, null} +!25 = !{!26, null, null, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!30 = !{i32 -2147483584} +!31 = !{i32 -1} +!32 = !DILocation(line: 91, column: 3, scope: !33) +!33 = !DISubprogram(name: "main", scope: !34, file: !34, line: 81, type: !35, isLocal: false, isDefinition: true, scopeLine: 81, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!34 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl", directory: "") +!35 = !DISubroutineType(types: !23) +!36 = !DILocation(line: 91, column: 23, scope: !33) +!37 = !DILocation(line: 101, column: 3, scope: !33) +!38 = !DILocation(line: 102, column: 1, scope: !33) diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll new file mode 100644 index 0000000000..b969a63f12 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll @@ -0,0 +1,160 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"$Globals" = type { %struct.RayDesc } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main() #0 { +entry: + + ; Capture CB, RTAS, and RayQuery + ; CHECK-DAG: %[[CB:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %"$Globals", %dx.types.ResourceProperties { i32 13, i32 32 }) + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + ; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !38 ; line:13 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !38 ; line:13 col:3 + + ; Load RayDesc.Origin + ; CHECK: %[[ORIG_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) + ; CHECK: %[[ORIG_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 0 + ; CHECK: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIG_EX0]], i64 0 + ; CHECK: %[[ORIG_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 1 + ; CHECK: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIG_EX1]], i64 1 + ; CHECK: %[[ORIG_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 2 + ; CHECK: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIG_EX2]], i64 2 + + ; Load RayDesc.TMin + ; CHECK: %[[TMIN_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) + ; CHECK: %[[TMIN:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[TMIN_CB_LD]], 3 + + ; Load RayDesc.Direction + ; CHECK: %[[DIR_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) + ; CHECK: %[[DIR_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 0 + ; CHECK: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIR_EX0]], i64 0 + ; CHECK: %[[DIR_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 1 + ; CHECK: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIR_EX1]], i64 1 + ; CHECK: %[[DIR_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 2 + ; CHECK: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIR_EX2]], i64 2 + + ; Load RayDesc.TMax + ; CHECK: %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) + ; CHECK: %22 = extractvalue %dx.types.CBufRet.f32 %21, 3 + + ; Extract RayDesc vector fields + ; CHECK: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 + ; CHECK: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 + ; CHECK: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 + ; CHECK: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 + ; CHECK: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 + ; CHECK: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + + %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 0, !dbg !38 ; line:13 col:3 + %8 = load <3 x float>, <3 x float>* %7, !dbg !38 ; line:13 col:3 + %9 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 1, !dbg !38 ; line:13 col:3 + %10 = load float, float* %9, !dbg !38 ; line:13 col:3 + %11 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 2, !dbg !38 ; line:13 col:3 + %12 = load <3 x float>, <3 x float>* %11, !dbg !38 ; line:13 col:3 + %13 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 3, !dbg !38 ; line:13 col:3 + %14 = load float, float* %13, !dbg !38 ; line:13 col:3 + + ; Call TraceRayInline + ; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %22) + + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %6, i32 1, i32 2, <3 x float> %8, float %10, <3 x float> %12, float %14), !dbg !38 ; line:13 col:3 + ret void, !dbg !39 ; line:14 col:1 +} + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!31} +!dx.options = !{!32, !33} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12, %"$Globals" undef, !18} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 32, !19} +!19 = !{i32 6, !"rayDesc", i32 3, i32 0} +!20 = !{i32 1, void ()* @main, !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{void ()* @main, !"main", null, !25, null} +!25 = !{!26, null, !29, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!31 = !{void ()* @main, i32 1} +!32 = !{i32 64} +!33 = !{i32 -1} +!34 = !DILocation(line: 12, column: 71, scope: !35) +!35 = !DISubprogram(name: "main", scope: !36, file: !36, line: 11, type: !37, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main) +!36 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl", directory: "") +!37 = !DISubroutineType(types: !23) +!38 = !DILocation(line: 13, column: 3, scope: !35) +!39 = !DILocation(line: 14, column: 1, scope: !35) diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll new file mode 100644 index 0000000000..0d97d8782d --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll @@ -0,0 +1,134 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, +; with call to DoTrace commented out. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #0 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #1 + +; Function Attrs: nounwind +define void @main(float* noalias, <3 x float>, float, <3 x float>, float) #1 { +entry: + + ; Load RayDesc fields from input + ; CHECK-DAG: %[[ORIGX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[ORIGY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) + ; CHECK-DAG: %[[ORIGZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef) + ; CHECK-DAG: %[[TMIN:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[DIRX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[DIRY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef) + ; CHECK-DAG: %[[DIRZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef) + ; CHECK-DAG: %[[TMAX:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIGX_LI]], i64 0 + ; CHECK-DAG: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIGY_LI]], i64 1 + ; CHECK-DAG: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIGZ_LI]], i64 2 + ; CHECK-DAG: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIRX_LI]], i64 0 + ; CHECK-DAG: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIRY_LI]], i64 1 + ; CHECK-DAG: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIRZ_LI]], i64 2 + + ; Capture RayQuery and RTAS + ; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !41 ; line:15 col:71 + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !45 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !45 ; line:17 col:3 + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !45 ; line:17 col:3 + + ; Extract RayDesc vector fields + ; CHECK-DAG: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 + ; CHECK-DAG: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 + ; CHECK-DAG: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 + ; CHECK-DAG: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 + ; CHECK-DAG: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 + ; CHECK-DAG: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + + ; Call TraceRayInline + ; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %[[TMAX]]) + + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %7, i32 1, i32 2, <3 x float> %1, float %2, <3 x float> %3, float %4), !dbg !45 ; line:17 col:3 + store float 0.000000e+00, float* %0, !dbg !46 ; line:18 col:3 + ret void, !dbg !46 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!33} +!dx.fnprops = !{!38} +!dx.options = !{!39, !40} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, void (float*, <3 x float>, float, <3 x float>, float)* @main, !19} +!19 = !{!20, !22, !25, !27, !29, !31} +!20 = !{i32 0, !21, !21} +!21 = !{} +!22 = !{i32 1, !23, !24} +!23 = !{i32 4, !"OUT", i32 7, i32 9} +!24 = !{i32 0} +!25 = !{i32 0, !26, !24} +!26 = !{i32 4, !"RAYDESC", i32 7, i32 9} +!27 = !{i32 0, !26, !28} +!28 = !{i32 1} +!29 = !{i32 0, !26, !30} +!30 = !{i32 2} +!31 = !{i32 0, !26, !32} +!32 = !{i32 3} +!33 = !{void (float*, <3 x float>, float, <3 x float>, float)* @main, !"main", null, !34, null} +!34 = !{!35, null, null, null} +!35 = !{!36} +!36 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !37} +!37 = !{i32 0, i32 4} +!38 = !{void (float*, <3 x float>, float, <3 x float>, float)* @main, i32 1} +!39 = !{i32 64} +!40 = !{i32 -1} +!41 = !DILocation(line: 15, column: 71, scope: !42) +!42 = !DISubprogram(name: "main", scope: !43, file: !43, line: 14, type: !44, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: void (float*, <3 x float>, float, <3 x float>, float)* @main) +!43 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!44 = !DISubroutineType(types: !21) +!45 = !DILocation(line: 17, column: 3, scope: !42) +!46 = !DILocation(line: 18, column: 3, scope: !42) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll index 5afd30b524..85c3a34eb9 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll @@ -95,10 +95,10 @@ target triple = "dxil-ms-dx" @"$Globals" = external constant %ConstantBuffer ; CHECK: %[[RQA:[^ ]+]] = alloca i32 -; CHECK: %[[ATTRA0:[^ ]+]] = alloca %struct.CustomAttrs -; CHECK: %[[ATTRA1:[^ ]+]] = alloca %struct.CustomAttrs ; CHECK: %[[XATTRA:[^ ]+]] = alloca float ; CHECK: %[[YATTRA:[^ ]+]] = alloca float +; CHECK: %[[ATTRA0:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: %[[ATTRA1:[^ ]+]] = alloca %struct.CustomAttrs ; COM: Check same query handle used for TraceRayInline and the FromRayQuery calls ; CHECK: %[[RQH:[^ ]+]] = load i32, i32* %[[RQA]] @@ -122,7 +122,7 @@ target triple = "dxil-ms-dx" ; CHECK: store float %[[XF1]], float* %[[XPTR0]] ; CHECK: %[[YPTR0:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA0]], i32 0, i32 1 ; CHECK: %[[YF1:[^ ]+]] = load float, float* %[[YATTRA]] -; CHECK: store float %[[YF1]], float* %[[YPTR0]], align 4 +; CHECK: store float %[[YF1]], float* %[[YPTR0]] ; CHECK: %[[RQH1:[^ ]+]] = load i32, i32* %[[RQA]] ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH1]], i32 16, %struct.CustomAttrs* %[[ATTRA0]]) @@ -140,7 +140,7 @@ target triple = "dxil-ms-dx" ; CHECK: store float %[[XF2]], float* %[[XPTR1]] ; CHECK: %[[YPTR1:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA1]], i32 0, i32 1 ; CHECK: %[[YF2:[^ ]+]] = load float, float* %[[YATTRA]] -; CHECK: store float %[[YF2]], float* %[[YPTR1]], align 4 +; CHECK: store float %[[YF2]], float* %[[YPTR1]] ; CHECK: %[[RQH2:[^ ]+]] = load i32, i32* %[[RQA]] ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH2]], i32 17, %struct.CustomAttrs* %[[ATTRA1]]) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll index 89ee886c2e..78f7271e94 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll @@ -33,7 +33,7 @@ entry: %hit = alloca %dx.types.HitObject, align 4 %tmp = alloca %dx.types.HitObject, align 4 %ray = alloca %struct.RayDesc, align 4 -; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc +; CHECK-NOT: alloca %struct.RayDesc %tmp2 = alloca %dx.types.HitObject, align 4 ; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4 ; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4 @@ -69,7 +69,16 @@ entry: ; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]], ; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]], ; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]], -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]]) +; Copy introduced for RayDesc argument +; CHECK-DAG: store <3 x float> %[[RDO]], <3 x float>* %[[pRDO2:[^ ]+]], +; CHECK-DAG: store float %[[RDTMIN]], float* %[[pRDTMIN2:[^ ]+]], +; CHECK-DAG: store <3 x float> %[[RDD]], <3 x float>* %[[pRDD2:[^ ]+]], +; CHECK-DAG: store float %[[RDTMAX]], float* %[[pRDTMAX2:[^ ]+]], +; CHECK-DAG: %[[RDO2:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO2]], +; CHECK-DAG: %[[RDTMIN2:[^ ]+]] = load float, float* %[[pRDTMIN2]], +; CHECK-DAG: %[[RDD2:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD2]], +; CHECK-DAG: %[[RDTMAX2:[^ ]+]] = load float, float* %[[pRDTMAX2]], +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO2]], float %[[RDTMIN2]], <3 x float> %[[RDD2]], float %[[RDTMAX2]]) call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3 %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3 diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll new file mode 100644 index 0000000000..fa22ee5744 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll @@ -0,0 +1,198 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { float } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?UAV@@3V?$RWStructuredBuffer@M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %rayDesc = alloca %struct.RayDesc, align 4 + %pld = alloca %struct.Payload, align 4 + + ; CHECK: %[[HITOBJ:[^ ,]+]] = alloca %dx.types.HitObject, align 4 + + %hit = alloca %dx.types.HitObject, align 4 + + %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !37 ; line:82 col:3 + call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !37 ; line:82 col:3 + + ; Init RayDesc. + ; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ,]+]], align 4 + + %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !41 ; line:83 col:11 + store <3 x float> , <3 x float>* %Origin, align 4, !dbg !42, !tbaa !43 ; line:83 col:18 + %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !46 ; line:84 col:11 + store float 3.000000e+00, float* %TMin, align 4, !dbg !47, !tbaa !48 ; line:84 col:16 + %Direction = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 2, !dbg !50 ; line:85 col:11 + store <3 x float> , <3 x float>* %Direction, align 4, !dbg !51, !tbaa !43 ; line:85 col:21 + %TMax = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 3, !dbg !52 ; line:86 col:11 + store float 7.000000e+00, float* %TMax, align 4, !dbg !53, !tbaa !48 ; line:86 col:16 + + %1 = bitcast %struct.Payload* %pld to i8*, !dbg !54 ; line:88 col:3 + call void @llvm.lifetime.start(i64 12, i8* %1) #0, !dbg !54 ; line:88 col:3 + %dummy = getelementptr inbounds %struct.Payload, %struct.Payload* %pld, i32 0, i32 0, !dbg !55 ; line:89 col:7 + store <3 x float> , <3 x float>* %dummy, align 4, !dbg !56, !tbaa !43 ; line:89 col:13 + %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !57 ; line:91 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !57 ; line:91 col:3 + %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !58 ; line:91 col:23 + %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !58 ; line:91 col:23 + + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !58 ; line:91 col:23 + + ; Copy RayDesc. + ; CHECK-DAG: %[[ORIGIN_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] + ; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ,]+]] + ; CHECK-DAG: %[[TMIN_L0:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ,]+]] + ; CHECK-DAG: %[[DIRECTION_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] + ; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ,]+]] + ; CHECK-DAG: %[[TMAX_L0:[^ ,]+]] = load float, float* %[[TMAX_P0]] + ; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ,]+]] + + ; Load RayDesc. + ; CHECK-DAG: %[[ORIGIN_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] + ; CHECK-DAG: %[[TMIN_L1:[^ ,]+]] = load float, float* %[[TMIN_P1]] + ; CHECK-DAG: %[[DIRECTION_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] + ; CHECK-DAG: %[[TMAX_L1:[^ ,]+]] = load float, float* %[[TMAX_P1]] + + ; RayDesc is scalar replaced in HL op for dx::HitObject::TraceRay. + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ,]+]]) + + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !58 ; line:91 col:23 + + ; Copy payload. + ; CHECK: %[[GEP_PLD_P0:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 + ; CHECK: %[[PLD_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] + ; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ,]+]] + ; CHECK: %[[GEP_PLD_P1:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ,]+]], i32 0, i32 0 + ; CHECK: [[PLD_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] + ; CHECK: store <3 x float> [[PLD_L1]], <3 x float>* %[[GEP_PLD_P1]] + + ; dx::HitObject::Invoke + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %[[HITOBJ]], %struct.Payload* %[[PLD_P1]]) + + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !59 ; line:101 col:3 + + %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 4, i8* %6) #0, !dbg !60 ; line:102 col:1 + %7 = bitcast %struct.Payload* %pld to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 12, i8* %7) #0, !dbg !60 ; line:102 col:1 + %8 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 32, i8* %8) #0, !dbg !60 ; line:102 col:1 + ret void, !dbg !60 ; line:102 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!34} +!dx.options = !{!35, !36} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer" undef, !6, %struct.RayDesc undef, !11, %struct.Payload undef, !16, %"class.dx::HitObject" undef, !18} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, float undef} +!11 = !{i32 32, !12, !13, !14, !15} +!12 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!13 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!14 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!15 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!16 = !{i32 12, !17} +!17 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!18 = !{i32 4, !19} +!19 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!20 = !{i32 1, void ()* @"\01?main@@YAXXZ", !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{null, !"", null, !25, null} +!25 = !{!26, !29, !32, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"class.RWStructuredBuffer"* @"\01?UAV@@3V?$RWStructuredBuffer@M@@A", !"UAV", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !31} +!31 = !{i32 1, i32 4} +!32 = !{!33} +!33 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!34 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!35 = !{i32 -2147483584} +!36 = !{i32 -1} +!37 = !DILocation(line: 82, column: 3, scope: !38) +!38 = !DISubprogram(name: "main", scope: !39, file: !39, line: 81, type: !40, isLocal: false, isDefinition: true, scopeLine: 81, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!39 = !DIFile(filename: "D:\5Cgit\5Cdxc\5Cmain\5Ctools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CHitObject\5Chitobject_traceinvoke.hlsl", directory: "") +!40 = !DISubroutineType(types: !23) +!41 = !DILocation(line: 83, column: 11, scope: !38) +!42 = !DILocation(line: 83, column: 18, scope: !38) +!43 = !{!44, !44, i64 0} +!44 = !{!"omnipotent char", !45, i64 0} +!45 = !{!"Simple C/C++ TBAA"} +!46 = !DILocation(line: 84, column: 11, scope: !38) +!47 = !DILocation(line: 84, column: 16, scope: !38) +!48 = !{!49, !49, i64 0} +!49 = !{!"float", !44, i64 0} +!50 = !DILocation(line: 85, column: 11, scope: !38) +!51 = !DILocation(line: 85, column: 21, scope: !38) +!52 = !DILocation(line: 86, column: 11, scope: !38) +!53 = !DILocation(line: 86, column: 16, scope: !38) +!54 = !DILocation(line: 88, column: 3, scope: !38) +!55 = !DILocation(line: 89, column: 7, scope: !38) +!56 = !DILocation(line: 89, column: 13, scope: !38) +!57 = !DILocation(line: 91, column: 3, scope: !38) +!58 = !DILocation(line: 91, column: 23, scope: !38) +!59 = !DILocation(line: 101, column: 3, scope: !38) +!60 = !DILocation(line: 102, column: 1, scope: !38) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll new file mode 100644 index 0000000000..59551a7eb4 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll @@ -0,0 +1,182 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"$Globals" = type { i32, i32, i32, i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <2 x float>, <3 x i32> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?Acc@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?RayFlags@@3IB" = external constant i32, align 4 +@"\01?InstanceInclusionMask@@3IB" = external constant i32, align 4 +@"\01?RayContributionToHitGroupIndex@@3IB" = external constant i32, align 4 +@"\01?MultiplierForGeometryContributionToHitGroupIndex@@3IB" = external constant i32, align 4 +@"\01?MissShaderIndex@@3IB" = external constant i32, align 4 +@"$Globals" = external constant %"$Globals" + +; CHECK: define <4 x float> @" +; CHECK-SAME: ?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) + +; Function Attrs: nounwind +define <4 x float> @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) #0 { +entry: + + ; Copy Payload fields (PLD_F0, PLD_F1) to local allocas: + ; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 0 + ; CHECK: %[[LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[GEP]] + ; CHECK: store <2 x float> %[[LOAD]], <2 x float>* %[[PLD_F0:[^ ,]+]] + ; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 1 + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] + ; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] + + %0 = alloca %struct.RayDesc, !dbg !39 ; line:22 col:61 + %1 = bitcast %struct.RayDesc* %0 to i8*, !dbg !39 ; line:22 col:61 + %2 = bitcast %struct.RayDesc* %Ray to i8*, !dbg !39 ; line:22 col:61 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false), !dbg !39 ; line:22 col:61 + %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0), !dbg !39 ; line:22 col:61 + %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 13, i32 20 }, %"$Globals" undef), !dbg !39 ; line:22 col:61 + %5 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %4, i32 0), !dbg !39 ; line:22 col:61 + %6 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 0, !dbg !39 ; line:22 col:61 + %7 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 1, !dbg !39 ; line:22 col:61 + %8 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 2, !dbg !39 ; line:22 col:61 + %9 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 3, !dbg !39 ; line:22 col:61 + %10 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 4, !dbg !39 ; line:22 col:61 + %11 = load i32, i32* %10, align 4, !dbg !39, !tbaa !43 ; line:22 col:61 + %12 = load i32, i32* %9, align 4, !dbg !47, !tbaa !43 ; line:22 col:12 + %13 = load i32, i32* %8, align 4, !dbg !48, !tbaa !43 ; line:21 col:12 + %14 = load i32, i32* %7, align 4, !dbg !49, !tbaa !43 ; line:20 col:25 + %15 = load i32, i32* %6, align 4, !dbg !50, !tbaa !43 ; line:20 col:16 + %16 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !dbg !51 ; line:20 col:3 + %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %16), !dbg !51 ; line:20 col:3 + + ; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !51 ; line:20 col:3 + + ; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: + ; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 + ; CHECK: %[[RAY_ORIGIN_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_GEP]] + ; CHECK: store <3 x float> %[[RAY_ORIGIN_LOAD]], <3 x float>* %[[RAY_ORIGIN_P0:[^ ,]+]] + ; CHECK: %[[TMIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 1 + ; CHECK: %[[TMIN_LOAD:[^ ,]+]] = load float, float* %[[TMIN_GEP]] + ; CHECK: store float %[[TMIN_LOAD]], float* %[[TMIN_P0:[^ ,]+]] + ; CHECK: %[[DIRECTION_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 2 + ; CHECK: %[[DIRECTION_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_GEP]] + ; CHECK: store <3 x float> %[[DIRECTION_LOAD]], <3 x float>* %[[DIRECTION_P0:[^ ,]+]] + ; CHECK: %[[TMAX_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 3 + ; CHECK: %[[TMAX_LOAD:[^ ,]+]] = load float, float* %[[TMAX_GEP]] + ; CHECK: store float %[[TMAX_LOAD]], float* %[[TMAX_P0:[^ ,]+]] + + ; Copy Payload fields into payload struct for call: + ; CHECK: %[[PLD_F0_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0:[^ ,]+]], i32 0, i32 0 + ; CHECK: %[[PLD_F0_LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[PLD_F0]] + ; CHECK: store <2 x float> %[[PLD_F0_LOAD]], <2 x float>* %[[PLD_F0_GEP]] + ; CHECK: %[[PLD_F1_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 1 + ; CHECK: %[[PLD_F1_LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[PLD_F1]] + ; CHECK: store <3 x i32> %[[PLD_F1_LOAD]], <3 x i32>* %[[PLD_F1_GEP]] + + ; Load RayDesc fields: + ; CHECK: %[[RAY_ORIGIN_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_P0]] + ; CHECK: %[[TMIN_LOAD2:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK: %[[DIRECTION_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] + ; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] + + ; call TraceRay with the local allocas: + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) + + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 69, %dx.types.Handle %18, i32 %15, i32 %14, i32 %13, i32 %12, i32 %11, %struct.RayDesc* %0, %struct.Payload* %p), !dbg !51 ; line:20 col:3 + + ret <4 x float> , !dbg !52 ; line:24 col:4 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !21} +!dx.entryPoints = !{!30} +!dx.fnprops = !{} +!dx.options = !{!37, !38} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 3} +!4 = !{i32 1, i32 9} +!5 = !{!"lib", i32 6, i32 3} +!6 = !{i32 0, %struct.RayDesc undef, !7, %struct.Payload undef, !12, %"$Globals" undef, !15} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 28, !13, !14} +!13 = !{i32 6, !"t", i32 3, i32 0, i32 7, i32 9} +!14 = !{i32 6, !"t2", i32 3, i32 16, i32 7, i32 4} +!15 = !{i32 20, !16, !17, !18, !19, !20} +!16 = !{i32 6, !"RayFlags", i32 3, i32 0, i32 7, i32 5} +!17 = !{i32 6, !"InstanceInclusionMask", i32 3, i32 4, i32 7, i32 5} +!18 = !{i32 6, !"RayContributionToHitGroupIndex", i32 3, i32 8, i32 7, i32 5} +!19 = !{i32 6, !"MultiplierForGeometryContributionToHitGroupIndex", i32 3, i32 12, i32 7, i32 5} +!20 = !{i32 6, !"MissShaderIndex", i32 3, i32 16, i32 7, i32 5} +!21 = !{i32 1, <4 x float> (<2 x float>*, %struct.RayDesc*, %struct.Payload*)* @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z", !22} +!22 = !{!23, !26, !27, !29} +!23 = !{i32 1, !24, !25} +!24 = !{i32 7, i32 9} +!25 = !{} +!26 = !{i32 2, !24, !25} +!27 = !{i32 0, !28, !25} +!28 = !{i32 4, !"R"} +!29 = !{i32 2, !25, !25} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, null, !35, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !"Acc", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{!36} +!36 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 20, null} +!37 = !{i32 -2147483584} +!38 = !{i32 11} +!39 = !DILocation(line: 22, column: 61, scope: !40) +!40 = !DISubprogram(name: "emit", scope: !41, file: !41, line: 19, type: !42, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, function: <4 x float> (<2 x float>*, %struct.RayDesc*, %struct.Payload*)* @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z") +!41 = !DIFile(filename: "D:\5Cgit\5Cdxc\5Cmain\5Ctools\5Cclang\5Ctest\5CHLSLFileCheck\5Cshader_targets\5Craytracing\5Craytracing_traceray.hlsl", directory: "") +!42 = !DISubroutineType(types: !25) +!43 = !{!44, !44, i64 0} +!44 = !{!"int", !45, i64 0} +!45 = !{!"omnipotent char", !46, i64 0} +!46 = !{!"Simple C/C++ TBAA"} +!47 = !DILocation(line: 22, column: 12, scope: !40) +!48 = !DILocation(line: 21, column: 12, scope: !40) +!49 = !DILocation(line: 20, column: 25, scope: !40) +!50 = !DILocation(line: 20, column: 16, scope: !40) +!51 = !DILocation(line: 20, column: 3, scope: !40) +!52 = !DILocation(line: 24, column: 4, scope: !40) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll new file mode 100644 index 0000000000..c01ec797bb --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll @@ -0,0 +1,154 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"$Globals" = type { %struct.RayDesc } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?rayDesc@@3URayDesc@@B" = external constant %struct.RayDesc, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main() #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + + ; Capture CB, RayDesc ptr from CB, RTAS, and init RayQuery + ; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + + ; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) + + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + + ; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 + + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + + ; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) + ; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 + %4 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !34 ; line:12 col:71 + store i32 %rayQuery1, i32* %4, !dbg !34 ; line:12 col:71 + + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 + %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !38 ; line:13 col:3 + + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !38 ; line:13 col:3 + + ; Load RayDesc fields from CB to local copy + ; CHECK-DAG: %[[ORIG_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 0 + ; CHECK-DAG: %[[ORIG_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_CBP]] + ; CHECK-DAG: store <3 x float> %[[ORIG_LD_CB]], <3 x float>* %[[ORIG_P0:[^ ,]+]] + ; CHECK-DAG: %[[TMIN_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 1 + ; CHECK-DAG: %[[TMIN_LD_CB:[^ ,]+]] = load float, float* %[[TMIN_CBP]] + ; CHECK-DAG: store float %[[TMIN_LD_CB]], float* %[[TMIN_P0:[^ ,]+]] + ; CHECK-DAG: %[[DIR_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 2 + ; CHECK-DAG: %[[DIR_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_CBP]] + ; CHECK-DAG: store <3 x float> %[[DIR_LD_CB]], <3 x float>* %[[DIR_P0:[^ ,]+]] + ; CHECK-DAG: %[[TMAX_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 3 + ; CHECK-DAG: %[[TMAX_LD_CB:[^ ,]+]] = load float, float* %[[TMAX_CBP]] + ; CHECK-DAG: store float %[[TMAX_LD_CB]], float* %[[TMAX_P0:[^ ,]+]] + + ; Load RayDesc fields from local copy + ; CHECK-DAG: %[[ORIG:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_P0]] + ; CHECK-DAG: %[[TMIN:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK-DAG: %[[DIR:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_P0]] + ; CHECK-DAG: %[[TMAX:[^ ,]+]] = load float, float* %[[TMAX_P0]] + ; CHECK-DAG: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + + ; Call TraceRayInline + ; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[ORIG]], float %[[TMIN]], <3 x float> %[[DIR]], float %[[TMAX]]) + + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %7, i32 1, i32 2, %struct.RayDesc* %3), !dbg !38 ; line:13 col:3 + ret void, !dbg !39 ; line:14 col:1 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!31} +!dx.options = !{!32, !33} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12, %"$Globals" undef, !18} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 32, !19} +!19 = !{i32 6, !"rayDesc", i32 3, i32 0} +!20 = !{i32 1, void ()* @main, !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{void ()* @main, !"main", null, !25, null} +!25 = !{!26, null, !29, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!31 = !{void ()* @main, i32 1} +!32 = !{i32 64} +!33 = !{i32 -1} +!34 = !DILocation(line: 12, column: 71, scope: !35) +!35 = !DISubprogram(name: "main", scope: !36, file: !36, line: 11, type: !37, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main) +!36 = !DIFile(filename: "/home/texr/git/dxc/main/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl", directory: "") +!37 = !DISubroutineType(types: !23) +!38 = !DILocation(line: 13, column: 3, scope: !35) +!39 = !DILocation(line: 14, column: 1, scope: !35) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll new file mode 100644 index 0000000000..ee76872441 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll @@ -0,0 +1,155 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, +; with call to DoTrace commented out. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %ConstantBuffer + +; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) + +; Function Attrs: nounwind +define float @main(%struct.RayDesc* %rayDesc) #0 { +entry: + %0 = alloca %struct.RayDesc + + ; Copy flattened RayDesc input to main function + ; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax + ; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] + ; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] + ; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] + ; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] + + ; Copy RayDesc fields again + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] + ; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] + ; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] + + %1 = bitcast %struct.RayDesc* %0 to i8* + %2 = bitcast %struct.RayDesc* %rayDesc to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false) + + ; Capture RayQuery ptr and RTAS handle + ; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) + ; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + ; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !35 ; line:15 col:71 + %3 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !35 ; line:15 col:71 + store i32 %rayQuery1, i32* %3, !dbg !35 ; line:15 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !39 ; line:17 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !39 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !39 ; line:17 col:3 + + ; Copy RayDesc fields again + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] + ; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] + ; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] + + ; Load RayDesc fields for TraceRayInline + ; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] + ; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] + ; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] + ; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] + + ; Load RayQuery + ; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + + ; TraceRayInline call + ; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) + + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %6, i32 1, i32 2, %struct.RayDesc* %0), !dbg !39 ; line:17 col:3 + ret float 0.000000e+00, !dbg !40 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!25} +!dx.fnprops = !{!32} +!dx.options = !{!33, !34} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, float (%struct.RayDesc*)* @main, !19} +!19 = !{!20, !23} +!20 = !{i32 1, !21, !22} +!21 = !{i32 4, !"OUT", i32 7, i32 9} +!22 = !{} +!23 = !{i32 0, !24, !22} +!24 = !{i32 4, !"RAYDESC"} +!25 = !{float (%struct.RayDesc*)* @main, !"main", null, !26, null} +!26 = !{!27, null, !30, null} +!27 = !{!28} +!28 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !29} +!29 = !{i32 0, i32 4} +!30 = !{!31} +!31 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!32 = !{float (%struct.RayDesc*)* @main, i32 1} +!33 = !{i32 64} +!34 = !{i32 -1} +!35 = !DILocation(line: 15, column: 71, scope: !36) +!36 = !DISubprogram(name: "main", scope: !37, file: !37, line: 14, type: !38, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: float (%struct.RayDesc*)* @main) +!37 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!38 = !DISubroutineType(types: !22) +!39 = !DILocation(line: 17, column: 3, scope: !36) +!40 = !DILocation(line: 18, column: 3, scope: !36) diff --git a/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl b/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl deleted file mode 100644 index b9670bdaba..0000000000 --- a/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %dxc -Od -T lib_6_6 %s | %opt -S -dxil-annotate-with-virtual-regs | FileCheck %s - - -/* To run locally run: -%dxc -Od -T lib_6_6 %s -Fc %t.ll -%opt %t.ll -S -dxil-annotate-with-virtual-regs | FileCheck %s -*/ - -RaytracingAccelerationStructure scene : register(t0); - -struct RayPayload -{ - int3 color; -}; - -[shader("raygeneration")] -void ENTRY() -{ - RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0}; - RayPayload pld; - TraceRay(scene, 0 /*rayFlags*/, 0xFF /*rayMask*/, 0 /*sbtRecordOffset*/, 1 /*sbtRecordStride*/, 0 /*missIndex*/, ray, pld); -} - -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 0, !pix-dxil-reg [[RDGEP:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 1, !pix-dxil-reg [[RDGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 2, !pix-dxil-reg [[RDGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}} - -// CHECK-DAG: [[RDGEP]] = !{i32 0, i32 0} -// CHECK-DAG: [[NothGEP]] = !{i32 0, i32 11} -// CHECK-DAG: [[RDGEP2]] = !{i32 0, i32 3} -// CHECK-DAG: [[NothGEP2]] = !{i32 0, i32 12} -// CHECK-DAG: [[RDGEP3]] = !{i32 0, i32 4} -// CHECK-DAG: [[NothGEP3]] = !{i32 0, i32 13} diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl index 12df1ecbcf..98997a52b1 100644 --- a/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl +++ b/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl @@ -1,10 +1,10 @@ // RUN: %dxc -T lib_6_5 -auto-binding-space 11 %s | FileCheck %s // CHECK: define void [[intersection1:@"\\01\?intersection1@[^\"]+"]]() #0 { -// CHECK: [[rayTCurrent:%[^ ]+]] = call float @dx.op.rayTCurrent.f32(i32 154) -// CHECK: [[GeometryIndex:%[^ ]+]] = call i32 @dx.op.geometryIndex.i32(i32 213) -// CHECK: icmp eq i32 [[GeometryIndex]], 0 -// CHECK: call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[rayTCurrent]], i32 0, %struct.MyAttributes* nonnull {{.*}}) +// CHECK-DAG: [[rayTCurrent:%[^ ]+]] = call float @dx.op.rayTCurrent.f32(i32 154) +// CHECK-DAG: [[GeometryIndex:%[^ ]+]] = call i32 @dx.op.geometryIndex.i32(i32 213) +// CHECK-DAG: icmp eq i32 [[GeometryIndex]], 0 +// CHECK-DAG: call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[rayTCurrent]], i32 0, %struct.MyAttributes* nonnull {{.*}}) // CHECK: ret void struct MyAttributes { diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index e337d2951c..af7801c7bf 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -119,7 +119,6 @@ class PixTest : public ::testing::Test { TEST_METHOD(AccessTracking_ModificationReport_SM66) TEST_METHOD(PixStructAnnotation_Lib_DualRaygen) - TEST_METHOD(PixStructAnnotation_Lib_RaygenAllocaStructAlignment) TEST_METHOD(PixStructAnnotation_Simple) TEST_METHOD(PixStructAnnotation_CopiedStruct) @@ -1455,100 +1454,6 @@ void Raygen1() } } -TEST_F(PixTest, PixStructAnnotation_Lib_RaygenAllocaStructAlignment) { - if (m_ver.SkipDxilVersion(1, 5)) - return; - - const char *hlsl = R"( - -RaytracingAccelerationStructure Scene : register(t0, space0); -RWTexture2D RenderTarget : register(u0); - -struct SceneConstantBuffer -{ - float4x4 projectionToWorld; - float4 cameraPosition; - float4 lightPosition; - float4 lightAmbientColor; - float4 lightDiffuseColor; -}; - -ConstantBuffer g_sceneCB : register(b0); - -struct RayPayload -{ - float4 color; -}; - -inline void GenerateCameraRay(uint2 index, out float3 origin, out float3 direction) -{ - float2 xy = index + 0.5f; // center in the middle of the pixel. - float2 screenPos = xy;// / DispatchRaysDimensions().xy * 2.0 - 1.0; - - // Invert Y for DirectX-style coordinates. - screenPos.y = -screenPos.y; - - // Unproject the pixel coordinate into a ray. - float4 world = /*mul(*/float4(screenPos, 0, 1)/*, g_sceneCB.projectionToWorld)*/; - - //world.xyz /= world.w; - origin = world.xyz; //g_sceneCB.cameraPosition.xyz; - direction = float3(1,0,0);//normalize(world.xyz - origin); -} - -void RaygenCommon() -{ - float3 rayDir; - float3 origin; - - // Generate a ray for a camera pixel corresponding to an index from the dispatched 2D grid. - GenerateCameraRay(DispatchRaysIndex().xy, origin, rayDir); - - // Trace the ray. - // Set the ray's extents. - RayDesc ray; - ray.Origin = origin; - ray.Direction = rayDir; - // Set TMin to a non-zero small value to avoid aliasing issues due to floating - point errors. - // TMin should be kept small to prevent missing geometry at close contact areas. - ray.TMin = 0.001; - ray.TMax = 10000.0; - RayPayload payload = { float4(0, 0, 0, 0) }; - TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload); - - // Write the raytraced color to the output texture. - // RenderTarget[DispatchRaysIndex().xy] = payload.color; -} - -[shader("raygeneration")] -void Raygen() -{ - RaygenCommon(); -} -)"; - - auto Testables = TestStructAnnotationCase(hlsl, L"-Od", true, L"lib_6_6"); - - // Built-in type "RayDesc" has this structure: struct { float3 Origin; float - // TMin; float3 Direction; float TMax; } This is 8 floats, with members at - // offsets 0,3,4,7 respectively. - - auto FindAtLeastOneOf = [=](char const *name, uint32_t index) { - VERIFY_IS_TRUE(std::find_if(Testables.AllocaWrites.begin(), - Testables.AllocaWrites.end(), - [&name, &index](AllocaWrite const &aw) { - return 0 == strcmp(aw.memberName.c_str(), - name) && - aw.index == index; - }) != Testables.AllocaWrites.end()); - }; - - FindAtLeastOneOf("Origin.x", 0); - FindAtLeastOneOf("TMin", 3); - FindAtLeastOneOf("Direction.x", 4); - FindAtLeastOneOf("TMax", 7); -} - TEST_F(PixTest, PixStructAnnotation_Simple) { if (m_ver.SkipDxilVersion(1, 5)) return; @@ -3441,7 +3346,6 @@ void RaygenInternalName() // check that there are alloca writes that cover all of them. RayPayload // has four elements, and RayDesc has eight. std::array RayPayloadElementCoverage; - std::array RayDescElementCoverage; for (auto const &write : metaDataKeyToValue.allocaWrites) { // the whole point of the changes with this test is to separate vector @@ -3452,14 +3356,10 @@ void RaygenInternalName() if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) { if (findAlloca->second.count == 4) { RayPayloadElementCoverage[write.second.offset] = true; - } else if (findAlloca->second.count == 8) { - RayDescElementCoverage[write.second.offset] = true; } } } // Check that coverage for every element was emitted: for (auto const &b : RayPayloadElementCoverage) VERIFY_IS_TRUE(b); - for (auto const &b : RayDescElementCoverage) - VERIFY_IS_TRUE(b); } From f9c2d5de38cd37b42de07fe5b986bb424df38be5 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Fri, 16 May 2025 18:03:51 +0200 Subject: [PATCH 43/93] [SER] Diagnose HitObject in unsupported declaration contexts (#7376) - Generalize long vector diagnostics code to HitObjects. - Diagnose unsupported use of HitObject in globals, entry params/returns and various other shader-kind-specific contexts. - Create HitObject variants from the invalid-longvec-decls*.hlsl tests to make sure all cases are covered. Specification: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md Closes #7234 [SER] Diagnose and validate illegal use of HitObject in unsupported contexts (discussed offline) --- tools/clang/include/clang/AST/DeclCXX.h | 11 - .../clang/Basic/DiagnosticSemaKinds.td | 13 +- tools/clang/include/clang/Sema/SemaHLSL.h | 32 ++ tools/clang/lib/AST/DeclCXX.cpp | 14 +- tools/clang/lib/AST/HlslTypes.cpp | 7 + tools/clang/lib/Sema/SemaDXR.cpp | 13 +- tools/clang/lib/Sema/SemaHLSL.cpp | 241 +++++++----- tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp | 24 +- .../lib/Sema/SemaTemplateInstantiate.cpp | 12 - .../HitObject/hitobject-in-buffer.hlsl | 2 +- .../hitobject_traceinvoke_payload_udt.hlsl | 17 +- .../types/invalid-hitobject-decls-hs.hlsl | 32 ++ .../types/invalid-hitobject-decls-struct.hlsl | 344 ++++++++++++++++++ .../invalid-hitobject-decls-templated.hlsl | 340 +++++++++++++++++ .../workgraph/invalid_node_record_type.hlsl | 14 +- .../clang/test/SemaHLSL/template-checks.hlsl | 6 +- .../test/SemaHLSL/template-udt-load.hlsl | 4 +- 17 files changed, 964 insertions(+), 162 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h index 36e0f99c82..3b07576545 100644 --- a/tools/clang/include/clang/AST/DeclCXX.h +++ b/tools/clang/include/clang/AST/DeclCXX.h @@ -465,10 +465,6 @@ class CXXRecordDecl : public RecordDecl { /// \brief Whether we are currently parsing base specifiers. bool IsParsingBaseSpecifiers : 1; - /// \brief Whether this class contains at least one member or base - /// class containing an HLSL vector longer than 4 elements. - bool HasHLSLLongVector : 1; - /// \brief The number of base class specifiers in Bases. unsigned NumBases; @@ -1022,13 +1018,6 @@ class CXXRecordDecl : public RecordDecl { return data().NeedOverloadResolutionForDestructor; } - // HLSL Change add HLSL Long vector bit. - /// \brief Determine whether this class contains an HLSL long vector - /// of over 4 elements. - bool hasHLSLLongVector() { return data().HasHLSLLongVector; } - /// \brief Set that this class contains an HLSL long vector of over 4 elements - bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; } - /// \brief Determine whether this class describes a lambda function object. bool isLambda() const { // An update record can't turn a non-lambda into a lambda. diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index ae7e777180..003aa50795 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7558,8 +7558,6 @@ def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_typ "HLSL requires a type specifier for all declarations">; def err_hlsl_multiple_concrete_bases : Error< "multiple concrete base types specified">; -def err_hlsl_objectintemplateargument : Error< - "%0 is an object and cannot be used as a type parameter">; def err_hlsl_packoffset_requires_cbuffer : Error< "packoffset is only allowed in a constant buffer">; def warn_hlsl_packoffset_mix : Warning< @@ -7886,6 +7884,15 @@ def err_hlsl_unsupported_long_vector "entry function parameters|entry function return type|" "patch constant function parameters|patch constant function return type|" "payload parameters|attributes}0 are not supported">; +// First %select options must match err_hlsl_unsupported_long_vector (same index used) +def err_hlsl_unsupported_object_context + : Error<"object %0 is not allowed in " + "%select{ConstantBuffers or TextureBuffers|" + "tessellation patches|geometry streams|node records|" + "cbuffers or tbuffers|user-defined struct parameter|" + "entry function parameters|entry function return type|" + "patch constant function parameters|patch constant function return type|" + "payload parameters|attributes|builtin template parameters|structured buffers|global variables|groupshared variables}1">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< @@ -7970,8 +7977,6 @@ def err_hlsl_too_many_node_inputs : Error< "Node shader '%0' may not have more than one input record">; def err_hlsl_node_record_type : Error< "%0 is not valid as a node record type - struct/class required">; -def err_hlsl_node_record_object : Error< - "object %0 may not appear in a node record">; def err_hlsl_array_disallowed : Error< "%select{entry parameter|declaration}1 of type %0 may not be an array">; def err_hlsl_inputpatch_size: Error< diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 59d99ab4c5..80ce8ddd7d 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -59,6 +59,38 @@ bool DiagnoseNodeStructArgument(clang::Sema *self, clang::QualType ArgTy, bool &Empty, const clang::FieldDecl *FD = nullptr); +// Keep this in sync with err_hlsl_unsupported_object in DiagnosticSemaKinds.td +enum class TypeDiagContext { + // Indices that the type context is valid and no diagnostics should be emitted + // for this type category. + Valid = -1, + // Supported indices for both `err_hlsl_unsupported_object_context` and + // `err_hlsl_unsupported_long_vector` + ConstantBuffersOrTextureBuffers = 0, + TessellationPatches = 1, + GeometryStreams = 2, + NodeRecords = 3, + CBuffersOrTBuffers = 4, + UserDefinedStructParameter = 5, + EntryFunctionParameters = 6, + EntryFunctionReturnType = 7, + PatchConstantFunctionParameters = 8, + PatchConstantFunctionReturnType = 9, + PayloadParameters = 10, + Attributes = 11, + TypeParameter = 12, + LongVecDiagMaxSelectIndex = TypeParameter, + // Below only supported for `err_hlsl_diag_unsupported_object_context` + StructuredBuffers = 13, + GlobalVariables = 14, + GroupShared = 15, + DiagMaxSelectIndex = 15, +}; +bool DiagnoseTypeElements(clang::Sema &S, clang::SourceLocation Loc, + clang::QualType Ty, TypeDiagContext ObjDiagContext, + TypeDiagContext LongVecDiagContext, + const clang::FieldDecl *FD = nullptr); + void DiagnoseControlFlowConditionForHLSL(clang::Sema *self, clang::Expr *condExpr, llvm::StringRef StmtName); diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp index baed44667f..8023a0a588 100644 --- a/tools/clang/lib/AST/DeclCXX.cpp +++ b/tools/clang/lib/AST/DeclCXX.cpp @@ -72,8 +72,8 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) ImplicitCopyAssignmentHasConstParam(true), HasDeclaredCopyConstructorWithConstParam(false), HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), - IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0), - NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {} + IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0), Bases(), + VBases(), Definition(D), FirstFriend() {} // HLSL Change End - Add HasLongVector and clang-format CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { @@ -203,11 +203,6 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (!BaseClassDecl->isStandardLayout()) data().IsStandardLayout = false; - // HLSL Change Begin - Propagate presence of long vector to child classes. - if (BaseClassDecl->hasHLSLLongVector()) - data().HasHLSLLongVector = true; - // HLSL Change End - // Record if this base is the first non-literal field or base. if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C)) data().HasNonLiteralTypeFieldsOrBases = true; @@ -389,11 +384,6 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } - - // HLSL Change Begin - Propagate presence of long vector to child classes. - if (Subobj->hasHLSLLongVector()) - data().HasHLSLLongVector = true; - // HLSL Change End } /// Callback function for CXXRecordDecl::forallBases that acknowledges diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index e081362ebf..05386ddaa5 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -120,6 +120,13 @@ bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT) { if (!IsHLSLNumericOrAggregateOfNumericType(Member->getType())) return false; } + if (auto *CXXRD = dyn_cast(RD)) { + // Walk up the inheritance chain and check base class fields + for (const auto &Base : CXXRD->bases()) { + if (!IsHLSLCopyableAnnotatableRecord(Base.getType())) + return false; + } + } return true; } return false; diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index f0102f9e3f..04e1582513 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -827,19 +827,16 @@ void DiagnoseBuiltinCallWithPayload(Sema &S, const VarDecl *Payload, } // Verify that the payload type is legal - if (!hlsl::IsHLSLCopyableAnnotatableRecord(Payload->getType())) { + if (!hlsl::IsHLSLCopyableAnnotatableRecord(Payload->getType())) S.Diag(Payload->getLocation(), diag::err_payload_attrs_must_be_udt) << /*payload|attributes|callable*/ 0 << /*parameter %2|type*/ 0 << Payload; - return; - } - if (ContainsLongVector(Payload->getType())) { - const unsigned PayloadParametersIdx = 10; - S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) - << PayloadParametersIdx; + // This will produce more details, but also catch disallowed long vectors + const TypeDiagContext DiagContext = TypeDiagContext::PayloadParameters; + if (DiagnoseTypeElements(S, Payload->getLocation(), Payload->getType(), + DiagContext, DiagContext)) return; - } CollectNonAccessableFields(PayloadType, CallerStage, {}, {}, NonWriteableFields, NonReadableFields); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index b15068638d..e5424ecdde 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -46,6 +46,7 @@ #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -5394,7 +5395,8 @@ class HLSLExternalSource : public ExternalSemaSource { objectKind = ClassifyRecordType(recordType); switch (objectKind) { case AR_TOBJ_OBJECT: - m_sema->Diag(argLoc, diag::err_hlsl_objectintemplateargument) << type; + m_sema->Diag(argLoc, diag::err_hlsl_unsupported_object_context) + << type << static_cast(TypeDiagContext::TypeParameter); return false; case AR_TOBJ_COMPOUND: { const RecordDecl *recordDecl = recordType->getDecl(); @@ -5533,14 +5535,27 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->RequireCompleteType(argSrcLoc, argType, diag::err_typecheck_decl_incomplete_type); - if (ContainsLongVector(argType)) { - const unsigned ConstantBuffersOrTextureBuffersIdx = 0; - m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) - << ConstantBuffersOrTextureBuffersIdx; + TypeDiagContext DiagContext = + TypeDiagContext::ConstantBuffersOrTextureBuffers; + if (DiagnoseTypeElements(*m_sema, argSrcLoc, argType, DiagContext, + DiagContext)) return true; - } } return false; + } else if (ResAttr && DXIL::IsStructuredBuffer(ResAttr->getResKind())) { + if (TemplateArgList.size() == 1) { + const TemplateArgumentLoc &ArgLoc = TemplateArgList[0]; + const TemplateArgument &Arg = ArgLoc.getArgument(); + if (Arg.getKind() == TemplateArgument::ArgKind::Type) { + QualType ArgType = Arg.getAsType(); + SourceLocation ArgSrcLoc = ArgLoc.getLocation(); + if (DiagnoseTypeElements( + *m_sema, ArgSrcLoc, ArgType, + TypeDiagContext::StructuredBuffers /*ObjDiagContext*/, + TypeDiagContext::Valid /*LongVecDiagContext*/)) + return true; + } + } } else if (Template->getTemplatedDecl()->hasAttr()) { @@ -5641,13 +5656,10 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (ContainsLongVector(arg.getAsType())) { - const unsigned TessellationPatchesIDx = 1; - m_sema->Diag(argLoc.getLocation(), - diag::err_hlsl_unsupported_long_vector) - << TessellationPatchesIDx; + const TypeDiagContext DiagContext = TypeDiagContext::TessellationPatches; + if (DiagnoseTypeElements(*m_sema, argLoc.getLocation(), arg.getAsType(), + DiagContext, DiagContext)) return true; - } } else if (Template->getTemplatedDecl()->hasAttr()) { DXASSERT(TemplateArgList.size() > 0, "Geometry streams should have at least one template args"); @@ -5660,13 +5672,10 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (ContainsLongVector(arg.getAsType())) { - const unsigned GeometryStreamsIdx = 2; - m_sema->Diag(argLoc.getLocation(), - diag::err_hlsl_unsupported_long_vector) - << GeometryStreamsIdx; + const TypeDiagContext DiagContext = TypeDiagContext::GeometryStreams; + if (DiagnoseTypeElements(*m_sema, argLoc.getLocation(), arg.getAsType(), + DiagContext, DiagContext)) return true; - } } bool isMatrix = Template->getCanonicalDecl() == @@ -10784,11 +10793,9 @@ bool DiagnoseIntersectionAttributes(Sema &S, SourceLocation Loc, QualType Ty) { return false; } - if (ContainsLongVector(Ty)) { - const unsigned AttributesIdx = 11; - S.Diag(Loc, diag::err_hlsl_unsupported_long_vector) << AttributesIdx; + const TypeDiagContext DiagContext = TypeDiagContext::Attributes; + if (DiagnoseTypeElements(S, Loc, Ty, DiagContext, DiagContext)) return false; - } return true; } @@ -10940,6 +10947,10 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( if (!IsLegalTemplate) { getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_numeric) << intrinsicName; + DiagnoseTypeElements( + *getSema(), Loc, functionTemplateTypeArg, + TypeDiagContext::TypeParameter /*ObjDiagContext*/, + TypeDiagContext::Valid /*LongVecDiagContext*/); return Sema::TemplateDeductionResult::TDK_Invalid; } } @@ -12128,34 +12139,73 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, ///////////////////////////////////////////////////////////////////////////// -bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, - QualType ArgTy, bool &Empty, - const FieldDecl *FD) { - DXASSERT_NOMSG(!ArgTy.isNull()); +static bool AllowObjectInContext(QualType Ty, TypeDiagContext DiagContext) { + // Disallow all object in template type parameters (former + // err_hlsl_objectintemplateargument) + if (DiagContext == TypeDiagContext::TypeParameter) + return false; + // Disallow all objects in node records (former + // err_hlsl_node_record_object) + if (DiagContext == TypeDiagContext::NodeRecords) + return false; + // TODO: Extend this list for other object types. + if (IsHLSLHitObjectType(Ty)) + return false; + return true; +} - HLSLExternalSource *source = HLSLExternalSource::FromSema(self); - ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy); - switch (shapeKind) { +// Determine if `Ty` is valid in this `DiagContext` and/or an empty type. If +// invalid returns false and Sema `S`, location `Loc`, error index +// `DiagContext`, and FieldDecl `FD` are used to emit diagnostics. If +// `CheckLongVec` is set, errors are produced if `Ty` is a long vector. If the +// type is not empty, `Empty` is set to false. `CheckedDecls` is used to prevent +// redundant recursive type checks. +static bool +DiagnoseElementTypes(Sema &S, SourceLocation Loc, QualType Ty, bool &Empty, + TypeDiagContext ObjDiagContext, + TypeDiagContext LongVecDiagContext, + llvm::SmallPtrSet &CheckedDecls, + const clang::FieldDecl *FD) { + if (Ty.isNull() || Ty->isDependentType()) + return false; + + const bool CheckLongVec = LongVecDiagContext != TypeDiagContext::Valid; + const bool CheckObjects = ObjDiagContext != TypeDiagContext::Valid; + + while (const ArrayType *Arr = Ty->getAsArrayTypeUnsafe()) + Ty = Arr->getElementType(); + + const int ObjDiagContextIdx = static_cast(ObjDiagContext); + const int LongVecDiagContextIdx = static_cast(LongVecDiagContext); + DXASSERT_NOMSG( + LongVecDiagContext == TypeDiagContext::Valid || + (0 <= LongVecDiagContextIdx && + LongVecDiagContextIdx <= + static_cast(TypeDiagContext::LongVecDiagMaxSelectIndex))); + + HLSLExternalSource *Source = HLSLExternalSource::FromSema(&S); + ArTypeObjectKind ShapeKind = Source->GetTypeObjectKind(Ty); + switch (ShapeKind) { case AR_TOBJ_VECTOR: - if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { - const unsigned NodeRecordsIdx = 3; - self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << NodeRecordsIdx; + if (CheckLongVec && GetHLSLVecSize(Ty) > DXIL::kDefaultMaxVectorLength) { + S.Diag(Loc, diag::err_hlsl_unsupported_long_vector) + << LongVecDiagContextIdx; Empty = false; return false; } LLVM_FALLTHROUGH; - case AR_TOBJ_ARRAY: case AR_TOBJ_BASIC: case AR_TOBJ_MATRIX: Empty = false; return false; case AR_TOBJ_OBJECT: Empty = false; - self->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_object) - << ArgTy << ArgLoc.getSourceRange(); + if (!CheckObjects || AllowObjectInContext(Ty, ObjDiagContext)) + return false; + S.Diag(Loc, diag::err_hlsl_unsupported_object_context) + << Ty << ObjDiagContextIdx; if (FD) - self->Diag(FD->getLocation(), diag::note_field_declared_here) + S.Diag(FD->getLocation(), diag::note_field_declared_here) << FD->getType() << FD->getSourceRange(); return true; case AR_TOBJ_DEPENDENT: @@ -12164,25 +12214,55 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, return true; case AR_TOBJ_COMPOUND: { bool ErrorFound = false; - const RecordDecl *RD = ArgTy->getAs()->getDecl(); + const RecordDecl *RD = Ty->getAs()->getDecl(); + // Never recurse redundantly into related subtypes that have already been + // checked. + if (!CheckedDecls.insert(RD).second) + return false; + // Check the fields of the RecordDecl - for (auto *FD : RD->fields()) + for (auto *ElemFD : RD->fields()) { ErrorFound |= - DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD); - if (RD->isCompleteDefinition()) - if (auto *Child = dyn_cast(RD)) - // Walk up the inheritance chain and check base class fields - for (auto &B : Child->bases()) - ErrorFound |= - DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty); + DiagnoseElementTypes(S, Loc, ElemFD->getType(), Empty, ObjDiagContext, + LongVecDiagContext, CheckedDecls, ElemFD); + } + if (!RD->isCompleteDefinition()) + return ErrorFound; + + if (auto *Child = dyn_cast(RD)) + // Walk up the inheritance chain and check base class fields + for (auto &B : Child->bases()) + ErrorFound |= + DiagnoseElementTypes(S, Loc, B.getType(), Empty, ObjDiagContext, + LongVecDiagContext, CheckedDecls, nullptr); return ErrorFound; } default: - DXASSERT(false, "unreachable"); + // Not a recursive type, no element types to check here + Empty = false; return false; } } +bool hlsl::DiagnoseTypeElements(Sema &S, SourceLocation Loc, QualType Ty, + TypeDiagContext ObjDiagContext, + TypeDiagContext LongVecDiagContext, + const clang::FieldDecl *FD) { + bool Empty = false; + llvm::SmallPtrSet CheckedDecls; + return DiagnoseElementTypes(S, Loc, Ty, Empty, ObjDiagContext, + LongVecDiagContext, CheckedDecls, FD); +} + +bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, + QualType ArgTy, bool &Empty, + const FieldDecl *FD) { + llvm::SmallPtrSet CheckedDecls; + return DiagnoseElementTypes(*self, ArgLoc.getLocation(), ArgTy, Empty, + TypeDiagContext::NodeRecords, + TypeDiagContext::NodeRecords, CheckedDecls, FD); +} + // This function diagnoses whether or not all entry-point attributes // should exist on this shader stage void DiagnoseEntryAttrAllowedOnStage(clang::Sema *self, @@ -12610,21 +12690,6 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::ContainsLongVector(QualType QT) { - if (QT.isNull() || QT->isDependentType()) - return false; - - while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe()) - QT = Arr->getElementType(); - - if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) { - if (!Decl->isCompleteDefinition()) - return false; - return Decl->hasHLSLLongVector(); - } - return false; -} - bool hlsl::IsConversionToLessOrEqualElements( clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, bool explicitConversion) { @@ -15295,8 +15360,8 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, result = false; } - // Disallow long vecs from $Global cbuffers. - if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) { + // Disallow intangible HLSL objects in the global scope. + if (isGlobal) { // Suppress actual emitting of errors for incompletable types here // They are redundant to those produced in ActOnUninitializedDecl. struct SilentDiagnoser : public TypeDiagnoser { @@ -15304,12 +15369,22 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} } SD; RequireCompleteType(D.getLocStart(), qt, SD); - if (ContainsLongVector(qt)) { - unsigned CbuffersOrTbuffersIdx = 4; - Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) - << CbuffersOrTbuffersIdx; + + // Disallow objects in the global context + TypeDiagContext ObjDiagContext = TypeDiagContext::CBuffersOrTBuffers; + if (isGroupShared) + ObjDiagContext = TypeDiagContext::GroupShared; + else if (isStatic) + ObjDiagContext = TypeDiagContext::GlobalVariables; + + TypeDiagContext LongVecDiagContext = TypeDiagContext::Valid; + + // Disallow long vecs from $Global cbuffers. + if (!isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) + LongVecDiagContext = TypeDiagContext::CBuffersOrTBuffers; + if (DiagnoseTypeElements(*this, D.getLocStart(), qt, ObjDiagContext, + LongVecDiagContext)) result = false; - } } // SPIRV change starts @@ -16214,13 +16289,10 @@ static bool isRelatedDeclMarkedNointerpolation(Expr *E) { // Verify that user-defined intrinsic struct args contain no long vectors static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (ContainsLongVector(Arg->getType())) { - const unsigned UserDefinedStructParameterIdx = 5; - S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) - << UserDefinedStructParameterIdx; - return true; - } - return false; + const TypeDiagContext DiagContext = + TypeDiagContext::UserDefinedStructParameter; + return DiagnoseTypeElements(*S, Arg->getExprLoc(), Arg->getType(), + DiagContext, DiagContext); } static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, @@ -16957,18 +17029,15 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Would be nice to check for resources here as they crash the compiler now. // See issue #7186. for (const auto *param : FD->params()) { - if (ContainsLongVector(param->getType())) { - const unsigned EntryFunctionParametersIdx = 6; - S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << EntryFunctionParametersIdx; - } + const TypeDiagContext DiagContext = + TypeDiagContext::EntryFunctionParameters; + hlsl::DiagnoseTypeElements(S, param->getLocation(), param->getType(), + DiagContext, DiagContext); } - if (ContainsLongVector(FD->getReturnType())) { - const unsigned EntryFunctionReturnIdx = 7; - S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) - << EntryFunctionReturnIdx; - } + const TypeDiagContext DiagContext = TypeDiagContext::EntryFunctionReturnType; + DiagnoseTypeElements(S, FD->getLocation(), FD->getReturnType(), DiagContext, + DiagContext); DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index abca7cbf86..a3ca955802 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -709,20 +709,18 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { << hullPatchCount.value(); } } - for (const auto *param : pPatchFnDecl->params()) - if (ContainsLongVector(param->getType())) { - const unsigned PatchConstantFunctionParametersIdx = 8; - self->Diag(param->getLocation(), - diag::err_hlsl_unsupported_long_vector) - << PatchConstantFunctionParametersIdx; - } - - if (ContainsLongVector(pPatchFnDecl->getReturnType())) { - const unsigned PatchConstantFunctionReturnIdx = 9; - self->Diag(pPatchFnDecl->getLocation(), - diag::err_hlsl_unsupported_long_vector) - << PatchConstantFunctionReturnIdx; + for (const auto *param : pPatchFnDecl->params()) { + const TypeDiagContext ParamDiagContext = + TypeDiagContext::PatchConstantFunctionParameters; + DiagnoseTypeElements(*self, param->getLocation(), param->getType(), + ParamDiagContext, ParamDiagContext); } + + const TypeDiagContext ReturnDiagContext = + TypeDiagContext::PatchConstantFunctionReturnType; + DiagnoseTypeElements(*self, pPatchFnDecl->getLocation(), + pPatchFnDecl->getReturnType(), ReturnDiagContext, + ReturnDiagContext); } DXIL::ShaderKind EntrySK = shaderModel->GetKind(); DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid; diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp index 1eacedbb0b..a6ae05faa5 100644 --- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2139,18 +2139,6 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, SourceLocation(), SourceLocation(), nullptr); CheckCompletedCXXClass(Instantiation); - // HLSL Change Begin - set longvec bit for vectors of over 4 elements - ClassTemplateSpecializationDecl *Spec = - dyn_cast(Instantiation); - if (Spec && Spec->hasAttr()) { - const TemplateArgumentList &argList = Spec->getTemplateArgs(); - const TemplateArgument &arg1 = argList[1]; - llvm::APSInt vecSize = arg1.getAsIntegral(); - if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength) - Instantiation->setHasHLSLLongVector(); - } - // HLSL Change End - set longvec bit for vectors of over 4 elements - // Default arguments are parsed, if not instantiated. We can go instantiate // default arg exprs for default constructors if necessary now. ActOnFinishCXXMemberDefaultArgs(Instantiation); diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl index baa3a07a5b..b091bd2ac5 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl @@ -1,4 +1,4 @@ // RUN: %dxc -T lib_6_9 %s -verify -// expected-error@+1{{'dx::HitObject' is an object and cannot be used as a type parameter}} +// expected-error@+1{{object 'dx::HitObject' is not allowed in structured buffers}} RWStructuredBuffer InvalidBuffer; diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl index e89e33a78f..ee4ff8c020 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl @@ -4,19 +4,28 @@ struct [raypayload] Payload { - int a : read(caller, closesthit, miss) : write(caller, closesthit, miss); + int a : read(closesthit, miss) : write(anyhit); dx::HitObject hit; }; -struct Attribs +struct +[raypayload] +PayloadLV { - float2 barys; + int a : read(closesthit, miss) : write(anyhit); + vector b : read(closesthit, miss) : write(anyhit); }; [shader("raygeneration")] void RayGen() { - // expected-error@+1{{payload parameter 'payload_in_rg' must be a user-defined type composed of only numeric types}} + // expected-error@+3{{payload parameter 'payload_in_rg' must be a user-defined type composed of only numeric types}} + // expected-error@+2{{object 'dx::HitObject' is not allowed in payload parameters}} + // expected-note@8{{'dx::HitObject' field declared here}} Payload payload_in_rg; dx::HitObject::Invoke( dx::HitObject(), payload_in_rg ); + + // expected-error@+1{{vectors of over 4 elements in payload parameters are not supported}} + PayloadLV payload_with_lv; + dx::HitObject::Invoke( dx::HitObject(), payload_with_lv ); } \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl new file mode 100644 index 0000000000..3a4457bd5f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -T hs_6_9 -verify %s + +struct HsConstantData { + float Edges[3] : SV_TessFactor; + dx::HitObject hit; +}; + +struct LongVec { + float4 f; + dx::HitObject hit; +}; + +HsConstantData +PatchConstantFunction( + // expected-error@-1{{object 'dx::HitObject' is not allowed in patch constant function return type}} + // expected-note@5{{'dx::HitObject' field declared here}} + dx::HitObject hit : V, + // expected-error@-1{{object 'dx::HitObject' is not allowed in patch constant function parameters}} + LongVec lv : L) + // expected-error@-1{{object 'dx::HitObject' is not allowed in patch constant function parameters}} + // expected-note@10{{'dx::HitObject' field declared here}} +{ + HsConstantData empty; + return empty; +} + +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("PatchConstantFunction")] +void main() { +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl new file mode 100644 index 0000000000..b6b28700a9 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl @@ -0,0 +1,344 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=HitStruct -verify %s +// RUN: %dxc -T lib_6_9 -DTYPE=HitStructSub -verify %s + + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#ifndef TYPE +#define TYPE HitTpl +#endif + +// Add tests for base types and instantiated template classes with HitObjects + +struct HitStruct { + float4 f; + dx::HitObject hit; +}; + +struct HitStructSub : HitStruct { + int3 is; +}; + +template +struct HitTpl { + float4 f; + T val; +}; + +TYPE global_type; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +dx::HitObject global_hit; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +dx::HitObject global_hit_arr[10]; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + +static TYPE static_gv; +// expected-error@-1{{object 'dx::HitObject' is not allowed in global variables}} +// expected-note@16{{'dx::HitObject' field declared here}} + +cbuffer BadBuffy { + dx::HitObject cb_hit; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject cb_hit_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +}; + +tbuffer BadTuffy { + dx::HitObject tb_vec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject tb_vec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + TYPE tb_vec_rec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@16{{'dx::HitObject' field declared here}} + TYPE tb_vec_rec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@16{{'dx::HitObject' field declared here}} +}; + +StructuredBuffer struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +RWStructuredBuffer rw_struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +ConstantBuffer const_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +TextureBuffer tex_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@16{{'dx::HitObject' field declared here}} + +ByteAddressBuffer bab; +RWByteAddressBuffer rw_bab; + +[Shader("raygeneration")] +void main() +{ + bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + rw_bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + TYPE val; + rw_bab.Store(0, val); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Store must be a single numeric type}} +} + +[shader("pixel")] +TYPE ps_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@16{{'dx::HitObject' field declared here}} + TYPE vec : V) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + return vec; +} + +[shader("vertex")] +TYPE vs_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@16{{'dx::HitObject' field declared here}} + TYPE parm : P) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + parm.f = 0; + return parm; +} + + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point( + line TYPE e, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + inout PointStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line( + line TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + inout LineStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_tri( + triangle TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + inout TriangleStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +[shader("domain")] +[domain("tri")] +void ds_main( + OutputPatch TrianglePatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +void patch_const( + InputPatch inpatch, + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@16{{'dx::HitObject' field declared here}} + OutputPatch outpatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} +// expected-note@16{{'dx::HitObject' field declared here}} + +RaytracingAccelerationStructure RTAS; + +struct [raypayload] DXRHitStruct { + float4 f : write(closesthit) : read(caller); + TYPE hit : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRHitStructSub : DXRHitStruct { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRHitTpl { + float4 f : write(closesthit) : read(caller); + T hit : write(closesthit) : read(caller); +}; + +#define RTTYPE PASTE(DXR,TYPE) + + +TYPE userFunc(TYPE arg) { + return arg; +} + +[shader("raygeneration")] +void raygen() { + RTTYPE p = (RTTYPE)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + TYPE val; + TYPE res = userFunc(val); +} + +[shader("closesthit")] +void closesthit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + in RTTYPE attribs) { + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[shader("anyhit")] +void AnyHit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + in RTTYPE attribs) + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} +{ +} + +[shader("miss")] +void Miss( + inout RTTYPE payload){ + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + RTTYPE attr = (RTTYPE)0; + bool bReported = ReportHit(hitT, 0, attr); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[shader("callable")] +void callable1( + inout RTTYPE p) { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{callable parameter 'p' must be a user-defined type composed of only numeric types}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +static groupshared TYPE gs_var; +// expected-error@-1{{object 'dx::HitObject' is not allowed in groupshared variables}} +// expected-note@16{{'dx::HitObject' field declared here}} + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + TYPE as_pld; + DispatchMesh(1,1,1,as_pld); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +struct NodeHitStruct { + uint3 grid : SV_DispatchGrid; + TYPE hit; +}; + +struct NodeHitStructSub : NodeHitStruct { + int3 is; +}; + +template +struct NodeHitTpl { + uint3 grid : SV_DispatchGrid; + T hit; +}; + +#define NTYPE PASTE(Node,TYPE) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8, 1, 1)] +void broadcast( +// expected-error@-1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} + DispatchNodeInputRecord input, + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} + NodeOutput output) + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} +{ + ThreadNodeOutputRecords touts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} + GroupNodeOutputRecords gouts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@16{{'dx::HitObject' field declared here}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@16{{'dx::HitObject' field declared here}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl new file mode 100644 index 0000000000..4ffd53878d --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl @@ -0,0 +1,340 @@ +// RUN: %dxc -T lib_6_9 -verify %s + + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#define TYPE HitTpl + +// Add tests for base types and instantiated template classes with HitObjects + +struct HitStruct { + float4 f; + dx::HitObject hit; +}; + +struct HitStructSub : HitStruct { + int3 is; +}; + +template +struct HitTpl { + float4 f; + T val; +}; + +RaytracingAccelerationStructure RTAS; + +struct [raypayload] DXRHitStruct { + float4 f : write(closesthit) : read(caller); + TYPE hit : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRHitStructSub : DXRHitStruct { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRHitTpl { + float4 f : write(closesthit) : read(caller); + T hit : write(closesthit) : read(caller); +}; + +struct NodeHitStruct { + uint3 grid : SV_DispatchGrid; + TYPE hit; +}; + +struct NodeHitStructSub : NodeHitStruct { + int3 is; +}; + +template +struct NodeHitTpl { + uint3 grid : SV_DispatchGrid; + T hit; +}; + +TYPE global_type; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +dx::HitObject global_hit; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +dx::HitObject global_hit_arr[10]; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + +static TYPE static_gv; +// expected-error@-1{{object 'dx::HitObject' is not allowed in global variables}} +// expected-note@23{{'dx::HitObject' field declared here}} + +cbuffer BadBuffy { + dx::HitObject cb_hit; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject cb_hit_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +}; + +tbuffer BadTuffy { + dx::HitObject tb_vec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject tb_vec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + TYPE tb_vec_rec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@23{{'dx::HitObject' field declared here}} + TYPE tb_vec_rec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@23{{'dx::HitObject' field declared here}} +}; + +StructuredBuffer struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +RWStructuredBuffer rw_struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +ConstantBuffer const_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +TextureBuffer tex_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@23{{'dx::HitObject' field declared here}} + +ByteAddressBuffer bab; +RWByteAddressBuffer rw_bab; + +[Shader("raygeneration")] +void main() +{ + bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + rw_bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + TYPE val; + rw_bab.Store(0, val); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Store must be a single numeric type}} +} + +[shader("pixel")] +TYPE ps_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@23{{'dx::HitObject' field declared here}} + TYPE vec : V) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + return vec; +} + +[shader("vertex")] +TYPE vs_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@23{{'dx::HitObject' field declared here}} + TYPE parm : P) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + parm.f = 0; + return parm; +} + + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point( + line TYPE e, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + inout PointStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line( + line TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + inout LineStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line( + line TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + inout TriangleStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +[shader("domain")] +[domain("tri")] +void ds_main( + OutputPatch TrianglePatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +void patch_const( + InputPatch inpatch, + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@23{{'dx::HitObject' field declared here}} + OutputPatch outpatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} +// expected-note@23{{'dx::HitObject' field declared here}} + +#define RTTYPE PASTE(DXR,TYPE) + +TYPE userFunc(TYPE arg) { + return arg; +} + +[shader("raygeneration")] +void raygen() { + RTTYPE p = (RTTYPE)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + TYPE val; + TYPE res = userFunc(val); +} + +[shader("closesthit")] +void closesthit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + in RTTYPE attribs) { + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +[shader("anyhit")] +void AnyHit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + in RTTYPE attribs) + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} +{ +} + +[shader("miss")] +void Miss( + inout RTTYPE payload){ + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + RTTYPE attr = (RTTYPE)0; + bool bReported = ReportHit(hitT, 0, attr); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +[shader("callable")] +void callable1( + inout RTTYPE p) { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + // expected-error@-3{{callable parameter 'p' must be a user-defined type composed of only numeric types}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +static groupshared TYPE gs_var; +// expected-error@-1{{object 'dx::HitObject' is not allowed in groupshared variables}} +// expected-note@23{{'dx::HitObject' field declared here}} + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + TYPE as_pld; + DispatchMesh(1,1,1,as_pld); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@23{{'dx::HitObject' field declared here}} +} + +#define NTYPE PASTE(Node,TYPE) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8, 1, 1)] +void broadcast( +// expected-error@-1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} + DispatchNodeInputRecord input, + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@55{{'dx::HitObject' field declared here}} + NodeOutput output) + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@23{{'dx::HitObject' field declared here}} +{ + ThreadNodeOutputRecords touts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@23{{'dx::HitObject' field declared here}} + GroupNodeOutputRecords gouts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@23{{'dx::HitObject' field declared here}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@23{{'dx::HitObject' field declared here}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@23{{'dx::HitObject' field declared here}} diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl index 40b820a1b4..de523d51d1 100644 --- a/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl @@ -76,7 +76,7 @@ void node07(RWThreadNodeInputRecord input) // expected-error {{'f2x2' (aka [Shader("node")] [NodeLaunch("thread")] -void node08(ThreadNodeInputRecord input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node08(ThreadNodeInputRecord input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] @@ -86,17 +86,17 @@ void node09(ThreadNodeInputRecord input) // expected-error {{'BAD [Shader("node")] [NodeLaunch("thread")] -void node10(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node10(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] [NodeLaunch("thread")] -void node11(NodeOutput input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node11(NodeOutput input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] [NodeLaunch("thread")] -void node12(NodeOutputArray output) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node12(NodeOutputArray output) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] @@ -129,7 +129,7 @@ void node16() ThreadNodeOutputRecords outrec2; // expected-error {{'f2x2' (aka 'matrix') is not valid as a node record type - struct/class required}} - GroupNodeOutputRecords outrec3; // expected-error {{object 'SamplerState' may not appear in a node record}} + GroupNodeOutputRecords outrec3; // expected-error {{object 'SamplerState' is not allowed in node records}} ThreadNodeOutputRecords outrec4; // expected-error {{'SamplerState' is not valid as a node record type - struct/class required}} } @@ -151,10 +151,10 @@ void node17(ThreadNodeInputRecord > input) [Shader("node")] [NodeLaunch("thread")] -void node18(ThreadNodeInputRecord > input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node18(ThreadNodeInputRecord > input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] [NodeLaunch("thread")] -void node19(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node19(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } diff --git a/tools/clang/test/SemaHLSL/template-checks.hlsl b/tools/clang/test/SemaHLSL/template-checks.hlsl index d0d736fc1f..751e89b652 100644 --- a/tools/clang/test/SemaHLSL/template-checks.hlsl +++ b/tools/clang/test/SemaHLSL/template-checks.hlsl @@ -1,8 +1,8 @@ // RUN: %dxc -Tlib_6_3 -verify %s Texture2D t_float4; -Texture2D t_obj_sampler; /* expected-error {{'SamplerState' is an object and cannot be used as a type parameter}} fxc-error {{X3124: object element type cannot be an object type}} */ -Texture2D > t_obj_tex; /* expected-error {{'Texture2D' is an object and cannot be used as a type parameter}} fxc-error {{X3124: object element type cannot be an object type}} */ +Texture2D t_obj_sampler; /* expected-error {{object 'SamplerState' is not allowed in builtin template parameters}} fxc-error {{X3124: object element type cannot be an object type}} */ +Texture2D > t_obj_tex; /* expected-error {{object 'Texture2D' is not allowed in builtin template parameters}} fxc-error {{X3124: object element type cannot be an object type}} */ matrix m_obj_sampler; /* expected-error {{'SamplerState' cannot be used as a type parameter where a scalar is required}} fxc-error {{X3123: matrix element type must be a scalar type}} */ matrix m_bool; @@ -15,7 +15,7 @@ matrix m_bool; RWBuffer rwb_struct; /* expected-error {{elements of typed buffers and textures must fit in four 32-bit quantities}} fxc-error {{X3037: elements of typed buffers and textures must fit in four 32-bit quantities}} */ -RWBuffer rwb_struct_objs; /* expected-error {{'SamplerState' is an object and cannot be used as a type parameter}} */ +RWBuffer rwb_struct_objs; /* expected-error {{object 'SamplerState' is not allowed in builtin template parameters}} */ void vain() { // Nothing to do here. diff --git a/tools/clang/test/SemaHLSL/template-udt-load.hlsl b/tools/clang/test/SemaHLSL/template-udt-load.hlsl index 591f27b384..f666297bb9 100644 --- a/tools/clang/test/SemaHLSL/template-udt-load.hlsl +++ b/tools/clang/test/SemaHLSL/template-udt-load.hlsl @@ -8,6 +8,8 @@ RWBuffer Out; [numthreads(1,1,1)] void main() { - RWBuffer FB = In.Load >(0); // expected-error {{Explicit template arguments on intrinsic Load must be a single numeric type}} + RWBuffer FB = In.Load >(0); + // expected-error@-1{{Explicit template arguments on intrinsic Load must be a single numeric type}} + // expected-error@-2{{object 'RWBuffer' is not allowed in builtin template parameters}} Out[0] = FB[0]; } From adffd31eb02f690d9d1afe86c7fa6f12b4e70aa1 Mon Sep 17 00:00:00 2001 From: Anupama Chandrasekhar Date: Fri, 16 May 2025 15:24:57 -0700 Subject: [PATCH 44/93] Implement HLSL Diagnostics for LinAlg operations (#7430) This PR implements checks to validate the linalg builtin functions: __builtin_MatVecMul, __builtin_MatVecMulAdd, __builtin_OuterProductAccumulate and __builtin_VectorAccumulate. This includes: - verify valid types for input and output vectors - const checks for compile-time const parameters - value checks for interpretation and layout (enum) parameters - min/max checks for matrix dimensions - verify input-output vector are the right dimensions for the given matrix (dimM and dimK) : packed and unpacked cases - verify matrix layout, transpose and stride rules - incorrect shader model warning Adds tests for all the above error checks. Implements https://github.com/microsoft/DirectXShaderCompiler/issues/7336 --- include/dxc/dxcapi.internal.h | 10 +- .../clang/Basic/DiagnosticSemaKinds.td | 37 + tools/clang/lib/Headers/hlsl/dx/linalg.h | 44 +- tools/clang/lib/Sema/SemaHLSL.cpp | 562 ++++++- .../mat-vec-mul-add_multioverload.hlsl | 86 +- .../mat-vec-mul_multioverload.hlsl | 86 +- ...uter-product-accumulate-multioverload.hlsl | 7 +- .../CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl | 60 +- .../hlsl/linalg/builtins/mul_add_invalid.hlsl | 1398 +++++++++++++++++ .../hlsl/linalg/builtins/mul_add_valid.hlsl | 244 +++ .../hlsl/linalg/builtins/mul_invalid.hlsl | 1156 ++++++++++++++ .../hlsl/linalg/builtins/mul_valid.hlsl | 344 ++++ .../outer_product_accumulate_invalid.hlsl | 256 +++ .../outer_product_accumulate_valid.hlsl | 66 + .../hlsl/linalg/make-interp-vec-errors.hlsl | 4 +- .../hlsl/linalg/mat-vec-mul-errors.hlsl | 2 +- .../hlsl/linalg/mat-vec-muladd-errors.hlsl | 2 +- .../linalg/outerproductaccumulate-errors.hlsl | 6 +- .../hlsl/linalg/unavailable-pre-sm69.hlsl | 14 +- utils/hct/gen_intrin_main.txt | 8 +- utils/hct/hctdb.py | 5 +- 21 files changed, 4271 insertions(+), 126 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl create mode 100644 tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index 28bd3e7066..41891338e6 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -133,11 +133,15 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_HIT_OBJECT = 51, LICOMPTYPE_RAY_QUERY = 52, + LICOMPTYPE_LINALG = 53, // f32, partial-precision-f32, f16, + // i32, i16, u32, u16, + // int8_4packed, uint8_4packed + #ifdef ENABLE_SPIRV_CODEGEN - LICOMPTYPE_VK_BUFFER_POINTER = 53, - LICOMPTYPE_COUNT = 54 + LICOMPTYPE_VK_BUFFER_POINTER = 54, + LICOMPTYPE_COUNT = 55 #else - LICOMPTYPE_COUNT = 53 + LICOMPTYPE_COUNT = 54 #endif }; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 003aa50795..48412facad 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8018,6 +8018,43 @@ def err_hlsl_reorder_unsupported_stage : Error< "dx::MaybeReorderThread is unavailable in shader stage '%0' (requires 'raygeneration')">; def err_hlsl_hitobject_unsupported_stage : Error< "dx::HitObject is unavailable in shader stage '%0' (requires 'raygeneration', 'closesthit' or 'miss')">; + +// Linear Algebra Operations +def err_hlsl_linalg_isunsigned_incorrect_for_given_type : Error< + "%0 must be %select{false|true}1 for vector of " + "%select{floating point|signed integer|unsigned integer}2 type">; +def err_hlsl_linalg_interpretation_value_incorrect : Error< + "%0 is an invalid %select{memory|register}1 interpretation value">; +def err_hlsl_linalg_matrix_layout_is_not_transposable : Error< + "RowMajor and ColumnMajor matrices are not transposable">; +def err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero : Error< + "for optimal matrix layout, matrix stride must be 0">; +def err_hlsl_linalg_matrix_dim_must_be_greater_than_zero: Error< + "matrix dimension must be greater than 0">; +def err_hlsl_linalg_matrix_layout_invalid : Error< + "matrix layout %0 is not valid, must be in the range [%1, %2]">; + +def err_hlsl_linalg_mul_muladd_output_vector_size_not_equal_to_matrix_M : Error< + "output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation">; +def err_hlsl_linalg_mul_muladd_unpacked_input_vector_size_not_equal_to_matrix_K : Error< + "unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation">; +def err_hlsl_linalg_mul_muladd_packed_input_vector_size_incorrect : Error< + "packed input vector length must be the smallest number that can hold matrix dim K values of the " + "packed(smaller) type in linalg mul/muladd operations">; +def err_hlsl_linalg_mul_muladd_isUnsigned_for_packed_input_must_be_true : Error< + "IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations">; +def err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint : Error< + "packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations">; +def err_hlsl_linalg_mul_muladd_invalid_dim: Error< + "matrix dimension %select{M|K when using unpacked input vectors|K " + "when using packed input vectors}0 must be less than %1, in a linalg " + "Mul/MulAdd operation">; + +def err_hlsl_linalg_outer_prod_acc_vector_type_mismatch : Error< + "input vectors of outerproductaccumulate must have the same element type">; +def err_hlsl_linalg_outer_prod_acc_matrix_layout_must_be_outer_prod_acc_optimal : Error< + "matrix layout for outerproductaccumulate must be %0">; + // HLSL Change Ends // SPIRV Change Starts diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h index 51e662bbc9..4f5e62070d 100644 --- a/tools/clang/lib/Headers/hlsl/dx/linalg.h +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -43,14 +43,30 @@ enum MatrixLayout { // Helper for signedness // namespace details { -template bool IsUnsigned() { return false; } + +template struct IsUnsigned {}; + +#define _SPECIALIZE_ISUNSIGNED(type, value) \ + template <> struct IsUnsigned { \ + static const bool Value = value; \ + } + +_SPECIALIZE_ISUNSIGNED(uint8_t4_packed, true); +_SPECIALIZE_ISUNSIGNED(int8_t4_packed, true); +_SPECIALIZE_ISUNSIGNED(uint32_t, true); +_SPECIALIZE_ISUNSIGNED(int32_t, false); +_SPECIALIZE_ISUNSIGNED(float32_t, false); #ifdef __HLSL_ENABLE_16_BIT -template <> bool IsUnsigned() { return true; } -#endif +_SPECIALIZE_ISUNSIGNED(uint16_t, true); +_SPECIALIZE_ISUNSIGNED(int16_t, false); +_SPECIALIZE_ISUNSIGNED(float16_t, false); +#else // //__HLSL_ENABLE_16_BIT +_SPECIALIZE_ISUNSIGNED(half, false); +#endif //__HLSL_ENABLE_16_BIT + +#undef _SPECIALIZE_ISUNSIGNED -template <> bool IsUnsigned() { return true; } -template <> bool IsUnsigned() { return true; } } // namespace details // @@ -116,10 +132,10 @@ Mul(MatrixRefImpl OutputVector; __builtin_MatVecMul( - /*out*/ OutputVector, details::IsUnsigned(), InputVector.Data, - details::IsUnsigned(), InputDT, Matrix.Buffer, - Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, MatrixLayout, - MatrixTranspose, Matrix.Stride); + /*out*/ OutputVector, details::IsUnsigned::Value, + InputVector.Data, details::IsUnsigned::Value, InputDT, + Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, + MatrixLayout, MatrixTranspose, Matrix.Stride); return OutputVector; } @@ -143,11 +159,11 @@ MulAdd(MatrixRefImpl OutputVector; __builtin_MatVecMulAdd( - /*out*/ OutputVector, details::IsUnsigned(), InputVector.Data, - details::IsUnsigned(), InputDT, Matrix.Buffer, - Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, MatrixLayout, - MatrixTranspose, Matrix.Stride, BiasVector.Buffer, BiasVector.StartOffset, - BiasVectorDT); + /*out*/ OutputVector, details::IsUnsigned::Value, + InputVector.Data, details::IsUnsigned::Value, InputDT, + Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, + MatrixLayout, MatrixTranspose, Matrix.Stride, BiasVector.Buffer, + BiasVector.StartOffset, BiasVectorDT); return OutputVector; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index e5424ecdde..fa59aa6ef7 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -15,6 +15,7 @@ #include "clang/Sema/SemaHLSL.h" #include "VkConstantsTables.h" +#include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilFunctionProps.h" #include "dxc/DXIL/DxilShaderModel.h" #include "dxc/DXIL/DxilUtil.h" @@ -1139,6 +1140,14 @@ static const ArBasicKind g_RayDescCT[] = {AR_OBJECT_RAY_DESC, AR_BASIC_UNKNOWN}; static const ArBasicKind g_RayQueryCT[] = {AR_OBJECT_RAY_QUERY, AR_BASIC_UNKNOWN}; +static const ArBasicKind g_LinAlgCT[] = { + AR_BASIC_FLOAT32, AR_BASIC_FLOAT32_PARTIAL_PRECISION, + AR_BASIC_FLOAT16, AR_BASIC_INT32, + AR_BASIC_INT16, AR_BASIC_UINT32, + AR_BASIC_UINT16, AR_BASIC_INT8_4PACKED, + AR_BASIC_UINT8_4PACKED, AR_BASIC_NOCAST, + AR_BASIC_UNKNOWN}; + static const ArBasicKind g_AccelerationStructCT[] = { AR_OBJECT_ACCELERATION_STRUCT, AR_BASIC_UNKNOWN}; @@ -1302,6 +1311,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT g_RayQueryCT, // LICOMPTYPE_RAY_QUERY + g_LinAlgCT, // LICOMPTYPE_LINALG #ifdef ENABLE_SPIRV_CODEGEN g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER #endif @@ -11674,6 +11684,537 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE, return false; } +// MatVec Ops +static const unsigned kMatVecMulOutputVectorIdx = 0; +static const unsigned kMatVecMulOutputIsUnsignedIdx = 1; +static const unsigned kMatVecMulInputVectorIdx = 2; +static const unsigned kMatVecMulIsInputUnsignedIdx = 3; +static const unsigned kMatVecMulInputInterpretationIdx = 4; +// static const unsigned kMatVecMulMatrixBufferIdx = 5; +// static const unsigned kMatVecMulMatrixOffsetIdx = 6; +static const unsigned kMatVecMulMatrixInterpretationIdx = 7; +static const unsigned kMatVecMulMatrixMIdx = 8; +static const unsigned kMatVecMulMatrixKIdx = 9; +static const unsigned kMatVecMulMatrixLayoutIdx = 10; +static const unsigned kMatVecMulMatrixTransposeIdx = 11; +static const unsigned kMatVecMulMatrixStrideIdx = 12; + +// MatVecAdd +const unsigned kMatVecMulAddBiasInterpretation = 15; + +static bool IsValidMatrixLayoutForMulAndMulAddOps(unsigned Layout) { + return Layout <= + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal); +} + +static bool IsOptimalTypeMatrixLayout(unsigned Layout) { + return ( + Layout == (static_cast(DXIL::LinalgMatrixLayout::MulOptimal)) || + (Layout == + (static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal)))); +} + +static bool IsValidTransposeForMatrixLayout(unsigned Layout, bool Transposed) { + switch (static_cast(Layout)) { + case DXIL::LinalgMatrixLayout::RowMajor: + case DXIL::LinalgMatrixLayout::ColumnMajor: + return !Transposed; + + default: + return true; + } +} + +static bool IsPackedType(unsigned type) { + return (type == static_cast(DXIL::ComponentType::PackedS8x32) || + type == static_cast(DXIL::ComponentType::PackedU8x32)); +} + +static bool IsValidLinalgTypeInterpretation(uint32_t Input, bool InRegister) { + + switch (static_cast(Input)) { + case DXIL::ComponentType::I16: + case DXIL::ComponentType::U16: + case DXIL::ComponentType::I32: + case DXIL::ComponentType::U32: + case DXIL::ComponentType::F16: + case DXIL::ComponentType::F32: + case DXIL::ComponentType::U8: + case DXIL::ComponentType::I8: + case DXIL::ComponentType::F8_E4M3: + case DXIL::ComponentType::F8_E5M2: + return true; + case DXIL::ComponentType::PackedS8x32: + case DXIL::ComponentType::PackedU8x32: + return InRegister; + default: + return false; + } +} + +static bool IsValidVectorAndMatrixDimensions(Sema &S, CallExpr *CE, + unsigned InputVectorSize, + unsigned OutputVectorSize, + unsigned MatrixK, unsigned MatrixM, + bool isInputPacked) { + // Check if output vector size equals to matrix dimension M + if (OutputVectorSize != MatrixM) { + Expr *OutputVector = CE->getArg(kMatVecMulOutputVectorIdx); + S.Diags.Report( + OutputVector->getExprLoc(), + diag:: + err_hlsl_linalg_mul_muladd_output_vector_size_not_equal_to_matrix_M); + return false; + } + + // Check if input vector size equals to matrix dimension K in the unpacked + // case. + // Check if input vector size equals the smallest number that can hold + // matrix dimension K values + const unsigned PackingFactor = isInputPacked ? 4 : 1; + unsigned MinInputVectorSize = (MatrixK + PackingFactor - 1) / PackingFactor; + if (InputVectorSize != MinInputVectorSize) { + Expr *InputVector = CE->getArg(kMatVecMulInputVectorIdx); + if (isInputPacked) { + S.Diags.Report( + InputVector->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_packed_input_vector_size_incorrect); + return false; + } else { + S.Diags.Report( + InputVector->getExprLoc(), + diag:: + err_hlsl_linalg_mul_muladd_unpacked_input_vector_size_not_equal_to_matrix_K); + return false; + } + } + + return true; +} + +static void CheckCommonMulAndMulAddParameters(Sema &S, CallExpr *CE, + const hlsl::ShaderModel *SM) { + // Check if IsOutputUnsigned is a const parameter + bool IsOutputUnsignedFlagValue = false; + Expr *IsOutputUnsignedExpr = CE->getArg(kMatVecMulOutputIsUnsignedIdx); + llvm::APSInt IsOutputUnsignedExprVal; + if (IsOutputUnsignedExpr->isIntegerConstantExpr(IsOutputUnsignedExprVal, + S.Context)) { + IsOutputUnsignedFlagValue = IsOutputUnsignedExprVal.getBoolValue(); + } else { + S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } + + Expr *OutputVectorExpr = CE->getArg(kMatVecMulOutputVectorIdx); + unsigned OutputVectorSizeValue = 0; + if (IsHLSLVecType(OutputVectorExpr->getType())) { + OutputVectorSizeValue = GetHLSLVecSize(OutputVectorExpr->getType()); + QualType OutputVectorType = + GetHLSLVecElementType(OutputVectorExpr->getType()); + const Type *OutputVectorTypePtr = OutputVectorType.getTypePtr(); + + // Check if IsOutputUnsigned flag matches output vector type. + // Must be true for unsigned int outputs, false for signed int/float + // outputs. + if (IsOutputUnsignedFlagValue && + !OutputVectorTypePtr->isUnsignedIntegerType()) { + DXASSERT_NOMSG(OutputVectorTypePtr->isSignedIntegerType() || + OutputVectorTypePtr->isFloatingType()); + S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsOuputUnsigned" << false + << (OutputVectorTypePtr->isSignedIntegerType() ? 1 : 0); + return; + } else if (!IsOutputUnsignedFlagValue && + OutputVectorTypePtr->isUnsignedIntegerType()) { + S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsOuputUnsigned" << true << 2; + return; + } + } + + // Check if isInputUnsigned parameter is a constant + bool IsInputUnsignedFlagValue = false; + Expr *IsInputUnsignedExpr = CE->getArg(kMatVecMulIsInputUnsignedIdx); + llvm::APSInt IsInputUnsignedExprVal; + if (IsInputUnsignedExpr->isIntegerConstantExpr(IsInputUnsignedExprVal, + S.Context)) { + IsInputUnsignedFlagValue = IsInputUnsignedExprVal.getBoolValue(); + } else { + S.Diags.Report(IsInputUnsignedExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } + + // Get InputInterpretation, check if it is constant + Expr *InputInterpretationExpr = CE->getArg(kMatVecMulInputInterpretationIdx); + llvm::APSInt InputInterpretationExprVal; + unsigned InputInterpretationValue = 0; + if (InputInterpretationExpr->isIntegerConstantExpr(InputInterpretationExprVal, + S.Context)) { + InputInterpretationValue = InputInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = true; + if (!IsValidLinalgTypeInterpretation(InputInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(InputInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(InputInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(InputInterpretationExpr->getExprLoc(), + diag::err_expr_not_ice) + << 0; + return; + } + + bool IsInputVectorPacked = IsPackedType(InputInterpretationValue); + + // For packed types input vector type must be uint and isUnsigned must be + // true. The signedness is determined from the InputInterpretation + Expr *InputVectorExpr = CE->getArg(kMatVecMulInputVectorIdx); + unsigned InputVectorSizeValue = 0; + if (IsHLSLVecType(InputVectorExpr->getType())) { + InputVectorSizeValue = GetHLSLVecSize(InputVectorExpr->getType()); + QualType InputVectorType = + GetHLSLVecElementType(InputVectorExpr->getType()); + unsigned BitWidth = S.Context.getTypeSize(InputVectorType); + bool Is32Bit = (BitWidth == 32); + const Type *InputVectorTypePtr = InputVectorType.getTypePtr(); + + // Check if the isUnsigned flag setting + if (IsInputVectorPacked) { + // Check that the input vector element type is "32bit" + if (!Is32Bit) { + S.Diags.Report( + InputVectorExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint); + return; + } + + // Check that the input vector element type is an unsigned int + if (!InputVectorTypePtr->isUnsignedIntegerType()) { + S.Diags.Report( + InputVectorExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint); + return; + } + + // Check that isInputUnsigned is always true + // Actual signedness is inferred from the InputInterpretation + if (!IsInputUnsignedFlagValue) { + S.Diags.Report( + IsInputUnsignedExpr->getExprLoc(), + diag:: + err_hlsl_linalg_mul_muladd_isUnsigned_for_packed_input_must_be_true); + return; + } + } else { + if (IsInputUnsignedFlagValue && + !InputVectorTypePtr->isUnsignedIntegerType()) { + DXASSERT_NOMSG(InputVectorTypePtr->isSignedIntegerType() || + InputVectorTypePtr->isFloatingType()); + S.Diags.Report( + IsInputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsInputUnsigned" << false + << (InputVectorTypePtr->isSignedIntegerType() ? 1 : 0); + return; + } else if (!IsInputUnsignedFlagValue && + InputVectorTypePtr->isUnsignedIntegerType()) { + S.Diags.Report( + IsInputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsInputUnsigned" << true << 2; + return; + } + } + } + + // Get Matrix Dimensions M and K, check if they are constants + Expr *MatrixKExpr = CE->getArg(kMatVecMulMatrixKIdx); + llvm::APSInt MatrixKExprVal; + unsigned MatrixKValue = 0; + if (MatrixKExpr->isIntegerConstantExpr(MatrixKExprVal, S.Context)) { + MatrixKValue = MatrixKExprVal.getLimitedValue(); + } else { + S.Diags.Report(MatrixKExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + Expr *MatrixMExpr = CE->getArg(kMatVecMulMatrixMIdx); + llvm::APSInt MatrixMExprVal; + unsigned MatrixMValue = 0; + if (MatrixMExpr->isIntegerConstantExpr(MatrixMExprVal, S.Context)) { + MatrixMValue = MatrixMExprVal.getLimitedValue(); + } else { + S.Diags.Report(MatrixMExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + // Check MatrixM and MatrixK values are non-zero + if (MatrixMValue == 0) { + S.Diags.Report(MatrixMExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_dim_must_be_greater_than_zero) + << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + + if (MatrixKValue == 0) { + S.Diags.Report(MatrixKExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_dim_must_be_greater_than_zero) + << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + + // Check MatrixM and MatrixK values are less than max + // Matrix dimension cannot exceed largest vector length in a Mul/MulAdd + // operation. + if (MatrixMValue > DXIL::kSM69MaxVectorLength) { + S.Diags.Report(MatrixMExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_invalid_dim) + << 0 << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + + // For packed input vectors 4 values are packed in a uint, so max Matrix K + // can be 4096 + if (IsInputVectorPacked) { + const unsigned PackingFactor = + 4; // Only supported packed formats: DATA_TYPE_(U)SINT8_T4_PACKED + if (MatrixKValue > DXIL::kSM69MaxVectorLength * PackingFactor) { + S.Diags.Report(MatrixKExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_invalid_dim) + << 2 << std::to_string(DXIL::kSM69MaxVectorLength * PackingFactor); + return; + } + } else { + if (MatrixKValue > DXIL::kSM69MaxVectorLength) { + S.Diags.Report(MatrixKExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_invalid_dim) + << 1 << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + } + + if (!IsValidVectorAndMatrixDimensions(S, CE, InputVectorSizeValue, + OutputVectorSizeValue, MatrixKValue, + MatrixMValue, IsInputVectorPacked)) { + return; + } + + // Get MatrixInterpretation, check if it is constant + // Make sure it is a valid value + Expr *MatrixInterpretationExpr = + CE->getArg(kMatVecMulMatrixInterpretationIdx); + llvm::APSInt MatrixInterpretationExprVal; + unsigned MatrixInterpretationValue = 0; + if (MatrixInterpretationExpr->isIntegerConstantExpr( + MatrixInterpretationExprVal, S.Context)) { + MatrixInterpretationValue = MatrixInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = false; + if (!IsValidLinalgTypeInterpretation(MatrixInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(MatrixInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_expr_not_ice) + << 0; + return; + } + + // Get MatrixLayout, check if it is constant and valid value + Expr *MatrixLayoutExpr = CE->getArg(kMatVecMulMatrixLayoutIdx); + llvm::APSInt MatrixLayoutExprVal; + unsigned MatrixLayoutValue = 0; + if (MatrixLayoutExpr->isIntegerConstantExpr(MatrixLayoutExprVal, S.Context)) { + MatrixLayoutValue = MatrixLayoutExprVal.getLimitedValue(); + if (!IsValidMatrixLayoutForMulAndMulAddOps(MatrixLayoutValue)) { + S.Diags.Report(MatrixLayoutExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_layout_invalid) + << std::to_string(MatrixLayoutValue) + << std::to_string( + static_cast(DXIL::LinalgMatrixLayout::RowMajor)) + << std::to_string(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal)); + return; + } + } else { + S.Diags.Report(MatrixLayoutExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + // Get MatrixTranspose, check if it is constant + Expr *MatrixTransposeExpr = CE->getArg(kMatVecMulMatrixTransposeIdx); + llvm::APSInt MatrixTransposeExprVal; + unsigned MatrixTransposeValue = 0; + if (MatrixTransposeExpr->isIntegerConstantExpr(MatrixTransposeExprVal, + S.Context)) { + MatrixTransposeValue = MatrixTransposeExprVal.getBoolValue(); + if (!IsValidTransposeForMatrixLayout(MatrixLayoutValue, + MatrixTransposeValue)) { + + S.Diags.Report(MatrixTransposeExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_layout_is_not_transposable); + return; + } + } else { + S.Diags.Report(MatrixTransposeExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } + + // Get MatrixStride, check if it is constant, if yes it should be zero + // for optimal layouts + Expr *MatrixStrideExpr = CE->getArg(kMatVecMulMatrixStrideIdx); + llvm::APSInt MatrixStrideExprVal; + unsigned MatrixStrideValue = 0; + if (MatrixStrideExpr->isIntegerConstantExpr(MatrixStrideExprVal, S.Context)) { + MatrixStrideValue = MatrixStrideExprVal.getLimitedValue(); + if (IsOptimalTypeMatrixLayout(MatrixLayoutValue) && + MatrixStrideValue != 0) { + S.Diags.Report( + MatrixStrideExpr->getExprLoc(), + diag:: + err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero); + return; + } + } +} + +static void CheckMulCall(Sema &S, FunctionDecl *FD, CallExpr *CE, + const hlsl::ShaderModel *SM) { + CheckCommonMulAndMulAddParameters(S, CE, SM); +} + +static void CheckMulAddCall(Sema &S, FunctionDecl *FD, CallExpr *CE, + const hlsl::ShaderModel *SM) { + CheckCommonMulAndMulAddParameters(S, CE, SM); + + // Check if BiasInterpretation is constant and a valid value + Expr *BiasInterpretationExpr = CE->getArg(kMatVecMulAddBiasInterpretation); + llvm::APSInt BiasInterpretationExprVal; + unsigned BiasInterpretationValue = 0; + if (BiasInterpretationExpr->isIntegerConstantExpr(BiasInterpretationExprVal, + S.Context)) { + BiasInterpretationValue = BiasInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = false; + if (!IsValidLinalgTypeInterpretation(BiasInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(BiasInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(BiasInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(BiasInterpretationExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } +} + +// Linalg Outer Product Accumulate +// OuterProductAccumulate builtin function parameters +static const unsigned kOuterProdAccInputVector1Idx = 0; +static const unsigned kOuterProdAccInputVector2Idx = 1; +// static const unsigned kOuterProdAccMatrixBufferIdx = 2; +// static const unsigned kOuterProdAccMatrixOffsetIdx = 3; +static const unsigned kOuterProdAccMatrixInterpretationIdx = 4; +static const unsigned kOuterProdAccMatrixLayoutIdx = 5; +static const unsigned kOuterProdAccMatrixStrideIdx = 6; + +static void CheckOuterProductAccumulateCall(Sema &S, FunctionDecl *FD, + CallExpr *CE) { + // Check InputVector1 and InputVector2 are the same type + const Expr *InputVector1Expr = CE->getArg(kOuterProdAccInputVector1Idx); + const Expr *InputVector2Expr = CE->getArg(kOuterProdAccInputVector2Idx); + QualType InputVector1Type = InputVector1Expr->getType(); + QualType InputVector2Type = InputVector2Expr->getType(); + + // Get the element types of the vectors + const QualType InputVector1ElementType = + GetHLSLVecElementType(InputVector1Type); + const QualType InputVector2ElementType = + GetHLSLVecElementType(InputVector2Type); + + if (!S.Context.hasSameType(InputVector1ElementType, + InputVector2ElementType)) { + S.Diags.Report(InputVector2Expr->getExprLoc(), + diag::err_hlsl_linalg_outer_prod_acc_vector_type_mismatch); + return; + } + + // Check Matrix Interpretation is a constant and a valid value + Expr *MatrixInterpretationExpr = + CE->getArg(kOuterProdAccMatrixInterpretationIdx); + llvm::APSInt MatrixInterpretationExprVal; + unsigned MatrixInterpretationValue = 0; + if (MatrixInterpretationExpr->isIntegerConstantExpr( + MatrixInterpretationExprVal, S.Context)) { + MatrixInterpretationValue = MatrixInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = false; + if (!IsValidLinalgTypeInterpretation(MatrixInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(MatrixInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_expr_not_ice) + << 0; + return; + } + + // Check Matrix Layout must be a constant and Training Optimal + Expr *MatrixLayoutExpr = CE->getArg(kOuterProdAccMatrixLayoutIdx); + llvm::APSInt MatrixLayoutExprVal; + unsigned MatrixLayoutValue = 0; + if (MatrixLayoutExpr->isIntegerConstantExpr(MatrixLayoutExprVal, S.Context)) { + MatrixLayoutValue = MatrixLayoutExprVal.getLimitedValue(); + if (MatrixLayoutValue != + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal)) { + S.Diags.Report( + MatrixLayoutExpr->getExprLoc(), + diag:: + err_hlsl_linalg_outer_prod_acc_matrix_layout_must_be_outer_prod_acc_optimal) + << std::to_string(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal)); + return; + } + } else { + S.Diags.Report(MatrixLayoutExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + // Matrix Stride must be zero (Training Optimal matrix layout) + Expr *MatrixStrideExpr = CE->getArg(kOuterProdAccMatrixStrideIdx); + llvm::APSInt MatrixStrideExprVal; + unsigned MatrixStrideValue = 0; + if (MatrixStrideExpr->isIntegerConstantExpr(MatrixStrideExprVal, S.Context)) { + MatrixStrideValue = MatrixStrideExprVal.getLimitedValue(); + if (MatrixStrideValue != 0) { + S.Diags.Report( + MatrixStrideExpr->getExprLoc(), + diag:: + err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero); + return; + } + } +} + #ifdef ENABLE_SPIRV_CODEGEN static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, bool isStatic) { @@ -11721,6 +12262,15 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP_Barrier: CheckBarrierCall(*this, FDecl, TheCall, SM); break; + case hlsl::IntrinsicOp::IOP___builtin_MatVecMul: + CheckMulCall(*this, FDecl, TheCall, SM); + break; + case hlsl::IntrinsicOp::IOP___builtin_MatVecMulAdd: + CheckMulAddCall(*this, FDecl, TheCall, SM); + break; + case hlsl::IntrinsicOp::IOP___builtin_OuterProductAccumulate: + CheckOuterProductAccumulateCall(*this, FDecl, TheCall); + break; #ifdef ENABLE_SPIRV_CODEGEN case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, false); @@ -12119,18 +12669,6 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, break; case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread: DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, true); - break; - case hlsl::IntrinsicOp::IOP___builtin_MatVecMul: - case hlsl::IntrinsicOp::IOP___builtin_MatVecMulAdd: - case hlsl::IntrinsicOp::IOP___builtin_OuterProductAccumulate: - case hlsl::IntrinsicOp::IOP___builtin_VectorAccumulate: - if (!SM->IsSM69Plus()) { - Diags.Report(CE->getExprLoc(), - diag::warn_hlsl_intrinsic_in_wrong_shader_model) - << FD->getNameAsString() << EntryDecl->getNameAsString() << "6.9"; - return; - } - break; default: break; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl index 98a568fa22..de811982d6 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl @@ -1,43 +1,57 @@ -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 | FileCheck %s --check-prefixes COMMON,DXIL-0 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 | FileCheck %s --check-prefixes COMMON,DXIL-1 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 | FileCheck %s --check-prefixes COMMON,DXIL-2 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 | FileCheck %s --check-prefixes COMMON,DXIL-3 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 | FileCheck %s --check-prefixes COMMON,DXIL-4 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 | FileCheck %s --check-prefixes COMMON,DXIL-5 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 | FileCheck %s --check-prefixes COMMON,DXIL-6 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 | FileCheck %s --check-prefixes COMMON,DXIL-7 - -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-7 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 // COMMON: define void @main() // Test minimum support set of combinations for matVecMul -// HLOP-0: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) -// DXIL-0: call <4 x half> @dx.op.matVecMulAdd.v4f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) -// HLOP-1: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) -// DXIL-1: call <4 x half> @dx.op.matVecMulAdd.v4f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) -// HLOP-2: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) -// DXIL-2: call <4 x half> @dx.op.matVecMulAdd.v4f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) -// HLOP-3: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) -// DXIL-3: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) -// HLOP-4: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) -// DXIL-4: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) + +// DXIL-0: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) + +// DXIL-1: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) + +// DXIL-2: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-3: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <2 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <2 x i32> %{{[^ ]+}}, i1 true, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) + +// DXIL-3: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v2i32(i32 306, <2 x i32> {{[^ ]+}}, i1 true, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-4: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) + +// DXIL-4: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) // Test unsigned variations -// HLOP-5: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) -// DXIL-5: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 true) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) -// HLOP-6: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) -// DXIL-6: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) -// HLOP-7: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) -// DXIL-7: call <4 x i32> @dx.op.matVecMulAdd.v4i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) +// HLOP-5: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) + +// DXIL-5: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 true) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-6: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) + +// DXIL-6: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-7: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) + +// DXIL-7: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) ByteAddressBuffer input_vector_buffer; @@ -84,10 +98,10 @@ enum MatLayout { [NumThreads(1,1,1)] void main() { - vector output_vector; + vector output_vector; static const uint is_output_unsigned = OU; - vector input_vector = input_vector_buffer.Load >(0); + vector input_vector = input_vector_buffer.Load >(0); const uint is_input_unsigned = IU; const uint input_interpretation = II; @@ -97,7 +111,7 @@ void main() const uint matrix_dimK = 8; const uint matrix_layout = ML; const bool matrix_is_transposed = (bool) MT; - const uint matrix_stride = 64; + const uint matrix_stride = MST; const uint bias_offset = 0; const uint bias_interpretation = BI; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl index 2ca2648503..8b14fb4cf1 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl @@ -1,42 +1,56 @@ -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-0 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-1 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 | FileCheck %s --check-prefixes COMMON,DXIL-2 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 | FileCheck %s --check-prefixes COMMON,DXIL-3 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-4 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-5 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 | FileCheck %s --check-prefixes COMMON,DXIL-6 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 | FileCheck %s --check-prefixes COMMON,DXIL-7 - -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=uint -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 -// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DMST=0| FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=PackedS8x32 -DINUM=2 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DINUM=8 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DINUM=8 -DML=ColumnMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DINUM=8 -DML=MulOptimal -DMT=1 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-7 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 // COMMON: define void @main() // Test minimum support set of combinations for matVecMul -// HLOP-0: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) -// DXIL-0: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) -// HLOP-1: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64) -// DXIL-1: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) -// HLOP-2: call void @"dx.hl.op..void (i32, <4 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64) -// DXIL-2: call <4 x half> @dx.op.matVecMul.v4f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) -// HLOP-3: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64) -// DXIL-3: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) -// HLOP-4: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64) -// DXIL-4: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) + +// DXIL-0: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0) + +// DXIL-1: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0) + +// DXIL-2: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-3: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <2 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <2 x i32> %{{[^ ]+}}, i1 true, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0) + +// DXIL-3: call <8 x i32> @dx.op.matVecMul.v8i32.v2i32(i32 305, <2 x i32> {{[^ ]+}}, i1 true, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-4: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64) + +// DXIL-4: call <8 x i32> @dx.op.matVecMul.v8i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) // Test unsigned variations -// HLOP-5: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) -// DXIL-5: call <4 x i32> @dx.op.matVecMul.v4i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) -// HLOP-6: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64) -// DXIL-6: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) -// HLOP-7: call void @"dx.hl.op..void (i32, <4 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64) -// DXIL-7: call <4 x i32> @dx.op.matVecMul.v4i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) +// HLOP-5: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) + +// DXIL-5: call <8 x i32> @dx.op.matVecMul.v8i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-6: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64) + +// DXIL-6: call <8 x i32> @dx.op.matVecMul.v8i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-7: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0) + +// DXIL-7: call <8 x i32> @dx.op.matVecMul.v8i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) ByteAddressBuffer input_vector_buffer; @@ -83,10 +97,10 @@ enum MatLayout { [NumThreads(1,1,1)] void main() { - vector output_vector; + vector output_vector; static const uint is_output_unsigned = OU; - vector input_vector = input_vector_buffer.Load >(0); + vector input_vector = input_vector_buffer.Load >(0); const uint is_input_unsigned = IU; const uint input_interpretation = II; @@ -96,7 +110,7 @@ void main() const uint matrix_dimK = 8; const uint matrix_layout = ML; const bool matrix_is_transposed = (bool) MT; - const uint matrix_stride = 64; + const uint matrix_stride = MST; __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride); diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl index c40365078f..c53b7d8f21 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl @@ -1,7 +1,6 @@ // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-0 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-1 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-2 - // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 // RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 @@ -11,11 +10,17 @@ ByteAddressBuffer input_vector_buffer2; RWByteAddressBuffer matrix_buffer; // COMMON: define void @main() + // DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + // HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) + // DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + // HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0) + // DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + // HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0) enum CompType { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl index 141801c71c..26bcc75da2 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl @@ -15,26 +15,78 @@ export float4 Test1(vector Input) { Matrix, MakeInterpretedVector(Input)); } -export vector Test2(vector Input) { +export vector Test2(vector Input) { using namespace dx::linalg; MatrixRef Matrix = { Buf, 0, 0}; // note the stride argument is dropped. - // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6f32(i32 305, <6 x float> %{{.+}}, i1 false, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 2, i1 false, i32 0, i1 false) + // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 2, i1 false, i32 0, i1 false) return Mul(Matrix, MakeInterpretedVector(Input)); } // test that "stride" isn't ignored in non-optimal layouts -export vector Test3(vector Input) { +export vector Test3(vector Input) { using namespace dx::linalg; MatrixRef Matrix = { Buf, 0, 6 * 4 * 8}; - // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6f32(i32 305, <6 x float> %{{.+}}, i1 false, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 false) + // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 false) return Mul(Matrix, MakeInterpretedVector(Input)); } + +// test that isUnsigned is set correctly for uint16_t +export vector Test4(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i16> @dx.op.matVecMul.v8i16.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} + +// test that isUnsigned is set correctly for uint32_t +export vector Test5(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} + +// test that isUnsigned is set correctly for uint8_t4_packed +export vector Test5(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} + +// test that isUnsigned is set correctly for int8_t4_packed +export vector Test5(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 17, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl new file mode 100644 index 0000000000..866fad8225 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl @@ -0,0 +1,1398 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer constants_buffer; + +// Output vector, isUnsigned mismatch +void test_invalid_output_vector_type() { + + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector output_vector_0; + const uint is_output_unsigned_0 = 0; + + // expected-error@+1 {{IsOuputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector output_vector_1; + const uint is_output_unsigned_1 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned_1, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector output_vector_2; + const uint is_output_unsigned_2 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMulAdd(output_vector_2, is_output_unsigned_2, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// IsOutputUnsigned is not a constant parameter +void test_invalid_is_output_unsigned_non_const() { + + vector output_vector_0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint is_output_unsigned_0 = constants_buffer.Load(0); + + // expected-error@+1 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Input vector is incorrect type - 64 bit types +void test_invalid_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Input vector is incorrect type for packed InputInterpretation +void test_invalid_input_vector_type_packed_input_interpretation() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_2 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_3 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_3 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_3 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_3, + is_input_unsigned_3, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_4 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_4 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_4 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_4, + is_input_unsigned_4, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// IsInputUnsigned must be true for packed input vector type +void test_invalid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check packed input vector dimension +void test_invalid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_UINT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_2 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_2, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// Check is Input vector type/isInputUnsigned matched +void test_invalid_input_vector_type_mismatch() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix M dimension is a constant parameter +void test_invalid_matrix_M_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimM = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix K dimension is a constant parameter +void test_invalid_matrix_K_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimK = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix M dimension is non-zero +void test_invalid_matrix_M_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimM = 0; + // expected-error@+3 {{matrix dimension must be greater than 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix K dimension is non-zero +void test_invalid_matrix_K_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimK = 0; + // expected-error@+4 {{matrix dimension must be greater than 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix M dimension is less than Max +void test_invalid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 1025; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4097; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_invalid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 1025; + + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4096; + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_invalid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 1024; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 4096; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4097; + + // expected-error@+4 {{matrix dimension K when using packed input vectors must be less than 4096, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +void test_invalid_input_interpretation_non_const() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation = constants_buffer.Load(0); + + // expected-error@+2 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if InputInterpretation is a valid value +void test_invalid_input_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = 0; + + // expected-error@+2 {{0 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = 1; + + // expected-error@+2 {{1 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_2 = 6; + + // expected-error@+2 {{6 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_3 = 7; + + // expected-error@+2 {{7 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_4 = 10; + + // expected-error@+2 {{10 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_5 = 11; + + // expected-error@+2 {{11 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_5, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_6 = 12; + + // expected-error@+2 {{12 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_6, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_7 = 13; + + // expected-error@+2 {{13 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_7, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_8 = 14; + + // expected-error@+2 {{14 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_8, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_9 = 15; + + // expected-error@+2 {{15 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_9, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_10 = 16; + + // expected-error@+2 {{16 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_10, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_11 = 23; + + // expected-error@+2 {{23 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_11, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_12 = 100; + + // expected-error@+2 {{100 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_12, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_square_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 32; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_rectangle_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 16; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + // Use dimension of Matrix K to trigger error + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_2; + vector input_vector_2 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_2, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Use dimension of Matrix M to trigger error + vector output_vector_3; + vector input_vector_3 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_3, is_output_unsigned, input_vector_3, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_4; + vector input_vector_4 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_4, is_output_unsigned, input_vector_4, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_5; + vector input_vector_5 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_5, is_output_unsigned, input_vector_5, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Swap dimensions to trigger error + vector output_vector_6; + vector input_vector_6 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_6, is_output_unsigned, input_vector_6, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if matrtrix interpretation is a constant value +void test_invalid_matrix_interpretation_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_interpretation_0 = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check for invalid matrix interpretation value +void test_invalid_matrix_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_interpretation_0 = 0; + + // expected-error@+3 {{0 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_1 = 1; + + // expected-error@+3 {{1 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_1, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_2 = 6; + + // expected-error@+3 {{6 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_2, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_3 = 7; + + // expected-error@+3 {{7 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_3, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_4 = 10; + + // expected-error@+3 {{10 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_4, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_5 = 11; + + // expected-error@+3 {{11 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_5, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_6 = 12; + + // expected-error@+3 {{12 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_6, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_7 = 13; + + // expected-error@+3 {{13 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_7, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_8 = 14; + + // expected-error@+3 {{14 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_8, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_9 = 15; + + // expected-error@+3 {{15 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_9, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_10 = 16; + + // expected-error@+3 {{16 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_10, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_11 = 23; + // expected-error@+3 {{23 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_11, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_12 = 100; + + // expected-error@+3 {{100 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_12, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if matrix Layout is a constant value +void test_invalid_matrix_layout_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_layout = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check invalid matrix layout value +void test_invalid_matrix_layout_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_layout_0 = 4; + + // expected-error@+4 {{matrix layout 4 is not valid, must be in the range [0, 3]}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if matrix is transposed is a constant value +void test_invalid_matrix_transposed_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = constants_buffer.Load(0); + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if invalid matrix transpose value is used +void test_invalid_matrix_transpose_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed_0 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed_0, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR; + const bool matrix_is_transposed_1 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed_1, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + + +// Check invalid matrix stride value for optimal matrix layout +void test_invalid_matrix_stride_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const uint matrix_stride_0 = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride_0, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride_1 = 64; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed, + matrix_stride_1, bias_buffer, bias_offset, bias_interpretation); +} + +// Check bias interpretation is not a constant value +void test_invalid_bias_interpretation() { + vector output_vector; + const uint is_output_unsigned = 0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const uint matrix_is_transposed = 0; + const uint matrix_stride = 0; + const uint bias_offset = 0; + + const uint bias_interpretation_0 = constants_buffer.Load(0); + + // expected-error@+6 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_0); +} + +// Check bias interpretation is not a valid value +void test_invalid_bias_interpretation_value() { + vector output_vector; + const uint is_output_unsigned = 0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const uint matrix_is_transposed = 0; + const uint matrix_stride = 0; + const uint bias_offset = 0; + + const uint bias_interpretation_0 = 0; + + // expected-error@+6 {{0 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_0); + + const uint bias_interpretation_1 = 1; + + // expected-error@+6 {{1 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_1); + + const uint bias_interpretation_2 = 6; + + // expected-error@+6 {{6 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_2); + + const uint bias_interpretation_3 = 7; + + // expected-error@+6 {{7 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_3); + + const uint bias_interpretation_4 = 10; + + // expected-error@+6 {{10 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_4); + + const uint bias_interpretation_5 = 11; + + // expected-error@+6 {{11 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_5); + + const uint bias_interpretation_6 = 12; + + // expected-error@+6 {{12 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_6); + + const uint bias_interpretation_7 = 13; + + // expected-error@+6 {{13 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_7); + + const uint bias_interpretation_8 = 14; + + // expected-error@+6 {{14 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_8); + + const uint bias_interpretation_9 = 15; + + // expected-error@+6 {{15 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_9); + + const uint bias_interpretation_10 = 16; + + // expected-error@+6 {{16 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_10); + + const uint bias_interpretation_11 = DataType::DATA_TYPE_SINT8_T4_PACKED; + + // expected-error@+6 {{17 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_11); + + const uint bias_interpretation_12 = DataType::DATA_TYPE_UINT8_T4_PACKED; + + // expected-error@+6 {{18 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_12); + + const uint bias_interpretation_13 = 23; + + // expected-error@+6 {{23 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_13); + + const uint bias_interpretation_14 = 100; + + // expected-error@+6 {{100 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_14); + } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl new file mode 100644 index 0000000000..4b0bd6dd87 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl @@ -0,0 +1,244 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer constants_buffer; + +// Check valid input vector packed types +void test_valid_input_vector_packed_types() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// IsInputUnsigned must be true for packed input vector type +void test_valid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check packed input vector dimension +void test_valid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_UINT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix M dimension is less than Max +void test_valid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_valid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4; + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_valid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4096; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + + + diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl new file mode 100644 index 0000000000..14f34d62c4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl @@ -0,0 +1,1156 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer constants_buffer; + +// Output vector, isUnsigned mismatch +void test_invalid_output_vector_type() { + + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector output_vector_0; + const uint is_output_unsigned_0 = 0; + + // expected-error@+1 {{IsOuputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector output_vector_1; + const uint is_output_unsigned_1 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMul(output_vector_1, is_output_unsigned_1, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector output_vector_2; + const uint is_output_unsigned_2 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMul(output_vector_2, is_output_unsigned_2, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// IsOutputUnsigned is not a constant parameter +void test_invalid_is_output_unsigned_non_const() { + + vector output_vector_0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint is_output_unsigned_0 = constants_buffer.Load(0); + + // expected-error@+1 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Input vector is incorrect type - 64 bit types +void test_invalid_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Input vector is incorrect type for packed InputInterpretation +void test_invalid_input_vector_type_packed_input_interpretation() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_2 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_3 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_3 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_3 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_3, + is_input_unsigned_3, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_4 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_4 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_4 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_4, + is_input_unsigned_4, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// IsInputUnsigned must be true for packed input vector type +void test_invalid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check packed input vector dimension +void test_invalid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_UINT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_2 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_2, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// Input vector type/isInputUnsigned mismatch +void test_invalid_input_vector_type_mismatch() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix M dimension is a constant parameter +void test_invalid_matrix_M_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimM = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix K dimension is a constant parameter +void test_invalid_matrix_K_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimK = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix M dimension is non-zero +void test_invalid_matrix_M_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimM = 0; + // expected-error@+3 {{matrix dimension must be greater than 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix K dimension is non-zero +void test_invalid_matrix_K_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimK = 0; + // expected-error@+4 {{matrix dimension must be greater than 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix M dimension is less than Max +void test_invalid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 1025; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4097; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_invalid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 1025; + + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4096; + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_invalid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 1024; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 4096; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4097; + + // expected-error@+4 {{matrix dimension K when using packed input vectors must be less than 4096, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +//Check if InputInterpretation is a constant parameter +void test_invalid_input_interpretation_non_const() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation = constants_buffer.Load(0); + + // expected-error@+2 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if InputInterpretation is a valid value +void test_invalid_input_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = 0; + + // expected-error@+2 {{0 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = 1; + + // expected-error@+2 {{1 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_2 = 6; + + // expected-error@+2 {{6 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_3 = 7; + + // expected-error@+2 {{7 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_4 = 10; + + // expected-error@+2 {{10 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_5 = 11; + + // expected-error@+2 {{11 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_5, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_6 = 12; + + // expected-error@+2 {{12 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_6, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_7 = 13; + + // expected-error@+2 {{13 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_7, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_8 = 14; + + // expected-error@+2 {{14 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_8, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_9 = 15; + + // expected-error@+2 {{15 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_9, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_10 = 16; + + // expected-error@+2 {{16 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_10, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_11 = 23; + + // expected-error@+2 {{23 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_11, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_12 = 100; + + // expected-error@+2 {{100 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_12, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_square_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 32; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_rectangle_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 16; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + // Use dimension of Matrix K to trigger error + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_2; + vector input_vector_2 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_2, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Use dimension of Matrix M to trigger error + vector output_vector_3; + vector input_vector_3 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_3, is_output_unsigned, input_vector_3, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_4; + vector input_vector_4 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_4, is_output_unsigned, input_vector_4, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_5; + vector input_vector_5 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_5, is_output_unsigned, input_vector_5, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Swap dimensions to trigger error + vector output_vector_6; + vector input_vector_6 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_6, is_output_unsigned, input_vector_6, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if matrtrix interpretation is a constant value +void test_invalid_matrix_interpretation_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_interpretation_0 = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check for invalid matrix interpretation value +void test_invalid_matrix_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_interpretation_0 = 0; + + // expected-error@+3 {{0 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_1 = 1; + + // expected-error@+3 {{1 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_1, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_2 = 6; + + // expected-error@+3 {{6 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_2, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_3 = 7; + + // expected-error@+3 {{7 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_3, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_4 = 10; + + // expected-error@+3 {{10 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_4, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_5 = 11; + + // expected-error@+3 {{11 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_5, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_6 = 12; + + // expected-error@+3 {{12 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_6, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_7 = 13; + + // expected-error@+3 {{13 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_7, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_8 = 14; + + // expected-error@+3 {{14 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_8, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_9 = 15; + + // expected-error@+3 {{15 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_9, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_10 = 16; + + // expected-error@+3 {{16 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_10, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_11 = 23; + // expected-error@+3 {{23 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_11, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_12 = 100; + + // expected-error@+3 {{100 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_12, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if matrix Layout is a constant value +void test_invalid_matrix_layout_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_layout = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check invalid matrix layout value +void test_invalid_matrix_layout_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_layout_0 = 4; + + // expected-error@+4 {{matrix layout 4 is not valid, must be in the range [0, 3]}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride); +} + +// Check if matrix is transposed is a constant value +void test_invalid_matrix_transposed_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = constants_buffer.Load(0); + const uint matrix_stride = 64; + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if invalid matrix transpose value is used +void test_invalid_matrix_transpose_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_stride = 64; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed_0 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed_0, + matrix_stride); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR; + const bool matrix_is_transposed_1 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed_1, + matrix_stride); +} + + +// Check invalid matrix stride value for optimal matrix layout +void test_invalid_matrix_stride_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const uint matrix_stride_0 = 64; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride_0); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride_1 = 64; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed, + matrix_stride_1); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl new file mode 100644 index 0000000000..5972b22b95 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl @@ -0,0 +1,344 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer const_buffer; + +// Output vector, isUnsigned mismatch +void test_valid_output_vector_type() { + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector output_vector_0; + const uint is_output_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + + vector output_vector_1; + const uint is_output_unsigned_1 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_1, is_output_unsigned_1, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + + vector output_vector_2; + const uint is_output_unsigned_2 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_2, is_output_unsigned_2, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); +} + +void test_valid_is_output_unsigned_non_const() { + + vector output_vector_0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint is_output_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Input vector is incorrect type +void test_valid_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check valid input vector packed types +void test_valid_input_vector_packed_types() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// IsInputUnsigned must be true for packed input vector type +void test_valid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check packed input vector dimension +void test_valid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix M dimension is less than Max +void test_valid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_valid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_valid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4096; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl new file mode 100644 index 0000000000..4e15c92a5d --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl @@ -0,0 +1,256 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +RWByteAddressBuffer accumulate_buffer; +ByteAddressBuffer constants_buffer; + +// Check if input vectors aren't the same component type +void test_invalid_input_vector_component_type() { + + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + vector input_vector_0_0 = input_vector_buffer.Load >(0); + vector input_vector_1_0 = input_vector_buffer.Load >(0); + + // expected-error@+1 {{input vectors of outerproductaccumulate must have the same element type}} + __builtin_OuterProductAccumulate(input_vector_0_0, input_vector_1_0, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + vector input_vector_0_1 = input_vector_buffer.Load >(0); + vector input_vector_1_1 = input_vector_buffer.Load >(0); + + // expected-error@+1 {{input vectors of outerproductaccumulate must have the same element type}} + __builtin_OuterProductAccumulate(input_vector_0_1, input_vector_1_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for non constant matrix interpretation +void test_non_constant_matrix_interpretation() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + const uint matrix_interpretation = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for matrix interpretation is not a valid value +void test_invalid_matrix_interpretation() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + const uint matrix_interpretation = 0; + + // expected-error@+3 {{0 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_2 = 1; + + // expected-error@+3 {{1 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_2, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_3 = 6; + + // expected-error@+3 {{6 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_3, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_4 = 7; + + // expected-error@+3 {{7 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_4, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_5 = 10; + + // expected-error@+3 {{10 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_5, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_6 = 11; + + // expected-error@+3 {{11 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_6, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_7 = 12; + + // expected-error@+3 {{12 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_7, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_8 = 13; + + // expected-error@+3 {{13 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_8, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_9 = 14; + + // expected-error@+3 {{14 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_9, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_10 = 15; + + // expected-error@+3 {{15 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_10, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_11 = 16; + + // expected-error@+3 {{16 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_11, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_12 = DataType::DATA_TYPE_SINT8_T4_PACKED; + + // expected-error@+3 {{17 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_12, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_13 = DataType::DATA_TYPE_UINT8_T4_PACKED; + + // expected-error@+3 {{18 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_13, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_14 = 23; + + // expected-error@+3 {{23 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_14, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_15 = 100; + + // expected-error@+3 {{100 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_15, matrix_layout, + matrix_stride); + +} + +// Check for matrix layout is not a constant parameter +void test_non_constant_matrix_layout() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_stride = 0; + + const uint matrix_layout = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for matrix layout is not a valid value +void test_invalid_matrix_layout() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_stride = 0; + + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + + // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + const uint matrix_layout_2 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR; + + // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout_2, + matrix_stride); + + const uint matrix_layout_3 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + + // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout_3, + matrix_stride); + +} + +// Check for matrix stride is zero, if constant +void test_zero_matrix_stride() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + + const uint matrix_stride = 16; + + // expected-error@+4 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl new file mode 100644 index 0000000000..85298e2dbb --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl @@ -0,0 +1,66 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +RWByteAddressBuffer accumulate_buffer; +ByteAddressBuffer constants_buffer; + +// Check for input vectors aren't the same component type +void test_invalid_input_vector_component_type() { + + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + vector input_vector_0_0 = input_vector_buffer.Load >(0); + vector input_vector_1_0 = input_vector_buffer.Load >(0); + + // expected-no-diagnostics@+1 + __builtin_OuterProductAccumulate(input_vector_0_0, input_vector_1_0, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + vector input_vector_0_1 = input_vector_buffer.Load >(0); + vector input_vector_1_1 = input_vector_buffer.Load >(0); + + // expected-no-diagnostics@+1 + __builtin_OuterProductAccumulate(input_vector_0_1, input_vector_1_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + vector input_vector_0_2 = input_vector_buffer.Load >(0); + vector input_vector_1_2 = input_vector_buffer.Load >(0); + + // expected-no-diagnostics@+1 + __builtin_OuterProductAccumulate(input_vector_0_2, input_vector_1_2, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for non constant matrix stride +void test_non_constant_matrix_stride() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + + const uint matrix_stride = constants_buffer.Load(0); + + // expected-no-diagnostics@+4 + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for matrix stride is not a valid value + diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl index 9f2793d417..be67d92546 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl @@ -10,7 +10,7 @@ export float4 Test1(vector Input) { Buf, 0, 0}; // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}} - // expected-note@dx/linalg.h:97{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} + // expected-note@dx/linalg.h:113{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} return Mul( Matrix, MakeInterpretedVector<2>(Input)); } @@ -26,7 +26,7 @@ export float4 Test2(vector Input) { Buf, 0, 0}; // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}} - // expected-note@dx/linalg.h:97{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} + // expected-note@dx/linalg.h:113{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} return Mul( Matrix, MakeInterpretedVector(Input)); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl index 2d5a11e83e..b911de648e 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl @@ -11,6 +11,6 @@ vector MixUpVectorAndMatrixArguments(vector Input) { Buf, 0, 0}; // expected-error@+2{{no matching function for call to 'Mul'}} - // expected-note@dx/linalg.h:111{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} + // expected-note@dx/linalg.h:127{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} return Mul(MakeInterpretedVector(Input), Matrix); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl index f444f81c3a..24ad3ef46c 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl @@ -11,6 +11,6 @@ vector MixUpVectorAndMatrixArguments(vector Input) { Buf, 0, 0}; // expected-error@+2{{no matching function for call to 'MulAdd'}} - // expected-note@dx/linalg.h:137{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} + // expected-note@dx/linalg.h:153{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} return MulAdd(MakeInterpretedVector(Input), Matrix, MakeInterpretedVector(Input)); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl index 6f503b367b..5759631bcb 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl @@ -12,7 +12,7 @@ export void Test4(vector Input1, vector Input2) { matrix = {RWBuf, 0, 0}; // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} - // expected-note@dx/linalg.h:161{{candidate template ignored: could not match 0 against 1}} + // expected-note@dx/linalg.h:177{{candidate template ignored: could not match 0 against 1}} OuterProductAccumulate(Input1, Input2, matrix); } @@ -25,7 +25,7 @@ export void Test5(vector Input1, vector Input2) { matrix = {RWBuf, 0, 0}; // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} - // expected-note@dx/linalg.h:161{{candidate template ignored: could not match 0 against 1}} + // expected-note@dx/linalg.h:177{{candidate template ignored: could not match 0 against 1}} OuterProductAccumulate(Input1, Input2, matrix); } @@ -38,7 +38,7 @@ export void Test4(vector Input1, vector Input2) { matrix = {RWBuf, 0, 0}; // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} - // expected-note@dx/linalg.h:161{{candidate template ignored: deduced conflicting types for parameter 'ElTy' ('int' vs. 'unsigned int')}} + // expected-note@dx/linalg.h:177{{candidate template ignored: deduced conflicting types for parameter 'ElTy' ('int' vs. 'unsigned int')}} OuterProductAccumulate(Input1, Input2, matrix); } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl index d5e251ae8b..57683b9a59 100644 --- a/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl @@ -23,7 +23,7 @@ void cs_main() const bool matrix_is_transposed = false; const uint matrix_stride = 64; - //expected-error@+1{{intrinsic __builtin_MatVecMul potentially used by 'cs_main' requires shader model 6.9 or greater}} + //expected-error@+1{{intrinsic hlsl::__builtin_MatVecMul potentially used by ''cs_main'' requires shader model 6.9 or greater}} __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, @@ -32,7 +32,7 @@ void cs_main() const uint bias_offset = 0; const uint bias_interpretation = 9; /*F32*/ - //expected-error@+1{{intrinsic __builtin_MatVecMulAdd potentially used by 'cs_main' requires shader model 6.9 or greater}} + //expected-error@+1{{intrinsic hlsl::__builtin_MatVecMulAdd potentially used by ''cs_main'' requires shader model 6.9 or greater}} __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, @@ -44,16 +44,16 @@ void cs_main() const uint opa_matrix_offset = 0; const uint opa_matrix_interpretation = 5; /*U32*/ const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ - const uint opa_matrix_stride = 64; + const uint opa_matrix_stride = 0; - //expected-error@+1{{intrinsic __builtin_OuterProductAccumulate potentially used by 'cs_main' requires shader model 6.9 or greater}} + //expected-error@+1{{intrinsic hlsl::__builtin_OuterProductAccumulate potentially used by ''cs_main'' requires shader model 6.9 or greater}} __builtin_OuterProductAccumulate(input_vector1, input_vector2, rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, opa_matrix_layout, opa_matrix_stride); const uint va_matrix_offset = 0; - //expected-error@+1{{intrinsic __builtin_VectorAccumulate potentially used by 'cs_main' requires shader model 6.9 or greater}} - __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, - va_matrix_offset); + //expected-error@+1{{intrinsic hlsl::__builtin_VectorAccumulate potentially used by ''cs_main'' requires shader model 6.9 or greater}} + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); } \ No newline at end of file diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index e5e4119330..60bef02f18 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -383,13 +383,13 @@ void [[]] Barrier(in NodeRecordOrUAV o, in uint SemanticFlags); uint [[]] GetRemainingRecursionLevels(); -void [[]] __builtin_MatVecMul(out numeric OutputVector, in bool OutputIsUnsigned, in numeric InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride); +void [[min_sm=6.9]] __builtin_MatVecMul(out LinAlg OutputVector, in bool OutputIsUnsigned, in LinAlg InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride); -void [[]] __builtin_MatVecMulAdd(out numeric OutputVector, in bool OutputIsUnsigned, in numeric InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride, in ByteAddressBuffer BiasVector, in uint BiasOffset, in uint BiasInterpretation); +void [[min_sm=6.9]] __builtin_MatVecMulAdd(out LinAlg OutputVector, in bool OutputIsUnsigned, in LinAlg InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride, in ByteAddressBuffer BiasVector, in uint BiasOffset, in uint BiasInterpretation); -void [[]] __builtin_OuterProductAccumulate(in numeric InputVector1, in numeric InputVector2, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint MatrixLayout, in uint MatrixStride); +void [[min_sm=6.9]] __builtin_OuterProductAccumulate(in LinAlg InputVector1, in LinAlg InputVector2, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint MatrixLayout, in uint MatrixStride); -void [[]] __builtin_VectorAccumulate(in numeric InputVector, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset); +void [[min_sm=6.9]] __builtin_VectorAccumulate(in LinAlg InputVector, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 5567a6a88d..3af3cde949 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -8419,13 +8419,13 @@ def build_valrules(self): self.add_valrule_msg( "Instr.MatVecOpIsUnsignedFlagsAreConst", "In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant.", - "'%1' is not a constant value", + "%0 is not a constant value", ) self.add_valrule_msg( "Instr.LinalgInterpretationParamAreConst", "In Linalg operations, Interpretation value is a constant.", - "'%1' is not a constant value", + "%0 is not a constant value", ) self.add_valrule_msg( @@ -9357,6 +9357,7 @@ def __init__(self, intrinsic_defs, opcode_data): "DxHitObject": "LICOMPTYPE_HIT_OBJECT", "VkBufferPointer": "LICOMPTYPE_VK_BUFFER_POINTER", "RayQuery": "LICOMPTYPE_RAY_QUERY", + "LinAlg": "LICOMPTYPE_LINALG", } self.trans_rowcol = {"r": "IA_R", "c": "IA_C", "r2": "IA_R2", "c2": "IA_C2"} From d72e2b1a15d22fc825e2f3c939f1baac43281ae9 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Thu, 22 May 2025 12:39:06 -0600 Subject: [PATCH 45/93] Removes improper modification copyright notice. (#7477) Modification copyright notices were added in error to files changed in [PR #7163](https://github.com/microsoft/DirectXShaderCompiler/pull/7163). --- include/dxc/dxcapi.internal.h | 3 --- lib/HLSL/HLOperationLower.cpp | 3 --- tools/clang/include/clang/AST/HlslTypes.h | 3 --- tools/clang/include/clang/AST/OperationKinds.h | 3 --- tools/clang/include/clang/Basic/Attr.td | 3 --- tools/clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 --- tools/clang/include/clang/SPIRV/SpirvBuilder.h | 3 --- tools/clang/include/clang/SPIRV/SpirvContext.h | 3 --- tools/clang/include/clang/SPIRV/SpirvInstruction.h | 3 --- tools/clang/include/clang/SPIRV/SpirvType.h | 3 --- tools/clang/include/clang/SPIRV/SpirvVisitor.h | 3 --- tools/clang/lib/AST/ASTContextHLSL.cpp | 3 --- tools/clang/lib/AST/Expr.cpp | 3 --- tools/clang/lib/AST/ExprConstant.cpp | 3 --- tools/clang/lib/AST/HlslTypes.cpp | 3 --- tools/clang/lib/Lex/PPMacroExpansion.cpp | 3 --- tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp | 3 --- tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 3 --- tools/clang/lib/SPIRV/EmitVisitor.cpp | 3 --- tools/clang/lib/SPIRV/EmitVisitor.h | 3 --- tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 3 --- tools/clang/lib/SPIRV/LowerTypeVisitor.h | 3 --- tools/clang/lib/SPIRV/SpirvBuilder.cpp | 3 --- tools/clang/lib/SPIRV/SpirvContext.cpp | 3 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 3 --- tools/clang/lib/SPIRV/SpirvEmitter.h | 3 --- tools/clang/lib/SPIRV/SpirvInstruction.cpp | 3 --- tools/clang/lib/Sema/SemaCast.cpp | 3 --- tools/clang/lib/Sema/SemaExprCXX.cpp | 3 --- tools/clang/lib/Sema/SemaHLSL.cpp | 3 --- utils/hct/gen_intrin_main.txt | 3 --- utils/hct/hctdb.py | 2 -- 32 files changed, 95 deletions(-) diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index 41891338e6..46a485206e 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -7,9 +7,6 @@ // // // Provides non-public declarations for the DirectX Compiler component. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // /////////////////////////////////////////////////////////////////////////////// #ifndef __DXC_API_INTERNAL__ diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 58c1de3941..7d5eb0edce 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7,9 +7,6 @@ // // // Lower functions to lower HL operations to DXIL operations. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // /////////////////////////////////////////////////////////////////////////////// #include "dxc/DXIL/DxilConstants.h" diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index c14f562101..58d2d916b1 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -6,9 +6,6 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // /// /// \file // /// \brief Defines the HLSL type system interface. // diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h index 3909c8b5e8..d19082d699 100644 --- a/tools/clang/include/clang/AST/OperationKinds.h +++ b/tools/clang/include/clang/AST/OperationKinds.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file enumerates the different kinds of operations that can be diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index db7fdea8d9..1797597d17 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// class DocumentationCategory { diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 48412facad..0d98792688 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index e4e6ef308f..2da14dab54 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h index c18c139642..50ff77d4b4 100644 --- a/tools/clang/include/clang/SPIRV/SpirvContext.h +++ b/tools/clang/include/clang/SPIRV/SpirvContext.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 6d95459373..20cd57525c 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h index 00a00ef238..d39fc6943b 100644 --- a/tools/clang/include/clang/SPIRV/SpirvType.h +++ b/tools/clang/include/clang/SPIRV/SpirvType.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H #define LLVM_CLANG_SPIRV_SPIRVTYPE_H diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index 95bc46aa5f..fef06da503 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 0a688c03fa..913b28ced8 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -6,9 +6,6 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // // This file implements the ASTContext interface for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp index c6dc21217e..8ed14508af 100644 --- a/tools/clang/lib/AST/Expr.cpp +++ b/tools/clang/lib/AST/Expr.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the Expr class and subclasses. diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index 69e0760bce..baa0349cfe 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 05386ddaa5..017f0f7218 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -5,9 +5,6 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // -// -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. // // /// /// \file // diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp index ebfb93df2e..16040d69c7 100644 --- a/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the top level handling of macro expansion for the diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp index db140f4766..9bb2f1b1fa 100644 --- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp +++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "AlignmentSizeCalculator.h" diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 9ca9cbc6cd..43ab2540b4 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "CapabilityVisitor.h" diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index f58160254a..7d39b0ec1f 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // Do not change the inclusion order between "dxc/Support/*" files. diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index bfa0710998..1cec230e50 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H #define LLVM_CLANG_SPIRV_EMITVISITOR_H diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index b31d19b5d8..8238750af9 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "LowerTypeVisitor.h" diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h index 5b26b67e3a..26b6e44f6d 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 689fc0715f..a0dcb5420b 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "clang/SPIRV/SpirvBuilder.h" diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp index 47dfc67433..cb44d3a3a8 100644 --- a/tools/clang/lib/SPIRV/SpirvContext.cpp +++ b/tools/clang/lib/SPIRV/SpirvContext.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 9ffa978511..ea2347edce 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements a SPIR-V emitter class that takes in HLSL AST and emits diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 10694313a8..978e88e4ed 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file defines a SPIR-V emitter class that takes in HLSL AST and emits diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index 3b5861710d..f6ac29f379 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the in-memory representation of SPIR-V instructions. diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp index f5a864e2b6..dcff6c2461 100644 --- a/tools/clang/lib/Sema/SemaCast.cpp +++ b/tools/clang/lib/Sema/SemaCast.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements semantic analysis for cast expressions, including diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp index 5113c56205..1e70b95476 100644 --- a/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// /// /// \file diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index fa59aa6ef7..2163eef8a3 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6,9 +6,6 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // // This file implements the semantic support for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index 60bef02f18..ae8df55a0c 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1,9 +1,6 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// // See hctdb.py for the implementation of intrinsic file processing. // // Intrinsic declarations are grouped into namespaces that diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 3af3cde949..2b94b13134 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1,7 +1,5 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. -# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -# All rights reserved. ############################################################################### # DXIL information. # ############################################################################### From d14d174d9c54845696613e2d00387a67d98f6fb4 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 27 May 2025 13:16:26 -0700 Subject: [PATCH 46/93] Update Release notes for 1.8.2505 (#7481) (#7483) This change updates release notes with notable changes for release 1.8.2505. (cherry picked from commit 9efbb6c3242cbb40c1844a2589171ff1c27cf956) --- docs/ReleaseNotes.md | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 7788c57726..274164158e 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -23,8 +23,42 @@ Place release notes for the upcoming release below this line and remove this lin ### Version 1.8.2505 +#### Potentially breaking changes + - Typed buffers (including ROV buffers) no longer accept types other than vectors and scalars. Any other types will produce descriptive errors. This removes support for appropriately sized matrices and structs. Though it worked in some contexts, code generated from such types was unreliable. -- By default, the internal validator will be used instead of searching externally for an existing DXIL.dll. + - Load and Store operations have been refactored as a consequence. Behavior should be identical, please file issues if discrepancies are observed. +- The compiler will now always use the internal validator instead of searching for an external DXIL.dll. The (hidden) `-select-validator` option has been removed. + +#### Notable SPIR-V updates + +- Fix unnecessary Int64 requirement when loading Float64 +- Added vk::BufferPointer, see [proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0010-vk-buffer-ref.md) for more details. +- Implement QuadAny and QuadAll (#7266) +- Fix -fvk-invert-y (#7447) + +#### Shader Model 6.9 Preview + +You can now compile shaders to SM 6.9, but this is a preview, so shader hashes will be set to the PREVIEW_BYPASS pattern. +SM 6.9 shaders will only work with AgilitySDK 1.717.0-preview, a supported preview driver, and use of experimental shader models in developer mode. +Preview shaders will not be compatible with the SM 6.9 release, or likely even later versions of the SM 6.9 preview. + +SM 6.9 Preview Additions: + +- Long vectors are allowed in HLSL when targeting shader model 6.9. Vectors up to 1024 elements in length can be loaded from/stored to raw buffers and used in elementwise operations. See the [long vector proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0026-hlsl-long-vector-type.md) for more details. +- HLSL Vectors are still limited to a maximum of 4 elements when used in certain contexts: + - entry function inputs/outputs + - parameter, payload, attribute, and node record types for mesh, raytracing, and node shaders + - constant buffers (cbuffer), texture buffers (tbuffer), textures and typed buffers + - Note: some HLSL elementwise intrinsics do not yet support long vectors in this preview +- Native vectors of up to 1024 elements are now present in DXIL. This includes vector llvm instructions, load/store, and various elementwise DXIL operations. This may result in smaller DXIL and potentially other performance improvements. See the [dxil vectors proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0030-dxil-vectors.md) for more details. +- Cooperative Vector operations, a subset of Linear Algebra (LinAlg). See the [cooperative vectors proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0029-cooperative-vector.md) and the [HLSL header based API proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0031-hlsl-vector-matrix-operations.md) for more details. + - New built-in operations are added for multiplying long vectors with a matrix in a ByteAddressBuffer, optionally with accumulation and bias data, as well as outer product and vector accumulate operations. + - An HLSL header shipped with this release provides a more convenient API for using these built-in operations. +- Support for [Opacity Micromaps](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0024-opacity-micromaps.md) in DXR shaders as well as for RayQuery. + - Unlocks DXR performance improvements using triangle sub-divisions for fast hit/miss detection to reduce the need for anyhit invocations. +- Support for [Shader Execution Reordering](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md) in DXR. + - Introduces `MaybeReorderThread()` to explicitly specify where and how shader execution coherence can be improved. `MaybeReorderThread()` can be used in raygeneration shaders. + - `HitObject` decouples traversal, intersection testing and anyhit shading from closesthit and miss shading for more control and better reordering opportunities. `HitObject` can be used in raygeneration, closesthit and miss shaders. ### Version 1.8.2502 From 66287b27442d0af17a152d024a6deaadb075cd30 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 28 May 2025 19:46:27 -0400 Subject: [PATCH 47/93] Update submodules (#7492) Update the submodules to the latest, and renabled the test that is now passing. Fixes #7160 --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- .../clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index aa6cef192b..c9aad99f92 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit aa6cef192b8e693916eb713e7a9ccadf06062ceb +Subproject commit c9aad99f9276817f18f72a4696239237c83cb775 diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index a62abcb402..da48bb20bd 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit a62abcb402009b9ca5975e6167c09f237f630e0e +Subproject commit da48bb20bdfc8a214d5bffdacca2d1d2ae849009 diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl index cb5d7f771f..2a143afab2 100644 --- a/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl +++ b/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl @@ -1,6 +1,4 @@ // RUN: %dxc -T ms_6_6 -fspv-target-env=vulkan1.1spirv1.4 -E main %s -spirv | FileCheck %s -// XFAIL: * -// FIXME(7160): test disabled until the spirv-val fix is merged. struct MeshletPrimitiveOut { From dc59ed092b17b48436e9220a950eee3d974dbbe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 30 May 2025 12:53:17 +0200 Subject: [PATCH 48/93] [SPIR-V] Fix r-value being used in mul intrinsic (#7489) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dealing with the Load method on buffers, the operator call can emit a pointer instead of an actual load, and the user is then responsible on loading the value if required. The `mul` instrinsic code was not handling this, hence caused the pointer to be passed as-is in SPIR-V. Fixes #7246 Signed-off-by: Nathan Gauër --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 17 ++++++++++--- tools/clang/lib/SPIRV/SpirvEmitter.h | 3 +++ .../test/CodeGenSPIRV/intrinsics.mul.hlsl | 25 +++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index ea2347edce..92e4c687ca 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -1260,6 +1260,15 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr, return result; } +SpirvInstruction *SpirvEmitter::doExprEnsuringRValue(const Expr *E, + SourceLocation location, + SourceRange range) { + SpirvInstruction *I = doExpr(E); + if (I->isRValue()) + return I; + return spvBuilder.createLoad(E->getType(), I, location, range); +} + SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, SourceRange rangeOverride) { // We are trying to load the value here, which is what an LValueToRValue @@ -11364,8 +11373,8 @@ SpirvInstruction *SpirvEmitter::processIntrinsicMul(const CallExpr *callExpr) { uint32_t numRows = 0; if (isMxNMatrix(returnType, &elemType, &numRows)) { llvm::SmallVector rows; - auto *arg0Id = doExpr(arg0); - auto *arg1Id = doExpr(arg1); + auto *arg0Id = doExprEnsuringRValue(arg0, loc, range); + auto *arg1Id = doExprEnsuringRValue(arg1, loc, range); for (uint32_t i = 0; i < numRows; ++i) { auto *scalar = spvBuilder.createCompositeExtract(elemType, arg0Id, {i}, loc, range); @@ -11380,8 +11389,8 @@ SpirvInstruction *SpirvEmitter::processIntrinsicMul(const CallExpr *callExpr) { } // All the following cases require handling arg0 and arg1 expressions first. - auto *arg0Id = doExpr(arg0); - auto *arg1Id = doExpr(arg1); + auto *arg0Id = doExprEnsuringRValue(arg0, loc, range); + auto *arg1Id = doExprEnsuringRValue(arg1, loc, range); // mul(scalar, scalar) if (isScalarType(arg0Type) && isScalarType(arg1Type)) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 978e88e4ed..e5daed603d 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -80,6 +80,9 @@ class SpirvEmitter : public ASTConsumer { void doDecl(const Decl *decl); void doStmt(const Stmt *stmt, llvm::ArrayRef attrs = {}); SpirvInstruction *doExpr(const Expr *expr, SourceRange rangeOverride = {}); + SpirvInstruction *doExprEnsuringRValue(const Expr *expr, + SourceLocation location, + SourceRange range); /// Processes the given expression and emits SPIR-V instructions. If the /// result is a GLValue, does an additional load. diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl index 4d04896781..629e7527c3 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl @@ -1,5 +1,8 @@ // RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s +StructuredBuffer buffer_vec; +StructuredBuffer buffer_mat; + /* According to HLSL reference, mul() has the following versions: @@ -448,6 +451,7 @@ void main() { // mul( Mat(Mx1) * Mat(1xN) ) --> Mat(MxN) matrix float1x3 mat1x3; float3x2 mat3x2; + float3x3 mat3x3; float3x1 mat3x1; float1x4 mat1x4; @@ -474,4 +478,25 @@ void main() { // CHECK-NEXT: [[result3:%[0-9]+]] = OpCompositeConstruct %mat3v4float [[row0]] [[row1]] [[row2]] // CHECK-NEXT: OpStore %result3 [[result3]] float3x4 result3 = mul( mat3x1, mat1x4 ); // result is float3x4 matrix + + float3 v3; + +// CHECK: [[matp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_mat3v3float %buffer_mat %int_0 %int_0 +// CHECK: [[mat:%[0-9]+]] = OpLoad %mat3v3float [[matp]] +// CHECK: [[vec:%[0-9]+]] = OpLoad %v3float %v3 +// CHECK: {{.*}} = OpVectorTimesMatrix %v3float [[vec]] [[mat]] + float3 result4 = mul(buffer_mat.Load(0), v3); + +// CHECK: [[mat:%[0-9]+]] = OpLoad %mat3v3float %mat3x3 +// CHECK: [[vecp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v3float %buffer_vec %int_0 %int_1 +// CHECK: [[vec:%[0-9]+]] = OpLoad %v3float [[vecp]] +// CHECK: {{.*}} = OpVectorTimesMatrix %v3float [[vec]] [[mat]] + float3 result5 = mul(mat3x3, buffer_vec.Load(1)); + +// CHECK: [[matp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_mat3v3float %buffer_mat %int_0 %int_2 +// CHECK: [[mat:%[0-9]+]] = OpLoad %mat3v3float [[matp]] +// CHECK: [[vecp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v3float %buffer_vec %int_0 %int_2 +// CHECK: [[vec:%[0-9]+]] = OpLoad %v3float [[vecp]] +// CHECK: {{.*}} = OpVectorTimesMatrix %v3float [[vec]] [[mat]] + float3 result6 = mul(buffer_mat.Load(2), buffer_vec.Load(2)); } From d8ef89cdcfc356cd983ef918c3fa324e16ba55ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 30 May 2025 12:53:45 +0200 Subject: [PATCH 49/93] [SPIR-V] Add payload to OpEmitMeshTasksEXT (#7485) This commit fixes the missing payload parameter for the OpEmitMeshTasksEXT instruction. Errors such as the passed variable storage class or type are already tested. Fixes #7082 Co-Authored-by: baldurk Co-authored-by: baldurk --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 7 ++++--- .../meshshading.ext.amplification.payload.hlsl | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 92e4c687ca..c005f6576c 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -13021,7 +13021,7 @@ void SpirvEmitter::processDispatchMesh(const CallExpr *callExpr) { : spv::StorageClass::Output; auto *payloadArg = doExpr(args[3]); bool isValid = false; - const VarDecl *param = nullptr; + SpirvInstruction *param = nullptr; if (const auto *implCastExpr = dyn_cast(args[3])) { if (const auto *arg = dyn_cast(implCastExpr->getSubExpr())) { if (const auto *paramDecl = dyn_cast(arg->getDecl())) { @@ -13029,7 +13029,8 @@ void SpirvEmitter::processDispatchMesh(const CallExpr *callExpr) { isValid = declIdMapper.createPayloadStageVars( sigPoint, sc, paramDecl, /*asInput=*/false, paramDecl->getType(), "out.var", &payloadArg); - param = paramDecl; + param = + declIdMapper.getDeclEvalInfo(paramDecl, paramDecl->getLocation()); } } } @@ -13046,7 +13047,7 @@ void SpirvEmitter::processDispatchMesh(const CallExpr *callExpr) { if (featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)) { // for EXT_mesh_shader, create opEmitMeshTasksEXT. - spvBuilder.createEmitMeshTasksEXT(threadX, threadY, threadZ, loc, nullptr, + spvBuilder.createEmitMeshTasksEXT(threadX, threadY, threadZ, loc, param, range); } else { // for NV_mesh_shader, set TaskCountNV = threadX * threadY * threadZ. diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl new file mode 100644 index 0000000000..c50ef252e9 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -E main -T as_6_8 -spirv %s -E main -fspv-target-env=vulkan1.1spirv1.4 | FileCheck %s + +struct S { + uint a; +}; + +groupshared S s; +// CHECK: %s = OpVariable {{.*}} TaskPayloadWorkgroupEXT + +[numthreads(1, 1, 1)] +void main() +{ +// CHECK: OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %s + DispatchMesh(1, 1, 1, s); +} From 194b57a37ea4a69f947df1d56ac95aece72ed943 Mon Sep 17 00:00:00 2001 From: Vovan675 <31342770+Vovan675@users.noreply.github.com> Date: Fri, 30 May 2025 17:42:13 +0300 Subject: [PATCH 50/93] Fix markdown in SPIR-V.rst (#7112) Fix markdown syntax in a few places. They shown incorrectly on github --- docs/SPIR-V.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index f3981ba854..771cf0e5a2 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -3967,7 +3967,7 @@ RayQuery Mapping to SPIR-V +---------------------------------------------------+-------------------------------------------------------------------------+ |``.WorldRayDirection`` | ``OpRayQueryGetWorldRayDirectionKHR`` | +---------------------------------------------------+-------------------------------------------------------------------------+ -|``.WorldRayOrigin` | ``OpRayQueryGetWorldRayOriginKHR`` | +|``.WorldRayOrigin`` | ``OpRayQueryGetWorldRayOriginKHR`` | +---------------------------------------------------+-------------------------------------------------------------------------+ Shader Model 6.0+ Wave Intrinsics From 20f291eb6f0092ac3bf7b652769cc0396e4335ac Mon Sep 17 00:00:00 2001 From: wszqkzqk Date: Fri, 30 May 2025 22:43:08 +0800 Subject: [PATCH 51/93] Add LoongArch 64 bit (#7020) [LoongArch](https://docs.kernel.org/arch/loongarch/introduction.html) is a new RISC ISA developed by loongson. There are already a lot of [community support and testing](https://www.phoronix.com/search/LoongArch) about it. Like #4894 , this PR add support for LoongArch 64bit. - Add loongarch64 target to config.guess - Update config-ix.cmake to support loongarch64 - Tested on loongarch64, see the [build log](https://github.com/user-attachments/files/17893008/directx-shader-compiler-1.8.2407-1-loong64-build.log) Signed-off-by: Zhou Qiankang --- autoconf/config.guess | 3 +++ cmake/config-ix.cmake | 2 ++ 2 files changed, 5 insertions(+) diff --git a/autoconf/config.guess b/autoconf/config.guess index cf0541d1f1..62df94c187 100755 --- a/autoconf/config.guess +++ b/autoconf/config.guess @@ -929,6 +929,9 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; + loongarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 4541d08162..226881ad30 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -367,6 +367,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "wasm64") set(LLVM_NATIVE_ARCH WebAssembly) elseif (LLVM_NATIVE_ARCH MATCHES "riscv64") set(LLVM_NATIVE_ARCH RISCV) +elseif (LLVM_NATIVE_ARCH MATCHES "loongarch64") + set(LLVM_NATIVE_ARCH LoongArch) elseif (LLVM_NATIVE_ARCH MATCHES "e2k") set(LLVM_NATIVE_ARCH E2K) else () From 085550991a87e0b7ae3ae988b1da87b73f70a29e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Mon, 2 Jun 2025 13:03:54 +0200 Subject: [PATCH 52/93] [SPIR-V] Fix bool cast on buffers with swizzle (#7497) HLSL resources can store booleans. SPIR-V resources can't. We handle this by using integers in resources, and casting at the interface. Swizzle path was handled a bit differently, and was not going through the common load/store path which handles the cast. Fixes #7475 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 16 ++++++------ tools/clang/lib/SPIRV/SpirvEmitter.h | 3 ++- .../op.vector.swizzle.buffer-store.hlsl | 26 +++++++++++++++++++ 3 files changed, 36 insertions(+), 9 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index c005f6576c..e62197094f 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -1281,7 +1281,8 @@ SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, } SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, - SpirvInstruction *info) { + SpirvInstruction *info, + SourceRange rangeOverride) { const auto exprType = expr->getType(); // Do nothing if this is already rvalue @@ -1316,9 +1317,11 @@ SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, return info; } + SourceRange range = + (rangeOverride != SourceRange()) ? rangeOverride : expr->getSourceRange(); SpirvInstruction *loadedInstr = nullptr; - loadedInstr = spvBuilder.createLoad(exprType, info, expr->getExprLoc(), - expr->getSourceRange()); + loadedInstr = + spvBuilder.createLoad(exprType, info, expr->getExprLoc(), range); assert(loadedInstr); // Special-case: According to the SPIR-V Spec: There is no physical size or @@ -7969,15 +7972,12 @@ SpirvInstruction *SpirvEmitter::tryToAssignToVectorElements( } auto *vec1 = doExpr(base, range); - auto *vec1Val = - vec1->isRValue() - ? vec1 - : spvBuilder.createLoad(baseType, vec1, base->getLocStart(), range); + auto *vec1Val = vec1->isRValue() ? vec1 : loadIfGLValue(base, vec1, range); auto *shuffle = spvBuilder.createVectorShuffle( baseType, vec1Val, rhs, selectors, lhs->getLocStart(), range); if (!tryToAssignToRWBufferRWTexture(base, shuffle)) - spvBuilder.createStore(vec1, shuffle, lhs->getLocStart(), range); + storeValue(vec1, shuffle, base->getType(), lhs->getLocStart(), range); // TODO: OK, this return value is incorrect for compound assignments, for // which cases we should return lvalues. Should at least emit errors if diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index e5daed603d..0c77f2fc24 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -176,7 +176,8 @@ class SpirvEmitter : public ASTConsumer { /// Overload with pre computed SpirvEvalInfo. /// /// The given expr will not be evaluated again. - SpirvInstruction *loadIfGLValue(const Expr *expr, SpirvInstruction *info); + SpirvInstruction *loadIfGLValue(const Expr *expr, SpirvInstruction *info, + SourceRange rangeOverride = {}); /// Loads the pointer of the aliased-to-variable if the given expression is a /// DeclRefExpr referencing an alias variable. See DeclResultIdMapper for diff --git a/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl new file mode 100644 index 0000000000..5d77d222f9 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -T cs_6_0 -E main -fcgl %s -spirv | FileCheck %s + +RWStructuredBuffer buffer; + +// CHECK-DAG: [[v4_0:%[0-9]+]] = OpConstantComposite %v4uint %uint_0 %uint_0 %uint_0 %uint_0 +// CHECK-DAG: [[v4_1:%[0-9]+]] = OpConstantComposite %v4uint %uint_1 %uint_1 %uint_1 %uint_1 + +[numthreads(1, 1, 1)] +void main() +{ +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4uint %buffer %int_0 %uint_0 +// CHECK: [[load:%[0-9]+]] = OpLoad %v4uint [[ptr]] +// CHECK: [[cast:%[0-9]+]] = OpINotEqual %v4bool [[load]] [[v4_0]] +// CHECK: [[shuf:%[0-9]+]] = OpVectorShuffle %v3bool [[cast]] [[cast]] 0 1 2 +// CHECK: OpStore %a [[shuf]] + bool3 a = buffer[0].xyz; + +// CHECK: [[a:%[0-9]+]] = OpLoad %v3bool %a +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4uint %buffer %int_0 %uint_1 +// CHECK: [[load:%[0-9]+]] = OpLoad %v4uint [[ptr]] +// CHECK: [[cast:%[0-9]+]] = OpINotEqual %v4bool [[load]] [[v4_0]] +// CHECK: [[shuf:%[0-9]+]] = OpVectorShuffle %v4bool [[cast]] [[a]] 4 5 6 3 +// CHECK: [[cast:%[0-9]+]] = OpSelect %v4uint [[shuf]] [[v4_1]] [[v4_0]] +// CHECK: OpStore [[ptr]] [[cast]] + buffer[1].xyz = a; +} From 72149fa8debb29054a36b5a5450ace1a875c4ac8 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Mon, 2 Jun 2025 09:42:00 -0600 Subject: [PATCH 53/93] Eliminates layout mismatch when vk::BufferPointer::Get() result returned from called function. (#7500) Fixes #7460. --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 1 + .../vk.buffer-pointer.rvalue.hlsl | 42 ++++++++++++++++--- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index e62197094f..24774875f7 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -11025,6 +11025,7 @@ SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( if (bufferPointer->isRValue()) { bufferPointer->setRValue(false); bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); + bufferPointer->setLayoutRule(spirvOptions.sBufferLayoutRule); return bufferPointer; } diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl index 930770cc16..5132c57000 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl @@ -1,4 +1,5 @@ -// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s +// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s --check-prefix=CHECK --check-prefix=NOFUN +// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 -DFUN %s | FileCheck %s --check-prefix=CHECK --check-prefix=FUN // Issue #7302: implicit object argument of Get() evaluates to rvalue @@ -20,16 +21,45 @@ struct Content // CHECK: [[V2UINT:%[_0-9A-Za-z]*]] = OpTypeVector [[UINT]] 2 // CHECK: [[VECTOR:%[_0-9A-Za-z]*]] = OpConstantComposite [[V2UINT]] [[UDEADBEEF]] [[U0]] // CHECK: [[CONTENT:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]] -// CHECK: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]] -// CHECK: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]] +// FUN: [[PFCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[CONTENT]] +// FUN: [[PFINT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[INT]] +// FUN: [[CONTENT0:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]] +// FUN: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT0]] +// NOFUN: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]] +// NOFUN: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]] + +Content f() { + return bitcast >(uint32_t2(0xdeadbeefu,0x0u)).Get(); +} [numthreads(1, 1, 1)] void main() { +#ifdef FUN + Content c = f(); + c.a = 1; +#else bitcast >(uint32_t2(0xdeadbeefu,0x0u)).Get().a = 1; +#endif } -// CHECK: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] -// CHECK: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]] -// CHECK: OpStore [[PTR]] [[I1]] Aligned 4 +// NOFUN: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] +// NOFUN: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]] +// NOFUN: OpStore [[PTR]] [[I1]] Aligned 4 + +// FUN: [[VAR:%[_0-9A-Za-z]*]] = OpVariable [[PFCONTENT]] Function +// FUN: [[CALL:%[0-9]*]] = OpFunctionCall [[CONTENT]] [[F:%[_0-9A-Za-z]*]] +// FUN: OpStore [[VAR]] [[CALL]] +// FUN: [[PTR:%[0-9]*]] = OpAccessChain [[PFINT]] [[VAR]] [[IO]] +// FUN: OpStore [[PTR]] [[I1]] + +// FUN: [[F]] = OpFunction [[CONTENT]] +// FUN: [[VAR:%[_0-9A-Za-z]*]] = OpVariable [[PFCONTENT]] Function +// FUN: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] +// FUN: [[CVAL0:%[0-9]*]] = OpLoad [[CONTENT0]] [[BITCAST]] Aligned 4 +// FUN: [[IVAL:%[0-9]*]] = OpCompositeExtract [[INT]] [[CVAL0]] 0 +// FUN: [[CVAL1:%[0-9]*]] = OpCompositeConstruct [[CONTENT]] [[IVAL]] +// FUN: OpStore [[VAR]] [[CVAL1]] +// FUN: [[RET:%[0-9]*]] = OpLoad [[CONTENT]] [[VAR]] +// FUN: OpReturnValue [[RET]] From 48d6e3c635f0ab3ae79580c37003e6faeca6c671 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Mon, 2 Jun 2025 13:29:26 -0400 Subject: [PATCH 54/93] [SPIRV] Get alignemnt from pointee type for vk::BufferPoitner store (#7501) A small mistake for stores to vk:BufferPointer when storing directly to the return value of `get()`. We were getting the alignment of the pointer itself, which is always 8 instead of the type pointed to. I tested loads, and it does not have the same problem. Fixes #7459 --- tools/clang/lib/SPIRV/SpirvBuilder.cpp | 2 +- .../clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl | 9 +++++++-- .../clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index a0dcb5420b..d776ba65fb 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -313,7 +313,7 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); uint32_t align, size, stride; std::tie(align, size) = alignmentCalc.getAlignmentAndSize( - address->getAstResultType(), address->getLayoutRule(), llvm::None, + source->getAstResultType(), address->getLayoutRule(), llvm::None, &stride); instruction->setAlignment(align); } diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl index c7d6f0ed2b..cc3b1a0209 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl @@ -36,7 +36,8 @@ struct TestPushConstant_t float4 MainPs(void) : SV_Target0 { float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4; - return vTest; + float f = vk::BufferPointer(0xdeadbeefull).Get(); + return vTest+f; } // CHECK: [[FUN]] = OpFunction @@ -44,5 +45,9 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] // CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16 -// CHECK: OpStore [[OUT]] [[X4]] +// CHECK: [[TEMP_PTR:%[_0-9A-Za-z]*]] = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_float %ulong_3735928559 +// CHECK: [[LD:%[_0-9A-Za-z]*]] = OpLoad %float [[TEMP_PTR]] Aligned 4 +// CHECK: [[CONSTRUCT:%[_0-9A-Za-z]*]] = OpCompositeConstruct [[V4FLOAT]] [[LD]] [[LD]] [[LD]] [[LD]] +// CHECK: [[ADD:%[_0-9A-Za-z]*]] = OpFAdd [[V4FLOAT]] [[X4]] [[CONSTRUCT]] +// CHECK: OpStore [[OUT]] [[ADD]] // CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl index b2efd02cbd..843815a4a0 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl @@ -40,6 +40,7 @@ float4 MainPs(void) : SV_Target0 { float4 vTest = float4(1.0,0.0,0.0,0.0); g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest; + vk::BufferPointer(0xdeadbeefull).Get() = 4.5f; return vTest; } @@ -48,5 +49,7 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] // CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] // CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16 +// CHECK: [[TEMP_PTR:%[_0-9A-Za-z]*]] = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_float %ulong_3735928559 +// CHECK: OpStore [[TEMP_PTR]] %float_4_5 Aligned 4 // CHECK: OpStore [[OUT]] [[CV4FLOAT]] // CHECK: OpFunctionEnd From 8a8b29f967b5925a970949984442b3783d730551 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Tue, 3 Jun 2025 10:22:13 -0600 Subject: [PATCH 55/93] [spirv] AMD work graphs extension (#7353) Enables work graphs for SPIR-V target, based on AMD_shader_enqueue extension. Closes #5960. --- tools/clang/include/clang/AST/HlslTypes.h | 5 + .../clang/Basic/DiagnosticSemaKinds.td | 2 + .../include/clang/SPIRV/FeatureManager.h | 1 + .../clang/include/clang/SPIRV/SpirvBuilder.h | 20 + .../clang/include/clang/SPIRV/SpirvContext.h | 53 ++ .../include/clang/SPIRV/SpirvInstruction.h | 146 ++++++ tools/clang/include/clang/SPIRV/SpirvType.h | 23 + .../clang/include/clang/SPIRV/SpirvVisitor.h | 6 + tools/clang/lib/AST/HlslTypes.cpp | 40 ++ tools/clang/lib/SPIRV/CapabilityVisitor.cpp | 64 ++- tools/clang/lib/SPIRV/DebugTypeVisitor.cpp | 11 + tools/clang/lib/SPIRV/DeclResultIdMapper.cpp | 35 +- tools/clang/lib/SPIRV/EmitVisitor.cpp | 207 +++++++- tools/clang/lib/SPIRV/EmitVisitor.h | 15 +- tools/clang/lib/SPIRV/FeatureManager.cpp | 3 + tools/clang/lib/SPIRV/GlPerVertex.cpp | 3 + tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 10 + tools/clang/lib/SPIRV/PreciseVisitor.cpp | 3 + tools/clang/lib/SPIRV/SpirvBuilder.cpp | 72 ++- tools/clang/lib/SPIRV/SpirvContext.cpp | 16 + tools/clang/lib/SPIRV/SpirvEmitter.cpp | 452 +++++++++++++++++- tools/clang/lib/SPIRV/SpirvEmitter.h | 44 ++ tools/clang/lib/SPIRV/SpirvInstruction.cpp | 57 ++- tools/clang/lib/SPIRV/SpirvType.cpp | 4 + tools/clang/lib/Sema/SemaHLSL.cpp | 4 + .../test/CodeGenSPIRV/bezier.hull.hlsl2spv | 144 +++--- .../hs.const.output-patch.out.hlsl | 6 +- .../CodeGenSPIRV/node.barrier.compute.hlsl | 15 + .../CodeGenSPIRV/node.barrier.memory-arg.hlsl | 60 +++ .../CodeGenSPIRV/node.barrier.object-arg.hlsl | 213 +++++++++ .../node.broadcasting.no-input.hlsl | 15 + .../node.coalescing.num-threads.hlsl | 16 + .../test/CodeGenSPIRV/node.dispatch-grid.hlsl | 28 ++ .../CodeGenSPIRV/node.empty-node-input.hlsl | 28 ++ .../node.finished-cross-group-sharing.hlsl | 32 ++ .../node.get-input-record-count.hlsl | 25 + .../node.get-node-output-record.multiple.hlsl | 72 +++ .../node.get-remaining-recursion-levels.hlsl | 26 + .../node.group-shared.barrier.hlsl | 18 + .../test/CodeGenSPIRV/node.group-shared.hlsl | 24 + .../node.increment-output-count.group.hlsl | 22 + .../node.increment-output-count.thread.hlsl | 22 + ...node.input-record.dispatch-grid.array.hlsl | 26 + ...ode.input-record.dispatch-grid.nested.hlsl | 32 ++ .../CodeGenSPIRV/node.max-dispatch-grid.hlsl | 30 ++ .../test/CodeGenSPIRV/node.max-records.hlsl | 45 ++ .../test/CodeGenSPIRV/node.member.read.hlsl | 150 ++++++ .../CodeGenSPIRV/node.member.read.types.hlsl | 193 ++++++++ .../test/CodeGenSPIRV/node.member.write.hlsl | 88 ++++ .../node.member.write.matrix.hlsl | 123 +++++ .../CodeGenSPIRV/node.member.write.types.hlsl | 150 ++++++ tools/clang/test/CodeGenSPIRV/node.mesh.hlsl | 88 ++++ .../CodeGenSPIRV/node.output-complete.hlsl | 33 ++ .../node.output.is-valid.empty.hlsl | 19 + .../CodeGenSPIRV/node.output.is-valid.hlsl | 24 + .../clang/test/CodeGenSPIRV/node.renamed.hlsl | 23 + .../test/CodeGenSPIRV/node.share-input.hlsl | 42 ++ .../test/CodeGenSPIRV/node.sparse-nodes.hlsl | 141 ++++++ .../CodeGenSPIRV/node.thread.num-threads.hlsl | 15 + .../node.thread.num-threads.none.hlsl | 15 + .../vk.attribute.image-format.hlsl | 6 - 61 files changed, 3193 insertions(+), 112 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/node.barrier.compute.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.barrier.memory-arg.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.max-records.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.member.read.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.member.write.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.mesh.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.renamed.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.share-input.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 58d2d916b1..43c1effdb8 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -485,7 +485,10 @@ bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type); bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type); bool IsHLSLRWNodeInputRecordType(clang::QualType type); bool IsHLSLRONodeInputRecordType(clang::QualType type); +bool IsHLSLDispatchNodeInputRecordType(clang::QualType type); +bool IsHLSLNodeRecordArrayType(clang::QualType type); bool IsHLSLNodeOutputType(clang::QualType type); +bool IsHLSLEmptyNodeRecordType(clang::QualType type); DXIL::NodeIOKind GetNodeIOType(clang::QualType type); @@ -495,6 +498,8 @@ bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT); bool IsHLSLBuiltinRayAttributeStruct(clang::QualType QT); bool IsHLSLAggregateType(clang::QualType type); clang::QualType GetHLSLResourceResultType(clang::QualType type); +clang::QualType GetHLSLNodeIOResultType(clang::ASTContext &astContext, + clang::QualType type); unsigned GetHLSLResourceTemplateUInt(clang::QualType type); bool IsIncompleteHLSLResourceArrayType(clang::ASTContext &context, clang::QualType type); diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 0d98792688..cbd9412566 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8060,6 +8060,8 @@ def err_hlsl_vk_pointer_cast_alignment: Error< "Vulkan buffer pointer cannot be cast to greater alignment">; def err_hlsl_vk_static_pointer_cast_type: Error< "vk::static_pointer_cast() content type must be base class of argument's content type">; +def warn_spirv_node_shaders_experimental : Warning< + "SPIR-V implementation of node shaders is experimental and subject to change">; // SPIRV Change Ends let CategoryName = "OpenMP Issue" in { diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 3c1871df37..94dc5bf1ab 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -57,6 +57,7 @@ enum class Extension { KHR_ray_query, EXT_shader_image_int64, KHR_physical_storage_buffer, + AMD_shader_enqueue, KHR_vulkan_memory_model, NV_compute_shader_derivatives, KHR_compute_shader_derivatives, diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index 2da14dab54..465f7313f1 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -434,6 +434,25 @@ class SpirvBuilder { QualType resultType, NonSemanticDebugPrintfInstructions instId, llvm::ArrayRef operands, SourceLocation); + SpirvInstruction *createIsNodePayloadValid(SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex, + SourceLocation); + + SpirvInstruction *createNodePayloadArrayLength(SpirvInstruction *payloadArray, + SourceLocation); + + SpirvInstruction *createAllocateNodePayloads(QualType resultType, + spv::Scope allocationScope, + SpirvInstruction *shaderIndex, + SpirvInstruction *recordCount, + SourceLocation); + + void createEnqueueOutputNodePayloads(SpirvInstruction *payload, + SourceLocation); + + SpirvInstruction *createFinishWritingNodePayload(SpirvInstruction *payload, + SourceLocation); + /// \brief Creates an OpMemoryBarrier or OpControlBarrier instruction with the /// given flags. If execution scope (exec) is provided, an OpControlBarrier /// is created; otherwise an OpMemoryBarrier is created. @@ -763,6 +782,7 @@ class SpirvBuilder { llvm::ArrayRef constituents, bool specConst = false); SpirvConstant *getConstantNull(QualType); + SpirvConstant *getConstantString(llvm::StringRef str, bool specConst = false); SpirvUndef *getUndef(QualType); SpirvString *createString(llvm::StringRef str); diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h index 50ff77d4b4..8e0458e731 100644 --- a/tools/clang/include/clang/SPIRV/SpirvContext.h +++ b/tools/clang/include/clang/SPIRV/SpirvContext.h @@ -98,6 +98,21 @@ struct RuntimeArrayTypeMapInfo { } }; +// Provides DenseMapInfo for NodePayloadArrayType so we can create a DenseSet of +// node payload array types. +struct NodePayloadArrayTypeMapInfo { + static inline NodePayloadArrayType *getEmptyKey() { return nullptr; } + static inline NodePayloadArrayType *getTombstoneKey() { return nullptr; } + static unsigned getHashValue(const NodePayloadArrayType *Val) { + return llvm::hash_combine(Val->getElementType(), Val->getNodeDecl()); + } + static bool isEqual(const NodePayloadArrayType *LHS, + const NodePayloadArrayType *RHS) { + // Either both are null, or both should have the same underlying type. + return (LHS == RHS) || (LHS && RHS && *LHS == *RHS); + } +}; + // Provides DenseMapInfo for ImageType so we can create a DenseSet of // image types. struct ImageTypeMapInfo { @@ -270,6 +285,9 @@ class SpirvContext { const RuntimeArrayType * getRuntimeArrayType(const SpirvType *elemType, llvm::Optional arrayStride); + const NodePayloadArrayType * + getNodePayloadArrayType(const SpirvType *elemType, + const ParmVarDecl *nodeDecl); const StructType *getStructType( llvm::ArrayRef fields, llvm::StringRef name, @@ -346,6 +364,7 @@ class SpirvContext { bool isDS() const { return curShaderModelKind == ShaderModelKind::Domain; } bool isCS() const { return curShaderModelKind == ShaderModelKind::Compute; } bool isLib() const { return curShaderModelKind == ShaderModelKind::Library; } + bool isNode() const { return curShaderModelKind == ShaderModelKind::Node; } bool isRay() const { return curShaderModelKind >= ShaderModelKind::RayGeneration && curShaderModelKind <= ShaderModelKind::Callable; @@ -437,6 +456,31 @@ class SpirvContext { instructionsWithLoweredType.end(); } + void registerDispatchGridIndex(const RecordDecl *decl, unsigned index) { + auto iter = dispatchGridIndices.find(decl); + if (iter == dispatchGridIndices.end()) { + dispatchGridIndices[decl] = index; + } + } + + llvm::Optional getDispatchGridIndex(const RecordDecl *decl) { + auto iter = dispatchGridIndices.find(decl); + if (iter != dispatchGridIndices.end()) { + return iter->second; + } + return llvm::None; + } + + void registerNodeDeclPayloadType(const NodePayloadArrayType *type, + const ParmVarDecl *decl) { + nodeDecls[decl] = type; + } + + const NodePayloadArrayType *getNodeDeclPayloadType(const ParmVarDecl *decl) { + auto iter = nodeDecls.find(decl); + return iter == nodeDecls.end() ? nullptr : iter->second; + } + private: /// \brief The allocator used to create SPIR-V entity objects. /// @@ -481,6 +525,8 @@ class SpirvContext { llvm::DenseSet arrayTypes; llvm::DenseSet runtimeArrayTypes; + llvm::DenseSet + nodePayloadArrayTypes; llvm::SmallVector structTypes; llvm::SmallVector hybridStructTypes; llvm::DenseMap pointerTypes; @@ -507,6 +553,9 @@ class SpirvContext { llvm::StringMap debugInfo; SpirvDebugInstruction *currentLexicalScope; + // Mapping from graphics node input record types to member decoration maps. + llvm::MapVector dispatchGridIndices; + // Mapping from SPIR-V type to debug type instruction. // The purpose is not to generate several DebugType* instructions for the same // type if the type is used for several variables. @@ -538,6 +587,10 @@ class SpirvContext { // Set of instructions that already have lowered SPIR-V types. llvm::DenseSet instructionsWithLoweredType; + + // Mapping from shader entry function parameter declaration to node payload + // array type. + llvm::MapVector nodeDecls; }; } // end namespace spirv diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index 20cd57525c..52f4128a6c 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -67,6 +67,7 @@ class SpirvInstruction { IK_ConstantInteger, IK_ConstantFloat, IK_ConstantComposite, + IK_ConstantString, IK_ConstantNull, // Pointer <-> uint conversions. @@ -165,6 +166,13 @@ class SpirvInstruction { IK_DebugTypeMember, IK_DebugTypeTemplate, IK_DebugTypeTemplateParameter, + + // For workgraph instructions + IK_IsNodePayloadValid, + IK_NodePayloadArrayLength, + IK_AllocateNodePayloads, + IK_EnqueueNodePayloads, + IK_FinishWritingNodePayload, }; // All instruction classes should include a releaseMemory method. @@ -440,9 +448,13 @@ class SpirvExecutionMode : public SpirvExecutionModeBase { bool invokeVisitor(Visitor *v) override; + SpirvFunction *getEntryPoint() const { return entryPoint; } + spv::ExecutionMode getExecutionMode() const { return execMode; } llvm::ArrayRef getParams() const { return params; } private: + SpirvFunction *entryPoint; + spv::ExecutionMode execMode; llvm::SmallVector params; }; @@ -1056,6 +1068,119 @@ class SpirvBarrier : public SpirvInstruction { llvm::Optional executionScope; }; +/// \brief OpIsNodePayloadValidAMDX instruction +class SpirvIsNodePayloadValid : public SpirvInstruction { +public: + SpirvIsNodePayloadValid(QualType resultType, SourceLocation loc, + SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvIsNodePayloadValid) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_IsNodePayloadValid; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayloadArray() { return payloadArray; } + SpirvInstruction *getNodeIndex() { return nodeIndex; } + +private: + SpirvInstruction *payloadArray; + SpirvInstruction *nodeIndex; +}; + +/// \brief OpNodePayloadArrayLengthAMDX instruction +class SpirvNodePayloadArrayLength : public SpirvInstruction { +public: + SpirvNodePayloadArrayLength(QualType resultType, SourceLocation loc, + SpirvInstruction *payloadArray); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvNodePayloadArrayLength) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_NodePayloadArrayLength; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayloadArray() { return payloadArray; } + +private: + SpirvInstruction *payloadArray; +}; + +/// \brief OpAllocateNodePayloadsAMDX instruction +class SpirvAllocateNodePayloads : public SpirvInstruction { +public: + SpirvAllocateNodePayloads(QualType resultType, SourceLocation loc, + spv::Scope allocationScope, + SpirvInstruction *shaderIndex, + SpirvInstruction *recordCount); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvAllocateNodePayloads) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_AllocateNodePayloads; + } + + bool invokeVisitor(Visitor *v) override; + + spv::Scope getAllocationScope() { return allocationScope; } + SpirvInstruction *getShaderIndex() { return shaderIndex; } + SpirvInstruction *getRecordCount() { return recordCount; } + +private: + spv::Scope allocationScope; + SpirvInstruction *shaderIndex; + SpirvInstruction *recordCount; +}; + +/// \brief OpReleaseOutputNodePayloadAMDX instruction +class SpirvEnqueueNodePayloads : public SpirvInstruction { +public: + SpirvEnqueueNodePayloads(SourceLocation loc, SpirvInstruction *payload); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvEnqueueNodePayloads) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_EnqueueNodePayloads; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayload() { return payload; } + +private: + SpirvInstruction *payload; +}; + +/// \brief OpFinishWritingNodePayloadAMDX instruction +class SpirvFinishWritingNodePayload : public SpirvInstruction { +public: + SpirvFinishWritingNodePayload(QualType resultType, SourceLocation loc, + SpirvInstruction *payload); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvFinishWritingNodePayload) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_FinishWritingNodePayload; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayload() { return payload; } + +private: + SpirvInstruction *payload; +}; + /// \brief Represents SPIR-V binary operation instructions. /// /// This class includes: @@ -1352,6 +1477,27 @@ class SpirvConstantNull : public SpirvConstant { bool operator==(const SpirvConstantNull &that) const; }; +class SpirvConstantString : public SpirvConstant { +public: + SpirvConstantString(llvm::StringRef stringLiteral, bool isSpecConst = false); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConstantString) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConstantString; + } + + bool invokeVisitor(Visitor *v) override; + + bool operator==(const SpirvConstantString &that) const; + + llvm::StringRef getString() const { return str; } + +private: + std::string str; +}; + class SpirvConvertPtrToU : public SpirvInstruction { public: SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h index d39fc6943b..7966e3e0de 100644 --- a/tools/clang/include/clang/SPIRV/SpirvType.h +++ b/tools/clang/include/clang/SPIRV/SpirvType.h @@ -51,6 +51,7 @@ class SpirvType { TK_SampledImage, TK_Array, TK_RuntimeArray, + TK_NodePayloadArrayAMD, TK_Struct, TK_Pointer, TK_ForwardPointer, @@ -291,6 +292,26 @@ class RuntimeArrayType : public SpirvType { llvm::Optional stride; }; +class NodePayloadArrayType : public SpirvType { +public: + NodePayloadArrayType(const SpirvType *elemType, const ParmVarDecl *decl) + : SpirvType(TK_NodePayloadArrayAMD), elementType(elemType), + nodeDecl(decl) {} + + static bool classof(const SpirvType *t) { + return t->getKind() == TK_NodePayloadArrayAMD; + } + + bool operator==(const NodePayloadArrayType &that) const; + + const SpirvType *getElementType() const { return elementType; } + const ParmVarDecl *getNodeDecl() const { return nodeDecl; } + +private: + const SpirvType *elementType; + const ParmVarDecl *nodeDecl; +}; + // The StructType is the lowered type that best represents what a structure type // is in SPIR-V. Contains all necessary information for properly emitting a // SPIR-V structure type. @@ -627,6 +648,8 @@ bool SpirvType::isOrContainsType(const SpirvType *type) { return isOrContainsType(pointerType->getPointeeType()); if (const auto *raType = dyn_cast(type)) return isOrContainsType(raType->getElementType()); + if (const auto *npaType = dyn_cast(type)) + return isOrContainsType(npaType->getElementType()); if (const auto *imgType = dyn_cast(type)) return isOrContainsType(imgType->getSampledType()); if (const auto *sampledImageType = dyn_cast(type)) diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index fef06da503..a6de26c807 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -82,6 +82,11 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvAccessChain) DEFINE_VISIT_METHOD(SpirvAtomic) DEFINE_VISIT_METHOD(SpirvBarrier) + DEFINE_VISIT_METHOD(SpirvIsNodePayloadValid) + DEFINE_VISIT_METHOD(SpirvNodePayloadArrayLength) + DEFINE_VISIT_METHOD(SpirvAllocateNodePayloads) + DEFINE_VISIT_METHOD(SpirvEnqueueNodePayloads) + DEFINE_VISIT_METHOD(SpirvFinishWritingNodePayload) DEFINE_VISIT_METHOD(SpirvBinaryOp) DEFINE_VISIT_METHOD(SpirvBitFieldExtract) DEFINE_VISIT_METHOD(SpirvBitFieldInsert) @@ -89,6 +94,7 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvConstantInteger) DEFINE_VISIT_METHOD(SpirvConstantFloat) DEFINE_VISIT_METHOD(SpirvConstantComposite) + DEFINE_VISIT_METHOD(SpirvConstantString) DEFINE_VISIT_METHOD(SpirvConstantNull) DEFINE_VISIT_METHOD(SpirvConvertPtrToU) DEFINE_VISIT_METHOD(SpirvConvertUToPtr) diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 017f0f7218..7693c065be 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -587,6 +587,12 @@ bool IsHLSLRONodeInputRecordType(clang::QualType type) { static_cast(DXIL::NodeIOFlags::Input); } +bool IsHLSLDispatchNodeInputRecordType(clang::QualType type) { + return IsHLSLNodeInputType(type) && + (static_cast(GetNodeIOType(type)) & + static_cast(DXIL::NodeIOFlags::DispatchRecord)) != 0; +} + bool IsHLSLNodeOutputType(clang::QualType type) { return (static_cast(GetNodeIOType(type)) & (static_cast(DXIL::NodeIOFlags::Output) | @@ -594,6 +600,23 @@ bool IsHLSLNodeOutputType(clang::QualType type) { static_cast(DXIL::NodeIOFlags::Output); } +bool IsHLSLNodeRecordArrayType(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "ThreadNodeOutputRecords" || name == "GroupNodeOutputRecords" || + name == "GroupNodeInputRecords" || name == "RWGroupNodeInputRecords" || + name == "EmptyNodeInput") + return true; + } + return false; +} + +bool IsHLSLEmptyNodeRecordType(clang::QualType type) { + return (static_cast(GetNodeIOType(type)) & + static_cast(DXIL::NodeIOFlags::EmptyRecord)) == + static_cast(DXIL::NodeIOFlags::EmptyRecord); +} + bool IsHLSLStructuredBufferType(clang::QualType type) { if (const HLSLResourceAttr *Attr = getAttr(type)) return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer; @@ -836,6 +859,23 @@ QualType GetHLSLResourceResultType(QualType type) { return HandleFieldDecl->getType(); } +QualType GetHLSLNodeIOResultType(ASTContext &astContext, QualType type) { + if (hlsl::IsHLSLEmptyNodeRecordType(type)) { + RecordDecl *RD = astContext.buildImplicitRecord(""); + RD->startDefinition(); + RD->completeDefinition(); + return astContext.getRecordType(RD); + } else if (hlsl::IsHLSLNodeType(type)) { + const RecordType *recordType = type->getAs(); + if (const auto *templateDecl = + dyn_cast(recordType->getDecl())) { + const auto &templateArgs = templateDecl->getTemplateArgs(); + return templateArgs[0].getAsType(); + } + } + return type; +} + unsigned GetHLSLResourceTemplateUInt(clang::QualType type) { const ClassTemplateSpecializationDecl *templateDecl = cast( diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 43ab2540b4..c8444a3b81 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -122,6 +122,12 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type, } addCapabilityForType(raType->getElementType(), loc, sc); } + // Node payload array also requires additional capability. + else if (const auto *npaType = dyn_cast(type)) { + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addCapabilityForType(npaType->getElementType(), loc, sc); + } // Image types else if (const auto *imageType = dyn_cast(type)) { switch (imageType->getDimension()) { @@ -254,6 +260,19 @@ bool CapabilityVisitor::visit(SpirvDecoration *decor) { addCapability(spv::Capability::FragmentBarycentricKHR); break; } + case spv::Decoration::NodeSharesPayloadLimitsWithAMDX: + case spv::Decoration::NodeMaxPayloadsAMDX: + case spv::Decoration::TrackFinishWritingAMDX: + case spv::Decoration::PayloadNodeNameAMDX: + case spv::Decoration::PayloadNodeBaseIndexAMDX: + case spv::Decoration::PayloadNodeSparseArrayAMDX: + case spv::Decoration::PayloadNodeArraySizeAMDX: + case spv::Decoration::PayloadDispatchIndirectAMDX: { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + break; + } // Capabilities needed for built-ins case spv::Decoration::BuiltIn: { AddVulkanMemoryModelForVolatile(decor, loc); @@ -532,8 +551,14 @@ bool CapabilityVisitor::visitInstruction(SpirvInstruction *instr) { addCapability(spv::Capability::GroupNonUniformQuad); break; case spv::Op::OpVariable: { - if (spvOptions.enableReflect && - !cast(instr)->getHlslUserType().empty()) { + auto var = cast(instr); + auto storage = var->getStorageClass(); + if (storage == spv::StorageClass::NodePayloadAMDX) { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + } + if (spvOptions.enableReflect && !var->getHlslUserType().empty()) { addExtension(Extension::GOOGLE_user_type, "HLSL User Type", loc); addExtension(Extension::GOOGLE_hlsl_functionality1, "HLSL User Type", loc); @@ -577,6 +602,28 @@ bool CapabilityVisitor::visitInstruction(SpirvInstruction *instr) { } break; } + case spv::Op::OpConstantStringAMDX: + case spv::Op::OpSpecConstantStringAMDX: + case spv::Op::OpAllocateNodePayloadsAMDX: + case spv::Op::OpEnqueueNodePayloadsAMDX: + case spv::Op::OpIsNodePayloadValidAMDX: + case spv::Op::OpFinishWritingNodePayloadAMDX: { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + break; + } + case spv::Op::OpControlBarrier: + case spv::Op::OpMemoryBarrier: { + auto barrier = cast(instr); + if ((bool)(barrier->getMemorySemantics() & + spv::MemorySemanticsMask::OutputMemoryKHR)) { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "NODE_OUTPUT_MEMORY", + loc); + addCapability(spv::Capability::VulkanMemoryModel, loc); + } + break; + } default: break; @@ -642,6 +689,19 @@ bool CapabilityVisitor::visit(SpirvExecutionModeBase *execMode) { SourceLocation entryPointSourceLocation = execMode->getEntryPoint()->getSourceLocation(); switch (executionMode) { + case spv::ExecutionMode::CoalescingAMDX: + case spv::ExecutionMode::MaxNodeRecursionAMDX: + case spv::ExecutionMode::StaticNumWorkgroupsAMDX: + case spv::ExecutionMode::MaxNumWorkgroupsAMDX: + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", + execModeSourceLocation); + addCapability(spv::Capability::ShaderEnqueueAMDX, execModeSourceLocation); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", + execModeSourceLocation); + break; + case spv::ExecutionMode::SubgroupSize: + addCapability(spv::Capability::SubgroupDispatch, execModeSourceLocation); + break; case spv::ExecutionMode::PostDepthCoverage: addCapability(spv::Capability::SampleMaskPostDepthCoverage, entryPointSourceLocation); diff --git a/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp b/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp index 058e7b6255..24fab092cc 100644 --- a/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp @@ -356,6 +356,17 @@ SpirvDebugType *DebugTypeVisitor::lowerToDebugType(const SpirvType *spirvType) { debugType = spvContext.getDebugTypeArray(spirvType, elemDebugType, counts); break; } + case SpirvType::TK_NodePayloadArrayAMD: { + auto *arrType = dyn_cast(spirvType); + SpirvDebugInstruction *elemDebugType = + lowerToDebugType(arrType->getElementType()); + + llvm::SmallVector counts; + counts.push_back(0u); + + debugType = spvContext.getDebugTypeArray(spirvType, elemDebugType, counts); + break; + } case SpirvType::TK_Vector: { auto *vecType = dyn_cast(spirvType); SpirvDebugInstruction *elemDebugType = diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index de73d5e417..9d0d8f51a3 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -467,6 +467,10 @@ hlsl::DxilParamInputQual deduceParamQual(const DeclaratorDecl *decl, if (decl->hasAttr()) return hlsl::DxilParamInputQual::InPayload; + if (hlsl::IsHLSLNodeType(type)) { + return hlsl::DxilParamInputQual::NodeIO; + } + return asInput ? hlsl::DxilParamInputQual::In : hlsl::DxilParamInputQual::Out; } @@ -475,6 +479,9 @@ hlsl::DxilParamInputQual deduceParamQual(const DeclaratorDecl *decl, const hlsl::SigPoint *deduceSigPoint(const DeclaratorDecl *decl, bool asInput, const hlsl::ShaderModel::Kind kind, bool forPCF) { + if (kind == hlsl::ShaderModel::Kind::Node) { + return hlsl::SigPoint::GetSigPoint(hlsl::SigPoint::Kind::CSIn); + } return hlsl::SigPoint::GetSigPoint(hlsl::SigPointFromInputQual( deduceParamQual(decl, asInput), kind, forPCF)); } @@ -2158,6 +2165,8 @@ bool DeclResultIdMapper::assignLocations( llvm::DenseSet *stageVariableLocationInfo) { for (const auto *var : vars) { + if (hlsl::IsHLSLNodeType(var->getAstType())) + continue; auto locCount = var->getLocationCount(); uint32_t location = nextLocs(locCount); spvBuilder.decorateLocation(var->getSpirvInstr(), location); @@ -3489,7 +3498,9 @@ SpirvVariable *DeclResultIdMapper::createSpirvInterfaceVariable( StageVar stageVar( stageVarData.sigPoint, *stageVarData.semantic, builtinAttr, evalType, // For HS/DS/GS, we have already stripped the outmost arrayness on type. - getLocationAndComponentCount(astContext, stageVarData.type)); + hlsl::IsHLSLNodeInputType(stageVarData.type) + ? LocationAndComponent({0, 0, false}) + : getLocationAndComponentCount(astContext, stageVarData.type)); const auto name = stageVarData.namePrefix.str() + "." + stageVar.getSemanticStr(); SpirvVariable *varInstr = createSpirvStageVar( @@ -3708,6 +3719,22 @@ bool DeclResultIdMapper::createStageVars(StageVarDataBundle &stageVarData, stageVarData.semantic = &thisSemantic; } + if (hlsl::IsHLSLNodeType(stageVarData.type)) { + // Hijack the notion of semantic to use createSpirvInterfaceVariable + StringRef str = stageVarData.decl->getName(); + stageVarData.semantic->str = stageVarData.semantic->name = str; + stageVarData.semantic->semantic = hlsl::Semantic::GetArbitrary(); + SpirvVariable *varInstr = createSpirvInterfaceVariable(stageVarData); + if (!varInstr) { + return false; + } + + *value = hlsl::IsHLSLNodeInputType(stageVarData.type) + ? varInstr + : loadShaderInputVariable(varInstr, stageVarData); + return true; + } + if (stageVarData.semantic->isValid() && // Structs with attached semantics will be handled later. !stageVarData.type->isStructureType()) { @@ -4161,6 +4188,8 @@ SpirvVariable *DeclResultIdMapper::getBuiltinVar(spv::BuiltIn builtIn, case spv::BuiltIn::GlobalInvocationId: case spv::BuiltIn::WorkgroupId: case spv::BuiltIn::LocalInvocationIndex: + case spv::BuiltIn::RemainingRecursionLevelsAMDX: + case spv::BuiltIn::ShaderIndexAMDX: sc = spv::StorageClass::Input; break; case spv::BuiltIn::TaskCountNV: @@ -4196,7 +4225,9 @@ SpirvVariable *DeclResultIdMapper::createSpirvStageVar( const auto type = stageVar->getAstType(); const auto isPrecise = decl->hasAttr(); auto isNointerp = decl->hasAttr(); - spv::StorageClass sc = getStorageClassForSigPoint(sigPoint); + spv::StorageClass sc = hlsl::IsHLSLNodeInputType(stageVar->getAstType()) + ? spv::StorageClass::NodePayloadAMDX + : getStorageClassForSigPoint(sigPoint); if (sc == spv::StorageClass::Max) return 0; stageVar->setStorageClass(sc); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 7d39b0ec1f..8de0262ae6 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -938,6 +938,73 @@ bool EmitVisitor::visit(SpirvBarrier *inst) { curInst.push_back(memoryScopeId); curInst.push_back(memorySemanticsId); finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvIsNodePayloadValid *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back( + getOrAssignResultId(inst->getPayloadArray())); + curInst.push_back( + getOrAssignResultId(inst->getNodeIndex())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvNodePayloadArrayLength *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back( + getOrAssignResultId(inst->getPayloadArray())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvAllocateNodePayloads *inst) { + const uint32_t allocationScopeId = typeHandler.getOrCreateConstantInt( + llvm::APInt(32, static_cast(inst->getAllocationScope())), + context.getUIntType(32), /*isSpecConst */ false); + + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(allocationScopeId); + curInst.push_back( + getOrAssignResultId(inst->getRecordCount())); + curInst.push_back( + getOrAssignResultId(inst->getShaderIndex())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvEnqueueNodePayloads *inst) { + initInstruction(inst); + curInst.push_back(getOrAssignResultId(inst->getPayload())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvFinishWritingNodePayload *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getPayload())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); return true; } @@ -1011,6 +1078,13 @@ bool EmitVisitor::visit(SpirvConstantComposite *inst) { return true; } +bool EmitVisitor::visit(SpirvConstantString *inst) { + typeHandler.getOrCreateConstant(inst); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + bool EmitVisitor::visit(SpirvConstantNull *inst) { typeHandler.getOrCreateConstant(inst); emitDebugNameForInstruction(getOrAssignResultId(inst), @@ -2078,6 +2152,8 @@ uint32_t EmitTypeHandler::getOrCreateConstant(SpirvConstant *inst) { return getOrCreateConstantNull(constNull); } else if (auto *constBool = dyn_cast(inst)) { return getOrCreateConstantBool(constBool); + } else if (auto *constString = dyn_cast(inst)) { + return getOrCreateConstantString(constString); } else if (auto *constUndef = dyn_cast(inst)) { return getOrCreateUndef(constUndef); } @@ -2116,6 +2192,36 @@ uint32_t EmitTypeHandler::getOrCreateConstantBool(SpirvConstantBoolean *inst) { return inst->getResultId(); } +uint32_t EmitTypeHandler::getOrCreateConstantString(SpirvConstantString *inst) { + const StringRef str = inst->getString(); + const bool isSpecConst = inst->isSpecConstant(); + + if (!isSpecConst && + emittedConstantStrings.find(str) != emittedConstantStrings.end()) { + // Already emitted this constant value. Reuse. + inst->setResultId(emittedConstantStrings[str]->getResultId()); + } else if (isSpecConst && emittedSpecConstantInstructions.find(inst) != + emittedSpecConstantInstructions.end()) { + // We've already emitted this SpecConstant. Reuse. + return inst->getResultId(); + } else { + // Constant wasn't emitted in the past. + const auto &words = string::encodeSPIRVString(inst->getString()); + initTypeInstruction(inst->getopcode()); + curTypeInst.push_back(getOrAssignResultId(inst)); + curTypeInst.insert(curTypeInst.end(), words.begin(), words.end()); + finalizeTypeInstruction(); + // Remember this constant for the future (if not a spec constant) + if (isSpecConst) { + emittedSpecConstantInstructions.insert(inst); + } else { + emittedConstantStrings[str] = inst; + } + } + + return inst->getResultId(); +} + uint32_t EmitTypeHandler::getOrCreateConstantNull(SpirvConstantNull *inst) { auto found = std::find_if(emittedConstantNulls.begin(), emittedConstantNulls.end(), @@ -2536,6 +2642,84 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { if (stride.hasValue()) emitDecoration(id, spv::Decoration::ArrayStride, {stride.getValue()}); } + // NodePayloadArray types + else if (const auto *npaType = dyn_cast(type)) { + const uint32_t elemTypeId = emitType(npaType->getElementType()); + initTypeInstruction(spv::Op::OpTypeNodePayloadArrayAMDX); + curTypeInst.push_back(id); + curTypeInst.push_back(elemTypeId); + finalizeTypeInstruction(); + + // Emit decorations + const ParmVarDecl *nodeDecl = npaType->getNodeDecl(); + if (hlsl::IsHLSLNodeOutputType(nodeDecl->getType())) { + StringRef name = nodeDecl->getName(); + unsigned index = 0; + if (auto nodeID = nodeDecl->getAttr()) { + name = nodeID->getName(); + index = nodeID->getArrayIndex(); + } + + auto *str = new (context) SpirvConstantString(name); + uint32_t nodeName = getOrCreateConstantString(str); + emitDecoration(id, spv::Decoration::PayloadNodeNameAMDX, {nodeName}, + llvm::None, true); + if (index) { + uint32_t baseIndex = getOrCreateConstantInt( + llvm::APInt(32, index), context.getUIntType(32), false); + emitDecoration(id, spv::Decoration::PayloadNodeBaseIndexAMDX, + {baseIndex}, llvm::None, true); + } + } + + uint32_t maxRecords; + if (const auto *attr = nodeDecl->getAttr()) { + maxRecords = getOrCreateConstantInt(llvm::APInt(32, attr->getMaxCount()), + context.getUIntType(32), false); + } else { + maxRecords = getOrCreateConstantInt(llvm::APInt(32, 1), + context.getUIntType(32), false); + } + emitDecoration(id, spv::Decoration::NodeMaxPayloadsAMDX, {maxRecords}, + llvm::None, true); + + if (const auto *attr = nodeDecl->getAttr()) { + const DeclContext *dc = nodeDecl->getParentFunctionOrMethod(); + if (const auto *funDecl = dyn_cast_or_null(dc)) { + IdentifierInfo *ii = attr->getName(); + bool alreadyExists = false; + for (auto *paramDecl : funDecl->params()) { + if (paramDecl->getIdentifier() == ii) { + assert(paramDecl != nodeDecl); + auto otherType = context.getNodeDeclPayloadType(paramDecl); + const uint32_t otherId = + getResultIdForType(otherType, &alreadyExists); + assert(alreadyExists && "forward references not allowed in " + "MaxRecordsSharedWith attribute"); + emitDecoration(id, spv::Decoration::NodeSharesPayloadLimitsWithAMDX, + {otherId}, llvm::None, true); + break; + } + } + assert(alreadyExists && + "invalid reference in MaxRecordsSharedWith attribute"); + } + } + if (const auto *attr = nodeDecl->getAttr()) { + emitDecoration(id, spv::Decoration::PayloadNodeSparseArrayAMDX, {}, + llvm::None); + } + if (const auto *attr = nodeDecl->getAttr()) { + emitDecoration(id, spv::Decoration::PayloadNodeSparseArrayAMDX, {}, + llvm::None); + } + if (const auto *attr = nodeDecl->getAttr()) { + uint32_t arraySize = getOrCreateConstantInt( + llvm::APInt(32, attr->getCount()), context.getUIntType(32), false); + emitDecoration(id, spv::Decoration::PayloadNodeArraySizeAMDX, {arraySize}, + llvm::None, true); + } + } // Structure types else if (const auto *structType = dyn_cast(type)) { std::vector> @@ -2549,6 +2733,15 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { } } + if (const auto recordDecl = dyn_cast_or_null( + context.getStructDeclForSpirvType(structType))) { + auto index = context.getDispatchGridIndex(recordDecl); + if (index.hasValue()) { + emitDecoration(id, spv::Decoration::PayloadDispatchIndirectAMDX, {}, + index); + } + } + // Emit OpMemberName for the struct members. for (size_t i = 0; i < fieldsToGenerate.size(); ++i) emitNameForType(fieldsToGenerate[i].get().name, id, i); @@ -2611,6 +2804,13 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { else if (interfaceType == StructInterfaceType::UniformBuffer) emitDecoration(id, spv::Decoration::Block, {}); + // Emit NodeTrackRWInputSharing decoration if attribute is present. + const auto *structDecl = dyn_cast_or_null( + context.getStructDeclForSpirvType(structType)); + if (structDecl && structDecl->hasAttr()) { + emitDecoration(id, spv::Decoration::TrackFinishWritingAMDX, {}); + } + initTypeInstruction(spv::Op::OpTypeStruct); curTypeInst.push_back(id); for (auto fieldTypeId : fieldTypeIds) @@ -2753,14 +2953,17 @@ void EmitTypeHandler::emitLiteral(const SpirvConstant *literal, void EmitTypeHandler::emitDecoration(uint32_t typeResultId, spv::Decoration decoration, llvm::ArrayRef decorationParams, - llvm::Optional memberIndex) { - + llvm::Optional memberIndex, + bool usesIdParams) { spv::Op op = memberIndex.hasValue() ? spv::Op::OpMemberDecorate : spv::Op::OpDecorate; if (decoration == spv::Decoration::UserTypeGOOGLE) { op = memberIndex.hasValue() ? spv::Op::OpMemberDecorateString : spv::Op::OpDecorateString; } + if (usesIdParams) { + op = spv::Op::OpDecorateId; + } assert(curDecorationInst.empty()); curDecorationInst.push_back(static_cast(op)); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index 1cec230e50..fb4b22e52b 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -83,7 +83,8 @@ class EmitTypeHandler { // parameters. void emitDecoration(uint32_t typeResultId, spv::Decoration, llvm::ArrayRef decorationParams, - llvm::Optional memberIndex = llvm::None); + llvm::Optional memberIndex = llvm::None, + bool usesIdParams = false); uint32_t getOrCreateConstant(SpirvConstant *); @@ -110,6 +111,7 @@ class EmitTypeHandler { uint32_t getOrCreateConstantNull(SpirvConstantNull *); uint32_t getOrCreateUndef(SpirvUndef *); uint32_t getOrCreateConstantBool(SpirvConstantBoolean *); + uint32_t getOrCreateConstantString(SpirvConstantString *); template void emitLiteral(const SpirvConstant *, vecType &outInst); template @@ -173,6 +175,7 @@ class EmitTypeHandler { emittedConstantInts; llvm::DenseMap, uint32_t> emittedConstantFloats; + llvm::DenseMap emittedConstantStrings; llvm::SmallVector emittedConstantComposites; llvm::SmallVector emittedConstantNulls; llvm::SmallVector emittedUndef; @@ -248,6 +251,11 @@ class EmitVisitor : public Visitor { bool visit(SpirvAccessChain *) override; bool visit(SpirvAtomic *) override; bool visit(SpirvBarrier *) override; + bool visit(SpirvIsNodePayloadValid *inst) override; + bool visit(SpirvNodePayloadArrayLength *inst) override; + bool visit(SpirvAllocateNodePayloads *inst) override; + bool visit(SpirvEnqueueNodePayloads *inst) override; + bool visit(SpirvFinishWritingNodePayload *inst) override; bool visit(SpirvBinaryOp *) override; bool visit(SpirvBitFieldExtract *) override; bool visit(SpirvBitFieldInsert *) override; @@ -255,6 +263,7 @@ class EmitVisitor : public Visitor { bool visit(SpirvConstantInteger *) override; bool visit(SpirvConstantFloat *) override; bool visit(SpirvConstantComposite *) override; + bool visit(SpirvConstantString *) override; bool visit(SpirvConstantNull *) override; bool visit(SpirvConvertPtrToU *) override; bool visit(SpirvConvertUToPtr *) override; @@ -455,6 +464,10 @@ class EmitVisitor : public Visitor { std::vector mainBinary; // String literals to SpirvString objects llvm::StringMap stringIdMap; + // String literals to SpirvConstantString objects + llvm::StringMap stringConstantIdMap; + // String spec constants + llvm::DenseSet stringSpecConstantInstructions; // Main file information for debugging that will be used by OpLine. uint32_t debugMainFileId; // Id for Vulkan DebugInfo extended instruction set. Used when generating diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index 7fb449fee9..b6aed4d8b6 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -214,6 +214,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_EXT_shader_image_int64", Extension::EXT_shader_image_int64) .Case("SPV_KHR_physical_storage_buffer", Extension::KHR_physical_storage_buffer) + .Case("SPV_AMDX_shader_enqueue", Extension::AMD_shader_enqueue) .Case("SPV_KHR_vulkan_memory_model", Extension::KHR_vulkan_memory_model) .Case("SPV_KHR_compute_shader_derivatives", Extension::KHR_compute_shader_derivatives) @@ -284,6 +285,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_EXT_shader_image_int64"; case Extension::KHR_physical_storage_buffer: return "SPV_KHR_physical_storage_buffer"; + case Extension::AMD_shader_enqueue: + return "SPV_AMDX_shader_enqueue"; case Extension::KHR_vulkan_memory_model: return "SPV_KHR_vulkan_memory_model"; case Extension::KHR_compute_shader_derivatives: diff --git a/tools/clang/lib/SPIRV/GlPerVertex.cpp b/tools/clang/lib/SPIRV/GlPerVertex.cpp index 09b09236b4..aa5a40d008 100644 --- a/tools/clang/lib/SPIRV/GlPerVertex.cpp +++ b/tools/clang/lib/SPIRV/GlPerVertex.cpp @@ -324,6 +324,9 @@ bool GlPerVertex::setClipCullDistanceType(SemanticIndexToTypeMap *typeMap, bool GlPerVertex::doGlPerVertexFacts(const NamedDecl *decl, QualType baseType, bool asInput) { + if (hlsl::IsHLSLNodeType(baseType)) { + return true; + } llvm::StringRef semanticStr; const hlsl::Semantic *semantic = {}; diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 8238750af9..1869983ae3 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -362,6 +362,16 @@ const SpirvType *LowerTypeVisitor::lowerType(const SpirvType *type, return raType; return spvContext.getRuntimeArrayType(loweredElemType, raType->getStride()); } + // Node payload arrays could contain a hybrid type + else if (const auto *npaType = dyn_cast(type)) { + const auto *loweredElemType = + lowerType(npaType->getElementType(), rule, loc); + // If runtime array didn't contain any hybrid types, return itself. + if (npaType->getElementType() == loweredElemType) + return npaType; + return spvContext.getNodePayloadArrayType(loweredElemType, + npaType->getNodeDecl()); + } // Pointer types could point to a hybrid type. else if (const auto *ptrType = dyn_cast(type)) { const auto *loweredPointee = diff --git a/tools/clang/lib/SPIRV/PreciseVisitor.cpp b/tools/clang/lib/SPIRV/PreciseVisitor.cpp index 34e6087990..f1869318a4 100644 --- a/tools/clang/lib/SPIRV/PreciseVisitor.cpp +++ b/tools/clang/lib/SPIRV/PreciseVisitor.cpp @@ -60,6 +60,9 @@ bool isAccessingPrecise(clang::spirv::SpirvAccessChain *inst) { } else if (auto *raType = llvm::dyn_cast(baseType)) { indexes.pop(); baseType = raType->getElementType(); + } else if (auto *npaType = llvm::dyn_cast(baseType)) { + indexes.pop(); + baseType = npaType->getElementType(); } else if (auto *structType = llvm::dyn_cast(baseType)) { SpirvInstruction *index = indexes.top(); if (auto *constInt = llvm::dyn_cast(index)) { diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index d776ba65fb..e085603b21 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -81,7 +81,9 @@ SpirvBuilder::addFnParam(QualType ptrType, bool isPrecise, bool isNointerp, param = new (context) SpirvFunctionParameter(ptrType, isPrecise, isNointerp, loc); } - param->setStorageClass(spv::StorageClass::Function); + param->setStorageClass(hlsl::IsHLSLNodeInputType(ptrType) + ? spv::StorageClass::NodePayloadAMDX + : spv::StorageClass::Function); param->setDebugName(name); function->addParameter(param); return param; @@ -230,6 +232,13 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, createEndInvocationInterlockEXT(loc, range); } + if (context.hasLoweredType(pointer)) { + // preserve distinct node payload array types + auto *ptrType = dyn_cast(pointer->getResultType()); + instruction->setResultType(ptrType->getPointeeType()); + context.addToInstructionsWithLoweredType(instruction); + } + const auto &bitfieldInfo = pointer->getBitfieldInfo(); if (!bitfieldInfo.hasValue()) return instruction; @@ -306,6 +315,12 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, auto *instruction = new (context) SpirvStore(loc, address, source, llvm::None, range); + if (context.hasLoweredType(source)) { + // preserve distinct node payload array types + address->setResultType(context.getPointerType(source->getResultType(), + address->getStorageClass())); + context.addToInstructionsWithLoweredType(address); + } insertPoint->addInstruction(instruction); if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer && @@ -872,6 +887,53 @@ SpirvInstruction *SpirvBuilder::createNonSemanticDebugPrintfExtInst( return extInst; } +SpirvInstruction * +SpirvBuilder::createIsNodePayloadValid(SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex, + SourceLocation loc) { + auto *inst = new (context) + SpirvIsNodePayloadValid(astContext.BoolTy, loc, payloadArray, nodeIndex); + insertPoint->addInstruction(inst); + return inst; +} + +SpirvInstruction * +SpirvBuilder::createNodePayloadArrayLength(SpirvInstruction *payloadArray, + SourceLocation loc) { + auto *inst = new (context) + SpirvNodePayloadArrayLength(astContext.UnsignedIntTy, loc, payloadArray); + insertPoint->addInstruction(inst); + return inst; +} + +SpirvInstruction *SpirvBuilder::createAllocateNodePayloads( + QualType resultType, spv::Scope allocationScope, + SpirvInstruction *shaderIndex, SpirvInstruction *recordCount, + SourceLocation loc) { + assert(insertPoint && "null insert point"); + auto *inst = new (context) SpirvAllocateNodePayloads( + resultType, loc, allocationScope, shaderIndex, recordCount); + insertPoint->addInstruction(inst); + return inst; +} + +void SpirvBuilder::createEnqueueOutputNodePayloads(SpirvInstruction *payload, + SourceLocation loc) { + assert(insertPoint && "null insert point"); + auto *inst = new (context) SpirvEnqueueNodePayloads(loc, payload); + insertPoint->addInstruction(inst); +} + +SpirvInstruction * +SpirvBuilder::createFinishWritingNodePayload(SpirvInstruction *payload, + SourceLocation loc) { + assert(insertPoint && "null insert point"); + auto *inst = new (context) + SpirvFinishWritingNodePayload(astContext.BoolTy, loc, payload); + insertPoint->addInstruction(inst); + return inst; +} + void SpirvBuilder::createBarrier(spv::Scope memoryScope, spv::MemorySemanticsMask memorySemantics, llvm::Optional exec, @@ -1866,6 +1928,14 @@ SpirvConstant *SpirvBuilder::getConstantNull(QualType type) { return nullConst; } +SpirvConstant *SpirvBuilder::getConstantString(llvm::StringRef str, + bool specConst) { + // We do not care about making unique constants at this point. + auto *stringConst = new (context) SpirvConstantString(str, specConst); + mod->addConstant(stringConst); + return stringConst; +} + SpirvUndef *SpirvBuilder::getUndef(QualType type) { // We do not care about making unique constants at this point. auto *undef = new (context) SpirvUndef(type); diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp index cb44d3a3a8..88716dddde 100644 --- a/tools/clang/lib/SPIRV/SpirvContext.cpp +++ b/tools/clang/lib/SPIRV/SpirvContext.cpp @@ -62,6 +62,9 @@ SpirvContext::~SpirvContext() { for (auto *raType : runtimeArrayTypes) raType->~RuntimeArrayType(); + for (auto *npaType : nodePayloadArrayTypes) + npaType->~NodePayloadArrayType(); + for (auto *fnType : functionTypes) fnType->~FunctionType(); @@ -273,6 +276,19 @@ SpirvContext::getRuntimeArrayType(const SpirvType *elemType, return *(inserted.first); } +const NodePayloadArrayType * +SpirvContext::getNodePayloadArrayType(const SpirvType *elemType, + const ParmVarDecl *nodeDecl) { + NodePayloadArrayType type(elemType, nodeDecl); + auto found = nodePayloadArrayTypes.find(&type); + if (found != nodePayloadArrayTypes.end()) + return *found; + + auto inserted = nodePayloadArrayTypes.insert( + new (this) NodePayloadArrayType(elemType, nodeDecl)); + return *(inserted.first); +} + const StructType * SpirvContext::getStructType(llvm::ArrayRef fields, llvm::StringRef name, bool isReadOnly, diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 24774875f7..f3d10537e1 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -265,7 +265,8 @@ inline bool canActAsInParmVar(const ParmVarDecl *param) { return !param->hasAttr() && // GS output streams are marked as inout, but it should not be // used as in parameter. - !hlsl::IsHLSLStreamOutputType(param->getType()); + !hlsl::IsHLSLStreamOutputType(param->getType()) && + !hlsl::IsHLSLNodeOutputType(param->getType()); } /// Returns true if the given function parameter can act as shader stage @@ -1424,6 +1425,83 @@ SpirvInstruction *SpirvEmitter::castToType(SpirvInstruction *value, return nullptr; } +static bool handleDispatchGrid(SpirvContext &spvContext, + const RecordDecl *recordDecl) { + unsigned index = 0; + for (auto fieldDecl : recordDecl->fields()) { + QualType fieldType = fieldDecl->getType(); + for (const hlsl::UnusualAnnotation *it : + fieldDecl->getUnusualAnnotations()) { + if (it->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { + const hlsl::SemanticDecl *sd = cast(it); + if (sd->SemanticName.equals("SV_DispatchGrid")) { + spvContext.registerDispatchGridIndex(recordDecl, index); + return true; + } + } + } + if (const auto *innerType = fieldType->getAs()) { + if (handleDispatchGrid(spvContext, innerType->getDecl())) + return true; + } + ++index; + } + return false; +} + +bool SpirvEmitter::handleNodePayloadArrayType(const ParmVarDecl *decl, + SpirvInstruction *instr) { + // Because SPIR-V node payload array types are node-specific, propagate + // lowered types + switch (instr->getKind()) { + case SpirvInstruction::Kind::IK_Load: { + SpirvInstruction *ptr = dyn_cast(instr)->getPointer(); + if (handleNodePayloadArrayType(decl, ptr)) { + const SpirvPointerType *ptrType = + dyn_cast(ptr->getResultType()); + instr->setResultType(ptrType->getPointeeType()); + spvContext.addToInstructionsWithLoweredType(instr); + return true; + } + return false; + } + case SpirvInstruction::Kind::IK_FunctionParameter: + case SpirvInstruction::Kind::IK_Variable: { + QualType varType = decl->getType(); + if (hlsl::IsHLSLNodeType(varType)) { + if (auto *type = spvContext.getNodeDeclPayloadType(decl)) { + instr->setResultType( + spvContext.getPointerType(type, instr->getStorageClass())); + } else { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + QualType resultType = + hlsl::GetHLSLNodeIOResultType(astContext, varType); + const auto *recordType = resultType->getAs(); + assert(recordType); + if (hlsl::IsHLSLDispatchNodeInputRecordType(varType)) { + handleDispatchGrid(spvContext, recordType->getDecl()); + } + const SpirvType *elemType = lowerTypeVisitor.lowerType( + resultType, clang::spirv::SpirvLayoutRule::Scalar, llvm::None, + decl->getLocation()); + const NodePayloadArrayType *arrType = + spvContext.getNodePayloadArrayType(elemType, decl); + const SpirvType *ptrType = + spvContext.getPointerType(arrType, instr->getStorageClass()); + instr->setResultType(ptrType); + spvContext.registerNodeDeclPayloadType(arrType, decl); + } + spvContext.addToInstructionsWithLoweredType(instr); + return true; + } + return false; + } + default: + return false; + } +} + void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { // Forward declaration of a function inside another. if (!decl->isThisDeclarationADefinition()) { @@ -1565,6 +1643,9 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { QualType paramType = paramDecl->getType(); auto *param = declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); + if (isEntry) { + handleNodePayloadArrayType(paramDecl, param); + } #ifdef ENABLE_SPIRV_CODEGEN if (hlsl::IsVKBufferPointerType(paramType)) { Optional isRowMajor = llvm::None; @@ -4318,7 +4399,7 @@ SpirvEmitter::processTextureLevelOfDetail(const CXXMemberCallExpr *expr, spvBuilder.createImageQuery(spv::Op::OpImageQueryLod, queryResultType, expr->getExprLoc(), sampledImage, coordinate); - if (spvContext.isCS()) { + if (spvContext.isCS() || spvContext.isNode()) { addDerivativeGroupExecutionMode(); } // The first component of the float2 contains the mipmap array layer. @@ -5321,6 +5402,9 @@ SpirvEmitter::doCXXMemberCallExpr(const CXXMemberCallExpr *expr) { uint32_t opcode = static_cast(hlsl::IntrinsicOp::Num_Intrinsics); if (hlsl::GetIntrinsicOp(callee, opcode, group)) { + if (group == "subscript") { + return processIntrinsicExtractRecordStruct(expr); + } return processIntrinsicMemberCall(expr, static_cast(opcode)); } @@ -5517,6 +5601,28 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, return processRayQueryIntrinsics(expr, opcode); case IntrinsicOp::MOP_GetBufferContents: return processIntrinsicGetBufferContents(expr); + case hlsl::IntrinsicOp::MOP_GetThreadNodeOutputRecords: + return processIntrinsicGetNodeOutputRecords(expr, false); + case hlsl::IntrinsicOp::MOP_GetGroupNodeOutputRecords: + return processIntrinsicGetNodeOutputRecords(expr, true); + case hlsl::IntrinsicOp::MOP_ThreadIncrementOutputCount: + retVal = processIntrinsicIncrementOutputCount(expr, false); + break; + case hlsl::IntrinsicOp::MOP_GroupIncrementOutputCount: + retVal = processIntrinsicIncrementOutputCount(expr, true); + break; + case hlsl::IntrinsicOp::MOP_IsValid: + retVal = processIntrinsicIsValid(expr); + break; + case hlsl::IntrinsicOp::MOP_Count: + retVal = processIntrinsicGetRecordCount(expr); + break; + case hlsl::IntrinsicOp::MOP_OutputComplete: + processIntrinsicOutputComplete(expr); + break; + case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing: + retVal = processIntrinsicFinishedCrossGroupSharing(expr); + break; default: emitError("intrinsic '%0' method unimplemented", expr->getCallee()->getExprLoc()) @@ -5568,7 +5674,8 @@ SpirvInstruction *SpirvEmitter::createImageSample( const bool isExplicit = lod || (grad.first && grad.second); // Implicit-lod instructions are only allowed in pixel and compute shaders. - if (!spvContext.isPS() && !spvContext.isCS() && !isExplicit) + if (!spvContext.isPS() && !spvContext.isCS() && !spvContext.isNode() && + !isExplicit) emitError("sampling with implicit lod is only allowed in fragment and " "compute shaders", loc); @@ -5673,7 +5780,7 @@ SpirvEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr, const auto retType = expr->getDirectCallee()->getReturnType(); if (isSample) { - if (spvContext.isCS()) { + if (spvContext.isCS() || spvContext.isNode()) { addDerivativeGroupExecutionMode(); } return createImageSample(retType, imageType, image, sampler, coordinate, @@ -5763,7 +5870,7 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, const auto retType = expr->getDirectCallee()->getReturnType(); - if (!lod && spvContext.isCS()) { + if (!lod && (spvContext.isCS() || spvContext.isNode())) { addDerivativeGroupExecutionMode(); } return createImageSample( @@ -8647,9 +8754,10 @@ const Expr *SpirvEmitter::collectArrayStructIndices( } { - // Indexing into ConstantBuffers and TextureBuffers involves an additional - // FlatConversion node which casts the handle to the underlying structure - // type. We can look past the FlatConversion to continue to collect indices. + // Indexing into ConstantBuffers, TextureBuffers, and node input/output + // types involves an additional FlatConversion node which casts the handle + // to the underlying structure type. We can look past the FlatConversion to + // continue to collect indices. // For example: MyConstantBufferArray[0].structMember1 // `-MemberExpr .structMember1 // `-ImplicitCastExpr 'const T' lvalue @@ -8658,7 +8766,8 @@ const Expr *SpirvEmitter::collectArrayStructIndices( if (castExpr->getCastKind() == CK_FlatConversion) { const auto *subExpr = castExpr->getSubExpr(); const QualType subExprType = subExpr->getType(); - if (isConstantTextureBuffer(subExprType)) { + if (isConstantTextureBuffer(subExprType) || + hlsl::IsHLSLNodeType(subExprType)) { return collectArrayStructIndices(subExpr, rawIndex, rawIndices, indices, isMSOutAttribute); } @@ -9060,6 +9169,9 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_udot: retVal = processIntrinsicDot(callExpr); break; + case hlsl::IntrinsicOp::IOP_Barrier: + retVal = processIntrinsicBarrier(callExpr); + break; case hlsl::IntrinsicOp::IOP_GroupMemoryBarrier: retVal = processIntrinsicMemoryBarrier(callExpr, /*isDevice*/ false, @@ -9092,6 +9204,9 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { /*groupSync*/ true, /*isAllBarrier*/ true); break; + case hlsl::IntrinsicOp::IOP_GetRemainingRecursionLevels: + retVal = processIntrinsicGetRemainingRecursionLevels(callExpr); + break; case hlsl::IntrinsicOp::IOP_CheckAccessFullyMapped: retVal = spvBuilder.createImageSparseTexelsResident( doExpr(callExpr->getArg(0)), srcLoc, srcRange); @@ -9568,6 +9683,15 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { return retVal; } +SpirvInstruction *SpirvEmitter::processIntrinsicGetRecordCount( + const CXXMemberCallExpr *callExpr) { + assert(callExpr->getNumArgs() == 0); + const auto obj = callExpr->getImplicitObjectArgument(); + const auto loc = callExpr->getExprLoc(); + SpirvInstruction *payload = doExpr(obj); + return spvBuilder.createNodePayloadArrayLength(payload, loc); +} + SpirvInstruction * SpirvEmitter::processIntrinsicFirstbit(const CallExpr *callExpr, GLSLstd450 glslOpcode) { @@ -11049,6 +11173,194 @@ SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( return retVal; } +SpirvInstruction *SpirvEmitter::processIntrinsicExtractRecordStruct( + const CXXMemberCallExpr *callExpr) { + Expr *obj = callExpr->getImplicitObjectArgument(); + QualType objType = obj->getType(); + unsigned n = callExpr->getNumArgs(); + assert(hlsl::IsHLSLNodeType(objType)); + assert(n == 0 || n == 1 && hlsl::IsHLSLNodeRecordArrayType(objType)); + + QualType recordType = hlsl::GetHLSLNodeIOResultType(astContext, objType); + SpirvInstruction *res = doExpr(obj); + SpirvInstruction *index = + n ? doExpr(callExpr->getArg(0)) + : spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, 0)); + res->setLayoutRule(SpirvLayoutRule::Scalar); + + return spvBuilder.createAccessChain(recordType, res, {index}, + callExpr->getExprLoc(), + callExpr->getSourceRange()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetRemainingRecursionLevels( + const CallExpr *callExpr) { + assert(callExpr->getNumArgs() == 0); + const auto loc = callExpr->getExprLoc(); + const QualType retType = callExpr->getCallReturnType(astContext); + auto *var = declIdMapper.getBuiltinVar( + spv::BuiltIn::RemainingRecursionLevelsAMDX, retType, loc); + return spvBuilder.createLoad(retType, var, loc); +} + +SpirvInstruction * +SpirvEmitter::processIntrinsicIsValid(const CXXMemberCallExpr *callExpr) { + assert(callExpr->getNumArgs() == 0); + const auto loc = callExpr->getExprLoc(); + const Expr *nodeOutputExpr = callExpr->getImplicitObjectArgument(); + Expr *baseExpr = const_cast(nodeOutputExpr); + SpirvInstruction *shaderIndex = nullptr; + + if (const auto subExpr = dyn_cast_or_null( + nodeOutputExpr->IgnoreParenNoopCasts(astContext))) { + if (subExpr->getOperator() == OverloadedOperatorKind::OO_Subscript) { + // special case: offset shader index by the array subscript + shaderIndex = doExpr(subExpr->getArg(1)); + baseExpr = const_cast(subExpr->getArg(0)); + } + } + + const auto *declRefExpr = dyn_cast(baseExpr->IgnoreImpCasts()); + const auto *paramDecl = dyn_cast(declRefExpr->getDecl()); + int nodeIndex = 0; + if (HLSLNodeIdAttr *nodeId = paramDecl->getAttr()) { + nodeIndex = nodeId->getArrayIndex(); + } + + SpirvInstruction *payload = doExpr(baseExpr); + if (!shaderIndex) { + shaderIndex = spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, nodeIndex)); + } + + return spvBuilder.createIsNodePayloadValid(payload, shaderIndex, loc); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetNodeOutputRecords( + const CXXMemberCallExpr *callExpr, bool isGroupShared) { + assert(callExpr->getNumArgs() == 1); + const auto loc = callExpr->getExprLoc(); + const Expr *nodeOutputExpr = callExpr->getImplicitObjectArgument(); + Expr *baseExpr = const_cast(nodeOutputExpr); + SpirvInstruction *shaderIndex = nullptr; + + if (const auto subExpr = dyn_cast_or_null( + nodeOutputExpr->IgnoreParenNoopCasts(astContext))) { + if (subExpr->getOperator() == OverloadedOperatorKind::OO_Subscript) { + // special case: offset shader index by the array subscript + shaderIndex = doExpr(subExpr->getArg(1)); + baseExpr = const_cast(subExpr->getArg(0)); + } + } + + const auto *declRefExpr = dyn_cast(baseExpr->IgnoreImpCasts()); + const auto *paramDecl = dyn_cast(declRefExpr->getDecl()); + const auto *nodeID = paramDecl->getAttr(); + StringRef nodeName = paramDecl->getName(); + unsigned nodeIndex = 0; + if (nodeID) { + nodeName = nodeID->getName(); + nodeIndex = nodeID->getArrayIndex(); + } + + if (!shaderIndex) { + shaderIndex = spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, nodeIndex)); + } + + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + const SpirvType *elemType = lowerTypeVisitor.lowerType( + hlsl::GetHLSLNodeIOResultType(astContext, baseExpr->getType()), + clang::spirv::SpirvLayoutRule::Scalar, llvm::None, + paramDecl->getLocation()); + const SpirvType *payloadType = spvContext.getPointerType( + spvContext.getNodePayloadArrayType(elemType, paramDecl), + spv::StorageClass::NodePayloadAMDX); + + spv::Scope scope = + isGroupShared ? spv::Scope::Workgroup : spv::Scope::Invocation; + SpirvInstruction *recordCount = doExpr(callExpr->getArg(0)); + SpirvInstruction *result = spvBuilder.createAllocateNodePayloads( + callExpr->getType(), scope, shaderIndex, recordCount, loc); + result->setResultType(payloadType); + spvContext.addToInstructionsWithLoweredType(result); + return result; +} + +SpirvInstruction *SpirvEmitter::processIntrinsicIncrementOutputCount( + const CXXMemberCallExpr *callExpr, bool isGroupShared) { + return processIntrinsicGetNodeOutputRecords(callExpr, isGroupShared); +} + +void SpirvEmitter::processIntrinsicOutputComplete( + const CXXMemberCallExpr *callExpr) { + Expr *payloadExpr = + callExpr->getImplicitObjectArgument()->IgnoreParenNoopCasts(astContext); + SpirvInstruction *payload = doExpr(payloadExpr); + spvBuilder.createEnqueueOutputNodePayloads(payload, callExpr->getExprLoc()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicFinishedCrossGroupSharing( + const CXXMemberCallExpr *callExpr) { + Expr *payloadExpr = callExpr->getImplicitObjectArgument(); + SpirvInstruction *payload = doExpr(payloadExpr); + return spvBuilder.createFinishWritingNodePayload(payload, + callExpr->getExprLoc()); +} + +SpirvInstruction * +SpirvEmitter::processIntrinsicBarrier(const CallExpr *callExpr) { + llvm::APSInt a1(32, true), a2(32, true); + int64_t i1, i2; + const Expr *e1 = callExpr->getArg(0), *e2 = callExpr->getArg(1); + + // object as first argument + if (!e1->EvaluateAsInt(a1, astContext)) { + assert(e1->getType()->isStructureOrClassType()); + a1.setAllBits(); + } + + if (e2->EvaluateAsInt(a2, astContext) && (i1 = a1.getExtValue()) >= 0 && + (i2 = a2.getExtValue()) >= 0) { + } else { + emitError("Barrier arguments must be non-negative integer constants", + callExpr->getExprLoc()); + return nullptr; + } + + if (!(i1 | i2)) { // all zero -> no-op + return nullptr; + } + + spv::Scope memScope = + (i2 & (unsigned)hlsl::DXIL::BarrierSemanticFlag::DeviceScope) + ? spv::Scope::Device + : (i2 & (unsigned)hlsl::DXIL::BarrierSemanticFlag::GroupScope) + ? spv::Scope::Workgroup + : spv::Scope::Invocation; + spv::MemorySemanticsMask memSemaMask = + spv::MemorySemanticsMask::AcquireRelease | + ((i1 & (unsigned)hlsl::DXIL::MemoryTypeFlag::UavMemory) + ? spv::MemorySemanticsMask::UniformMemory + : spv::MemorySemanticsMask::MaskNone) | + ((i1 & (unsigned)hlsl::DXIL::MemoryTypeFlag::GroupSharedMemory) + ? spv::MemorySemanticsMask::WorkgroupMemory + : spv::MemorySemanticsMask::MaskNone) | + ((i1 & (unsigned)hlsl::DXIL::MemoryTypeFlag::NodeOutputMemory) + ? spv::MemorySemanticsMask::OutputMemory + : spv::MemorySemanticsMask::MaskNone); + Optional execScope = + (i2 & (unsigned)hlsl::DXIL::BarrierSemanticFlag::GroupSync) + ? Optional(spv::Scope::Workgroup) + : None; + + spvBuilder.createBarrier(memScope, memSemaMask, execScope, + callExpr->getExprLoc()); + return nullptr; +} + SpirvInstruction * SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr, bool isDevice, bool groupSync, @@ -12186,7 +12498,7 @@ SpirvInstruction *SpirvEmitter::processIntrinsicUsingSpirvInst( case spv::Op::OpFwidth: case spv::Op::OpFwidthFine: case spv::Op::OpFwidthCoarse: - if (spvContext.isCS()) + if (spvContext.isCS() || spvContext.isNode()) addDerivativeGroupExecutionMode(); needsLegalization = true; break; @@ -13272,6 +13584,7 @@ hlsl::ShaderModel::Kind SpirvEmitter::getShaderModelKind(StringRef stageName) { .Case("callable", hlsl::ShaderModel::Kind::Callable) .Case("mesh", hlsl::ShaderModel::Kind::Mesh) .Case("amplification", hlsl::ShaderModel::Kind::Amplification) + .Case("node", hlsl::ShaderModel::Kind::Node) .Default(hlsl::ShaderModel::Kind::Invalid); assert(SMK != hlsl::ShaderModel::Kind::Invalid); return SMK; @@ -13292,6 +13605,7 @@ SpirvEmitter::getSpirvShaderStage(hlsl::ShaderModel::Kind smk, case hlsl::ShaderModel::Kind::Pixel: return spv::ExecutionModel::Fragment; case hlsl::ShaderModel::Kind::Compute: + case hlsl::ShaderModel::Kind::Node: return spv::ExecutionModel::GLCompute; case hlsl::ShaderModel::Kind::RayGeneration: return spv::ExecutionModel::RayGenerationNV; @@ -13512,6 +13826,21 @@ void SpirvEmitter::processPixelShaderAttributes(const FunctionDecl *decl) { } } +void SpirvEmitter::checkForWaveSizeAttr(const FunctionDecl *decl) { + if (auto *waveSizeAttr = decl->getAttr()) { + // Not supported in Vulkan SPIR-V, warn and ignore. + + // SPIR-V SubgroupSize execution mode would work but it is Kernel only + // (requires the SubgroupDispatch capability, which implies the + // DeviceEnqueue capability, which is Kernel only). Subgroup sizes can be + // specified in Vulkan on the application side via + // VK_EXT_subgroup_size_control. + emitWarning("Wave size is not supported by Vulkan SPIR-V. Consider using " + "VK_EXT_subgroup_size_control.", + waveSizeAttr->getLocation()); + } +} + void SpirvEmitter::processComputeShaderAttributes(const FunctionDecl *decl) { auto *numThreadsAttr = decl->getAttr(); assert(numThreadsAttr && "thread group size missing from entry-point"); @@ -13523,19 +13852,82 @@ void SpirvEmitter::processComputeShaderAttributes(const FunctionDecl *decl) { spvBuilder.addExecutionMode(entryFunction, spv::ExecutionMode::LocalSize, {x, y, z}, decl->getLocation()); - auto *waveSizeAttr = decl->getAttr(); - if (waveSizeAttr) { - // Not supported in Vulkan SPIR-V, warn and ignore. + checkForWaveSizeAttr(decl); +} - // SPIR-V SubgroupSize execution mode would work but it is Kernel only - // (requires the SubgroupDispatch capability, which implies the - // DeviceEnqueue capability, which is Kernel only). Subgroup sizes can be - // specified in Vulkan on the application side via - // VK_EXT_subgroup_size_control. - emitWarning("Wave size is not supported by Vulkan SPIR-V. Consider using " - "VK_EXT_subgroup_size_control.", - waveSizeAttr->getLocation()); +void SpirvEmitter::processNodeShaderAttributes(const FunctionDecl *decl) { + uint32_t x = 1, y = 1, z = 1; + if (auto *numThreadsAttr = decl->getAttr()) { + x = static_cast(numThreadsAttr->getX()); + y = static_cast(numThreadsAttr->getY()); + z = static_cast(numThreadsAttr->getZ()); + } + spvBuilder.addExecutionMode(entryFunction, spv::ExecutionMode::LocalSize, + {x, y, z}, decl->getLocation()); + + auto *nodeLaunchAttr = decl->getAttr(); + StringRef launchType = nodeLaunchAttr ? nodeLaunchAttr->getLaunchType() : ""; + if (launchType.equals("coalescing") || launchType.equals("thread")) { + spvBuilder.addExecutionMode(entryFunction, + spv::ExecutionMode::CoalescingAMDX, {}, + decl->getLocation()); } + + uint64_t nodeId = 0; + if (const auto nodeIdAttr = decl->getAttr()) + nodeId = static_cast(nodeIdAttr->getArrayIndex()); + spvBuilder.addExecutionModeId( + entryFunction, spv::ExecutionMode::ShaderIndexAMDX, + {spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, nodeId))}, + decl->getLocation()); + + if (const auto *nodeMaxRecursionDepthAttr = + decl->getAttr()) { + SpirvInstruction *count = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, + llvm::APInt(32, nodeMaxRecursionDepthAttr->getCount())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::MaxNodeRecursionAMDX, + {count}, decl->getLocation()); + } + + if (const auto *nodeShareInputOfAttr = + decl->getAttr()) { + SpirvInstruction *name = + spvBuilder.getConstantString(nodeShareInputOfAttr->getName()); + SpirvInstruction *index = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, + llvm::APInt(32, nodeShareInputOfAttr->getArrayIndex())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::SharesInputWithAMDX, + {name, index}, decl->getLocation()); + } + + if (const auto *dispatchGrid = decl->getAttr()) { + SpirvInstruction *gridX = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, dispatchGrid->getX())); + SpirvInstruction *gridY = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, dispatchGrid->getY())); + SpirvInstruction *gridZ = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, dispatchGrid->getZ())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::StaticNumWorkgroupsAMDX, + {gridX, gridY, gridZ}, decl->getLocation()); + } else if (const auto *maxDispatchGrid = + decl->getAttr()) { + SpirvInstruction *gridX = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, maxDispatchGrid->getX())); + SpirvInstruction *gridY = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, maxDispatchGrid->getY())); + SpirvInstruction *gridZ = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, maxDispatchGrid->getZ())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::MaxNumWorkgroupsAMDX, + {gridX, gridY, gridZ}, decl->getLocation()); + } + + checkForWaveSizeAttr(decl); } bool SpirvEmitter::processTessellationShaderAttributes( @@ -13978,6 +14370,8 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( processPixelShaderAttributes(decl); } else if (spvContext.isCS()) { processComputeShaderAttributes(decl); + } else if (spvContext.isNode()) { + processNodeShaderAttributes(decl); } else if (spvContext.isHS()) { if (!processTessellationShaderAttributes(decl, &numOutputControlPoints)) return nullptr; @@ -14086,12 +14480,23 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( llvm::SmallVector params; for (const auto *param : decl->params()) { const auto paramType = param->getType(); + if (hlsl::IsHLSLNodeInputType(paramType)) { + SpirvInstruction *value = nullptr; + if (!declIdMapper.createStageInputVar(param, &value, false)) + return nullptr; + if (value && value->getKind() == SpirvInstruction::Kind::IK_Variable) { + handleNodePayloadArrayType(param, value); + params.push_back(value); + } + continue; + } + std::string tempVarName = "param.var." + param->getNameAsString(); auto *tempVar = spvBuilder.addFnVar(paramType, param->getLocation(), tempVarName, param->hasAttr(), param->hasAttr()); - + handleNodePayloadArrayType(param, tempVar); params.push_back(tempVar); // Create the stage input variable for parameter not marked as pure out and @@ -14109,6 +14514,9 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( if (!declIdMapper.createStageInputVar(param, &loadedValue, false)) return nullptr; + if (loadedValue) { + handleNodePayloadArrayType(param, loadedValue); + } // Only initialize the temporary variable if the parameter is indeed used, // or if it is an inout parameter. diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 0c77f2fc24..954b2c5dd3 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -126,6 +126,8 @@ class SpirvEmitter : public ASTConsumer { SourceRange range = {}); private: + bool handleNodePayloadArrayType(const ParmVarDecl *decl, + SpirvInstruction *instr); void doFunctionDecl(const FunctionDecl *decl); void doVarDecl(const VarDecl *decl); void doRecordDecl(const RecordDecl *decl); @@ -505,6 +507,9 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction * processIntrinsicGetBufferContents(const CXXMemberCallExpr *); + /// Processes the 'Barrier' intrinsic function. + SpirvInstruction *processIntrinsicBarrier(const CallExpr *); + /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync', /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync', /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic @@ -513,6 +518,40 @@ class SpirvEmitter : public ASTConsumer { bool isDevice, bool groupSync, bool isAllBarrier); + /// Processes the 'GetRemainingRecursionLevels' intrinsic function. + SpirvInstruction * + processIntrinsicGetRemainingRecursionLevels(const CallExpr *callExpr); + + /// Processes the 'IsValid' intrinsic function. + SpirvInstruction *processIntrinsicIsValid(const CXXMemberCallExpr *callExpr); + + /// Processes the 'Get' intrinsic function for (arrays of) node records and + /// the array subscript operator for node record arrays. + SpirvInstruction * + processIntrinsicExtractRecordStruct(const CXXMemberCallExpr *callExpr); + + /// Processes the 'GetGroupNodeOutputRecords' and 'GetThreadNodeOutputRecords' + /// intrinsic functions. + SpirvInstruction * + processIntrinsicGetNodeOutputRecords(const CXXMemberCallExpr *callExpr, + bool isGroupShared); + + /// Processes the 'IncrementOutputCount' intrinsic function. + SpirvInstruction * + processIntrinsicIncrementOutputCount(const CXXMemberCallExpr *callExpr, + bool isGroupShared); + + /// Processes the 'Count' intrinsic function for node input record arrays. + SpirvInstruction * + processIntrinsicGetRecordCount(const CXXMemberCallExpr *callExpr); + + /// Processes the 'OutputComplete' intrinsic function. + void processIntrinsicOutputComplete(const CXXMemberCallExpr *callExpr); + + /// Processes the 'FinishedCrossGroupSharing' intrinsic function. + SpirvInstruction * + processIntrinsicFinishedCrossGroupSharing(const CXXMemberCallExpr *callExpr); + /// Processes the 'mad' intrinsic function. SpirvInstruction *processIntrinsicMad(const CallExpr *); @@ -850,6 +889,7 @@ class SpirvEmitter : public ASTConsumer { static hlsl::ShaderModel::Kind getShaderModelKind(StringRef stageName); static spv::ExecutionModel getSpirvShaderStage(hlsl::ShaderModel::Kind smk, bool); + void checkForWaveSizeAttr(const FunctionDecl *decl); /// \brief Handle inline SPIR-V attributes for the entry function. void processInlineSpirvAttributes(const FunctionDecl *entryFunction); @@ -876,6 +916,10 @@ class SpirvEmitter : public ASTConsumer { /// HLSL attributes of the entry point function. void processComputeShaderAttributes(const FunctionDecl *entryFunction); + /// \brief Adds necessary execution modes for the node shader based on the + /// HLSL attributes of the entry point function. + void processNodeShaderAttributes(const FunctionDecl *entryFunction); + /// \brief Adds necessary execution modes for the mesh/amplification shader /// based on the HLSL attributes of the entry point function. bool diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index f6ac29f379..88d669d397 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -52,6 +52,11 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUnreachable) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvAccessChain) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvAtomic) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBarrier) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvIsNodePayloadValid) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvNodePayloadArrayLength) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvAllocateNodePayloads) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvEnqueueNodePayloads) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvFinishWritingNodePayload) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBinaryOp) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBitFieldExtract) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBitFieldInsert) @@ -59,6 +64,7 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantBoolean) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantString) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr) @@ -469,6 +475,41 @@ SpirvBarrier::SpirvBarrier(SourceLocation loc, spv::Scope memScope, memoryScope(memScope), memorySemantics(memSemantics), executionScope(execScope) {} +SpirvIsNodePayloadValid::SpirvIsNodePayloadValid(QualType resultType, + SourceLocation loc, + SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex) + : SpirvInstruction(IK_IsNodePayloadValid, spv::Op::OpIsNodePayloadValidAMDX, + resultType, loc), + payloadArray(payloadArray), nodeIndex(nodeIndex) {} + +SpirvNodePayloadArrayLength::SpirvNodePayloadArrayLength( + QualType resultType, SourceLocation loc, SpirvInstruction *payloadArray) + : SpirvInstruction(IK_NodePayloadArrayLength, + spv::Op::OpNodePayloadArrayLengthAMDX, resultType, loc), + payloadArray(payloadArray) {} + +SpirvAllocateNodePayloads::SpirvAllocateNodePayloads( + QualType resultType, SourceLocation loc, spv::Scope allocationScope, + SpirvInstruction *shaderIndex, SpirvInstruction *recordCount) + : SpirvInstruction(IK_AllocateNodePayloads, + spv::Op::OpAllocateNodePayloadsAMDX, resultType, loc), + allocationScope(allocationScope), shaderIndex(shaderIndex), + recordCount(recordCount) {} + +SpirvEnqueueNodePayloads::SpirvEnqueueNodePayloads(SourceLocation loc, + SpirvInstruction *payload) + : SpirvInstruction(IK_EnqueueNodePayloads, + spv::Op::OpEnqueueNodePayloadsAMDX, QualType(), loc), + payload(payload) {} + +SpirvFinishWritingNodePayload::SpirvFinishWritingNodePayload( + QualType resultType, SourceLocation loc, SpirvInstruction *payload) + : SpirvInstruction(IK_FinishWritingNodePayload, + spv::Op::OpFinishWritingNodePayloadAMDX, resultType, + loc), + payload(payload) {} + SpirvBinaryOp::SpirvBinaryOp(spv::Op opcode, QualType resultType, SourceLocation loc, SpirvInstruction *op1, SpirvInstruction *op2, SourceRange range) @@ -565,7 +606,8 @@ bool SpirvConstant::isSpecConstant() const { return opcode == spv::Op::OpSpecConstant || opcode == spv::Op::OpSpecConstantTrue || opcode == spv::Op::OpSpecConstantFalse || - opcode == spv::Op::OpSpecConstantComposite; + opcode == spv::Op::OpSpecConstantComposite || + opcode == spv::Op::OpSpecConstantStringAMDX; } SpirvConstantBoolean::SpirvConstantBoolean(QualType type, bool val, @@ -620,6 +662,19 @@ SpirvConstantComposite::SpirvConstantComposite( type), constituents(constituentsVec.begin(), constituentsVec.end()) {} +SpirvConstantString::SpirvConstantString(llvm::StringRef stringLiteral, + bool isSpecConst) + : SpirvConstant(IK_ConstantString, + isSpecConst ? spv::Op::OpSpecConstantStringAMDX + : spv::Op::OpConstantStringAMDX, + QualType()), + str(stringLiteral) {} + +bool SpirvConstantString::operator==(const SpirvConstantString &that) const { + return opcode == that.opcode && resultType == that.resultType && + str == that.str; +} + SpirvConstantNull::SpirvConstantNull(QualType type) : SpirvConstant(IK_ConstantNull, spv::Op::OpConstantNull, type) {} diff --git a/tools/clang/lib/SPIRV/SpirvType.cpp b/tools/clang/lib/SPIRV/SpirvType.cpp index cabeba4cda..286e6224a4 100644 --- a/tools/clang/lib/SPIRV/SpirvType.cpp +++ b/tools/clang/lib/SPIRV/SpirvType.cpp @@ -167,6 +167,10 @@ bool RuntimeArrayType::operator==(const RuntimeArrayType &that) const { (!stride.hasValue() || stride.getValue() == that.stride.getValue()); } +bool NodePayloadArrayType::operator==(const NodePayloadArrayType &that) const { + return elementType == that.elementType && nodeDecl == that.nodeDecl; +} + bool SpvIntrinsicTypeOperand::operator==( const SpvIntrinsicTypeOperand &that) const { if (isTypeOperand != that.isTypeOperand) diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 2163eef8a3..744b06b8d0 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -17137,6 +17137,10 @@ void DiagnoseNodeEntry(Sema &S, FunctionDecl *FD, llvm::StringRef StageName, DXIL::ShaderKind shaderKind = ShaderModel::KindFromFullName(StageName); if (shaderKind == DXIL::ShaderKind::Node) { NodeLoc = pAttr->getLocation(); + // SPIR-V node shader support is experimental + if (S.getLangOpts().SPIRV) { + S.Diag(NodeLoc, diag::warn_spirv_node_shaders_experimental); + } } if (NodeLoc.isInvalid()) { return; diff --git a/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv b/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv index 30565394b4..1425137c68 100644 --- a/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv +++ b/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv @@ -161,7 +161,7 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch + Barrier(UAV_MEMORY|GROUP_SHARED_MEMORY|NODE_INPUT_MEMORY|NODE_OUTPUT_MEMORY, + DEVICE_SCOPE); + + // AllMemoryBarrierWithGroupSync() -> + Barrier(UAV_MEMORY|GROUP_SHARED_MEMORY|NODE_INPUT_MEMORY|NODE_OUTPUT_MEMORY, + GROUP_SYNC|DEVICE_SCOPE); + + // DeviceMemoryBarrier() -> + Barrier(UAV_MEMORY, + DEVICE_SCOPE); + + // DeviceMemoryBarrierWithGroupSync() -> + Barrier(UAV_MEMORY, + GROUP_SYNC|DEVICE_SCOPE); + + // GroupMemoryBarrier() -> + Barrier(GROUP_SHARED_MEMORY, + GROUP_SCOPE); + + // GroupMemoryBarrierWithGroupSync() -> + Barrier(GROUP_SHARED_MEMORY, + GROUP_SYNC|GROUP_SCOPE); +} + + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant %uint 2 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant %uint 5 +// CHECK-DAG: [[U72:%[^ ]*]] = OpConstant %uint 72 +// CHECK-DAG: [[U264:%[^ ]*]] = OpConstant %uint 264 +// CHECK-DAG: [[U328:%[^ ]*]] = OpConstant %uint 328 +// CHECK-DAG: [[U4424:%[^ ]*]] = OpConstant %uint 4424 + +// CHECK: OpControlBarrier [[U2]] [[U2]] [[U72]] +// CHECK: OpMemoryBarrier [[U2]] [[U328]] +// CHECK: OpMemoryBarrier [[U5]] [[U4424]] +// CHECK: OpControlBarrier [[U2]] [[U5]] [[U4424]] +// CHECK: OpMemoryBarrier [[U5]] [[U72]] +// CHECK: OpControlBarrier [[U2]] [[U5]] [[U72]] +// CHECK: OpMemoryBarrier [[U2]] [[U264]] +// CHECK: OpControlBarrier [[U2]] [[U2]] [[U264]] diff --git a/tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl b/tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl new file mode 100644 index 0000000000..215acf7bfd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl @@ -0,0 +1,213 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Barrier is called with each node record and UAV type + +struct RECORD +{ + uint value; +}; + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U256:%[^ ]*]] = OpConstant [[UINT]] 256 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U3:%[^ ]*]] = OpConstant [[UINT]] 3 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4424:%[^ ]*]] = OpConstant [[UINT]] 4424 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant [[UINT]] 5 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,1)] +[NodeDispatchGrid(256,1,1)] +void node01(DispatchNodeInputRecord input) +{ + Barrier(input, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(256,1,1)] +void node02([MaxRecords(8)] GroupNodeInputRecords input) +{ + Barrier(input, 3); +} + +// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_4424 + +[Shader("node")] +[NodeLaunch("thread")] +void node03(RWThreadNodeInputRecord input) +{ + Barrier(input, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(256,1,1)] +void node04([MaxRecords(6)] RWGroupNodeInputRecords input) +{ + Barrier(input, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,1)] +[NodeDispatchGrid(256,1,1)] +void node05([MaxRecords(5)] NodeOutput outputs) +{ + ThreadNodeOutputRecords outrec = outputs.GetThreadNodeOutputRecords(1); + Barrier(outrec, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("thread")] +void node06([MaxRecords(5)] NodeOutput outputs) +{ + ThreadNodeOutputRecords outrec = outputs.GetThreadNodeOutputRecords(3); + Barrier(outrec, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(256,1,3)] +void node07([MaxRecords(5)] NodeOutput outputs) +{ + GroupNodeOutputRecords outrec = outputs.GetGroupNodeOutputRecords(1); + Barrier(outrec, 3); +} + +// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_4424 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node08([MaxRecords(5)] NodeOutput outputs) +{ + GroupNodeOutputRecords outrec = outputs.GetGroupNodeOutputRecords(4); + Barrier(outrec, 3); +} + +// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_4424 + +RWBuffer obj09; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node09() +{ + Barrier(obj09, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture1D obj10; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node10() +{ + Barrier(obj10, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture1DArray obj11; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node11() +{ + Barrier(obj11, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture2D obj12; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node12() +{ + Barrier(obj12, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture2DArray obj13; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node13() +{ + Barrier(obj13, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture3D obj14; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node14() +{ + Barrier(obj14, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWStructuredBuffer obj15; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node15() +{ + Barrier(obj15, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWByteAddressBuffer obj16; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node16() +{ + Barrier(obj16, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +AppendStructuredBuffer obj17; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node17() +{ + Barrier(obj17, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 diff --git a/tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl new file mode 100644 index 0000000000..a3c369b252 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Broadcasting launch node with no input + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(3,4,5)] +[NumThreads(6,7,1)] +[NodeIsProgramEntry] +void node070_broadcasting_noinput() +{ +} + +// CHECK: OpReturn + diff --git a/tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl b/tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl new file mode 100644 index 0000000000..14e899da02 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s + +// Coalescing launch node with thread group defined in the shader + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node008_coalescing_numthreads_shader() +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK-DAG: OpExecutionMode [[SHADER]] CoalescingAMDX +// CHECK-DAG: OpExecutionMode [[SHADER]] LocalSize 1024 1 1 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl b/tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl new file mode 100644 index 0000000000..302c8ea698 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Broadcasting launch node with dispatch grid defined in shader + +struct INPUT_NOGRID +{ + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node001_dispatchgrid_shader(DispatchNodeInputRecord input) +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK-DAG: OpExecutionMode [[SHADER]] LocalSize 1024 1 1 +// CHECK-DAG: OpExecutionModeId [[SHADER]] StaticNumWorkgroupsAMDX [[U2:%[0-9A-Za-z_]*]] +// CHECK-SAME: [[U3:%[^ ]*]] [[U2]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U3]] = OpConstant [[UINT]] 3 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl new file mode 100644 index 0000000000..fa16429a1b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Coalescing launch node declares EmptyNodeInput + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("coalescing")] +[NodeIsProgramEntry] +[NumThreads(2,1,1)] +void emptynodeinput(EmptyNodeInput input) +{ + // input.Count should always return 1 here, so there is + // an opportunity for an optimization. + buf0[0] = input.Count(); +} + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[IMG:%[^ ]*]] = OpTypeImage [[UINT]] Buffer 2 0 0 2 R32ui +// CHECK-DAG: [[IMGPTR:%[^ ]*]] = OpTypePointer UniformConstant [[IMG]] +// CHECK-DAG: [[BUF:%[^ ]*]] = OpVariable [[IMGPTR]] UniformConstant + +// CHECK: [[COUNT:%[^ ]*]] = OpNodePayloadArrayLengthAMDX [[UINT]] +// CHECK: [[IMAGE:%[^ ]*]] = OpLoad [[IMG]] [[BUF]] +// CHECK: OpImageWrite [[IMAGE]] [[U0]] [[COUNT]] None diff --git a/tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl b/tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl new file mode 100644 index 0000000000..8e1ce56307 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// FinishedCrossGroupSharing() is called with RWDispatchNodeInputRecord + +RWBuffer buf0; + +struct [NodeTrackRWInputSharing] INPUT_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node037_finishedcrossgroupsharing(RWDispatchNodeInputRecord input) +{ + bool b = input.FinishedCrossGroupSharing(); + buf0[0] = 0 ? b : 1; +} + +// CHECK: OpName [[INPUT:%[^ ]*]] "input" +// CHECK: OpDecorate [[STRUCT:%[^ ]*]] TrackFinishWritingAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[STRUCT]] = OpTypeStruct [[UINT]] +// CHECK: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[STRUCT]] +// CHECK: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool +// CHECK: [[INPUT]] = OpFunctionParameter [[PTR]] +// CHECK: OpFinishWritingNodePayloadAMDX [[BOOL]] [[INPUT]] diff --git a/tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl b/tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl new file mode 100644 index 0000000000..a3af668c46 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl @@ -0,0 +1,25 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// GetInputRecordCount() called with NodeInputRecordArray + +RWBuffer buf0; + +struct INPUT_RECORD +{ + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node014_getinputrecordcount([MaxRecords(256)] GroupNodeInputRecords inputs) +{ + uint numRecords = inputs.Count(); + buf0[0] = numRecords; +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: OpNodePayloadArrayLengthAMDX [[UINT]] diff --git a/tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl b/tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl new file mode 100644 index 0000000000..d029bd20bb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Multiple calls to Get*NodeOuputRecords(array) + +struct RECORD { + int i; + float3 foo; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(64, 1, 1)] +[NodeDispatchGrid(8, 1, 1)] +void node150_a(NodeOutput output) +{ + GroupNodeOutputRecords outRec1 = output.GetGroupNodeOutputRecords(1); + GroupNodeOutputRecords outRec2 = output.GetGroupNodeOutputRecords(4); + outRec1.OutputComplete(); + outRec2.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(64, 1, 1)] +[NodeDispatchGrid(8, 1, 1)] +void node150_b(NodeOutput output) +{ + ThreadNodeOutputRecords outRec1 = output.GetThreadNodeOutputRecords(5); + ThreadNodeOutputRecords outRec2 = output.GetThreadNodeOutputRecords(1); + outRec1.OutputComplete(); + outRec1 = outRec2; + outRec1.OutputComplete(); +} + +// CHECK: OpDecorateId [[ARR_A:%[^ ]*]] PayloadNodeNameAMDX [[STR:%[0-9A-Za-z_]*]] +// CHECK: OpDecorateId [[ARR_B:%[^ ]*]] PayloadNodeNameAMDX [[STR]] + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant [[UINT]] 5 +// CHECK-DAG: [[STR]] = OpConstantStringAMDX "output" +// CHECK-DAG: [[ARR_A]] = OpTypeNodePayloadArrayAMDX +// CHECK-DAG: [[ARR_B]] = OpTypeNodePayloadArrayAMDX +// CHECK-DAG: [[FPTR_A:%[^ ]*]] = OpTypePointer Function [[ARR_A]] +// CHECK-DAG: [[NPTR_A:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR_A]] +// CHECK-DAG: [[FPTR_B:%[^ ]*]] = OpTypePointer Function [[ARR_B]] +// CHECK-DAG: [[NPTR_B:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR_B]] + +// checking for OpFunctionCall skips over the entry function wrapper and +// thereby avoids matching wrapper variables +// CHECK: OpFunctionCall +// CHECK: [[OUT1:%[^ ]*]] = OpVariable [[FPTR_A]] +// CHECK: [[OUT2:%[^ ]*]] = OpVariable [[FPTR_A]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_A]] [[U2]] [[U1]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_A]] [[PAY]] +// CHECK: OpStore [[OUT1]] [[VAL]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_A]] [[U2]] [[U4]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_A]] [[PAY]] +// CHECK: OpStore [[OUT2]] [[VAL]] +// CHECK: OpFunctionCall +// CHECK: [[OUT1:%[^ ]*]] = OpVariable [[FPTR_B]] +// CHECK: [[OUT2:%[^ ]*]] = OpVariable [[FPTR_B]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_B]] [[U4]] [[U5]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_B]] [[PAY]] +// CHECK: OpStore [[OUT1]] [[VAL]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_B]] [[U4]] [[U1]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_B]] [[PAY]] +// CHECK: OpStore [[OUT2]] [[VAL]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl b/tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl new file mode 100644 index 0000000000..f981282748 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -spirv -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// GetRemainingRecusionLevels() called + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeDispatchGrid(32,2,2)] +[NodeMaxRecursionDepth(16)] +void node133_getremainingrecursionlevels() +{ + uint remaining = GetRemainingRecursionLevels(); + // Use resource as a way of preventing DCE + buf0[0] = remaining; +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[^ ]*]] "node133_getremainingrecursionlevels" [[RRL:%[^ ]*]] +// CHECK: OpExecutionModeId [[SHADER]] MaxNodeRecursionAMDX [[U16:%[^ ]*]] +// CHECK: OpDecorate [[RRL]] BuiltIn RemainingRecursionLevelsAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U16]] = OpConstant [[UINT]] 16 +// CHECK: [[PTR:%[^ ]*]] = OpTypePointer Input [[UINT]] +// CHECK: [[RRL]] = OpVariable [[PTR]] Input +// CHECK: OpLoad [[UINT]] [[RRL]] diff --git a/tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl b/tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl new file mode 100644 index 0000000000..cf1638d75c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Check that a barrier can be used on a groupshared object from a +// work graph node + +groupshared uint Test; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void firstNode() +{ + Test = 1; + AllMemoryBarrierWithGroupSync(); +} + +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl b/tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl new file mode 100644 index 0000000000..81fc0e39a2 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Check that group shared memory is allowed from a work graph node + +struct Record +{ + uint index; +}; + +groupshared uint testLds[512]; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1,1,1)] +void firstNode(DispatchNodeInputRecord inputData) +{ + testLds[inputData.Get().index] = 99; +} + +// CHECK: OpReturn + diff --git a/tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl new file mode 100644 index 0000000000..d6a2ea759e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Node with EmptyNodeOutput calls GroupIncrementOutputCount + + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node028_incrementoutputcount([MaxRecords(32)] EmptyNodeOutput empty) +{ + empty.GroupIncrementOutputCount(1); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[STRUCT:%[^ ]*]] = OpTypeStruct +// CHECK-DAG: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[STRUCT]] +// CHECK-DAG: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK: OpAllocateNodePayloadsAMDX [[PTR]] [[U2]] [[U1]] [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl new file mode 100644 index 0000000000..6cd984fe69 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Node with EmptyNodeOutput calls ThreadIncrementOutputCount + + +[Shader("node")] +[NodeLaunch("thread")] +[NodeIsProgramEntry] +void node028_incrementoutputcount([MaxRecords(32)] EmptyNodeOutput empty) +{ + empty.ThreadIncrementOutputCount(1); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[STRUCT:%[^ ]*]] = OpTypeStruct +// CHECK-DAG: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[STRUCT]] +// CHECK-DAG: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK-DAG: OpConstantStringAMDX "empty" +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK: OpAllocateNodePayloadsAMDX [[PTR]] [[U4]] [[U1]] [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl new file mode 100644 index 0000000000..bae3f759b8 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Check that SV_DispatchGrid supports array + +struct RECORD +{ + uint a[3] : SV_DispatchGrid; + uint b[3]; +}; + +[Shader("node")] +[NodeLaunch("coalescing")] +[numthreads(4,4,4)] +void node01(RWGroupNodeInputRecords input) +{ + input.Get().a = input.Get().b; +} + +// CHECK: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK: OpMemberDecorate [[RECORD]] 0 PayloadDispatchIndirectAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U3:%[^ ]*]] = OpConstant %uint 3 +// CHECK: [[ARRAY:%[^ ]*]] = OpTypeArray [[UINT]] [[U3]] +// CHECK: [[RECORD]] = OpTypeStruct [[ARRAY]] [[ARRAY]] diff --git a/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl new file mode 100644 index 0000000000..aee7e0d014 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Check that SV_DispatchGrid in nested struct is recognized + +struct INNER { + uint c; + uint3 grid : SV_DispatchGrid; +}; + +struct RECORD +{ + uint a; + INNER b; +}; + +[Shader("node")] +[NodeLaunch("coalescing")] +[numthreads(4,4,4)] +void node01(RWGroupNodeInputRecords input) +{ + input.Get().a = input.Get().b.grid.x; +} + +// CHECK: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK: OpName [[INNER:%[^ ]*]] "INNER" +// CHECK: OpMemberDecorate [[INNER]] 1 PayloadDispatchIndirectAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[VECTOR:%[^ ]*]] = OpTypeVector %uint 3 +// CHECK: [[INNER]] = OpTypeStruct [[UINT]] [[VECTOR]] +// CHECK: [[RECORD]] = OpTypeStruct [[UINT]] [[INNER]] diff --git a/tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl b/tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl new file mode 100644 index 0000000000..e2440a31c0 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl @@ -0,0 +1,30 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Broadcasting launch node with dispatch grid defined in input +// and max dispatch grid defined in the shader + +struct INPUT_GRID +{ + uint3 DispatchGrid : SV_DispatchGrid; + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(2,3,4)] +[NumThreads(1024,1,1)] +void node002_dispatchgrid_input_maxdispatchgrid_shader(DispatchNodeInputRecord input) +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[^ ]*]] "node002_dispatchgrid_input_maxdispatchgrid_shader" +// CHECK-DAG: OpExecutionMode [[SHADER]] LocalSize 1024 1 1 +// CHECK-DAG: OpExecutionModeId [[SHADER]] MaxNumWorkgroupsAMDX [[U2:%[^ ]*]] [[U3:%[^ ]*]] [[U4:%[0-9A-Za-z_]*]] +// CHECK: OpMemberDecorate %{{[^ ]*}} 0 PayloadDispatchIndirectAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U3]] = OpConstant [[UINT]] 3 +// CHECK-DAG: [[U4]] = OpConstant [[UINT]] 4 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.max-records.hlsl b/tools/clang/test/CodeGenSPIRV/node.max-records.hlsl new file mode 100644 index 0000000000..7d8449afab --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.max-records.hlsl @@ -0,0 +1,45 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Test referencing params with MaxOutputRecordsSharedWith + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("thread")] +void BackwardRef( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecords(5)] NodeOutput Output1, + [MaxRecordsSharedWith(Output1)] NodeOutput Output2) +{ +} + +// CHECK: OpDecorateId [[TYPE1:%[^ ]*]] PayloadNodeNameAMDX [[STR1:%[^ ]*]] +// CHECK: OpDecorateId [[TYPE1]] NodeMaxPayloadsAMDX [[U5:%[^ ]*]] +// CHECK: OpDecorateId [[TYPE2:%[^ ]*]] PayloadNodeNameAMDX [[STR2:%[^ ]*]] +// CHECK: OpDecorateId [[TYPE2]] NodeSharesPayloadLimitsWithAMDX [[TYPE1]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U5]] = OpConstant [[UINT]] 5 +// CHECK-DAG: [[STR1]] = OpConstantStringAMDX "Output1" +// CHECK-DAG: [[STR2]] = OpConstantStringAMDX "Output2" + +#if 0 +// copied from DXIL test but doesn't seem to conform to spec +[Shader("node")] +[NodeLaunch("thread")] +void ForwardRef( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecordsSharedWith(Output2)] NodeOutput Output1, + [MaxRecords(5)] NodeOutput Output2) +{ +} +#endif diff --git a/tools/clang/test/CodeGenSPIRV/node.member.read.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.read.hlsl new file mode 100644 index 0000000000..ac2474b29b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.read.hlsl @@ -0,0 +1,150 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Read access to members of node input/output records + +RWBuffer buf0; + +struct RECORD +{ + uint a; + uint b; + uint c; +}; + +// CHECK: OpName [[BUF0:%[^ ]*]] "buf0" +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[U16:%[^ ]*]] = OpConstant [[UINT]] 16 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[S1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[S2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U7:%[^ ]*]] = OpConstant [[UINT]] 7 +// CHECK-DAG: [[TBI:%[^ ]*]] = OpTypeImage [[UINT]] Buffer + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(16,1,1)] +void node01(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().a; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(16,1,1)] +void node02(RWDispatchNodeInputRecord input) +{ + buf0[0] = input.Get().b; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S1]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024, 1, 1)] +[NodeLaunch("coalescing")] +void node03([MaxRecords(3)] GroupNodeInputRecords input) +{ + buf0[0] = input[1].c; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[U1]] [[S2]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node04([MaxRecords(4)] RWGroupNodeInputRecords input) +{ + buf0[0] = input[2].c; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[U2]] [[S2]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node05(NodeOutput output) +{ + ThreadNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(1); + buf0[0] = outrec.Get().a; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U1]] [[U0]] +// CHECK: [[TEMP:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[TEMP]] +// CHECK: [[PTR1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] +// CHECK: [[PTR2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[PTR1]] [[S0]] +// CHECK-DAG: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR2]] +// CHECK-DAG: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node06(NodeOutput output) +{ + ThreadNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(7); + buf0[0] = outrec[2].b; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U7]] [[U0]] +// CHECK: [[TEMP:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[TEMP]] +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U2]] [[S1]] +// CHECK-DAG: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK-DAG: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node07(NodeOutput output) +{ + GroupNodeOutputRecords outrec = output.GetGroupNodeOutputRecords(1); + buf0[0] = outrec.Get().c; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U2]] [[U1]] [[U0]] +// CHECK: [[TEMP:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[TEMP]] +// CHECK: [[PTR1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] +// CHECK: [[PTR2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[PTR1]] [[S2]] +// CHECK-DAG: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR2]] +// CHECK-DAG: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl new file mode 100644 index 0000000000..5f7d434bd2 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl @@ -0,0 +1,193 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 -enable-16bit-types %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Read access of members of input/output record with different type +// sizes - we check the function specializations generated + +RWBuffer buf0; + +struct RECORD +{ + half h; + float f; + double d; + bool b; + uint16_t i16; + int i; + int64_t i64; + uint64_t u64; +}; + +// CHECK: OpName [[BUF0:%[^ ]*]] "buf0" +// CHECK-DAG: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK-DAG: OpMemberName [[RECORD]] 0 "h" +// CHECK-DAG: OpMemberName [[RECORD]] 1 "f" +// CHECK-DAG: OpMemberName [[RECORD]] 2 "d" +// CHECK-DAG: OpMemberName [[RECORD]] 3 "b" +// CHECK-DAG: OpMemberName [[RECORD]] 4 "i16" +// CHECK-DAG: OpMemberName [[RECORD]] 5 "i" +// CHECK-DAG: OpMemberName [[RECORD]] 6 "i64" +// CHECK-DAG: OpMemberName [[RECORD]] 7 "u64" + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[S1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[S2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[S3:%[^ ]*]] = OpConstant [[INT]] 3 +// CHECK-DAG: [[S4:%[^ ]*]] = OpConstant [[INT]] 4 +// CHECK-DAG: [[S5:%[^ ]*]] = OpConstant [[INT]] 5 +// CHECK-DAG: [[S6:%[^ ]*]] = OpConstant [[INT]] 6 +// CHECK-DAG: [[S7:%[^ ]*]] = OpConstant [[INT]] 7 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[TBI:%[^ ]*]] = OpTypeImage [[UINT]] Buffer + +// CHECK-DAG: [[HALF:%[^ ]*]] = OpTypeFloat 16 +// CHECK-DAG: [[FLOAT:%[^ ]*]] = OpTypeFloat 32 +// CHECK-DAG: [[DOUBLE:%[^ ]*]] = OpTypeFloat 64 +// CHECK-DAG: [[USHORT:%[^ ]*]] = OpTypeInt 16 0 +// CHECK-DAG: [[LONG:%[^ ]*]] = OpTypeInt 64 1 +// CHECK-DAG: [[ULONG:%[^ ]*]] = OpTypeInt 64 0 +// CHECK: [[RECORD]] = OpTypeStruct [[HALF]] [[FLOAT]] [[DOUBLE]] [[UINT]] [[USHORT]] [[INT]] [[LONG]] [[ULONG]] +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node01(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().h; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S0]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[HALF]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpConvertFToU [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node02(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().f; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S1]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[FLOAT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpConvertFToU [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node03(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().d; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S2]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[DOUBLE]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpConvertFToU [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node04(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().b; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S3]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpINotEqual [[BOOL]] [[VAL0]] [[U0]] +// CHECK: [[VAL2:%[^ ]*]] = OpSelect [[UINT]] [[VAL1]] [[U1]] [[U0]] +// CHECK: [[VAL3:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL3]] [[U0]] [[VAL2]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node05(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i16; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S4]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[USHORT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpUConvert [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node06(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S5]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[INT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpBitcast [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node07(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i64; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S6]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[LONG]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpSConvert [[INT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpBitcast [[UINT]] [[VAL1]] +// CHECK: [[VAL3:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL3]] [[U0]] [[VAL2]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node08(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().u64; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S7]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[ULONG]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpUConvert [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + diff --git a/tools/clang/test/CodeGenSPIRV/node.member.write.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.write.hlsl new file mode 100644 index 0000000000..33fc2dd9ff --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.write.hlsl @@ -0,0 +1,88 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Writes to members of the various read-write node records + +struct RECORD +{ + uint a; + uint b; +}; + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[S0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[S1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant [[UINT]] 5 +// CHECK-DAG: [[U7:%[^ ]*]] = OpConstant [[UINT]] 7 +// CHECK-DAG: [[U8:%[^ ]*]] = OpConstant [[UINT]] 8 +// CHECK-DAG: [[U9:%[^ ]*]] = OpConstant [[UINT]] 9 +// CHECK-DAG: [[U11:%[^ ]*]] = OpConstant [[UINT]] 11 + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node01(RWDispatchNodeInputRecord input1) +{ + input1.Get().a = 5; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S0]] +// CHECK: OpStore [[PTR]] [[U5]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(2,1,1)] +[NodeLaunch("coalescing")] +void node02([MaxRecords(4)] RWGroupNodeInputRecords input2) +{ + input2[1].b = 7; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[U1]] [[S1]] +// CHECK: OpStore [[PTR]] [[U7]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(3,1,1)] +[NodeLaunch("coalescing")] +void node03(NodeOutput output) +{ + ThreadNodeOutputRecords output3 = output.GetThreadNodeOutputRecords(2); + output3.Get().b = 9; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U2]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[VAL]] +// CHECK: [[PTR0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] +// CHECK: [[PTR1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[PTR0]] [[S1]] +// CHECK: OpStore [[PTR1]] [[U9]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(4,1,1)] +[NodeLaunch("coalescing")] +void node04(NodeOutput output) +{ + GroupNodeOutputRecords output4 = output.GetGroupNodeOutputRecords(8); + output4[0].a = 11; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U2]] [[U8]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[VAL]] +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] [[S0]] +// CHECK: OpStore [[PTR]] [[U11]] +// CHECK: OpFunctionEnd + diff --git a/tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl new file mode 100644 index 0000000000..d875f27d4e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl @@ -0,0 +1,123 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments +// ================================================================== +// Test writing to matrix members of node records +// ================================================================== + +// CHECK: OpName [[NODE01:%[^ ]*]] "node01" +// CHECK: OpName [[INPUT1:%[^ ]*]] "input1" +// CHECK: OpName [[NODE02:%[^ ]*]] "node02" +// CHECK: OpName [[INPUT2:%[^ ]*]] "input2" +// CHECK: OpName [[NODE03:%[^ ]*]] "node03" +// CHECK: OpName [[OUTPUT3:%[^ ]*]] "output3" +// CHECK: OpName [[NODE04:%[^ ]*]] "node04" +// CHECK: OpName [[OUTPUTS4:%[^ ]*]] "outputs4" + +struct RECORD +{ + row_major float2x2 m0; + row_major float2x2 m1; + column_major float2x2 m2; +}; + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U64:%[^ ]*]] = OpConstant [[UINT]] 64 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[FLOAT:%[^ ]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F111:%[^ ]*]] = OpConstant [[FLOAT]] 111 +// CHECK-DAG: [[V2FLOAT:%[^ ]*]] = OpTypeVector [[FLOAT]] 2 +// CHECK-DAG: [[C1:%[^ ]*]] = OpConstantComposite [[V2FLOAT]] [[F111]] [[F111]] +// CHECK-DAG: [[MAT2V2FLOAT:[^ ]*]] = OpTypeMatrix [[V2FLOAT]] 2 +// CHECK-DAG: [[M1:%[^ ]*]] = OpConstantComposite [[MAT2V2FLOAT]] [[C1]] [[C1]] +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[I0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[I2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[F222:%[^ ]*]] = OpConstant [[FLOAT]] 222 +// CHECK-DAG: [[C2:%[^ ]*]] = OpConstantComposite [[V2FLOAT]] [[F222]] [[F222]] +// CHECK-DAG: [[M2:%[^ ]*]] = OpConstantComposite [[MAT2V2FLOAT]] [[C2]] [[C2]] +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node01(RWDispatchNodeInputRecord input1) +{ + // CHECK: [[NODE01]] = OpFunction + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT1]] [[U0]] + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT1]] [[U0]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad [[MAT2V2FLOAT]] [[P2]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT1]] [[U0]] + // CHECK: [[P3:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I2]] + // CHECK: OpStore [[P3]] [[VAL]] + // CHECK: OpFunctionEnd + input1.Get().m1 = 111; + input1.Get().m2 = input1.Get().m0; +} + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node02([MaxRecords(4)] RWGroupNodeInputRecords input2) +{ + // CHECK: [[NODE02]] = OpFunction + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT2]] [[U0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT2]] [[U1]] [[I0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad [[MAT2V2FLOAT]] [[P2]] + // CHECK: [[P3:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT2]] [[U1]] [[I2]] + // CHECK: OpStore [[P3]] [[VAL]] + // CHECK: OpFunctionEnd + input2[0].m1 = 111; + input2[1].m2 = input2[1].m0; +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeDispatchGrid(64,1,1)] +[NodeLaunch("broadcasting")] +void node03(NodeOutput output3) +{ + // CHECK: [[NODE03]] = OpFunction + // CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U1]] [[U0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] + // CHECK: OpStore [[OUTREC3:%[^ ]*]] [[VAL]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC3]] [[U0]] + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC3]] [[U0]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I2]] + // CHECK: OpStore [[P2]] [[M2]] + // CHECK: OpFunctionEnd + ThreadNodeOutputRecords outrec = output3.GetThreadNodeOutputRecords(1); + outrec.Get().m1 = 111; + outrec.Get().m2 = 222; +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("coalescing")] +void node04([MaxRecords(5)] NodeOutput outputs4) +{ + // CHECK: [[NODE04]] = OpFunction + // CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U2]] [[U1]] [[U0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] + // CHECK: OpStore [[OUTREC4:%[^ ]*]] [[VAL]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC4]] [[U0]] + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC4]] [[U0]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I2]] + // CHECK: OpStore [[P2]] [[M2]] + // CHECK: OpFunctionEnd + GroupNodeOutputRecords outrec = outputs4.GetGroupNodeOutputRecords(1); + outrec.Get().m1 = 111; + outrec.Get().m2 = 222; +} diff --git a/tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl new file mode 100644 index 0000000000..ec95c3d758 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl @@ -0,0 +1,150 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 -enable-16bit-types %s | FileCheck %s + +// Writes to node record members of various types + + +struct RECORD +{ + half h; + float f; + double d; + bool b; + int16_t i16; + uint16_t u16; + int i; + int64_t i64; + uint64_t u64; + float3 f3; + int ia[7]; +}; + +// CHECK: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK: OpMemberName [[RECORD]] 0 "h" +// CHECK: OpMemberName [[RECORD]] 1 "f" +// CHECK: OpMemberName [[RECORD]] 2 "d" +// CHECK: OpMemberName [[RECORD]] 3 "b" +// CHECK: OpMemberName [[RECORD]] 4 "i16" +// CHECK: OpMemberName [[RECORD]] 5 "u16" +// CHECK: OpMemberName [[RECORD]] 6 "i" +// CHECK: OpMemberName [[RECORD]] 7 "i64" +// CHECK: OpMemberName [[RECORD]] 8 "u64" +// CHECK: OpMemberName [[RECORD]] 9 "f3" +// CHECK: OpMemberName [[RECORD]] 10 "ia" + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[HALF:%[^ ]*]] = OpTypeFloat 16 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[FLOAT:%[^ ]*]] = OpTypeFloat 32 +// CHECK-DAG: [[DOUBLE:%[^ ]*]] = OpTypeFloat 64 +// CHECK-DAG: [[SHORT:%[^ ]*]] = OpTypeInt 16 1 +// CHECK-DAG: [[USHORT:%[^ ]*]] = OpTypeInt 16 0 +// CHECK-DAG: [[LONG:%[^ ]*]] = OpTypeInt 64 1 +// CHECK-DAG: [[ULONG:%[^ ]*]] = OpTypeInt 64 0 +// CHECK-DAG: [[V3FLOAT:%[^ ]*]] = OpTypeVector [[FLOAT]] 3 + +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[HALF_0X1_8P_1:%[^ ]*]] = OpConstant [[HALF]] 0x1.8p+1 +// CHECK-DAG: [[I0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[FN5:%[^ ]*]] = OpConstant [[FLOAT]] -5 +// CHECK-DAG: [[I1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[D7:%[^ ]*]] = OpConstant [[DOUBLE]] 7 +// CHECK-DAG: [[I2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[I3:%[^ ]*]] = OpConstant [[INT]] 3 +// CHECK-DAG: [[S11:%[^ ]*]] = OpConstant [[SHORT]] 11 +// CHECK-DAG: [[I4:%[^ ]*]] = OpConstant [[INT]] 4 +// CHECK-DAG: [[US13:%[^ ]*]] = OpConstant [[USHORT]] 13 +// CHECK-DAG: [[I5:%[^ ]*]] = OpConstant [[INT]] 5 +// CHECK-DAG: [[I17:%[^ ]*]] = OpConstant [[INT]] 17 +// CHECK-DAG: [[I6:%[^ ]*]] = OpConstant [[INT]] 6 +// CHECK-DAG: [[LN19:%[^ ]*]] = OpConstant [[LONG]] -19 +// CHECK-DAG: [[I7:%[^ ]*]] = OpConstant [[INT]] 7 +// CHECK-DAG: [[UL21:%[^ ]*]] = OpConstant [[ULONG]] 21 +// CHECK-DAG: [[I8:%[^ ]*]] = OpConstant [[INT]] 8 +// CHECK-DAG: [[F23:%[^ ]*]] = OpConstant [[FLOAT]] 23 +// CHECK-DAG: [[I9:%[^ ]*]] = OpConstant [[INT]] 9 +// CHECK-DAG: [[I29:%[^ ]*]] = OpConstant [[INT]] 29 +// CHECK-DAG: [[I10:%[^ ]*]] = OpConstant [[INT]] 10 +// CHECK-DAG: [[U7:%[^ ]*]] = OpConstant [[UINT]] 7 + +// CHECK-DAG: [[AI7:%[^ ]*]] = OpTypeArray [[INT]] [[U7]] +// CHECK-DAG: [[RECORD]] = OpTypeStruct [[HALF]] [[FLOAT]] [[DOUBLE]] [[UINT]] [[SHORT]] [[USHORT]] [[INT]] [[LONG]] [[ULONG]] [[V3FLOAT]] [[AI7]] +// CHECK-DAG: [[RAR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX %RECORD +// CHECK-DAG: [[RARP:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[RAR]] +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[HALFP:%[^ ]*]] = OpTypePointer Function [[HALF]] +// CHECK-DAG: [[FLOATP:%[^ ]*]] = OpTypePointer Function [[FLOAT]] +// CHECK-DAG: [[DOUBLEP:%[^ ]*]] = OpTypePointer Function [[DOUBLE]] +// CHECK-DAG: [[UINTP:%[^ ]*]] = OpTypePointer Function [[UINT]] +// CHECK-DAG: [[SHORTP:%[^ ]*]] = OpTypePointer Function [[SHORT]] +// CHECK-DAG: [[USHORTP:%[^ ]*]] = OpTypePointer Function [[USHORT]] +// CHECK-DAG: [[INTP:%[^ ]*]] = OpTypePointer Function [[INT]] +// CHECK-DAG: [[LONGP:%[^ ]*]] = OpTypePointer Function [[LONG]] +// CHECK-DAG: [[ULONGP:%[^ ]*]] = OpTypePointer Function [[ULONG]] + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(512,1,1)] +void node125(NodeOutput output) +{ + GroupNodeOutputRecords output01 = output.GetGroupNodeOutputRecords(1); + // CHECK: OpAllocateNodePayloadsAMDX [[RARP]] [[U2]] [[U1]] [[U0]] + + output01.Get().h = 3.0; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[HALFP]] + // CHECK-SAME: [[I0]] + // CHECK: OpStore [[PTR]] [[HALF_0X1_8P_1]] + + output01.Get().f = -5.0; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[FLOATP]] + // CHECK-SAME: [[I1]] + // CHECK: OpStore [[PTR]] [[FN5]] + + output01.Get().d = 7.0; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[DOUBLEP]] + // CHECK-SAME: [[I2]] + // CHECK: OpStore [[PTR]] [[D7]] + + output01.Get().b = true; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[UINTP]] + // CHECK-SAME: [[I3]] + // CHECK: OpStore [[PTR]] [[U1]] + + output01.Get().i16 = 11; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[SHORTP]] + // CHECK-SAME: [[I4]] + // CHECK: OpStore [[PTR]] [[S11]] + + output01.Get().u16 = 13; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[USHORTP]] + // CHECK-SAME: [[I5]] + // CHECK: OpStore [[PTR]] [[US13]] + + output01.Get().i = 17; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[INTP]] + // CHECK-SAME: [[I6]] + // CHECK: OpStore [[PTR]] [[I17]] + + output01.Get().i64 = -19; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[LONGP]] + // CHECK-SAME: [[I7]] + // CHECK: OpStore [[PTR]] [[LN19]] + + output01.Get().u64 = 21; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[ULONGP]] + // CHECK-SAME: [[I8]] + // CHECK: OpStore [[PTR]] [[UL21]] + + output01.Get().f3.y = 23; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[FLOATP]] + // CHECK-SAME: [[I9]] + // CHECK-SAME: [[I1]] + // CHECK: OpStore [[PTR]] [[F23]] + + output01.Get().ia[5] = 29; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[INTP]] + // CHECK-SAME: [[I10]] + // CHECK-SAME: [[I5]] + // CHECK: OpStore [[PTR]] [[I29]] +} diff --git a/tools/clang/test/CodeGenSPIRV/node.mesh.hlsl b/tools/clang/test/CodeGenSPIRV/node.mesh.hlsl new file mode 100644 index 0000000000..4d1726abb2 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.mesh.hlsl @@ -0,0 +1,88 @@ +// RUN: %dxc -spirv -T lib_6_9 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// XFAIL: * +// disabled until mesh nodes are implemented + +// Test loading of node input and funneling into mesh outputs +// Essentially an end-to-end mesh node test. + + +RWBuffer buf0; + +#define MAX_VERT 32 +#define MAX_PRIM 16 + +struct MeshPerVertex { + float4 position : SV_Position; + float color[4] : COLOR; +}; + +struct MeshPerPrimitive { + float normal : NORMAL; + float malnor : MALNOR; + float alnorm : ALNORM; + float ormaln : ORMALN; + int layer[6] : LAYER; +}; + +struct MeshPayload { + float normal; + float malnor; + float alnorm; + float ormaln; + int layer[6]; +}; + +groupshared float gsMem[MAX_PRIM]; + +[Shader("node")] +[NodeLaunch("mesh")] +[outputtopology("triangle")] +[numthreads(128, 1, 1)] +[NodeDispatchGrid(64,1,1)] +void node_setmeshoutputcounts(DispatchNodeInputRecord mpl, + out indices uint3 primIndices[MAX_PRIM], + out vertices MeshPerVertex verts[MAX_VERT], + out primitives MeshPerPrimitive prims[MAX_PRIM], + in uint tig : SV_GroupIndex) { + SetMeshOutputCounts(32, 16); + + // create mpl + + MeshPerVertex ov; + ov.position = float4(14.0,15.0,16.0,17.0); + ov.color[0] = 14.0; + ov.color[1] = 15.0; + ov.color[2] = 16.0; + ov.color[3] = 17.0; + + if (tig % 3) { + primIndices[tig / 3] = uint3(tig, tig + 1, tig + 2); + + MeshPerPrimitive op; + op.normal = mpl.Get().normal; + op.malnor = gsMem[tig / 3 + 1]; + op.alnorm = mpl.Get().alnorm; + op.ormaln = mpl.Get().ormaln; + op.layer[0] = mpl.Get().layer[0]; + op.layer[1] = mpl.Get().layer[1]; + op.layer[2] = mpl.Get().layer[2]; + op.layer[3] = mpl.Get().layer[3]; + op.layer[4] = mpl.Get().layer[4]; + op.layer[5] = mpl.Get().layer[5]; + + gsMem[tig / 3] = op.normal; + prims[tig / 3] = op; + } + verts[tig] = ov; +} + +// CHECK: OpEntryPoint MeshExt [[ENTRY:%[^ ]*]] +// CHECK-DAG: OpExecutionMode [[ENTRY]] OutputVertices 32 +// CHECK-DAG: OpExecutionMode [[ENTRY]] OutputPrimitivesNV 16 +// CHECK-DAG: OpExecutionMode [[ENTRY]] OutputTrianglesNV +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U16:%[^ ]*]] = OpConstant [[UINT]] 16 +// CHECK-DAG: [[U32:%[^ ]*]] = OpConstant [[UINT]] 32 +// CHECK: [[ENTRY]] = OpFunction +// CHECK: OpSetMeshOutputsEXT [[U32]] [[U16]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl b/tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl new file mode 100644 index 0000000000..17db15e7db --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// OutputComplete() is called with NodeOutput + +struct OUTPUT_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +void outputcomplete([MaxRecords(256)] NodeOutput output) +{ + ThreadNodeOutputRecords outputrecords = output.GetThreadNodeOutputRecords(1); + // ... + outputrecords.OutputComplete(); +} + +// CHECK: OpName [[RECORDS:%[^ ]*]] "outputrecords" +// CHECK: OpDecorateId [[ARR:%[^ ]*]] PayloadNodeNameAMDX [[STR:%[0-9A-Za-z_]*]] +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[REC:%[^ ]*]] = OpTypeStruct [[UINT]] +// CHECK-DAG: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[REC]] +// CHECK-DAG: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK-DAG: [[U4:[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK: [[V0:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[PTR]] [[U4]] [[U1]] [[U0]] +// CHECK: [[V1:%[^ ]*]] = OpLoad [[ARR]] [[V0]] +// CHECK: OpStore [[RECORDS]] [[V1]] +// CHECK: OpEnqueueNodePayloadsAMDX [[RECORDS]] diff --git a/tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl new file mode 100644 index 0000000000..08a103cf5e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// NodeOutputIsValid() is called with EmptyNodeOutput + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node131_nodeoutputisvalid_emptynodeoutput(EmptyNodeOutput output) +{ + buf0[0] = output.IsValid(); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool +// CHECK: OpIsNodePayloadValidAMDX [[BOOL]] %{{[^ ]*}} [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl new file mode 100644 index 0000000000..40e3a74fcb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// IsValid() is invoked on NodeOutput + +RWBuffer buf0; + +struct RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node129_nodeoutputisvalid_nodeoutput(NodeOutput output) +{ + buf0[0] = output.IsValid(); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool +// CHECK: OpIsNodePayloadValidAMDX [[BOOL]] %{{[^ ]*}} [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl b/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl new file mode 100644 index 0000000000..953288929d --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s + +// Renamed node, unnamed index defaults to 0 + +struct RECORD { + uint i; +}; + +[Shader("node")] +[NodeLaunch("thread")] +[NodeID("new_node_name")] +[NodeIsProgramEntry] +void node017_renamed_node([NodeID("output_node_name", 2)] NodeOutput r) +{ + r.GetThreadNodeOutputRecords(1); +} + +// CHECK: OpEntryPoint GLCompute %{{[^ ]*}} "node017_renamed_node" +// CHECK-DAG: OpDecorateId [[TYPE:%[^ ]*]] PayloadNodeNameAMDX [[STR:%[0-9A-Za-z_]*]] +// CHECK-DAG: OpDecorateId [[TYPE]] PayloadNodeBaseIndexAMDX [[U2:%[0-9A-Za-z_]*]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[STR]] = OpConstantStringAMDX "output_node_name" +// CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 diff --git a/tools/clang/test/CodeGenSPIRV/node.share-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.share-input.hlsl new file mode 100644 index 0000000000..c439bef017 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.share-input.hlsl @@ -0,0 +1,42 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Check that the NodeShareInputOf metadata entry is populated correctly + +struct entryRecord +{ + int data0; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +void firstNode(DispatchNodeInputRecord inputData) +{ } + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +[NodeShareInputOf("firstNode")] +void secondNode(DispatchNodeInputRecord inputData) +{ } + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +[NodeShareInputOf("firstNode", 3)] +void thirdNode(DispatchNodeInputRecord inputData) +{ } + + +// CHECK: OpEntryPoint GLCompute %firstNode "firstNode" +// CHECK: OpEntryPoint GLCompute %secondNode "secondNode" +// CHECK: OpEntryPoint GLCompute %thirdNode "thirdNode" +// CHECK-NOT: OpExecutionModeId %firstNode SharesInputWithAMDX +// CHECK: OpExecutionModeId %secondNode SharesInputWithAMDX [[STR:%[^ ]*]] [[U0:%[^ ]*]] +// CHECK: OpExecutionModeId %thirdNode SharesInputWithAMDX [[STR]] [[U3:%[^ ]*]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U3:%[^ ]*]] = OpConstant [[UINT]] 3 diff --git a/tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl b/tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl new file mode 100644 index 0000000000..ca3c14b8da --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl @@ -0,0 +1,141 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +struct RECORD1 +{ + uint value; + uint value2; +}; + +// CHECK: OpEntryPoint GLCompute [[NODE10:%[^ ]*]] "node_1_0" +// CHECK: OpEntryPoint GLCompute [[NODE11:%[^ ]*]] "node_1_1" +// CHECK: OpEntryPoint GLCompute [[NODE12:%[^ ]*]] "node_1_2" +// CHECK: OpEntryPoint GLCompute [[NODE20:%[^ ]*]] "node_2_0" +// CHECK: OpEntryPoint GLCompute [[NODE21:%[^ ]*]] "node_2_1" +// CHECK: OpEntryPoint GLCompute [[NODE22:%[^ ]*]] "node_2_2" +// CHECK: OpDecorateId [[A10:%[^ ]*]] PayloadNodeNameAMDX [[S10:%[^ ]*]] +// CHECK: OpDecorateId [[A10]] NodeMaxPayloadsAMDX [[U31:%[^ ]*]] +// CHECK: OpDecorate [[A10]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A10]] PayloadNodeArraySizeAMDX [[U129:%[^ ]*]] +// CHECK: OpDecorateId [[A11:%[^ ]*]] PayloadNodeNameAMDX [[S11:%[^ ]*]] +// CHECK: OpDecorateId [[A11]] NodeMaxPayloadsAMDX [[U37:%[^ ]*]] +// CHECK: OpDecorate [[A11]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A12:%[^ ]*]] PayloadNodeNameAMDX [[S12:%[^ ]*]] +// CHECK: OpDecorateId [[A12]] NodeMaxPayloadsAMDX [[U47:%[^ ]*]] +// CHECK: OpDecorate [[A12]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A20:%[^ ]*]] PayloadNodeNameAMDX [[S20:%[^ ]*]] +// CHECK: OpDecorateId [[A20]] NodeMaxPayloadsAMDX [[U41:%[^ ]*]] +// CHECK: OpDecorate [[A20]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A20]] PayloadNodeArraySizeAMDX [[U131:%[^ ]*]] +// CHECK: OpDecorateId [[A21:%[^ ]*]] PayloadNodeNameAMDX [[S21:%[^ ]*]] +// CHECK: OpDecorateId [[A21]] NodeMaxPayloadsAMDX [[U43:%[^ ]*]] +// CHECK: OpDecorate [[A21]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A22:%[^ ]*]] PayloadNodeNameAMDX [[S22:%[^ ]*]] +// CHECK: OpDecorateId [[A22]] NodeMaxPayloadsAMDX [[U53:%[^ ]*]] +// CHECK: OpDecorate [[A22]] PayloadNodeSparseArrayAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[RECORD:%[^ ]*]] = OpTypeStruct [[UINT]] [[UINT]] +// CHECK-DAG: [[A10]] = OpTypeNodePayloadArrayAMDX [[RECORD]] +// CHECK-DAG: [[S10]] = OpConstantStringAMDX "OutputArray_1_0" +// CHECK-DAG: [[U31]] = OpConstant [[UINT]] 31 +// CHECK-DAG: [[U129]] = OpConstant [[UINT]] 129 +// CHECK-DAG: [[A11]] = OpTypeNodePayloadArrayAMDX [[RECORD]] +// CHECK-DAG: [[S11]] = OpConstantStringAMDX "OutputArray_1_1" +// CHECK-DAG: [[U37]] = OpConstant [[UINT]] 37 +// CHECK-DAG: [[A12]] = OpTypeNodePayloadArrayAMDX [[RECORD]] +// CHECK-DAG: [[S12]] = OpConstantStringAMDX "Output_1_2" +// CHECK-DAG: [[U47]] = OpConstant [[UINT]] 47 +// CHECK-DAG: [[EMPTY:%[^ ]*]] = OpTypeStruct +// CHECK-DAG: [[A20]] = OpTypeNodePayloadArrayAMDX [[EMPTY]] +// CHECK-DAG: [[S20]] = OpConstantStringAMDX "OutputArray_2_0" +// CHECK-DAG: [[U41]] = OpConstant [[UINT]] 41 +// CHECK-DAG: [[U131]] = OpConstant [[UINT]] 131 +// CHECK-DAG: [[A21]] = OpTypeNodePayloadArrayAMDX [[EMPTY]] +// CHECK-DAG: [[S21]] = OpConstantStringAMDX "OutputArray_2_1" +// CHECK-DAG: [[U43]] = OpConstant [[UINT]] 43 +// CHECK-DAG: [[A22]] = OpTypeNodePayloadArrayAMDX [[EMPTY]] +// CHECK-DAG: [[S22]] = OpConstantStringAMDX "Output_2_2" +// CHECK-DAG: [[U53]] = OpConstant [[UINT]] 53 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_1_0( + [AllowSparseNodes] [NodeArraySize(129)] [MaxRecords(31)] + NodeOutputArray OutputArray_1_0) { + ThreadNodeOutputRecords outRec = OutputArray_1_0[1].GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} + +// CHECK: [[NODE10]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_1_1( + [UnboundedSparseNodes] [MaxRecords(37)] + NodeOutputArray OutputArray_1_1) { + ThreadNodeOutputRecords outRec = OutputArray_1_1[1].GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} + +// CHECK: [[NODE11]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_1_2( + [AllowSparseNodes] [MaxRecords(47)] + NodeOutput Output_1_2) { + ThreadNodeOutputRecords outRec = Output_1_2.GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} + +// CHECK: [[NODE12]] = OpFunction %void None +// CHECK: %{{[^ ]*}} = OpAllocateNodePayloadsAMDX %{{[^ ]*}} %{{[^ ]*}} %{{[^ ]*}} [[U0]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_2_0( + [AllowSparseNodes] [NodeArraySize(131)] [MaxRecords(41)] + EmptyNodeOutputArray OutputArray_2_0) { + OutputArray_2_0[1].GroupIncrementOutputCount(10); +} + +// CHECK: [[NODE20]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_2_1( + [UnboundedSparseNodes] [MaxRecords(43)] + EmptyNodeOutputArray OutputArray_2_1) { + OutputArray_2_1[1].GroupIncrementOutputCount(10); +} + +// CHECK: [[NODE21]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_2_2( + [AllowSparseNodes] [MaxRecords(53)] + EmptyNodeOutput Output_2_2) { + Output_2_2.GroupIncrementOutputCount(10); +} + +// CHECK: [[NODE22]] = OpFunction %void None +// CHECK: %{{[^ ]*}} = OpAllocateNodePayloadsAMDX %{{[^ ]*}} %{{[^ ]*}} %{{[^ ]*}} [[U0]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl new file mode 100644 index 0000000000..8732cf3478 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// NumThreads + +[Shader("node")] +[NodeLaunch("thread")] +[NumThreads(1,1,1)] +[NodeIsProgramEntry] +void node010_thread_numthreads_shader() +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK: OpExecutionMode [[SHADER]] LocalSize 1 1 1 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl new file mode 100644 index 0000000000..0b230479c4 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Thread launch node without NumThreads specified should use a +// default of (1,1,1) + +[Shader("node")] +[NodeLaunch("thread")] +[NodeIsProgramEntry] +void node011_thread_numthreads_none() +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK: OpExecutionMode [[SHADER]] LocalSize 1 1 1 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl b/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl index 12b03fffda..4d10dc446b 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl @@ -60,12 +60,6 @@ RWBuffer Buf_r64i; [[vk::image_format("r64ui")]] RWBuffer Buf_r64ui; -[[vk::image_format("r16f")]] -// CHECK: [[ImgType:%[0-9a-zA-Z_]+]] = OpTypeImage %float 2D 2 0 0 2 R16f -// CHECK: [[ArrayType:%[0-9a-zA-Z_]+]] = OpTypeRuntimeArray [[ImgType]] -// CHECK: [[PtrType:%[0-9a-zA-Z_]+]] = OpTypePointer UniformConstant [[ArrayType]] -RWTexture2D Buf_r16f_bindless[]; - struct S { RWBuffer b; }; From 296d4aee97308cbadbc03851b3457cebf7ced13b Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Tue, 3 Jun 2025 19:55:57 -0700 Subject: [PATCH 56/93] NFC: Add LSAN leak suppression to ASAN pipeline (#7503) Address-sanitizer reports a leak coming from the dynamic shared library loading code, though dlopen() is paired with dlclose(). This leak doesn't manifest on main yet because dxc adds dxcompiler directly to target_link_libraries (which it shouldn't have to), and dxil is loaded within that library. A PR to remove dxil loading from dxcompiler exposes the leak when dxc loads dxil dynamically. This change adds a suppressions file for LSAN to suppress the leak in the loader code that happens under call_init. This also changes the Linux_Clang_Release build to RelWithDebInfo so symbols are present. --- azure-pipelines.yml | 10 ++++++---- utils/asan/x86_64-pc-linux-gnu.lsan.supp | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 utils/asan/x86_64-pc-linux-gnu.lsan.supp diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7967fa03e3..dee579287c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -56,13 +56,13 @@ stages: strategy: matrix: - Linux_Clang_Release: + Linux_Clang_RelWithDebInfo: image: ${{ variables.linux }} - configuration: Release + configuration: RelWithDebInfo CC: clang-18 CXX: clang++-18 - CMAKE_OPTS: -DLLVM_ENABLE_WERROR=On -DLLVM_USE_SANITIZER='Address;Undefined' -DLLVM_ENABLE_LIBCXX=On -DLLVM_USE_LINKER=lld - CHECK_ALL_ENV: ASAN_OPTIONS=alloc_dealloc_mismatch=0 + CMAKE_OPTS: -DLLVM_ENABLE_WERROR=On -DLLVM_USE_SANITIZER='Address;Undefined' -DLLVM_ENABLE_LIBCXX=On -DLLVM_USE_LINKER=lld-18 + CHECK_ALL_ENV: ASAN_OPTIONS=alloc_dealloc_mismatch=0 LSAN_OPTIONS=suppressions=$BUILD_SOURCESDIRECTORY/utils/asan/x86_64-pc-linux-gnu.lsan.supp:print_suppressions=0 ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer-18 LSAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer-18 OS: Linux Linux_Clang_Debug: image: ${{ variables.linux }} @@ -107,6 +107,8 @@ stages: versionSpec: '3.x' - bash: | + sudo apt-get update + sudo apt-get upgrade libc6 libc6-dbg sudo apt-get install ninja-build wget https://apt.llvm.org/llvm.sh chmod u+x llvm.sh diff --git a/utils/asan/x86_64-pc-linux-gnu.lsan.supp b/utils/asan/x86_64-pc-linux-gnu.lsan.supp new file mode 100644 index 0000000000..3a7725f535 --- /dev/null +++ b/utils/asan/x86_64-pc-linux-gnu.lsan.supp @@ -0,0 +1 @@ +leak:^call_init$ \ No newline at end of file From 77dcbb61f7efdee92a19e4e289f03f2c77365222 Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 5 Jun 2025 13:37:04 -0500 Subject: [PATCH 57/93] Require complete types in some missing places (#7511) This change adds two additional calls to Sema::RequireCompleteType, one when evaluating unary `sizeof`, which fixes a reported issue with templates. The second is in the DXR diagnostics where complete type should be required but not diagnosed because ordering in the compiler is a bit wonky when diagnosing DXR entry points. Fixes #7510 --------- Co-authored-by: Tex Riddell --- tools/clang/lib/AST/HlslTypes.cpp | 1 + tools/clang/lib/Sema/SemaDXR.cpp | 15 ++++++- tools/clang/lib/Sema/SemaExpr.cpp | 16 ++++++-- tools/clang/lib/Sema/SemaHLSL.cpp | 12 +++--- .../test/SemaHLSL/raytracing-entry-diags.hlsl | 21 ++++++++++ .../sizeof-requires-complete-type.hlsl | 27 +++++++++++++ .../test/SemaHLSL/template-udt-load.hlsl | 39 +++++++++++++++++++ 7 files changed, 121 insertions(+), 10 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 7693c065be..00c18a81a9 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -108,6 +108,7 @@ bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type) { // which can't be annotated. But includes UDTs of trivially copyable data and // the builtin trivially copyable raytracing structs. bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT) { + assert(!QT->isIncompleteType() && "Type must be complete!"); const clang::Type *Ty = QT.getCanonicalType().getTypePtr(); if (const RecordType *RT = dyn_cast(Ty)) { const RecordDecl *RD = RT->getDecl(); diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 04e1582513..0ccb21fb2b 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -1190,7 +1190,10 @@ void DiagnoseCallableEntry(Sema &S, FunctionDecl *FD, << /*payload|callable*/ 1 << Param; QualType Ty = Param->getType().getNonReferenceType(); - if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) + // Don't diagnose incomplete type here. Function parameters are + // checked in Sema::CheckParmsForFunctionDef. + if (!S.RequireCompleteType(Param->getLocation(), Ty, 0) && + !(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) << /*payload|attributes|callable*/ 2 << /*parameter %2|type*/ 0 << Param; @@ -1232,6 +1235,11 @@ void DiagnoseMissOrAnyHitEntry(Sema &S, FunctionDecl *FD, QualType Ty = Param->getType().getNonReferenceType(); + // Don't diagnose here, just continue if this fails. Function parameters are + // checked in Sema::CheckParmsForFunctionDef. + if (S.RequireCompleteType(Param->getLocation(), Ty, 0)) + continue; + if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) { S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) << /*payload|attributes|callable*/ Idx << /*parameter %2|type*/ 0 @@ -1286,6 +1294,11 @@ void DiagnoseClosestHitEntry(Sema &S, FunctionDecl *FD, QualType Ty = Param->getType().getNonReferenceType(); + // Don't diagnose here, just continue if this fails. Function parameters are + // checked in Sema::CheckParmsForFunctionDef. + if (S.RequireCompleteType(Param->getLocation(), Ty, 0)) + continue; + if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) { S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) << /*payload|attributes|callable*/ Idx << /*parameter %2|type*/ 0 diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index 507b6a7508..389fcfc3ff 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -3798,13 +3798,21 @@ static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T, } // HLSL Change Begins -bool Sema::CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation Loc, +bool Sema::CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, + SourceLocation Loc, UnaryExprOrTypeTrait ExprKind) { assert(ExprKind == UnaryExprOrTypeTrait::UETT_SizeOf); - // "sizeof 42" is ill-defined because HLSL has literal int type which can decay to an int of any size. - const BuiltinType* BuiltinTy = ExprType->getAs(); - if (BuiltinTy != nullptr && (BuiltinTy->getKind() == BuiltinType::LitInt || BuiltinTy->getKind() == BuiltinType::LitFloat)) { + if (RequireCompleteType(Loc, ExprType, + diag::err_sizeof_alignof_incomplete_type, ExprKind, + ExprType)) + return true; + + // "sizeof 42" is ill-defined because HLSL has literal int type which can + // decay to an int of any size. + const BuiltinType *BuiltinTy = ExprType->getAs(); + if (BuiltinTy != nullptr && (BuiltinTy->getKind() == BuiltinType::LitInt || + BuiltinTy->getKind() == BuiltinType::LitFloat)) { Diag(Loc, diag::err_hlsl_sizeof_literal) << ExprType; return true; } diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 744b06b8d0..3d9de1804d 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -10947,11 +10947,13 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( } if (IsBABLoad || IsBABStore) { - const bool IsLegalTemplate = - !functionTemplateTypeArg.isNull() && - hlsl::IsHLSLNumericOrAggregateOfNumericType( - functionTemplateTypeArg); - if (!IsLegalTemplate) { + const bool IsNull = functionTemplateTypeArg.isNull(); + // Incomplete type is diagnosed elsewhere, so just fail if incomplete. + if (!IsNull && + getSema()->RequireCompleteType(Loc, functionTemplateTypeArg, 0)) + return Sema::TemplateDeductionResult::TDK_Invalid; + if (IsNull || !hlsl::IsHLSLNumericOrAggregateOfNumericType( + functionTemplateTypeArg)) { getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_numeric) << intrinsicName; DiagnoseTypeElements( diff --git a/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl b/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl index e41c6a2f4f..8dfc927e11 100644 --- a/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl +++ b/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl @@ -181,3 +181,24 @@ void callable7(inout MyPayload payload, float F) {} [shader("callable")] float callable8(inout MyPayload payload) {} // expected-error{{return type for 'callable' shaders must be void}} + +// expected-note@+1 6 {{forward declaration of 'Incomplete'}} +struct Incomplete; + +// expected-error@+3{{variable has incomplete type 'Incomplete'}} +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("anyhit")] +void anyhit_incomplete( inout Incomplete A1, Incomplete A2) { } + +// expected-error@+3{{variable has incomplete type 'Incomplete'}} +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("closesthit")] +void closesthit_incomplete( inout Incomplete payload, Incomplete attr ) {} + +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("miss")] +void miss_incomplete( inout Incomplete payload) { } + +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("callable")] +void callable_incomplete(inout Incomplete payload) {} diff --git a/tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl b/tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl new file mode 100644 index 0000000000..31d4898efe --- /dev/null +++ b/tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_3 -verify %s + +struct Complete {}; + +struct Incomplete; // expected-note{{forward declaration of 'Incomplete'}} +template struct CompleteTemplate {}; + +void fn() { + uint s; + // Complete types are easy. They are complete before we get to the expression. + s = sizeof(Complete); // This works! + + // A type may be incomplete for several reasons. + + // It may be incomplete because there is only a forward declaration, which + // should produce an error since we can't materialize a definition. + s = sizeof(Incomplete); // expected-error{{invalid application of 'sizeof' to an incomplete type 'Incomplete'}} + + // It may be incomplete because it is an un-instantiated template, which + // should work because we can just instantiate it. + s = sizeof(CompleteTemplate); // This works! + + // It may be incomplete because it is a lazy-initialized type from HLSL, + // which can be completed, and then will report a non-numeric type error. + // expected-error@+1{{invalid application of 'sizeof' to non-numeric type 'Buffer'}} + s = sizeof(Buffer); +} diff --git a/tools/clang/test/SemaHLSL/template-udt-load.hlsl b/tools/clang/test/SemaHLSL/template-udt-load.hlsl index f666297bb9..dd7cf8bd16 100644 --- a/tools/clang/test/SemaHLSL/template-udt-load.hlsl +++ b/tools/clang/test/SemaHLSL/template-udt-load.hlsl @@ -4,6 +4,34 @@ ByteAddressBuffer In; RWBuffer Out; +template +struct Foo { + // expected-note@+1{{'RWBuffer' field declared here}} + T Member; +}; + +template +struct MyTemplate { + T GetValue(ByteAddressBuffer srv, uint offset) { + // expected-error@+2{{Explicit template arguments on intrinsic Load must be a single numeric type}} + // expected-error@+1{{object 'RWBuffer' is not allowed in builtin template parameters}} + return srv.Load(offset); + } +}; +template +T GetValue(uint offset) { + MyTemplate myTemplate; + // expected-error@+2{{scalar, vector, or matrix expected}} + // expected-note@+1{{in instantiation of member function 'MyTemplate >::GetValue' requested here}} + return myTemplate.GetValue(In, offset) + + // expected-error@+2{{Explicit template arguments on intrinsic Load must be a single numeric type}} + // expected-error@+1{{object 'RWBuffer' is not allowed in builtin template parameters}} + In.Load >(offset + 4).Member; +} + +// expected-note@+1{{forward declaration of 'Incomplete'}} +struct Incomplete; + [shader("compute")] [numthreads(1,1,1)] void main() @@ -11,5 +39,16 @@ void main() RWBuffer FB = In.Load >(0); // expected-error@-1{{Explicit template arguments on intrinsic Load must be a single numeric type}} // expected-error@-2{{object 'RWBuffer' is not allowed in builtin template parameters}} + Out[0] = FB[0]; + + // Ok: + Out[4] = GetValue(4); + + // expected-note@?{{'Load' declared here}} + // expected-error@+1{{calling 'Load' with incomplete return type 'Incomplete'}} + Out[8] = In.Load(8); + + // expected-note@+1 2 {{in instantiation of function template specialization 'GetValue >' requested here}} + RWBuffer FB2 = GetValue >(16); } From 77b2ff676070aa5d34bcfe2ea0bbd4f435427e0b Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Thu, 5 Jun 2025 13:45:30 -0700 Subject: [PATCH 58/93] NFC: remove dead external validation code paths from dxcompiler (#7451) DXC has now been changed to use the internal validator (loaded by dxcompiler.dll) by default. This PR removes the ability for dxc.exe to load dxil.dll in preparation for a series of changes to fix external validation handling. --------- Co-authored-by: github-actions[bot] --- .../dxc/DxilContainer/DxcContainerBuilder.h | 4 +- include/dxc/Support/HLSLOptions.h | 9 -- lib/DxilContainer/DxcContainerBuilder.cpp | 12 +- tools/clang/tools/dxcompiler/CMakeLists.txt | 2 - tools/clang/tools/dxcompiler/DXCompiler.cpp | 8 -- tools/clang/tools/dxcompiler/dxcapi.cpp | 12 +- tools/clang/tools/dxcompiler/dxcassembler.cpp | 1 - tools/clang/tools/dxcompiler/dxclinker.cpp | 1 - .../clang/tools/dxcompiler/dxcompilerobj.cpp | 20 +-- tools/clang/tools/dxcompiler/dxcutil.cpp | 116 +++--------------- tools/clang/tools/dxcompiler/dxcutil.h | 17 +-- tools/clang/tools/dxcompiler/dxillib.cpp | 73 ----------- tools/clang/tools/dxcompiler/dxillib.h | 42 ------- .../unittests/HLSLTestLib/FileCheckerTest.cpp | 25 ++-- 14 files changed, 39 insertions(+), 303 deletions(-) delete mode 100644 tools/clang/tools/dxcompiler/dxillib.cpp delete mode 100644 tools/clang/tools/dxcompiler/dxillib.h diff --git a/include/dxc/DxilContainer/DxcContainerBuilder.h b/include/dxc/DxilContainer/DxcContainerBuilder.h index 9a3241525c..e79fec18c8 100644 --- a/include/dxc/DxilContainer/DxcContainerBuilder.h +++ b/include/dxc/DxilContainer/DxcContainerBuilder.h @@ -45,8 +45,7 @@ class DxcContainerBuilder : public IDxcContainerBuilder { return DoBasicQueryInterface(this, riid, ppvObject); } - void Init(const char *warning = nullptr) { - m_warning = warning; + void Init() { m_RequireValidation = false; m_HasPrivateData = false; m_HashFunction = nullptr; @@ -67,7 +66,6 @@ class DxcContainerBuilder : public IDxcContainerBuilder { PartList m_parts; CComPtr m_pContainer; - const char *m_warning; bool m_RequireValidation; bool m_HasPrivateData; // Function to compute hash when valid dxil container is built diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h index bad330747b..31ca3d1c14 100644 --- a/include/dxc/Support/HLSLOptions.h +++ b/include/dxc/Support/HLSLOptions.h @@ -114,13 +114,6 @@ struct RewriterOpts { bool DeclGlobalCB = false; // OPT_rw_decl_global_cb }; -enum class ValidatorSelection : int { - Auto, // Force internal validator (even if DXIL.dll is present) - Internal, // Force internal validator (even if DXIL.dll is present) - External, // Use DXIL.dll, failing compilation if not available - Invalid = -1 // Invalid -}; - /// Use this class to capture all options. class DxcOpts { public: @@ -225,8 +218,6 @@ class DxcOpts { bool ResMayAlias = false; // OPT_res_may_alias unsigned long ValVerMajor = UINT_MAX, ValVerMinor = UINT_MAX; // OPT_validator_version - ValidatorSelection SelectValidator = - ValidatorSelection::Auto; // OPT_select_validator unsigned ScanLimit = 0; // OPT_memdep_block_scan_limit bool ForceZeroStoreLifetimes = false; // OPT_force_zero_store_lifetimes bool EnableLifetimeMarkers = false; // OPT_enable_lifetime_markers diff --git a/lib/DxilContainer/DxcContainerBuilder.cpp b/lib/DxilContainer/DxcContainerBuilder.cpp index 770aa910a4..be182328dd 100644 --- a/lib/DxilContainer/DxcContainerBuilder.cpp +++ b/lib/DxilContainer/DxcContainerBuilder.cpp @@ -146,18 +146,14 @@ DxcContainerBuilder::SerializeContainer(IDxcOperationResult **ppResult) { // Combine existing warnings and errors from validation CComPtr pErrorBlob; CDxcMallocHeapPtr errorHeap(m_pMalloc); - SIZE_T warningLength = m_warning ? strlen(m_warning) : 0; - SIZE_T valErrorLength = + SIZE_T totalErrorLength = pValErrorUtf8 ? pValErrorUtf8->GetStringLength() : 0; - SIZE_T totalErrorLength = warningLength + valErrorLength; if (totalErrorLength) { SIZE_T errorSizeInBytes = totalErrorLength + 1; errorHeap.AllocateBytes(errorSizeInBytes); - if (warningLength) - memcpy(errorHeap.m_pData, m_warning, warningLength); - if (valErrorLength) - memcpy(errorHeap.m_pData + warningLength, - pValErrorUtf8->GetStringPointer(), valErrorLength); + + memcpy(errorHeap.m_pData, pValErrorUtf8->GetStringPointer(), + totalErrorLength); errorHeap.m_pData[totalErrorLength] = L'\0'; IFT(hlsl::DxcCreateBlobWithEncodingOnMalloc(errorHeap.m_pData, m_pMalloc, errorSizeInBytes, DXC_CP_UTF8, diff --git a/tools/clang/tools/dxcompiler/CMakeLists.txt b/tools/clang/tools/dxcompiler/CMakeLists.txt index c69e276194..26bf0e5d98 100644 --- a/tools/clang/tools/dxcompiler/CMakeLists.txt +++ b/tools/clang/tools/dxcompiler/CMakeLists.txt @@ -57,7 +57,6 @@ set(SOURCES DXCompiler.rc DXCompiler.def dxcfilesystem.cpp - dxillib.cpp dxcutil.cpp dxcdisassembler.cpp dxcpdbutils.cpp @@ -75,7 +74,6 @@ set(SOURCES dxcutil.cpp dxcdisassembler.cpp dxcpdbutils.cpp - dxillib.cpp dxcvalidator.cpp dxclinker.cpp dxcshadersourceinfo.cpp diff --git a/tools/clang/tools/dxcompiler/DXCompiler.cpp b/tools/clang/tools/dxcompiler/DXCompiler.cpp index c548441449..c7ffcbffa1 100644 --- a/tools/clang/tools/dxcompiler/DXCompiler.cpp +++ b/tools/clang/tools/dxcompiler/DXCompiler.cpp @@ -19,7 +19,6 @@ #ifdef LLVM_ON_WIN32 #include "dxcetw.h" #endif -#include "dxillib.h" namespace hlsl { HRESULT SetupRegistryPassForHLSL(); @@ -65,7 +64,6 @@ static HRESULT InitMaybeFail() throw() { fsSetup = true; IFC(hlsl::SetupRegistryPassForHLSL()); IFC(hlsl::SetupRegistryPassForPIX()); - IFC(DxilLibInitialize()); if (hlsl::options::initHlslOptTable()) { hr = E_FAIL; goto Cleanup; @@ -110,12 +108,6 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID reserved) { ::hlsl::options::cleanupHlslOptTable(); ::llvm::sys::fs::CleanupPerThreadFileSystem(); ::llvm::llvm_shutdown(); - if (reserved == - NULL) { // FreeLibrary has been called or the DLL load failed - DxilLibCleanup(DxilLibCleanUpType::UnloadLibrary); - } else { // Process termination. We should not call FreeLibrary() - DxilLibCleanup(DxilLibCleanUpType::ProcessTermination); - } DxcClearThreadMalloc(); DxcCleanupThreadMalloc(); DxcEtw_DXCompilerShutdown_Stop(S_OK); diff --git a/tools/clang/tools/dxcompiler/dxcapi.cpp b/tools/clang/tools/dxcompiler/dxcapi.cpp index ab2cf1f40e..d4e85bc35c 100644 --- a/tools/clang/tools/dxcompiler/dxcapi.cpp +++ b/tools/clang/tools/dxcompiler/dxcapi.cpp @@ -25,7 +25,6 @@ #include "dxcetw.h" #endif #include "dxc/DxilContainer/DxcContainerBuilder.h" -#include "dxillib.h" #include HRESULT CreateDxcCompiler(REFIID riid, _Out_ LPVOID *ppv); @@ -59,20 +58,11 @@ HRESULT CreateDxcContainerReflection(REFIID riid, _Out_ LPVOID *ppv) { HRESULT CreateDxcContainerBuilder(REFIID riid, _Out_ LPVOID *ppv) { // Call dxil.dll's containerbuilder *ppv = nullptr; - const char *warning; - HRESULT hr = DxilLibCreateInstance(CLSID_DxcContainerBuilder, - (IDxcContainerBuilder **)ppv); - if (FAILED(hr)) { - warning = "Unable to create container builder from dxil.dll. Resulting " - "container will not be signed.\n"; - } else { - return hr; - } CComPtr Result = DxcContainerBuilder::Alloc(DxcGetThreadMallocNoRef()); IFROOM(Result.p); - Result->Init(warning); + Result->Init(); return Result->QueryInterface(riid, ppv); } diff --git a/tools/clang/tools/dxcompiler/dxcassembler.cpp b/tools/clang/tools/dxcompiler/dxcassembler.cpp index 0ff2abe26c..6622e93cbc 100644 --- a/tools/clang/tools/dxcompiler/dxcassembler.cpp +++ b/tools/clang/tools/dxcompiler/dxcassembler.cpp @@ -19,7 +19,6 @@ #include "dxc/Support/dxcfilesystem.h" #include "dxc/Support/microcom.h" #include "dxcutil.h" -#include "dxillib.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/LLVMContext.h" diff --git a/tools/clang/tools/dxcompiler/dxclinker.cpp b/tools/clang/tools/dxcompiler/dxclinker.cpp index 82c9b8e96b..f5427ccc08 100644 --- a/tools/clang/tools/dxcompiler/dxclinker.cpp +++ b/tools/clang/tools/dxcompiler/dxclinker.cpp @@ -18,7 +18,6 @@ #include "dxc/Support/dxcapi.impl.h" #include "dxc/Support/microcom.h" #include "dxc/dxcapi.h" -#include "dxillib.h" #include "llvm/ADT/SmallVector.h" #include diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index ebeee380ef..84b568df9c 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -56,7 +56,6 @@ #include "dxcompileradapter.h" #include "dxcshadersourceinfo.h" #include "dxcversion.inc" -#include "dxillib.h" #include #include @@ -850,11 +849,9 @@ class DxcCompiler : public IDxcCompiler3, compiler.getCodeGenOpts().HLSLValidatorMajorVer = opts.ValVerMajor; compiler.getCodeGenOpts().HLSLValidatorMinorVer = opts.ValVerMinor; } else { - // Version from dxil.dll, or internal validator if unavailable dxcutil::GetValidatorVersion( &compiler.getCodeGenOpts().HLSLValidatorMajorVer, - &compiler.getCodeGenOpts().HLSLValidatorMinorVer, - opts.SelectValidator); + &compiler.getCodeGenOpts().HLSLValidatorMinorVer); } // Root signature-only container validation is only supported on 1.5 and @@ -934,7 +931,7 @@ class DxcCompiler : public IDxcCompiler3, CComPtr pValErrors; // Validation failure communicated through diagnostic error dxcutil::ValidateRootSignatureInContainer( - pOutputBlob, &compiler.getDiagnostics(), opts.SelectValidator); + pOutputBlob, &compiler.getDiagnostics()); } } } else if (opts.VerifyDiagnostics) { @@ -1054,8 +1051,7 @@ class DxcCompiler : public IDxcCompiler3, std::move(serializeModule), pOutputBlob, m_pMalloc, SerializeFlags, pOutputStream, 0, opts.GetPDBName(), &compiler.getDiagnostics(), &ShaderHashContent, pReflectionStream, - pRootSigStream, pRootSignatureBlob, pPrivateBlob, - opts.SelectValidator); + pRootSigStream, pRootSignatureBlob, pPrivateBlob); inputs.pVersionInfo = static_cast(this); @@ -1108,8 +1104,7 @@ class DxcCompiler : public IDxcCompiler3, CComPtr pValErrors; // Validation failure communicated through diagnostic error dxcutil::ValidateRootSignatureInContainer( - pRootSignature, &compiler.getDiagnostics(), - opts.SelectValidator); + pRootSignature, &compiler.getDiagnostics()); } IFT(pResult->SetOutputObject(DXC_OUT_ROOT_SIGNATURE, pRootSignature)); @@ -1324,13 +1319,6 @@ class DxcCompiler : public IDxcCompiler3, CComPtr pResult; hr = e.hr; std::string msg("Internal Compiler error: "); - switch (hr) { - case DXC_E_VALIDATOR_MISSING: - msg = "Error: external validator selected, but DXIL.dll not found."; - break; - default: - break; - } msg += e.msg; if (SUCCEEDED(DxcResult::Create( e.hr, DXC_OUT_NONE, diff --git a/tools/clang/tools/dxcompiler/dxcutil.cpp b/tools/clang/tools/dxcompiler/dxcutil.cpp index ea3f72dcb4..4e5c5c95e8 100644 --- a/tools/clang/tools/dxcompiler/dxcutil.cpp +++ b/tools/clang/tools/dxcompiler/dxcutil.cpp @@ -19,7 +19,6 @@ #include "dxc/Support/WinIncludes.h" #include "dxc/Support/dxcapi.impl.h" #include "dxc/dxcapi.h" -#include "dxillib.h" #include "clang/Basic/Diagnostic.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/DebugInfo.h" @@ -50,32 +49,8 @@ namespace { // AssembleToContainer helper functions. // return true if the internal validator was used, false otherwise -bool CreateValidator(CComPtr &pValidator, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto) { - bool bInternal = - SelectValidator == hlsl::options::ValidatorSelection::Internal; - bool bExternal = - SelectValidator == hlsl::options::ValidatorSelection::External; - bool bAuto = SelectValidator == hlsl::options::ValidatorSelection::Auto; - - // default behavior uses internal validator, as well as - // explicitly specifying internal - if (bInternal || bAuto) { - IFT(CreateDxcValidator(IID_PPV_ARGS(&pValidator))); - return true; - } - - if (bExternal) { - // if external was explicitly specified, but no - // external validator could be found (no DXIL.dll), then error - IFTBOOL(DxilLibIsEnabled(), DXC_E_VALIDATOR_MISSING); - IFT(DxilLibCreateInstance(CLSID_DxcValidator, &pValidator)); - - return false; - } - - return false; +void CreateValidator(CComPtr &pValidator) { + IFT(CreateDxcValidator(IID_PPV_ARGS(&pValidator))); } } // namespace @@ -89,23 +64,20 @@ AssembleInputs::AssembleInputs( uint32_t ValidationFlags, llvm::StringRef DebugName, clang::DiagnosticsEngine *pDiag, hlsl::DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionOut, AbstractMemoryStream *pRootSigOut, - CComPtr pRootSigBlob, CComPtr pPrivateBlob, - hlsl::options::ValidatorSelection SelectValidator) + CComPtr pRootSigBlob, CComPtr pPrivateBlob) : pM(std::move(pM)), pOutputContainerBlob(pOutputContainerBlob), pMalloc(pMalloc), SerializeFlags(SerializeFlags), ValidationFlags(ValidationFlags), pModuleBitcode(pModuleBitcode), DebugName(DebugName), pDiag(pDiag), pShaderHashOut(pShaderHashOut), pReflectionOut(pReflectionOut), pRootSigOut(pRootSigOut), - pRootSigBlob(pRootSigBlob), pPrivateBlob(pPrivateBlob), - SelectValidator(SelectValidator) {} + pRootSigBlob(pRootSigBlob), pPrivateBlob(pPrivateBlob) {} -void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor, - hlsl::options::ValidatorSelection SelectValidator) { +void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor) { if (pMajor == nullptr || pMinor == nullptr) return; CComPtr pValidator; - CreateValidator(pValidator, SelectValidator); + CreateValidator(pValidator); CComPtr pVersionInfo; if (SUCCEEDED(pValidator.QueryInterface(&pVersionInfo))) { @@ -177,76 +149,19 @@ HRESULT ValidateAndAssembleToContainer(AssembleInputs &inputs) { std::unique_ptr llvmModuleWithDebugInfo; CComPtr pValidator; - bool bInternalValidator = CreateValidator(pValidator, inputs.SelectValidator); - // Warning on internal Validator - - CComPtr pValidator2; - if (!bInternalValidator) { - pValidator.QueryInterface(&pValidator2); - } - - if (bInternalValidator || pValidator2) { - // If using the internal validator or external validator supports - // IDxcValidator2, we'll use the modules directly. In this case, we'll want - // to make a clone to avoid SerializeDxilContainerForModule stripping all - // the debug info. The debug info will be stripped from the orginal module, - // but preserved in the cloned module. - if (llvm::getDebugMetadataVersionFromModule(*inputs.pM) != 0) { - llvmModuleWithDebugInfo.reset(llvm::CloneModule(inputs.pM.get())); - } - } + CreateValidator(pValidator); - // Verify validator version can validate this module - CComPtr pValidatorVersion; - IFT(pValidator->QueryInterface(&pValidatorVersion)); - UINT32 ValMajor, ValMinor; - IFT(pValidatorVersion->GetVersion(&ValMajor, &ValMinor)); - DxilModule &DM = inputs.pM.get()->GetOrCreateDxilModule(); - unsigned ReqValMajor, ReqValMinor; - DM.GetValidatorVersion(ReqValMajor, ReqValMinor); - if (DXIL::CompareVersions(ValMajor, ValMinor, ReqValMajor, ReqValMinor) < 0) { - // Module is expecting to be validated by a newer validator. - if (inputs.pDiag) { - unsigned diagID = inputs.pDiag->getCustomDiagID( - clang::DiagnosticsEngine::Level::Error, - "The module cannot be validated by the version of the validator " - "currently attached."); - inputs.pDiag->Report(diagID); - } - return E_FAIL; - } + if (llvm::getDebugMetadataVersionFromModule(*inputs.pM) != 0) + llvmModuleWithDebugInfo.reset(llvm::CloneModule(inputs.pM.get())); AssembleToContainer(inputs); CComPtr pValResult; - // Important: in-place edit is required so the blob is reused and thus - // dxil.dll can be released. + // In-place edit to avoid an extra copy inputs.ValidationFlags |= DxcValidatorFlags_InPlaceEdit; - if (bInternalValidator) { - IFT(RunInternalValidator(pValidator, llvmModuleWithDebugInfo.get(), - inputs.pOutputContainerBlob, - inputs.ValidationFlags, &pValResult)); - } else { - if (pValidator2 && llvmModuleWithDebugInfo) { - // If metadata was stripped, re-serialize the input module. - CComPtr pDebugModuleStream; - IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pDebugModuleStream)); - raw_stream_ostream outStream(pDebugModuleStream.p); - WriteBitcodeToFile(llvmModuleWithDebugInfo.get(), outStream, true); - outStream.flush(); - - DxcBuffer debugModule = {}; - debugModule.Ptr = pDebugModuleStream->GetPtr(); - debugModule.Size = pDebugModuleStream->GetPtrSize(); - - IFT(pValidator2->ValidateWithDebug(inputs.pOutputContainerBlob, - inputs.ValidationFlags, &debugModule, - &pValResult)); - } else { - IFT(pValidator->Validate(inputs.pOutputContainerBlob, - inputs.ValidationFlags, &pValResult)); - } - } + IFT(RunInternalValidator(pValidator, llvmModuleWithDebugInfo.get(), + inputs.pOutputContainerBlob, inputs.ValidationFlags, + &pValResult)); IFT(pValResult->GetStatus(&valHR)); if (inputs.pDiag) { if (FAILED(valHR)) { @@ -271,9 +186,8 @@ HRESULT ValidateAndAssembleToContainer(AssembleInputs &inputs) { return valHR; } -HRESULT ValidateRootSignatureInContainer( - IDxcBlob *pRootSigContainer, clang::DiagnosticsEngine *pDiag, - hlsl::options::ValidatorSelection SelectValidator) { +HRESULT ValidateRootSignatureInContainer(IDxcBlob *pRootSigContainer, + clang::DiagnosticsEngine *pDiag) { HRESULT valHR = S_OK; CComPtr pValidator; CComPtr pValResult; diff --git a/tools/clang/tools/dxcompiler/dxcutil.h b/tools/clang/tools/dxcompiler/dxcutil.h index 45b3d4dc1a..8612353561 100644 --- a/tools/clang/tools/dxcompiler/dxcutil.h +++ b/tools/clang/tools/dxcompiler/dxcutil.h @@ -54,9 +54,7 @@ struct AssembleInputs { hlsl::AbstractMemoryStream *pReflectionOut = nullptr, hlsl::AbstractMemoryStream *pRootSigOut = nullptr, CComPtr pRootSigBlob = nullptr, - CComPtr pPrivateBlob = nullptr, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto); + CComPtr pPrivateBlob = nullptr); std::unique_ptr pM; CComPtr &pOutputContainerBlob; IDxcVersionInfo *pVersionInfo = nullptr; @@ -71,18 +69,13 @@ struct AssembleInputs { hlsl::AbstractMemoryStream *pRootSigOut = nullptr; CComPtr pRootSigBlob = nullptr; CComPtr pPrivateBlob = nullptr; - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto; }; HRESULT ValidateAndAssembleToContainer(AssembleInputs &inputs); -HRESULT ValidateRootSignatureInContainer( - IDxcBlob *pRootSigContainer, clang::DiagnosticsEngine *pDiag = nullptr, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto); +HRESULT +ValidateRootSignatureInContainer(IDxcBlob *pRootSigContainer, + clang::DiagnosticsEngine *pDiag = nullptr); HRESULT SetRootSignature(hlsl::DxilModule *pModule, CComPtr pSource); -void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto); +void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor); void AssembleToContainer(AssembleInputs &inputs); HRESULT Disassemble(IDxcBlob *pProgram, llvm::raw_string_ostream &Stream); void ReadOptsAndValidate(hlsl::options::MainArgs &mainArgs, diff --git a/tools/clang/tools/dxcompiler/dxillib.cpp b/tools/clang/tools/dxcompiler/dxillib.cpp deleted file mode 100644 index 72abc869da..0000000000 --- a/tools/clang/tools/dxcompiler/dxillib.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// // -// dxillib.cpp // -// Copyright (C) Microsoft Corporation. All rights reserved. // -// This file is distributed under the University of Illinois Open Source // -// License. See LICENSE.TXT for details. // -// // -// Provides access to dxil.dll // -// // -/////////////////////////////////////////////////////////////////////////////// - -#include "dxillib.h" -#include "dxc/Support/Global.h" // For DXASSERT -#include "dxc/Support/dxcapi.use.h" -#include "llvm/Support/Mutex.h" - -using namespace dxc; - -static DxcDllSupport g_DllSupport; -static HRESULT g_DllLibResult = S_OK; - -static llvm::sys::Mutex *cs = nullptr; - -// Check if we can successfully get IDxcValidator from dxil.dll -// This function is to prevent multiple attempts to load dxil.dll -HRESULT DxilLibInitialize() { - cs = new llvm::sys::Mutex; - cs->lock(); - g_DllLibResult = g_DllSupport.InitializeForDll(kDxilLib, "DxcCreateInstance"); - cs->unlock(); - return S_OK; -} - -HRESULT DxilLibCleanup(DxilLibCleanUpType type) { - HRESULT hr = S_OK; - if (type == DxilLibCleanUpType::ProcessTermination) { - g_DllSupport.Detach(); - } else if (type == DxilLibCleanUpType::UnloadLibrary) { - g_DllSupport.Cleanup(); - } else { - hr = E_INVALIDARG; - } - delete cs; - cs = nullptr; - return hr; -} - -// g_DllLibResult is S_OK by default, check again to see if dxil.dll is loaded -// If we fail to load dxil.dll, set g_DllLibResult to E_FAIL so that we don't -// have multiple attempts to load dxil.dll -bool DxilLibIsEnabled() { - cs->lock(); - if (SUCCEEDED(g_DllLibResult)) { - if (!g_DllSupport.IsEnabled()) { - g_DllLibResult = - g_DllSupport.InitializeForDll(kDxilLib, "DxcCreateInstance"); - } - } - cs->unlock(); - return SUCCEEDED(g_DllLibResult); -} - -HRESULT DxilLibCreateInstance(REFCLSID rclsid, REFIID riid, - IUnknown **ppInterface) { - DXASSERT_NOMSG(ppInterface != nullptr); - HRESULT hr = E_FAIL; - if (DxilLibIsEnabled()) { - cs->lock(); - hr = g_DllSupport.CreateInstance(rclsid, riid, ppInterface); - cs->unlock(); - } - return hr; -} diff --git a/tools/clang/tools/dxcompiler/dxillib.h b/tools/clang/tools/dxcompiler/dxillib.h deleted file mode 100644 index 879d023459..0000000000 --- a/tools/clang/tools/dxcompiler/dxillib.h +++ /dev/null @@ -1,42 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// // -// dxillib.h // -// Copyright (C) Microsoft Corporation. All rights reserved. // -// This file is distributed under the University of Illinois Open Source // -// License. See LICENSE.TXT for details. // -// // -// Provides wrappers to handle calls to dxil.dll // -// // -/////////////////////////////////////////////////////////////////////////////// - -#pragma once -#ifndef __DXC_DXILLIB__ -#define __DXC_DXILLIB__ - -#include "dxc/Support/WinIncludes.h" -#include "dxc/WinAdapter.h" - -// Initialize Dxil library. -HRESULT DxilLibInitialize(); - -// When dxcompiler is detached from process, -// we should not call FreeLibrary on process termination. -// So the caller has to specify if cleaning is from FreeLibrary or process -// termination -enum class DxilLibCleanUpType { UnloadLibrary, ProcessTermination }; - -HRESULT DxilLibCleanup(DxilLibCleanUpType type); - -// Check if can access dxil.dll -bool DxilLibIsEnabled(); - -HRESULT DxilLibCreateInstance(REFCLSID rclsid, REFIID riid, - IUnknown **ppInterface); - -template -HRESULT DxilLibCreateInstance(REFCLSID rclsid, TInterface **ppInterface) { - return DxilLibCreateInstance(rclsid, __uuidof(TInterface), - (IUnknown **)ppInterface); -} - -#endif // __DXC_DXILLIB__ diff --git a/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp b/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp index 2c75d45e5e..2d9ee7315d 100644 --- a/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp +++ b/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp @@ -519,28 +519,21 @@ FileRunCommandPart::RunDxc(dxc::DxcDllSupport &DllSupport, // Convert stage to minimum dxil/validator version: RequiredDxilMajor = std::max(RequiredDxilMajor, (unsigned)6) - 5; - bool bInternalValidator = - opts.SelectValidator == hlsl::options::ValidatorSelection::Internal; bool bValVerExplicit = opts.ValVerMajor != UINT_MAX; - // Normally we must check the validator version as well, but there are - // two scenarios where the validator version doesn't need to be checked - // against the version based on the shader model: - // 1. The test selects internal validator. - // 2. The test explicitly requests a specific validator version. - FileRunCommandResult result = - CheckDxilVer(DllSupport, RequiredDxilMajor, RequiredDxilMinor, - !(bInternalValidator || bValVerExplicit)); + // If validator version set explicitly, skip validator version check when + // checking required version for shader model. + FileRunCommandResult result = CheckDxilVer( + DllSupport, RequiredDxilMajor, RequiredDxilMinor, !bValVerExplicit); if (result.AbortPipeline) return result; // Additionally, if the test explicitly requests a specific non-zero - // validator version, and doesn't select internal validator or disable - // validation, we must check that the validator version is at least as - // high as the requested version. - // When ValVerMajor is 0, validation cannot be run against the module. - if (bValVerExplicit && opts.ValVerMajor != 0 && - !(bInternalValidator || opts.DisableValidation)) + // validator version, and doesn't disable validation, we must check + // that the validator version is at least as high as the requested + // version. When ValVerMajor is 0, validation cannot be run against + // the module. + if (bValVerExplicit && opts.ValVerMajor != 0 && !opts.DisableValidation) result = CheckDxilVer(DllSupport, opts.ValVerMajor, opts.ValVerMinor); if (result.AbortPipeline) return result; From 50f53c6c200fd6b53f65268912e6f9e444ce9242 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 5 Jun 2025 16:52:05 -0700 Subject: [PATCH 59/93] NFC: Update indentation in latest-release.json for clang-format (#7515) This avoids other unrelated changes which didn't intend to change this file from having to update the file just to make clang-format happy. --- utils/version/latest-release.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/version/latest-release.json b/utils/version/latest-release.json index 146acf0708..40d50a28ba 100644 --- a/utils/version/latest-release.json +++ b/utils/version/latest-release.json @@ -1,8 +1,8 @@ { - "version": { - "major": "1", - "minor": "8", - "rev": "2505" - }, - "sha": "0fd79eba6bb23f50ec21a7a7daeee3614bebe12b" + "version": { + "major": "1", + "minor": "8", + "rev": "2505" + }, + "sha": "0fd79eba6bb23f50ec21a7a7daeee3614bebe12b" } From 2084643e7ae44f23c80eb74d0cd549a8179c7443 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Fri, 6 Jun 2025 13:04:30 -0600 Subject: [PATCH 60/93] [spirv] Corrects output node index parameter. (#7517) The node index parameter of `OpAllocateNodePayloadsAMDX` was being set to the value of the NodeId index argument (which is captured in the `PayloadNodeBaseIndexAMDX` decoration). Instead, it should be set to the node's index in the node array, if any, or zero for a single node. --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 12 ++---------- tools/clang/test/CodeGenSPIRV/node.renamed.hlsl | 7 ++++++- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index f3d10537e1..36c9b33ad5 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -11256,17 +11256,9 @@ SpirvInstruction *SpirvEmitter::processIntrinsicGetNodeOutputRecords( const auto *declRefExpr = dyn_cast(baseExpr->IgnoreImpCasts()); const auto *paramDecl = dyn_cast(declRefExpr->getDecl()); - const auto *nodeID = paramDecl->getAttr(); - StringRef nodeName = paramDecl->getName(); - unsigned nodeIndex = 0; - if (nodeID) { - nodeName = nodeID->getName(); - nodeIndex = nodeID->getArrayIndex(); - } - if (!shaderIndex) { - shaderIndex = spvBuilder.getConstantInt(astContext.UnsignedIntTy, - llvm::APInt(32, nodeIndex)); + shaderIndex = + spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 0)); } LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, diff --git a/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl b/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl index 953288929d..265fd6c17f 100644 --- a/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl +++ b/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl @@ -12,7 +12,8 @@ struct RECORD { [NodeIsProgramEntry] void node017_renamed_node([NodeID("output_node_name", 2)] NodeOutput r) { - r.GetThreadNodeOutputRecords(1); + ThreadNodeOutputRecords records = r.GetThreadNodeOutputRecords(1); + records.OutputComplete(); } // CHECK: OpEntryPoint GLCompute %{{[^ ]*}} "node017_renamed_node" @@ -20,4 +21,8 @@ void node017_renamed_node([NodeID("output_node_name", 2)] NodeOutput r) // CHECK-DAG: OpDecorateId [[TYPE]] PayloadNodeBaseIndexAMDX [[U2:%[0-9A-Za-z_]*]] // CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 // CHECK-DAG: [[STR]] = OpConstantStringAMDX "output_node_name" +// CHECK-DAG: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1 // CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 4 +// CHECK: OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U1]] [[U0]] From 9b5f5c9fb239ae83a7d318b0b45632f343866583 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Thu, 12 Jun 2025 09:14:55 -0400 Subject: [PATCH 61/93] [SPIRV] Use OpCopyLogical to reconstruct values (#7530) When DXC needs to change the layout of a value, it currently has to extract each individual scalar, and then reconstruct using the type with the different layout. If you have a large array or struct with many member, this generates a lot of extra code. Starting in SPIR-V 1.4, the OpCopyLogical instruction is available to do the reconstruction. This should help generate less code, which will lead to improved compile time and maybe smaller binary sizes. Fixes #7493 --- .../clang/include/clang/SPIRV/AstTypeProbe.h | 4 ++ tools/clang/lib/SPIRV/AstTypeProbe.cpp | 21 ++++++ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 39 +++++++++++ tools/clang/lib/SPIRV/SpirvEmitter.h | 2 + .../clang/test/CodeGenSPIRV/logical_copy.hlsl | 67 +++++++++++++++++++ 5 files changed, 133 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/logical_copy.hlsl diff --git a/tools/clang/include/clang/SPIRV/AstTypeProbe.h b/tools/clang/include/clang/SPIRV/AstTypeProbe.h index 6302d43a88..9abea972c6 100644 --- a/tools/clang/include/clang/SPIRV/AstTypeProbe.h +++ b/tools/clang/include/clang/SPIRV/AstTypeProbe.h @@ -337,6 +337,10 @@ bool isOrContainsNonFpColMajorMatrix(const ASTContext &, const SpirvCodeGenOptions &, QualType type, const Decl *decl); +/// brief Returns true if the type is a boolean type or an aggragate type that +/// contains a boolean type. +bool isOrContainsBoolType(QualType type); + /// \brief Returns true if the given type is `vk::ext_result_id`. bool isExtResultIdType(QualType type); diff --git a/tools/clang/lib/SPIRV/AstTypeProbe.cpp b/tools/clang/lib/SPIRV/AstTypeProbe.cpp index 31a9bd8f7d..b6ca1f60ae 100644 --- a/tools/clang/lib/SPIRV/AstTypeProbe.cpp +++ b/tools/clang/lib/SPIRV/AstTypeProbe.cpp @@ -1353,6 +1353,27 @@ bool isOrContainsNonFpColMajorMatrix(const ASTContext &astContext, return false; } +bool isOrContainsBoolType(QualType type) { + if (isBoolOrVecMatOfBoolType(type)) { + return true; + } + + if (const auto *arrayType = type->getAsArrayTypeUnsafe()) { + return isOrContainsBoolType(arrayType->getElementType()); + } + + if (const auto *recordType = type->getAs()) { + for (auto field : recordType->getDecl()->fields()) { + if (isOrContainsBoolType(field->getType())) { + return true; + } + } + return false; + } + + return false; +} + bool isTypeInVkNamespace(const RecordType *type) { if (const auto *nameSpaceDecl = dyn_cast(type->getDecl()->getDeclContext())) { diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 36c9b33ad5..cc7016b594 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -7108,6 +7108,38 @@ void SpirvEmitter::storeValue(SpirvInstruction *lhsPtr, } } +bool SpirvEmitter::canUseOpCopyLogical(QualType type) const { + if (featureManager.getSpirvVersion(featureManager.getTargetEnv()) < + VersionTuple(1, 4)) { + return false; + } + + if (!type->isArrayType() && !type->isRecordType()) { + return false; + } + + if (const auto *recordType = type->getAs()) { + if (isTypeInVkNamespace(recordType) && + (recordType->getDecl()->getName().equals("BufferPointer") || + recordType->getDecl()->getName().equals("SpirvType") || + recordType->getDecl()->getName().equals("SpirvOpaqueType"))) { + // vk::BufferPointer lowers to a pointer type. No need to reconstruct + // the value. The vk::Spirv*Type should be treated an opaque type. All we + // can do is leave it the same. + return false; + } + } + + if (hlsl::IsHLSLVecMatType(type) || hlsl::IsHLSLResourceType(type)) { + return false; + } + + // If the type contains a bool it is possible that one type represents it with + // a bool and the other with an int. If that happens, OpCopyLogical is not + // valid. + return !isOrContainsBoolType(type); +} + SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, const QualType valType, SpirvLayoutRule dstLR, @@ -7171,6 +7203,13 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, return result; }; + if (canUseOpCopyLogical(valType)) { + SpirvInstruction *copy = spvBuilder.createUnaryOp( + spv::Op::OpCopyLogical, valType, srcVal, srcVal->getSourceLocation()); + copy->setLayoutRule(dstLR); + return copy; + } + // Constant arrays if (const auto *arrayType = astContext.getAsConstantArrayType(valType)) { const auto elemType = arrayType->getElementType(); diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 954b2c5dd3..14401c6418 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -228,6 +228,8 @@ class SpirvEmitter : public ASTConsumer { QualType lhsValType, SourceLocation loc, SourceRange range = {}); + bool canUseOpCopyLogical(QualType type) const; + /// Decomposes and reconstructs the given srcVal of the given valType to meet /// the requirements of the dstLR layout rule. SpirvInstruction *reconstructValue(SpirvInstruction *srcVal, QualType valType, diff --git a/tools/clang/test/CodeGenSPIRV/logical_copy.hlsl b/tools/clang/test/CodeGenSPIRV/logical_copy.hlsl new file mode 100644 index 0000000000..eb4a803548 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/logical_copy.hlsl @@ -0,0 +1,67 @@ +// RUN: %dxc %s -fcgl -spirv -T ps_6_8 -fspv-target-env=vulkan1.1spirv1.4 | FileCheck %s + + + +struct WithBool { + bool b; +}; + +struct StructWithBool { + WithBool wb; +}; + +struct StructWithoutBool { + int a; +}; + +struct OuterStruct { + StructWithBool a[2]; + WithBool b; + StructWithoutBool c; + StructWithoutBool d[2]; +} S; + + +// CHECK: %GetStruct = OpFunction %OuterStruct_0 None %34 +// CHECK: %bb_entry_0 = OpLabel +// CHECK: [[ld:%[0-9]+]] = OpLoad %OuterStruct %39 + +// The array `a` must be split up because it contains a bool that needs a +// conversion from int to bool. +// CHECK: [[arr_with_bool:%[0-9]+]] = OpCompositeExtract %_arr_StructWithBool_uint_2 [[ld]] 0 +// CHECK: [[struct_with_bool:%[0-9]+]] = OpCompositeExtract %StructWithBool [[arr_with_bool]] 0 +// CHECK: [[with_bool:%[0-9]+]] = OpCompositeExtract %WithBool [[struct_with_bool]] 0 +// CHECK: [[int:%[0-9]+]] = OpCompositeExtract %uint [[with_bool]] 0 +// CHECK: [[bool:%[0-9]+]] = OpINotEqual %bool [[int]] %uint_0 +// CHECK: [[with_bool:%[0-9]+]] = OpCompositeConstruct %WithBool_0 [[bool]] +// CHECK: [[struct_with_bool:%[0-9]+]] = OpCompositeConstruct %StructWithBool_0 [[with_bool]] + +// Skip second element of the array. It is more of the same. +// CHECK: [[a:%[0-9]+]] = OpCompositeConstruct %_arr_StructWithBool_0_uint_2 [[struct_with_bool]] {{%.*}} + +// The struct `b` must be split up for the same reason. +// CHECK: [[with_bool:%[0-9]+]] = OpCompositeExtract %WithBool [[ld]] 1 +// CHECK: [[int:%[0-9]+]] = OpCompositeExtract %uint [[with_bool]] 0 +// CHECK: [[bool:%[0-9]+]] = OpINotEqual %bool [[int]] %uint_0 +// CHECK: [[b:%[0-9]+]] = OpCompositeConstruct %WithBool_0 [[bool]] + +// The struct `c` can use OpCopyLogical. +// CHECK: %59 = OpCompositeExtract %StructWithoutBool [[ld]] 2 +// CHECK: [[c:%[0-9]+]] = OpCopyLogical %StructWithoutBool_0 %59 + +// The array `d` can use OpCopyLogical. +// CHECK: %61 = OpCompositeExtract %_arr_StructWithoutBool_uint_2 [[ld]] 3 +// CHECK: [[d:%[0-9]+]] = OpCopyLogical %_arr_StructWithoutBool_0_uint_2 %61 + +// CHECK: [[r:%[0-9]+]] = OpCompositeConstruct %OuterStruct_0 [[a]] [[b]] [[c]] [[d]] +// CHECK: OpStore {{%.*}} [[r]] +// CHECK: OpFunctionEnd + +OuterStruct GetStruct() { return S; } + +uint main() : SV_TARGET +{ + GetStruct(); + return 0; +} + From 57177f77a4dc6996400ac97a0d618799c82374e8 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 13 Jun 2025 07:05:13 -0400 Subject: [PATCH 62/93] [SPIRV] Use unknown image format in vk1.3 and later (#7528) We have had many request to use the `unknown` image format for storage images (OpTypeImage with sampled=2). We did not want to do that when targeting earlier versions of Vulkan because it could break existing code. The capability StorageImageWriteWithoutFormat is not guarenteed to be available on Vulkan 1.1 devices. This means the application will stop working. However, Vulkan 1.3 guarentees that StorageImageWriteWithoutFormat and StorageImageReadWithoutFormat are available. We can make this change for VK1.3 and later without breaking existing code. Fixes #7484 --- .../clang/include/clang/SPIRV/SpirvBuilder.h | 2 + tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 7 + .../CodeGenSPIRV/node.empty-node-input.hlsl | 2 +- .../clang/test/CodeGenSPIRV/type.buffer.hlsl | 176 +++++++++++------- .../type.rasterizer-ordered-buffer.hlsl | 92 +++++---- .../type.rasterizer-ordered-texture.hlsl | 36 ++-- .../test/CodeGenSPIRV/type.rwtexture.hlsl | 56 ++++-- 7 files changed, 234 insertions(+), 137 deletions(-) diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index 465f7313f1..4fe31c6d62 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -812,6 +812,8 @@ class SpirvBuilder { /// the given target at the given source location. inline void requireExtension(llvm::StringRef extension, SourceLocation); + FeatureManager &getFeatureManager() { return featureManager; } + private: /// \brief If not added already, adds an OpExtInstImport (import of extended /// instruction set) for the given instruction set. Returns the imported diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 1869983ae3..0309d56840 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -1156,6 +1156,13 @@ LowerTypeVisitor::lowerStructFields(const RecordDecl *decl, spv::ImageFormat LowerTypeVisitor::translateSampledTypeToImageFormat(QualType sampledType, SourceLocation srcLoc) { + + // In Vulkan 1.3, all image types can be Unknown. + FeatureManager &featureManager = spvBuilder.getFeatureManager(); + if (!featureManager.isTargetEnvVulkan() || + featureManager.isTargetEnvVulkan1p3OrAbove()) + return spv::ImageFormat::Unknown; + uint32_t elemCount = 1; QualType ty = {}; if (!isScalarType(sampledType, &ty) && diff --git a/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl index fa16429a1b..da6a1d32df 100644 --- a/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl +++ b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl @@ -19,7 +19,7 @@ void emptynodeinput(EmptyNodeInput input) // CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 // CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 -// CHECK-DAG: [[IMG:%[^ ]*]] = OpTypeImage [[UINT]] Buffer 2 0 0 2 R32ui +// CHECK-DAG: [[IMG:%[^ ]*]] = OpTypeImage [[UINT]] Buffer 2 0 0 2 Unknown // CHECK-DAG: [[IMGPTR:%[^ ]*]] = OpTypePointer UniformConstant [[IMG]] // CHECK-DAG: [[BUF:%[^ ]*]] = OpVariable [[IMGPTR]] UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl index 35d1b868a8..3e7bb73bcb 100644 --- a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl @@ -1,109 +1,149 @@ -// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-target-env=vulkan1.3 -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN +// RUN: %dxc -fspv-target-env=universal1.5 -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN + +// Before vulkan1.3, we should be trying to infer the image type for because +// we cannot necessarily use Unknown. However in VK1.3 and later, we can use +// Unknown. // CHECK: OpCapability SampledBuffer -// CHECK: OpCapability StorageImageExtendedFormats +// INFER: OpCapability StorageImageExtendedFormats -// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i +// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i +// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image Buffer intbuf; -// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui +// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui +// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 Buffer uintbuf; -// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f +// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f +// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 Buffer floatbuf; -// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i +// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i +// UNKNOWN: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RWBuffer intrwbuf; -// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// UNKNOWN: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RWBuffer uintrwbuf; -// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f +// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f +// UNKNOWN: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RWBuffer floatrwbuf; -// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// If the `Unkonwn image format is used, then the images below will reuse the types above. +// UNKNOWN-NOT: OpTypeImage + +// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 Buffer int2buf; -// CHECK: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// INFER: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 Buffer uint2buf; -// CHECK: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// INFER: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 Buffer float2buf; -// CHECK: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// INFER: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RWBuffer int2rwbuf; -// CHECK: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// INFER: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 RWBuffer uint2rwbuf; -// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RWBuffer float2rwbuf; -// CHECK: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 -// CHECK: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 +// INFER: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 +// INFER: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 Buffer int3buf; Buffer int4buf; -// CHECK: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 -// CHECK: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 +// INFER: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 +// INFER: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 Buffer uint3buf; Buffer uint4buf; -// CHECK: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 -// CHECK: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 +// INFER: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 +// INFER: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 Buffer float3buf; Buffer float4buf; -// CHECK: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 -// CHECK: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 +// INFER: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 +// INFER: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 RWBuffer int3rwbuf; RWBuffer int4rwbuf; -// CHECK: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 -// CHECK: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 +// INFER: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 +// INFER: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 RWBuffer uint3rwbuf; RWBuffer uint4rwbuf; -// CHECK: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 -// CHECK: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 +// INFER: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 +// INFER: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 RWBuffer float3rwbuf; RWBuffer float4rwbuf; -// CHECK: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// CHECK: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// CHECK: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// CHECK: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// CHECK: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// CHECK: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// CHECK: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// CHECK: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// CHECK: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// CHECK: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// CHECK: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// CHECK: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant -// CHECK: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant -// CHECK: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant -// CHECK: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant -// CHECK: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant -// CHECK: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant -// CHECK: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant -// CHECK: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant -// CHECK: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant -// CHECK: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant -// CHECK: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant -// CHECK: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant -// CHECK: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant +// INFER: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// INFER: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// INFER: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// INFER: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// INFER: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// INFER: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// INFER: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// INFER: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// INFER: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// INFER: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// INFER: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// INFER: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// INFER: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant +// INFER: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant +// INFER: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant +// INFER: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant +// INFER: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant +// INFER: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant +// INFER: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant +// INFER: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant +// INFER: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant +// INFER: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant +// INFER: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant +// INFER: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant + +// UNKNOWN: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl index c616f65bb9..0b576fc5e9 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl @@ -1,59 +1,81 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-target-env=vulkan1.3 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN +// RUN: %dxc -fspv-target-env=universal1.5 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN + +// Before vulkan1.3, we should be trying to infer the image type for because +// we cannot necessarily use Unknown. However in VK1.3 and later, we can use +// Unknown. // CHECK: OpCapability SampledBuffer -// CHECK: OpCapability StorageImageExtendedFormats +// INFER: OpCapability StorageImageExtendedFormats -// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i +// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i +// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image RasterizerOrderedBuffer introvbuf; -// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 RasterizerOrderedBuffer uintrovbuf; -// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f +// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f +// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 RasterizerOrderedBuffer floatrovbuf; -// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 +// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RasterizerOrderedBuffer int2rovbuf; -// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 +// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RasterizerOrderedBuffer uint2rovbuf; -// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 +// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RasterizerOrderedBuffer float2rovbuf; -// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 -// CHECK: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// INFER: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 RasterizerOrderedBuffer int3rovbuf; RasterizerOrderedBuffer int4rovbuf; -// CHECK: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 -// CHECK: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// INFER: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// INFER: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RasterizerOrderedBuffer uint3rovbuf; RasterizerOrderedBuffer uint4rovbuf; -// CHECK: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 -// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// INFER: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RasterizerOrderedBuffer float3rovbuf; RasterizerOrderedBuffer float4rovbuf; -// CHECK: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// CHECK: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// CHECK: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// CHECK: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// CHECK: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// CHECK: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// CHECK: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// CHECK: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// CHECK: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// CHECK: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// CHECK: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// CHECK: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// INFER: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// INFER: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// INFER: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// INFER: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// INFER: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// INFER: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// INFER: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// INFER: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// INFER: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// INFER: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// INFER: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// INFER: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant + +// UNKNOWN: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl index 32dd76e6f1..21bff421a0 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl @@ -1,23 +1,28 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-target-env=vulkan1.3 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN +// RUN: %dxc -fspv-target-env=universal1.5 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability Image1D -// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant @@ -33,7 +38,8 @@ RasterizerOrderedTexture3D t3 ; [[vk::image_format("rgba32f")]] RasterizerOrderedTexture3D t4 ; -// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant RasterizerOrderedTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl index f901d44cfa..884957210a 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl @@ -1,24 +1,43 @@ -// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-target-env=vulkan1.3 -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability Image1D -// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// INFER: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// UNKNOWN: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// UNKNOWN: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// UNKNOWN: %type_3d_image_1 = OpTypeImage %float 3D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_3d_image_1 = OpTypePointer UniformConstant %type_3d_image_1 +// UNKNOWN: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// UNKNOWN: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// UNKNOWN: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// UNKNOWN: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant RWTexture1D t1 ; @@ -33,7 +52,8 @@ RWTexture3D t3 ; [[vk::image_format("rgba32f")]] RWTexture3D t4 ; -// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant RWTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant From f94396ddffa8562a00d64a1db58d3f73f33b655a Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:02:00 -0700 Subject: [PATCH 63/93] Long Vector Execution tests preliminary work to merge a minor refactor. (#7532) This resolves issue #7531. This is a preliminary step to merging in some of the Long Vector Execution tests currently sitting in the staging-sm6.9 branch. There are no functional changes here, but given that this is a refactor I do not want to add the [NFC] tag to the PR title. - Moves functions used by existing exec tests and incoming long vector tests to a common HlslExecTestUtils.h. - Updates naming to adhere to LLVM coding standards for newly created files (even though the functions aren't new) - Move a few other shared functions to files that make more sense than ExecutionTest.cpp. - TableParameterHandler class moved to its own header/cpp files. No naming updates as nothing else was touched. Can update in a subsequent PR if it is preferred. - Add the LLVM coding guidelines preferred include guard to HlslTestUtils.h to mitigate redefinition issues exposed by this refactor. - Updated the D3D shader model 'redefines' in ExecutionTest.cpp as they were also factored out into a common header. constexpr required because they are enum values. - BigObj added to the cmake file as I was hitting issues locally for this. --- include/dxc/Test/HlslTestUtils.h | 4 + tools/clang/unittests/HLSLExec/CMakeLists.txt | 3 + .../unittests/HLSLExec/ExecutionTest.cpp | 1504 ++++------------- .../unittests/HLSLExec/HlslExecTestUtils.h | 405 +++++ .../clang/unittests/HLSLExec/ShaderOpTest.cpp | 73 + tools/clang/unittests/HLSLExec/ShaderOpTest.h | 32 +- .../HLSLExec/TableParameterHandler.cpp | 376 +++++ .../HLSLExec/TableParameterHandler.h | 205 +++ 8 files changed, 1458 insertions(+), 1144 deletions(-) create mode 100644 tools/clang/unittests/HLSLExec/HlslExecTestUtils.h create mode 100644 tools/clang/unittests/HLSLExec/TableParameterHandler.cpp create mode 100644 tools/clang/unittests/HLSLExec/TableParameterHandler.h diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h index 0e37ccdcff..44f3f6148a 100644 --- a/include/dxc/Test/HlslTestUtils.h +++ b/include/dxc/Test/HlslTestUtils.h @@ -10,6 +10,8 @@ /////////////////////////////////////////////////////////////////////////////// // *** THIS FILE CANNOT TAKE ANY LLVM DEPENDENCIES *** // +#ifndef HLSLTESTUTILS_H +#define HLSLTESTUTILS_H #include #include @@ -735,3 +737,5 @@ inline UINT GetByteSizeForFormat(DXGI_FORMAT value) { } } #endif + +#endif // HLSLTESTUTILS_H diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index 3878fa3f34..df61aad854 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -3,9 +3,12 @@ find_package(TAEF REQUIRED) find_package(D3D12 REQUIRED) # Used for ExecutionTest.cpp. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj") + add_clang_library(ExecHLSLTests SHARED ExecutionTest.cpp ShaderOpTest.cpp + TableParameterHandler.cpp ExecHLSLTests.rc ) diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 6db27d7a41..c26b9a1b5b 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -60,6 +60,8 @@ #include "ShaderOpTest.h" #include #include +#include "TableParameterHandler.h" +#include "HlslExecTestUtils.h" // clang-format on #pragma comment(lib, "d3dcompiler.lib") @@ -67,47 +69,6 @@ #pragma comment(lib, "dxguid.lib") #pragma comment(lib, "version.lib") -// A more recent Windows SDK than currently required is needed for these. -typedef HRESULT(WINAPI *D3D12EnableExperimentalFeaturesFn)( - UINT NumFeatures, __in_ecount(NumFeatures) const IID *pIIDs, - __in_ecount_opt(NumFeatures) void *pConfigurationStructs, - __in_ecount_opt(NumFeatures) UINT *pConfigurationStructSizes); - -static const GUID D3D12ExperimentalShaderModelsID = - {/* 76f5573e-f13a-40f5-b297-81ce9e18933f */ - 0x76f5573e, - 0xf13a, - 0x40f5, - {0xb2, 0x97, 0x81, 0xce, 0x9e, 0x18, 0x93, 0x3f}}; - -// Used to create D3D12SDKConfiguration to enable AgilitySDK programmatically. -typedef HRESULT(WINAPI *D3D12GetInterfaceFn)(REFCLSID rclsid, REFIID riid, - void **ppvDebug); - -#ifndef __ID3D12SDKConfiguration_INTERFACE_DEFINED__ -// Copied from AgilitySDK D3D12.h to programmatically enable when in developer -// mode. -#define __ID3D12SDKConfiguration_INTERFACE_DEFINED__ - -EXTERN_C const GUID DECLSPEC_SELECTANY IID_ID3D12SDKConfiguration = { - 0xe9eb5314, - 0x33aa, - 0x42b2, - {0xa7, 0x18, 0xd7, 0x7f, 0x58, 0xb1, 0xf1, 0xc7}}; -EXTERN_C const GUID DECLSPEC_SELECTANY CLSID_D3D12SDKConfiguration = { - 0x7cda6aca, - 0xa03e, - 0x49c8, - {0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce}}; - -MIDL_INTERFACE("e9eb5314-33aa-42b2-a718-d77f58b1f1c7") -ID3D12SDKConfiguration : public IUnknown { -public: - virtual HRESULT STDMETHODCALLTYPE SetSDKVersion(UINT SDKVersion, - LPCSTR SDKPath) = 0; -}; -#endif /* __ID3D12SDKConfiguration_INTERFACE_DEFINED__ */ - using namespace DirectX; using namespace hlsl_test; @@ -271,9 +232,6 @@ typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS4 { #endif -// Virtual class to compute the expected result given a set of inputs -struct TableParameter; - class ExecutionTest { public: BEGIN_TEST_CLASS(ExecutionTest) @@ -519,10 +477,10 @@ class ExecutionTest { return false; // Do not: FreeLibrary(hRuntime); // If we actually free the library, it defeats the purpose of - // EnableAgilitySDK and EnableExperimentalMode. + // enableAgilitySDK and enableExperimentalMode. HRESULT hr; - hr = EnableAgilitySDK(hRuntime); + hr = enableAgilitySDK(hRuntime); if (FAILED(hr)) { LogCommentFmt(L"Unable to enable Agility SDK - 0x%08x.", hr); } else if (hr == S_FALSE) { @@ -531,7 +489,7 @@ class ExecutionTest { LogCommentFmt(L"Agility SDK enabled."); } - hr = EnableExperimentalMode(hRuntime); + hr = enableExperimentalMode(hRuntime); if (FAILED(hr)) { LogCommentFmt(L"Unable to enable shader experimental mode - 0x%08x.", hr); @@ -541,7 +499,7 @@ class ExecutionTest { LogCommentFmt(L"Experimental mode enabled."); } - hr = EnableDebugLayer(); + hr = enableDebugLayer(); if (FAILED(hr)) { LogCommentFmt(L"Unable to enable debug layer - 0x%08x.", hr); } else if (hr == S_FALSE) { @@ -602,41 +560,31 @@ class ExecutionTest { // Do not remove the following line - it is used by TranslateExecutionTest.py // MARKER: ExecutionTest/DxilConf Shared Implementation Start - // This is defined in d3d.h for Windows 10 Anniversary Edition SDK, but we - // only require the Windows 10 SDK. - typedef enum D3D_SHADER_MODEL { - D3D_SHADER_MODEL_5_1 = 0x51, - D3D_SHADER_MODEL_6_0 = 0x60, - D3D_SHADER_MODEL_6_1 = 0x61, - D3D_SHADER_MODEL_6_2 = 0x62, - D3D_SHADER_MODEL_6_3 = 0x63, - D3D_SHADER_MODEL_6_4 = 0x64, - D3D_SHADER_MODEL_6_5 = 0x65, - D3D_SHADER_MODEL_6_6 = 0x66, - D3D_SHADER_MODEL_6_7 = 0x67, - D3D_SHADER_MODEL_6_8 = 0x68, - D3D_SHADER_MODEL_6_9 = 0x69, - } D3D_SHADER_MODEL; - - static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_9; - - bool UseDxbc() { -#ifdef _HLK_CONF - return false; -#else - return GetTestParamBool(L"DXBC"); -#endif - } - - bool UseWarpByDefault() { -#ifdef _HLK_CONF - return false; -#else - return true; -#endif - } - - bool UseDebugIfaces() { return true; } + // We define D3D_SHADER_MODEL enum values as we don't generally have access to + // the latest D3D headers when adding tests for a new SM being added. + using D3D_SHADER_MODEL = ExecTestUtils::D3D_SHADER_MODEL; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_0 = + ExecTestUtils::D3D_SHADER_MODEL_6_0; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_1 = + ExecTestUtils::D3D_SHADER_MODEL_6_1; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_2 = + ExecTestUtils::D3D_SHADER_MODEL_6_2; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_3 = + ExecTestUtils::D3D_SHADER_MODEL_6_3; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_4 = + ExecTestUtils::D3D_SHADER_MODEL_6_4; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_5 = + ExecTestUtils::D3D_SHADER_MODEL_6_5; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_6 = + ExecTestUtils::D3D_SHADER_MODEL_6_6; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_7 = + ExecTestUtils::D3D_SHADER_MODEL_6_7; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_8 = + ExecTestUtils::D3D_SHADER_MODEL_6_8; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_9 = + ExecTestUtils::D3D_SHADER_MODEL_6_9; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_HIGHEST_SHADER_MODEL = + ExecTestUtils::D3D_HIGHEST_SHADER_MODEL; bool SaveImages() { return GetTestParamBool(L"SaveImages"); } @@ -766,7 +714,7 @@ class ExecutionTest { CComPtr pComputeShader; // Load and compile shaders. - if (UseDxbc()) { + if (useDxbc()) { #ifndef _HLK_CONF DXBCFromText(pShader, L"main", pTargetProfile, &pComputeShader); #endif @@ -784,112 +732,6 @@ class ExecutionTest { &computePsoDesc, IID_PPV_ARGS(ppComputeState))); } - bool CreateDevice(ID3D12Device **ppDevice, - D3D_SHADER_MODEL testModel = D3D_SHADER_MODEL_6_0, - bool skipUnsupported = true) { - if (testModel > HIGHEST_SHADER_MODEL) { - UINT minor = (UINT)testModel & 0x0f; - LogCommentFmt(L"Installed SDK does not support " - L"shader model 6.%1u", - minor); - - if (skipUnsupported) { - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - } - - return false; - } - CComPtr factory; - CComPtr pDevice; - - *ppDevice = nullptr; - - VERIFY_SUCCEEDED(CreateDXGIFactory1(IID_PPV_ARGS(&factory))); - if (GetTestParamUseWARP(UseWarpByDefault())) { - CComPtr warpAdapter; - VERIFY_SUCCEEDED(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); - HRESULT createHR = D3D12CreateDevice(warpAdapter, D3D_FEATURE_LEVEL_11_0, - IID_PPV_ARGS(&pDevice)); - if (FAILED(createHR)) { - LogCommentFmt(L"The available version of WARP does not support d3d12."); - - if (skipUnsupported) { - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - } - - return false; - } - - if (GetModuleHandleW(L"d3d10warp.dll") != NULL) { - WCHAR szFullModuleFilePath[MAX_PATH] = L""; - GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"), - szFullModuleFilePath, sizeof(szFullModuleFilePath)); - WEX::Logging::Log::Comment(WEX::Common::String().Format( - L"WARP driver loaded from: %S", szFullModuleFilePath)); - } - - } else { - CComPtr hardwareAdapter; - WEX::Common::String AdapterValue; - HRESULT hr = WEX::TestExecution::RuntimeParameters::TryGetValue( - L"Adapter", AdapterValue); - if (SUCCEEDED(hr)) { - st::GetHardwareAdapter(factory, AdapterValue, &hardwareAdapter); - } else { - WEX::Logging::Log::Comment( - L"Using default hardware adapter with D3D12 support."); - } - - VERIFY_SUCCEEDED(D3D12CreateDevice( - hardwareAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&pDevice))); - } - // retrieve adapter information - LUID adapterID = pDevice->GetAdapterLuid(); - CComPtr adapter; - factory->EnumAdapterByLuid(adapterID, IID_PPV_ARGS(&adapter)); - DXGI_ADAPTER_DESC AdapterDesc; - VERIFY_SUCCEEDED(adapter->GetDesc(&AdapterDesc)); - LogCommentFmt(L"Using Adapter:%s", AdapterDesc.Description); - - if (pDevice == nullptr) - return false; - - if (!UseDxbc()) { - // Check for DXIL support. - typedef struct D3D12_FEATURE_DATA_SHADER_MODEL { - D3D_SHADER_MODEL HighestShaderModel; - } D3D12_FEATURE_DATA_SHADER_MODEL; - const UINT D3D12_FEATURE_SHADER_MODEL = 7; - D3D12_FEATURE_DATA_SHADER_MODEL SMData; - SMData.HighestShaderModel = testModel; - if (FAILED(pDevice->CheckFeatureSupport( - (D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, &SMData, - sizeof(SMData))) || - SMData.HighestShaderModel < testModel) { - UINT minor = (UINT)testModel & 0x0f; - LogCommentFmt(L"The selected device does not support " - L"shader model 6.%1u", - minor); - - if (skipUnsupported) { - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - } - - return false; - } - } - - if (UseDebugIfaces()) { - CComPtr pInfoQueue; - if (SUCCEEDED(pDevice->QueryInterface(&pInfoQueue))) { - pInfoQueue->SetMuteDebugOutput(FALSE); - } - } - - *ppDevice = pDevice.Detach(); - return true; - } - void CreateGraphicsCommandQueue(ID3D12Device *pDevice, ID3D12CommandQueue **ppCommandQueue) { D3D12_COMMAND_QUEUE_DESC queueDesc = {}; @@ -919,7 +761,7 @@ class ExecutionTest { CComPtr vertexShader; CComPtr pixelShader; - if (UseDxbc()) { + if (useDxbc()) { #ifndef _HLK_CONF DXBCFromText(pShaders, L"VSMain", L"vs_6_0", &vertexShader); DXBCFromText(pShaders, L"PSMain", L"ps_6_0", &pixelShader); @@ -1642,7 +1484,7 @@ class ExecutionTest { // The debug layer does net yet validate DXIL programs that require // rewriting, but basic logging should work properly. HRESULT hr = S_FALSE; - if (UseDebugIfaces()) { + if (useDebugIfaces()) { CComPtr debugController; hr = D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)); if (SUCCEEDED(hr)) { @@ -1830,20 +1672,6 @@ class ExecutionTest { } } - void ReadHlslDataIntoNewStream(LPCWSTR relativePath, IStream **ppStream) { - VERIFY_SUCCEEDED(m_support.Initialize()); - CComPtr pLibrary; - CComPtr pBlob; - CComPtr pStream; - std::wstring path = GetPathToHlslDataFile(relativePath, HLSLDATAFILEPARAM, - DEFAULT_EXEC_TEST_DIR); - VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary)); - VERIFY_SUCCEEDED( - pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob)); - VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream)); - *ppStream = pStream.Detach(); - } - void RecordRenderAndReadback(ID3D12GraphicsCommandList *pList, ID3D12DescriptorHeap *pRtvHeap, UINT rtvDescriptorSize, UINT instanceCount, @@ -2348,15 +2176,15 @@ TEST_F(ExecutionTest, LifetimeIntrinsicTest) { static const int DispatchGroupCount = 1; CComPtr pDevice; - bool bSM_6_6_Supported = CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6, false); + bool bSM_6_6_Supported = createDevice(&pDevice, D3D_SHADER_MODEL_6_6, false); bool bSM_6_3_Supported = bSM_6_6_Supported; if (!bSM_6_6_Supported) { // Try 6.3 for downlevel DXR case - bSM_6_3_Supported = CreateDevice(&pDevice, D3D_SHADER_MODEL_6_3, false); + bSM_6_3_Supported = createDevice(&pDevice, D3D_SHADER_MODEL_6_3, false); } if (!bSM_6_3_Supported) { // Otherwise, 6.0 better be supported for compute case - VERIFY_IS_TRUE(CreateDevice(&pDevice, D3D_SHADER_MODEL_6_0, false)); + VERIFY_IS_TRUE(createDevice(&pDevice, D3D_SHADER_MODEL_6_0, false)); } bool bDXRSupported = bSM_6_3_Supported && DoesDeviceSupportRayTracing(pDevice); @@ -2465,7 +2293,7 @@ TEST_F(ExecutionTest, BasicComputeTest) { static const int DispatchGroupCount = 1; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; std::vector values; @@ -2524,7 +2352,7 @@ TEST_F(ExecutionTest, BasicTriangleTest) { " return 1; //input.color;\r\n" "};\r\n"; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; struct BasicTestChecker { @@ -2668,7 +2496,7 @@ TEST_F(ExecutionTest, Int64Test) { static const int DispatchGroupCount = 1; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -2693,7 +2521,7 @@ TEST_F(ExecutionTest, SignTest) { "}"; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; const uint32_t neg1 = (uint32_t)-1; @@ -2714,7 +2542,7 @@ TEST_F(ExecutionTest, SignTest) { TEST_F(ExecutionTest, WaveIntrinsicsDDITest) { #ifndef _HLK_CONF CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; if (FAILED(pDevice->CheckFeatureSupport( @@ -2814,7 +2642,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsTest) { static const int DispatchGroupCount = 1; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportWaveOps(pDevice)) { @@ -2841,7 +2669,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsTest) { CComPtr pUavHeap; CComPtr pCommandAllocator; FenceObj FO; - bool dxbc = UseDxbc(); + bool dxbc = useDxbc(); const size_t valueSizeInBytes = values.size() * sizeof(PerThreadData); CreateComputeCommandQueue(pDevice, L"WaveIntrinsicsTest Command Queue", @@ -3172,7 +3000,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { CComPtr pVertexBuffer; D3D12_VERTEX_BUFFER_VIEW vertexBufferView; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportWaveOps(pDevice)) { // Optional feature, so it's correct to not support it if declared as such. @@ -3229,7 +3057,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { CreateVertexBuffer(pDevice, vertices, &pVertexBuffer, &vertexBufferView); - bool dxbc = UseDxbc(); + bool dxbc = useDxbc(); // Set up UAV resource. std::vector values; @@ -3491,12 +3319,6 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { } } -struct ShaderOpTestResult { - st::ShaderOp *ShaderOp; - std::shared_ptr ShaderOpSet; - std::shared_ptr Test; -}; - struct SPrimitives { float f_float; float f_float2; @@ -3504,87 +3326,19 @@ struct SPrimitives { float f_float2_o; }; -std::shared_ptr -RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, - LPCSTR pName, - st::ShaderOpTest::TInitCallbackFn pInitCallback, - st::ShaderOpTest::TShaderCallbackFn pShaderCallback, - std::shared_ptr ShaderOpSet) { - st::ShaderOp *pShaderOp; - if (pName == nullptr) { - if (ShaderOpSet->ShaderOps.size() != 1) { - VERIFY_FAIL(L"Expected a single shader operation."); - } - pShaderOp = ShaderOpSet->ShaderOps[0].get(); - } else { - pShaderOp = ShaderOpSet->GetShaderOp(pName); - } - if (pShaderOp == nullptr) { - std::string msg = "Unable to find shader op "; - msg += pName; - msg += "; available ops"; - const char sep = ':'; - for (auto &pAvailOp : ShaderOpSet->ShaderOps) { - msg += sep; - msg += pAvailOp->Name ? pAvailOp->Name : "[n/a]"; - } - CA2W msgWide(msg.c_str()); - VERIFY_FAIL(msgWide.m_psz); - } - - // This won't actually be used since we're supplying the device, - // but let's make it consistent. - pShaderOp->UseWarpDevice = GetTestParamUseWARP(true); - - std::shared_ptr test = std::make_shared(); - test->SetDxcSupport(&support); - test->SetInitCallback(pInitCallback); - test->SetShaderCallback(pShaderCallback); - test->SetDevice(pDevice); - test->RunShaderOp(pShaderOp); - - std::shared_ptr result = - std::make_shared(); - result->ShaderOpSet = ShaderOpSet; - result->Test = test; - result->ShaderOp = pShaderOp; - return result; -} - -std::shared_ptr -RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, - LPCSTR pName, - st::ShaderOpTest::TInitCallbackFn pInitCallback, - std::shared_ptr ShaderOpSet) { - return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, - nullptr, ShaderOpSet); -} - -std::shared_ptr -RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, - IStream *pStream, LPCSTR pName, - st::ShaderOpTest::TInitCallbackFn pInitCallback) { - DXASSERT_NOMSG(pStream != nullptr); - std::shared_ptr ShaderOpSet = - std::make_shared(); - st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); - return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, - ShaderOpSet); -} - TEST_F(ExecutionTest, OutOfBoundsTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, "OOB", nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, "OOB", nullptr); MappedData data; // Read back to CPU and examine contents - should get pure red. { @@ -3601,15 +3355,15 @@ TEST_F(ExecutionTest, SaturateTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, "Saturate", nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, "Saturate", nullptr); MappedData data; test->Test->GetReadBackData("U0", &data); const float *pValues = (float *)data.data(); @@ -3636,11 +3390,11 @@ void ExecutionTest::BasicTriangleTestSetup(LPCSTR ShaderOpName, WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice, testModel)) + if (!createDevice(&pDevice, testModel)) return; // As this is used, 6.2 requirement always comes with requiring native 16-bit @@ -3653,8 +3407,8 @@ void ExecutionTest::BasicTriangleTestSetup(LPCSTR ShaderOpName, return; } - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, ShaderOpName, nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, ShaderOpName, nullptr); MappedData data; D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; UINT width = (UINT)D.Width; @@ -3786,14 +3540,14 @@ TEST_F(ExecutionTest, PartialDerivTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, "DerivFine", nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, "DerivFine", nullptr); MappedData data; D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; UINT width = (UINT)D.Width; @@ -3894,10 +3648,10 @@ TEST_F(ExecutionTest, DerivativesTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; std::shared_ptr ShaderOpSet = @@ -3977,10 +3731,10 @@ TEST_F(ExecutionTest, QuadReadTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportWaveOps(pDevice)) { @@ -4033,8 +3787,9 @@ TEST_F(ExecutionTest, QuadReadTest) { // Test Compute Shader pShaderOp->CS = CS; - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "QuadRead", nullptr, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", nullptr, + ShaderOpSet); MappedData data; test->Test->GetReadBackData("U0", &data); @@ -4055,8 +3810,8 @@ TEST_F(ExecutionTest, QuadReadTest) { // Disable CS so mesh goes forward pShaderOp->CS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", + nullptr, ShaderOpSet); test->Test->GetReadBackData("U1", &data); pPixels = (UINT *)data.data(); // Test first, second and center quads @@ -4124,10 +3879,10 @@ TEST_F(ExecutionTest, ComputeSampleTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; std::shared_ptr ShaderOpSet = @@ -4175,7 +3930,7 @@ TEST_F(ExecutionTest, ComputeSampleTest) { } // Test 1D compute shader - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "ComputeSample", SampleInitFn, ShaderOpSet); MappedData data; @@ -4190,8 +3945,8 @@ TEST_F(ExecutionTest, ComputeSampleTest) { pShaderOp->CS = CS2; test.reset(); - test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U0", &data); pPixels = (UINT *)data.data(); @@ -4203,8 +3958,8 @@ TEST_F(ExecutionTest, ComputeSampleTest) { if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) { // Disable CS so mesh goes forward pShaderOp->CS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U1", &data); pPixels = (UINT *)data.data(); @@ -4221,8 +3976,8 @@ TEST_F(ExecutionTest, ComputeSampleTest) { pShaderOp->AS = AS2; pShaderOp->MS = MS2; - test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U1", &data); pPixels = (UINT *)data.data(); @@ -4251,7 +4006,7 @@ TEST_F(ExecutionTest, ATOWriteMSAATest) { #else D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_7; #endif - if (!CreateDevice(&pDevice, sm)) + if (!createDevice(&pDevice, sm)) return; #ifndef WRITEMSAA_FALLBACK @@ -4517,7 +4272,7 @@ TEST_F(ExecutionTest, ATOProgOffset) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -4550,7 +4305,7 @@ TEST_F(ExecutionTest, ATOProgOffset) { D3D_SHADER_MODEL sm = TestShaderModels[i]; CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, /*skipUnsupported*/ false)) { + if (!createDevice(&pDevice, sm, /*skipUnsupported*/ false)) { LogCommentFmt(L"Device does not support shader model 6.%1u", ((UINT)sm & 0x0f)); break; @@ -4603,8 +4358,9 @@ TEST_F(ExecutionTest, ATOProgOffset) { } // Test compute shader - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", + SampleInitFn, ShaderOpSet); MappedData data; test->Test->GetReadBackData("U0", &data); @@ -4614,8 +4370,8 @@ TEST_F(ExecutionTest, ATOProgOffset) { pShaderOp->CS = nullptr; if (DoesDeviceSupportMeshShaders(pDevice)) { - test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", + SampleInitFn, ShaderOpSet); // PS test->Test->GetReadBackData("U0", &data); @@ -4632,8 +4388,8 @@ TEST_F(ExecutionTest, ATOProgOffset) { // Disable MS so PS goes forward pShaderOp->MS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U0", &data); VerifyProgOffsetResults((UINT *)data.data(), true); @@ -4653,10 +4409,10 @@ TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_7)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_7)) return; if (!DoesDeviceSupportAdvancedTexOps(pDevice)) { @@ -4701,7 +4457,7 @@ TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { }; // Test compute shader - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "SampleCmpLevel", SampleInitFn, ShaderOpSet); MappedData data; @@ -4718,8 +4474,8 @@ TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { if (DoesDeviceSupportMeshShaders(pDevice)) { // Disable CS so mesh goes forward pShaderOp->CS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "SampleCmpLevel", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "SampleCmpLevel", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U0", &data); pPixels = (UINT *)data.data(); @@ -5298,7 +5054,7 @@ TEST_F(ExecutionTest, ATORawGather) { D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_7; #endif CComPtr pDevice; - if (!CreateDevice(&pDevice, sm)) + if (!createDevice(&pDevice, sm)) return; #ifndef RAWGATHER_FALLBACK @@ -5528,7 +5284,7 @@ void ExecutionTest::RunBasicShaderModelTest(D3D_SHADER_MODEL shaderModel) { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - if (!CreateDevice(&pDevice, shaderModel)) { + if (!createDevice(&pDevice, shaderModel)) { return; } @@ -5628,9 +5384,9 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr pDevice, }; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test is creating the resource to run // the test @@ -5958,178 +5714,6 @@ struct SPackUnpackOpOutUnpacked { std::array outputClampedUint16; std::array outputClampedInt16; }; - -// Parameter representation for taef data-driven tests -struct TableParameter { - LPCWSTR m_name; - enum TableParameterType { - INT8, - INT16, - INT32, - UINT, - FLOAT, - HALF, - DOUBLE, - STRING, - BOOL, - INT8_TABLE, - INT16_TABLE, - INT32_TABLE, - FLOAT_TABLE, - HALF_TABLE, - DOUBLE_TABLE, - STRING_TABLE, - UINT8_TABLE, - UINT16_TABLE, - UINT32_TABLE, - BOOL_TABLE - }; - TableParameter(LPCWSTR name, TableParameterType type, bool required) - : m_name(name), m_type(type), m_required(required) {} - TableParameterType m_type; - bool m_required; // required parameter - int8_t m_int8; - int16_t m_int16; - int m_int32; - unsigned int m_uint; - float m_float; - uint16_t m_half; // no such thing as half type in c++. Use int16 instead - double m_double; - bool m_bool; - WEX::Common::String m_str; - std::vector m_int8Table; - std::vector m_int16Table; - std::vector m_int32Table; - std::vector m_uint8Table; - std::vector m_uint16Table; - std::vector m_uint32Table; - std::vector m_floatTable; - std::vector m_halfTable; // no such thing as half type in c++ - std::vector m_doubleTable; - std::vector m_boolTable; - std::vector m_StringTable; -}; - -class TableParameterHandler { -private: - HRESULT ParseTableRow(); - -public: - TableParameter *m_table; - size_t m_tableSize; - TableParameterHandler(TableParameter *pTable, size_t size) - : m_table(pTable), m_tableSize(size) { - clearTableParameter(); - VERIFY_SUCCEEDED(ParseTableRow()); - } - - TableParameter *GetTableParamByName(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &m_table[i]; - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - void clearTableParameter() { - for (size_t i = 0; i < m_tableSize; ++i) { - m_table[i].m_int32 = 0; - m_table[i].m_uint = 0; - m_table[i].m_double = 0; - m_table[i].m_bool = false; - m_table[i].m_str = WEX::Common::String(); - } - } - - template std::vector *GetDataArray(LPCWSTR name) { - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_int32Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_int8Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_int16Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_uint32Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_floatTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - // TODO: uin16_t may be used to represent two different types when we - // introduce uint16 - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_halfTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_doubleTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_boolTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } -}; - static TableParameter UnaryFPOpParameters[] = { {L"ShaderOp.Target", TableParameter::STRING, true}, {L"ShaderOp.Text", TableParameter::STRING, true}, @@ -6460,381 +6044,6 @@ static TableParameter PackUnpackOpParameters[] = { {L"Validation.Input", TableParameter::UINT32_TABLE, true}, }; -static bool IsHexString(PCWSTR str, uint16_t *value) { - std::wstring wString(str); - wString.erase(std::remove(wString.begin(), wString.end(), L' '), - wString.end()); - LPCWSTR wstr = wString.c_str(); - if (wcsncmp(wstr, L"0x", 2) == 0 || wcsncmp(wstr, L"0b", 2) == 0) { - *value = (uint16_t)wcstol(wstr, NULL, 0); - return true; - } - return false; -} - -static HRESULT ParseDataToFloat(PCWSTR str, float &value) { - std::wstring wString(str); - wString.erase(std::remove(wString.begin(), wString.end(), L' '), - wString.end()); - wString.erase(std::remove(wString.begin(), wString.end(), L'\n'), - wString.end()); - PCWSTR wstr = wString.data(); - if (_wcsicmp(wstr, L"NaN") == 0) { - value = NAN; - } else if (_wcsicmp(wstr, L"-inf") == 0) { - value = -(INFINITY); - } else if (_wcsicmp(wstr, L"inf") == 0) { - value = INFINITY; - } else if (_wcsicmp(wstr, L"-denorm") == 0) { - value = -(FLT_MIN / 2); - } else if (_wcsicmp(wstr, L"denorm") == 0) { - value = FLT_MIN / 2; - } else if (_wcsicmp(wstr, L"-0.0f") == 0 || _wcsicmp(wstr, L"-0.0") == 0 || - _wcsicmp(wstr, L"-0") == 0) { - value = -0.0f; - } else if (_wcsicmp(wstr, L"0.0f") == 0 || _wcsicmp(wstr, L"0.0") == 0 || - _wcsicmp(wstr, L"0") == 0) { - value = 0.0f; - } else if (_wcsnicmp(wstr, L"0x", 2) == - 0) { // For hex values, take values literally - unsigned temp_i = std::stoul(wstr, nullptr, 16); - value = (float &)temp_i; - } else { - // evaluate the expression of wstring - double val = _wtof(wstr); - if (val == 0) { - LogErrorFmt(L"Failed to parse parameter %s to float", wstr); - return E_FAIL; - } - value = (float)val; - } - return S_OK; -} - -static HRESULT ParseDataToUint(PCWSTR str, unsigned int &value) { - std::wstring wString(str); - wString.erase(std::remove(wString.begin(), wString.end(), L' '), - wString.end()); - PCWSTR wstr = wString.data(); - // evaluate the expression of string - if (_wcsicmp(wstr, L"0") == 0 || _wcsicmp(wstr, L"0x00000000") == 0) { - value = 0; - return S_OK; - } - wchar_t *end; - unsigned int val = std::wcstoul(wstr, &end, 0); - if (val == 0) { - LogErrorFmt(L"Failed to parse parameter %s to int", wstr); - return E_FAIL; - } - value = val; - return S_OK; -} - -static HRESULT ParseDataToVectorFloat(PCWSTR str, float *ptr, size_t count) { - std::wstring wstr(str); - size_t curPosition = 0; - // parse a string of dot product separated by commas - for (size_t i = 0; i < count; ++i) { - size_t nextPosition = wstr.find(L",", curPosition); - if (FAILED(ParseDataToFloat( - wstr.substr(curPosition, nextPosition - curPosition).data(), - *(ptr + i)))) { - return E_FAIL; - } - curPosition = nextPosition + 1; - } - return S_OK; -} - -static HRESULT ParseDataToVectorHalf(PCWSTR str, uint16_t *ptr, size_t count) { - std::wstring wstr(str); - size_t curPosition = 0; - // parse a string of dot product separated by commas - for (size_t i = 0; i < count; ++i) { - size_t nextPosition = wstr.find(L",", curPosition); - float floatValue; - if (FAILED(ParseDataToFloat( - wstr.substr(curPosition, nextPosition - curPosition).data(), - floatValue))) { - return E_FAIL; - } - *(ptr + i) = ConvertFloat32ToFloat16(floatValue); - curPosition = nextPosition + 1; - } - return S_OK; -} - -static HRESULT ParseDataToVectorUint(PCWSTR str, unsigned int *ptr, - size_t count) { - std::wstring wstr(str); - size_t curPosition = 0; - // parse a string of dot product separated by commas - for (size_t i = 0; i < count; ++i) { - size_t nextPosition = wstr.find(L",", curPosition); - if (FAILED(ParseDataToUint( - wstr.substr(curPosition, nextPosition - curPosition).data(), - *(ptr + i)))) { - return E_FAIL; - } - curPosition = nextPosition + 1; - } - return S_OK; -} - -HRESULT TableParameterHandler::ParseTableRow() { - TableParameter *table = m_table; - for (unsigned int i = 0; i < m_tableSize; ++i) { - switch (table[i].m_type) { - case TableParameter::INT8: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_int32)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int16 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_int8 = (int8_t)(table[i].m_int32); - break; - case TableParameter::INT16: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_int32)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int16 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_int16 = (short)(table[i].m_int32); - break; - case TableParameter::INT32: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_int32)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::UINT: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_uint)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::DOUBLE: - if (FAILED(WEX::TestExecution::TestData::TryGetValue( - table[i].m_name, table[i].m_double)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::STRING: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_str)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::BOOL: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_str)) && - table[i].m_bool) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::INT8_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_int8Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int8Table[j] = (int8_t)tempTable[j]; - } - break; - } - case TableParameter::INT16_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_int16Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int16Table[j] = (int16_t)tempTable[j]; - } - break; - } - case TableParameter::INT32_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_int32Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int32Table[j] = tempTable[j]; - } - break; - } - case TableParameter::UINT8_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_int8Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int8Table[j] = (uint8_t)tempTable[j]; - } - break; - } - case TableParameter::UINT16_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_uint16Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_uint16Table[j] = (uint16_t)tempTable[j]; - } - break; - } - case TableParameter::UINT32_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_uint32Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_uint32Table[j] = tempTable[j]; - } - break; - } - case TableParameter::FLOAT_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_floatTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - ParseDataToFloat(tempTable[j], table[i].m_floatTable[j]); - } - break; - } - case TableParameter::HALF_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_halfTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - uint16_t value = 0; - if (IsHexString(tempTable[j], &value)) { - table[i].m_halfTable[j] = value; - } else { - float val; - ParseDataToFloat(tempTable[j], val); - if (isdenorm(val)) - table[i].m_halfTable[j] = - signbit(val) ? Float16NegDenorm : Float16PosDenorm; - else - table[i].m_halfTable[j] = ConvertFloat32ToFloat16(val); - } - } - break; - } - case TableParameter::DOUBLE_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_doubleTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_doubleTable[j] = tempTable[j]; - } - break; - } - case TableParameter::BOOL_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_boolTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_boolTable[j] = tempTable[j]; - } - break; - } - case TableParameter::STRING_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_StringTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_StringTable[j] = tempTable[j]; - } - break; - } - default: - DXASSERT_NOMSG("Invalid Parameter Type"); - } - if (errno == ERANGE) { - LogErrorFmt(L"got out of range value for table %s", table[i].m_name); - return E_FAIL; - } - } - return S_OK; -} - static bool CompareOutputWithExpectedValueInt(int output, int ref, int tolerance) { return ((output - ref) <= tolerance) && ((ref - output) <= tolerance); @@ -6972,10 +6181,10 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -6997,7 +6206,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7035,10 +6244,10 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7065,7 +6274,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7125,10 +6334,10 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7155,7 +6364,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7198,10 +6407,10 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7232,7 +6441,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7273,10 +6482,10 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7312,7 +6521,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7384,10 +6593,10 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7422,7 +6631,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7470,10 +6679,10 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7492,7 +6701,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7530,10 +6739,10 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7552,7 +6761,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -7590,10 +6799,10 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7617,7 +6826,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { size_t numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7680,10 +6889,10 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7705,7 +6914,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7750,10 +6959,10 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7775,7 +6984,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -7842,10 +7051,10 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7867,7 +7076,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -7916,10 +7125,10 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7946,7 +7155,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7984,10 +7193,10 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8014,7 +7223,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -8053,10 +7262,10 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8089,7 +7298,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { size_t numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -8151,10 +7360,10 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8185,7 +7394,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -8228,10 +7437,10 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8262,7 +7471,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -8326,10 +7535,10 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8361,7 +7570,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -8916,10 +8125,10 @@ TEST_F(ExecutionTest, DotTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } @@ -8946,7 +8155,7 @@ TEST_F(ExecutionTest, DotTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "DotOp", // this callback is called when the test // is creating the resource to run the test @@ -9000,10 +8209,10 @@ TEST_F(ExecutionTest, Dot2AddHalfTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { return; } @@ -9036,7 +8245,7 @@ TEST_F(ExecutionTest, Dot2AddHalfTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = validation_input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Dot2AddHalfOp", // this callback is called when the test // is creating the resource to run the test @@ -9088,10 +8297,10 @@ TEST_F(ExecutionTest, Dot4AddI8PackedTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { return; } @@ -9112,7 +8321,7 @@ TEST_F(ExecutionTest, Dot4AddI8PackedTest) { size_t count = validation_input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Dot4AddI8PackedOp", // this callback is called when the test // is creating the resource to run the test @@ -9151,10 +8360,10 @@ TEST_F(ExecutionTest, Dot4AddU8PackedTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { return; } @@ -9175,7 +8384,7 @@ TEST_F(ExecutionTest, Dot4AddU8PackedTest) { size_t count = validation_input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Dot4AddU8PackedOp", // this callback is called when the test // is creating the resource to run the test @@ -9214,10 +8423,10 @@ TEST_F(ExecutionTest, Msad4Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } size_t tableSize = sizeof(Msad4OpParameters) / sizeof(TableParameter); @@ -9238,7 +8447,7 @@ TEST_F(ExecutionTest, Msad4Test) { size_t count = Validation_Expected->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Msad4", // this callback is called when the test // is creating the resource to run the test @@ -9296,10 +8505,10 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -9340,7 +8549,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { "must have same number of expected values"); } - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -9407,10 +8616,10 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -9453,7 +8662,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { "must have same number of expected values"); } - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -9846,10 +9055,10 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( static const unsigned int DispatchGroupCount = 1; static const unsigned int ThreadCount = ThreadsPerGroup * DispatchGroupCount; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } if (!DoesDeviceSupportWaveOps(pDevice)) { @@ -9881,31 +9090,33 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( for (size_t maskIndex = 0; maskIndex < sizeof(MaskFunctionTable) / sizeof(MaskFunction); ++maskIndex) { - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "WaveIntrinsicsOp", - // this callback is called when the test - // is creating the resource to run the test - [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { - VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); - size_t size = sizeof(PerThreadData) * ThreadCount; - Data.resize(size); - PerThreadData *pPrimitives = (PerThreadData *)Data.data(); - // 4 different inputs for each operation test - size_t index = 0; - std::vector *IntList = InputDataList[setIndex]; - while (index < ThreadCount) { - PerThreadData *p = &pPrimitives[index]; - p->firstLaneId = 0xFFFFBFFF; - p->laneIndex = 0xFFFFBFFF; - p->mask = MaskFunctionTable[maskIndex]((int)index); - p->input = (*IntList)[index % IntList->size()]; - p->output = 0xFFFFBFFF; - index++; - } - // use shader from data table - pShaderOp->Shaders.at(0).Text = Text.m_psz; - }, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "WaveIntrinsicsOp", + // this callback is called when the test + // is creating the resource to run the test + [&](LPCSTR Name, std::vector &Data, + st::ShaderOp *pShaderOp) { + VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); + size_t size = sizeof(PerThreadData) * ThreadCount; + Data.resize(size); + PerThreadData *pPrimitives = (PerThreadData *)Data.data(); + // 4 different inputs for each operation test + size_t index = 0; + std::vector *IntList = InputDataList[setIndex]; + while (index < ThreadCount) { + PerThreadData *p = &pPrimitives[index]; + p->firstLaneId = 0xFFFFBFFF; + p->laneIndex = 0xFFFFBFFF; + p->mask = MaskFunctionTable[maskIndex]((int)index); + p->input = (*IntList)[index % IntList->size()]; + p->output = 0xFFFFBFFF; + index++; + } + // use shader from data table + pShaderOp->Shaders.at(0).Text = Text.m_psz; + }, + ShaderOpSet); // Check the value MappedData data; @@ -10106,11 +9317,11 @@ void ExecutionTest::WaveIntrinsicsMultiPrefixOpTest( constexpr size_t ThreadCount = ThreadsPerGroup * DispatchGroupSize; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_5)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_5)) { return; } @@ -10134,30 +9345,31 @@ void ExecutionTest::WaveIntrinsicsMultiPrefixOpTest( for (size_t maskIndex = 0; maskIndex < _countof(MaskFunctionTable); ++maskIndex) { - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "WaveIntrinsicsOp", - [&](LPCSTR name, std::vector &data, st::ShaderOp *pShaderOp) { - UNREFERENCED_PARAMETER(name); - - const size_t dataSize = sizeof(PerThreadData) * ThreadCount; - - data.resize(dataSize); - PerThreadData *pThreadData = - reinterpret_cast(data.data()); - - for (size_t i = 0; i != ThreadCount; ++i) { - pThreadData[i].key = keys->at(i % keys->size()); - pThreadData[i].value = values->at(i % values->size()); - pThreadData[i].firstLaneId = 0xdeadbeef; - pThreadData[i].laneId = 0xdeadbeef; - pThreadData[i].mask = MaskFunctionTable[maskIndex]((int)i); - pThreadData[i].result = 0xdeadbeef; - } + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "WaveIntrinsicsOp", + [&](LPCSTR name, std::vector &data, st::ShaderOp *pShaderOp) { + UNREFERENCED_PARAMETER(name); + + const size_t dataSize = sizeof(PerThreadData) * ThreadCount; + + data.resize(dataSize); + PerThreadData *pThreadData = + reinterpret_cast(data.data()); + + for (size_t i = 0; i != ThreadCount; ++i) { + pThreadData[i].key = keys->at(i % keys->size()); + pThreadData[i].value = values->at(i % values->size()); + pThreadData[i].firstLaneId = 0xdeadbeef; + pThreadData[i].laneId = 0xdeadbeef; + pThreadData[i].mask = MaskFunctionTable[maskIndex]((int)i); + pThreadData[i].result = 0xdeadbeef; + } - pShaderOp->Shaders.at(0).Text = shaderSource; - pShaderOp->Shaders.at(0).Target = shaderProfile; - }, - ShaderOpSet); + pShaderOp->Shaders.at(0).Text = shaderSource; + pShaderOp->Shaders.at(0).Target = shaderProfile; + }, + ShaderOpSet); MappedData mappedData; test->Test->GetReadBackData("SWaveIntrinsicsOp", &mappedData); @@ -10234,11 +9446,11 @@ TEST_F(ExecutionTest, CBufferTestHalf) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_2)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_2)) return; if (!DoesDeviceSupportNative16bitOps(pDevice)) { @@ -10250,7 +9462,7 @@ TEST_F(ExecutionTest, CBufferTestHalf) { uint16_t InputData[] = {0x3F80, 0x3F00, 0x3D80, 0x7BFF}; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "CBufferTestHalf", [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { UNREFERENCED_PARAMETER(pShaderOp); @@ -10280,7 +9492,7 @@ TEST_F(ExecutionTest, CBufferTestHalf) { } void TestBarycentricVariant(bool checkOrdering, - std::shared_ptr test) { + std::shared_ptr test) { MappedData data; D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; UINT width = (UINT)D.Width; @@ -10364,10 +9576,10 @@ TEST_F(ExecutionTest, BarycentricsTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_1)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_1)) return; if (!DoesDeviceSupportBarycentrics(pDevice)) { @@ -10386,9 +9598,9 @@ TEST_F(ExecutionTest, BarycentricsTest) { auto ResourceCallbackFnNoShift = MakeBarycentricsResourceInitCallbackFn(test_iteration); - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "Barycentrics", - ResourceCallbackFnNoShift, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "Barycentrics", + ResourceCallbackFnNoShift, ShaderOpSet); TestBarycentricVariant(false, test); // Now test that barycentric ordering is consistent @@ -10400,8 +9612,9 @@ TEST_F(ExecutionTest, BarycentricsTest) { auto ResourceCallbackFn = MakeBarycentricsResourceInitCallbackFn(test_iteration); - std::shared_ptr test2 = RunShaderOpTestAfterParse( - pDevice, m_support, "Barycentrics", ResourceCallbackFn, ShaderOpSet); + std::shared_ptr test2 = + st::RunShaderOpTestAfterParse(pDevice, m_support, "Barycentrics", + ResourceCallbackFn, ShaderOpSet); TestBarycentricVariant(true, test2); } } @@ -10647,7 +9860,7 @@ bool ExecutionTest::SetupRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, CComPtr &pStream, const char *&sTy, const char *&additionalOptions) { - if (!CreateDevice(&pDevice, shaderModel)) { + if (!createDevice(&pDevice, shaderModel)) { return false; } @@ -10692,7 +9905,7 @@ bool ExecutionTest::SetupRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, } // read shader config - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); return true; } @@ -10784,7 +9997,7 @@ void ExecutionTest::RunComputeRawBufferLdStTest( (int)sizeof(Ty), additionalOptions) != -1); // run the shader - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, shaderOpName, [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(((0 == strncmp(Name, "SRVBuffer", 9)) || @@ -10839,7 +10052,7 @@ void ExecutionTest::RunGraphicsRawBufferLdStTest( (int)sizeof(Ty), additionalOptions) != -1); // run the shader - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, shaderOpName, [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(((0 == strncmp(Name, "SRVBuffer", 9)) || @@ -10921,7 +10134,7 @@ TEST_F(ExecutionTest, PackUnpackTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; @@ -10929,14 +10142,14 @@ TEST_F(ExecutionTest, PackUnpackTest) { string args = "-enable-16bit-types -DPACKUNPACK_PLACEHOLDER"; string target = "cs_6_2"; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } #else string args = "-enable-16bit-types"; string target = "cs_6_6"; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) { return; } #endif @@ -10962,7 +10175,7 @@ TEST_F(ExecutionTest, PackUnpackTest) { std::vector expectedPacked(count / 4); std::vector expectedUnpacked(count / 4); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "PackUnpackOp", // this callback is called when the test // is creating the resource to run the test @@ -11316,7 +10529,7 @@ TEST_F(ExecutionTest, SignatureResourcesTest) { "}\n"; CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; RunResourceTest(pDevice, pShader.c_str(), L"cs_6_6", /*isDynamic*/ false); @@ -11355,7 +10568,7 @@ TEST_F(ExecutionTest, DynamicResourcesTest) { "}\n"; CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; // ResourceDescriptorHeap/SamplerDescriptorHeap requires Resource Binding Tier @@ -11398,7 +10611,7 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -11436,7 +10649,7 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { ((UINT)sm & 0x0f)); CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) { continue; } D3D12_FEATURE_DATA_D3D12_OPTIONS devOptions; @@ -11495,9 +10708,10 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { // Test Compute shader { pShaderOp->CS = pShaderOp->GetString("CS66"); - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "DynamicResourcesDynamicIndexing", nullptr, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "DynamicResourcesDynamicIndexing", + nullptr, ShaderOpSet); MappedData resultData; test->Test->GetReadBackData("g_result", &resultData); @@ -11512,9 +10726,10 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { pShaderOp->CS = nullptr; pShaderOp->VS = pShaderOp->GetString("VS66"); pShaderOp->PS = pShaderOp->GetString("PS66"); - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "DynamicResourcesDynamicIndexing", nullptr, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "DynamicResourcesDynamicIndexing", + nullptr, ShaderOpSet); MappedData resultVSData; MappedData resultPSData; @@ -11577,19 +10792,20 @@ void RunWaveSizeTest(UINT minWaveSize, UINT maxWaveSize, waveSize) != -1); // run the shader - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "WaveSizeTest", - [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { - VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); - pShaderOp->Shaders.at(0).Arguments = compilerOptions; - pShaderOp->Shaders.at(0).Text = waveSizeTestShader; - - VERIFY_IS_TRUE(sizeof(WaveSizeTestData) * MAX_WAVESIZE <= - Data.size()); - WaveSizeTestData *pInData = (WaveSizeTestData *)Data.data(); - memset(pInData, 0, sizeof(WaveSizeTestData) * MAX_WAVESIZE); - }, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "WaveSizeTest", + [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { + VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); + pShaderOp->Shaders.at(0).Arguments = compilerOptions; + pShaderOp->Shaders.at(0).Text = waveSizeTestShader; + + VERIFY_IS_TRUE(sizeof(WaveSizeTestData) * MAX_WAVESIZE <= + Data.size()); + WaveSizeTestData *pInData = (WaveSizeTestData *)Data.data(); + memset(pInData, 0, sizeof(WaveSizeTestData) * MAX_WAVESIZE); + }, + ShaderOpSet); // verify expected values MappedData dataUav; @@ -11665,7 +10881,7 @@ void ExecuteWaveSizeRangeInstance(UINT minWaveSize, UINT maxWaveSize, }; // run the shader - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "WaveSizeTest", [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); @@ -11737,7 +10953,7 @@ void ExecutionTest::WaveSizeTest() { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6, + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6, /*skipUnsupported*/ false)) { return; } @@ -11765,7 +10981,7 @@ void ExecutionTest::WaveSizeTest() { CComPtr pStream; std::shared_ptr ShaderOpSet = std::make_shared(); - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); LogCommentFmt(L"Testing WaveSize attribute for shader model 6.6."); @@ -11777,7 +10993,7 @@ void ExecutionTest::WaveSizeRangeTest() { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_8, + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_8, /*skipUnsupported*/ false)) { return; } @@ -11805,7 +11021,7 @@ void ExecutionTest::WaveSizeRangeTest() { CComPtr pStream; std::shared_ptr ShaderOpSet = std::make_shared(); - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); LogCommentFmt(L"Testing WaveSize Range attribute for shader model 6.8."); @@ -12034,7 +11250,7 @@ void VerifyAtomicResults(const BYTE *uResults, const BYTE *sResults, } } -void VerifyAtomicsRawTest(std::shared_ptr test, +void VerifyAtomicsRawTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { size_t stride = 8; @@ -12083,7 +11299,7 @@ void VerifyAtomicsRawTest(std::shared_ptr test, bitSize); } -void VerifyAtomicsTypedTest(std::shared_ptr test, +void VerifyAtomicsTypedTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { size_t stride = 8; @@ -12135,7 +11351,7 @@ void VerifyAtomicsTypedTest(std::shared_ptr test, VerifyAtomicResults(pUint, pSint + stride, pXchg, stride, maxIdx, bitSize); } -void VerifyAtomicsSharedTest(std::shared_ptr test, +void VerifyAtomicsSharedTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { size_t stride = 8; @@ -12156,7 +11372,7 @@ void VerifyAtomicsSharedTest(std::shared_ptr test, bitSize); } -void VerifyAtomicsTest(std::shared_ptr test, +void VerifyAtomicsTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { VerifyAtomicsRawTest(test, maxIdx, bitSize); VerifyAtomicsTypedTest(test, maxIdx, bitSize); @@ -12166,10 +11382,10 @@ TEST_F(ExecutionTest, AtomicsTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; std::shared_ptr ShaderOpSet = @@ -12181,7 +11397,7 @@ TEST_F(ExecutionTest, AtomicsTest) { // Test compute shader LogCommentFmt( L"Verifying 32-bit integer atomic operations in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); VerifyAtomicsTest(test, 32 * 32, 32); @@ -12192,8 +11408,8 @@ TEST_F(ExecutionTest, AtomicsTest) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 32-bit integer atomic operations in " L"amp/mesh/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 32); VerifyAtomicsSharedTest(test, 8 * 8 * 2 + 8 * 8 * 2, 32); } @@ -12202,8 +11418,8 @@ TEST_F(ExecutionTest, AtomicsTest) { pShaderOp->MS = nullptr; LogCommentFmt( L"Verifying 32-bit integer atomic operations in vert/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTest(test, 64 * 64 + 6, 32); } @@ -12211,10 +11427,10 @@ TEST_F(ExecutionTest, Atomics64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12240,7 +11456,7 @@ TEST_F(ExecutionTest, Atomics64Test) { // Test compute shader LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in " L"compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 32 * 32, 64); @@ -12249,8 +11465,8 @@ TEST_F(ExecutionTest, Atomics64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers " L"in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 64); } @@ -12258,8 +11474,8 @@ TEST_F(ExecutionTest, Atomics64Test) { pShaderOp->MS = nullptr; LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in " L"vert/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 64 * 64 + 6, 64); } @@ -12267,10 +11483,10 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12303,7 +11519,7 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { // Test compute shader LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw " L"buffers in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 32 * 32, 64); @@ -12312,8 +11528,8 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw " L"buffers in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 64); } @@ -12321,8 +11537,8 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { pShaderOp->MS = nullptr; LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw " L"buffers in vert/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 64 * 64 + 6, 64); } @@ -12330,10 +11546,10 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12366,7 +11582,7 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { // Test compute shader LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed " L"resources in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); VerifyAtomicsTypedTest(test, 32 * 32, 64); @@ -12375,8 +11591,8 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed " L"resources in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTypedTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 64); } @@ -12384,8 +11600,8 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { pShaderOp->MS = nullptr; LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed " L"resources in vert/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTypedTest(test, 64 * 64 + 6, 64); } @@ -12393,10 +11609,10 @@ TEST_F(ExecutionTest, AtomicsShared64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12426,7 +11642,7 @@ TEST_F(ExecutionTest, AtomicsShared64Test) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on groupshared " L"variables in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); VerifyAtomicsSharedTest(test, 32 * 32, 64); @@ -12435,8 +11651,8 @@ TEST_F(ExecutionTest, AtomicsShared64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on groupshared " L"variables in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", + nullptr, ShaderOpSet); VerifyAtomicsSharedTest(test, 8 * 8 * 2 + 8 * 8 * 2, 64); } } @@ -12464,7 +11680,8 @@ void VerifyAtomicFloatResults(const float *results) { } } -void VerifyAtomicsFloatSharedTest(std::shared_ptr test) { +void VerifyAtomicsFloatSharedTest( + std::shared_ptr test) { MappedData Data; const float *pData = nullptr; @@ -12476,7 +11693,7 @@ void VerifyAtomicsFloatSharedTest(std::shared_ptr test) { VerifyAtomicFloatResults(pData); } -void VerifyAtomicsFloatTest(std::shared_ptr test) { +void VerifyAtomicsFloatTest(std::shared_ptr test) { // struct mirroring that in the shader struct AtomicStuff { @@ -12524,10 +11741,10 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; std::shared_ptr ShaderOpSet = @@ -12539,7 +11756,7 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { // Test compute shader LogCommentFmt( L"Verifying float cmp/xchg atomic operations in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "FloatAtomics", nullptr, ShaderOpSet); VerifyAtomicsFloatTest(test); VerifyAtomicsFloatSharedTest(test); @@ -12549,8 +11766,8 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying float cmp/xchg atomic operations in " L"amp/mesh/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", - nullptr, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", + nullptr, ShaderOpSet); VerifyAtomicsFloatTest(test); VerifyAtomicsFloatSharedTest(test); } @@ -12559,8 +11776,8 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { pShaderOp->MS = nullptr; LogCommentFmt( L"Verifying float cmp/xchg atomic operations in vert/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", + nullptr, ShaderOpSet); VerifyAtomicsFloatTest(test); } @@ -12589,7 +11806,7 @@ TEST_F(ExecutionTest, HelperLaneTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -12604,19 +11821,20 @@ TEST_F(ExecutionTest, HelperLaneTest) { ((UINT)sm & 0x0f)); CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) continue; - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "HelperLaneTestNoWave", - // this callback is called when the test is creating the resource to - // run the test - [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { - VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); - std::fill(Data.begin(), Data.end(), (BYTE)0xCC); - UNREFERENCED_PARAMETER(pShaderOp); - }, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "HelperLaneTestNoWave", + // this callback is called when the test is creating the resource to + // run the test + [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { + VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); + std::fill(Data.begin(), Data.end(), (BYTE)0xCC); + UNREFERENCED_PARAMETER(pShaderOp); + }, + ShaderOpSet); struct HelperLaneTestResult { int32_t is_helper_00; @@ -12989,7 +12207,7 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -13010,7 +12228,7 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { bool smPassed = true; CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) { continue; } @@ -13045,9 +12263,10 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { // Test Compute shader { - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", - CleanUAVBuffer0Buffer, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "HelperLaneTestWave", + CleanUAVBuffer0Buffer, ShaderOpSet); MappedData uavData; test->Test->GetReadBackData("UAVBuffer0", &uavData); @@ -13069,9 +12288,10 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { // Test Vertex + Pixel shader { pShaderOp->CS = nullptr; - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", - CleanUAVBuffer0Buffer, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "HelperLaneTestWave", + CleanUAVBuffer0Buffer, ShaderOpSet); MappedData uavData; test->Test->GetReadBackData("UAVBuffer0", &uavData); @@ -13130,7 +12350,7 @@ TEST_F(ExecutionTest, QuadAnyAll) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -13163,7 +12383,7 @@ TEST_F(ExecutionTest, QuadAnyAll) { } CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) { continue; } @@ -13176,8 +12396,9 @@ TEST_F(ExecutionTest, QuadAnyAll) { Skipped = false; // test compute - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "QuadAnyAll", CleanUAVBuffer0Buffer, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", + CleanUAVBuffer0Buffer, ShaderOpSet); MappedData uavData; test->Test->GetReadBackData("UAVBuffer0", &uavData); @@ -13189,8 +12410,8 @@ TEST_F(ExecutionTest, QuadAnyAll) { pShaderOp->CS = nullptr; // test AS/MS - test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", - CleanUAVBuffer0Buffer, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", + CleanUAVBuffer0Buffer, ShaderOpSet); test->Test->GetReadBackData("UAVBuffer0", &uavData); Result = VerifyQuadAnyAllResults((int2 *)uavData.data()); @@ -13337,7 +12558,7 @@ TEST_F(ExecutionTest, IsNormalTest) { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - VERIFY_IS_TRUE(CreateDevice(&pDevice, D3D_SHADER_MODEL_6_0, + VERIFY_IS_TRUE(createDevice(&pDevice, D3D_SHADER_MODEL_6_0, false /* skipUnsupported */)); // The input is -Zero, Zero, -Denormal, Denormal, -Infinity, Infinity, -NaN, @@ -13354,7 +12575,7 @@ TEST_F(ExecutionTest, IsNormalTest) { std::vector *Validation_Expected = &Validation_Expected_Vec; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -13395,9 +12616,10 @@ TEST_F(ExecutionTest, IsNormalTest) { // Test Compute shader { pShaderOp->CS = pShaderOp->GetString("CS60"); - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "IsNormal", - ResourceInitFn, ShaderInitFn, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "IsNormal", + ResourceInitFn, ShaderInitFn, + ShaderOpSet); MappedData data; test->Test->GetReadBackData("g_TestData", &data); diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h new file mode 100644 index 0000000000..3822ef02ad --- /dev/null +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h @@ -0,0 +1,405 @@ +#ifndef HLSLEXECTESTUTILS_H +#define HLSLEXECTESTUTILS_H + +#include "dxc/Support/dxcapi.use.h" +#include "dxc/Test/HlslTestUtils.h" +#include +#include +#include + +namespace ExecTestUtils { +// This is defined in d3d.h for Windows 10 Anniversary Edition SDK, but we +// only require the Windows 10 SDK. +typedef enum D3D_SHADER_MODEL { + D3D_SHADER_MODEL_5_1 = 0x51, + D3D_SHADER_MODEL_6_0 = 0x60, + D3D_SHADER_MODEL_6_1 = 0x61, + D3D_SHADER_MODEL_6_2 = 0x62, + D3D_SHADER_MODEL_6_3 = 0x63, + D3D_SHADER_MODEL_6_4 = 0x64, + D3D_SHADER_MODEL_6_5 = 0x65, + D3D_SHADER_MODEL_6_6 = 0x66, + D3D_SHADER_MODEL_6_7 = 0x67, + D3D_SHADER_MODEL_6_8 = 0x68, + D3D_SHADER_MODEL_6_9 = 0x69, + D3D_HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_9 +} D3D_SHADER_MODEL; +} // namespace ExecTestUtils + +static bool useDebugIfaces() { return true; } + +static bool useDxbc() { +#ifdef _HLK_CONF + return false; +#else + return hlsl_test::GetTestParamBool(L"DXBC"); +#endif +} + +static bool useWarpByDefualt() { +#ifdef _HLK_CONF + return false; +#else + return true; +#endif +} + +// A more recent Windows SDK than currently required is needed for these. +typedef HRESULT(WINAPI *D3D12EnableExperimentalFeaturesFn)( + UINT NumFeatures, __in_ecount(NumFeatures) const IID *IIDs, + __in_ecount_opt(NumFeatures) void *ConfigurationStructs, + __in_ecount_opt(NumFeatures) UINT *ConfigurationStructSizes); + +static const GUID D3D12ExperimentalShaderModelsID = + {/* 76f5573e-f13a-40f5-b297-81ce9e18933f */ + 0x76f5573e, + 0xf13a, + 0x40f5, + {0xb2, 0x97, 0x81, 0xce, 0x9e, 0x18, 0x93, 0x3f}}; + +// Used to create D3D12SDKConfiguration to enable AgilitySDK programmatically. +typedef HRESULT(WINAPI *D3D12GetInterfaceFn)(REFCLSID Rclsid, REFIID Riid, + void **Debug); + +#ifndef __ID3D12SDKConfiguration_INTERFACE_DEFINED__ + +// Copied from AgilitySDK D3D12.h to programmatically enable when in developer +// mode. +#define __ID3D12SDKConfiguration_INTERFACE_DEFINED__ + +EXTERN_C const GUID DECLSPEC_SELECTANY IID_ID3D12SDKConfiguration = { + 0xe9eb5314, + 0x33aa, + 0x42b2, + {0xa7, 0x18, 0xd7, 0x7f, 0x58, 0xb1, 0xf1, 0xc7}}; +EXTERN_C const GUID DECLSPEC_SELECTANY CLSID_D3D12SDKConfiguration = { + 0x7cda6aca, + 0xa03e, + 0x49c8, + {0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce}}; + +MIDL_INTERFACE("e9eb5314-33aa-42b2-a718-d77f58b1f1c7") +ID3D12SDKConfiguration : public IUnknown { +public: + virtual HRESULT STDMETHODCALLTYPE SetSDKVersion(UINT SDKVersion, + LPCSTR SDKPath) = 0; +}; +#endif /* __ID3D12SDKConfiguration_INTERFACE_DEFINED__ */ + +static std::wstring getModuleName() { + wchar_t ModuleName[MAX_PATH + 1] = {0}; + const DWORD Length = GetModuleFileNameW(NULL, ModuleName, MAX_PATH); + + if (Length == 0 || Length == MAX_PATH) + return std::wstring(); // Error condition + + return std::wstring(ModuleName, Length); +} + +static std::wstring computeSDKFullPath(std::wstring SDKPath) { + std::wstring ModulePath = getModuleName(); + const size_t Pos = ModulePath.rfind('\\'); + + if (Pos == std::wstring::npos) + return SDKPath; + + if (SDKPath.substr(0, 2) != L".\\") + return SDKPath; + + return ModulePath.substr(0, Pos) + SDKPath.substr(1); +} + +static UINT getD3D12SDKVersion(std::wstring SDKPath) { + // Try to automatically get the D3D12SDKVersion from the DLL + UINT SDKVersion = 0; + std::wstring D3DCorePath = computeSDKFullPath(SDKPath); + D3DCorePath.append(L"D3D12Core.dll"); + HMODULE D3DCore = LoadLibraryW(D3DCorePath.c_str()); + if (D3DCore) { + if (UINT *SDKVersionOut = + (UINT *)GetProcAddress(D3DCore, "D3D12SDKVersion")) + SDKVersion = *SDKVersionOut; + FreeModule(D3DCore); + } + return SDKVersion; +} + +static bool createDevice(ID3D12Device **D3DDevice, + ExecTestUtils::D3D_SHADER_MODEL TestModel = + ExecTestUtils::D3D_SHADER_MODEL_6_0, + bool SkipUnsupported = true) { + if (TestModel > ExecTestUtils::D3D_HIGHEST_SHADER_MODEL) { + const UINT Minor = (UINT)TestModel & 0x0f; + hlsl_test::LogCommentFmt(L"Installed SDK does not support " + L"shader model 6.%1u", + Minor); + + if (SkipUnsupported) + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + + return false; + } + CComPtr DXGIFactory; + CComPtr D3DDeviceCom; + + *D3DDevice = nullptr; + + VERIFY_SUCCEEDED(CreateDXGIFactory1(IID_PPV_ARGS(&DXGIFactory))); + if (hlsl_test::GetTestParamUseWARP(useWarpByDefualt())) { + CComPtr WarpAdapter; + VERIFY_SUCCEEDED(DXGIFactory->EnumWarpAdapter(IID_PPV_ARGS(&WarpAdapter))); + HRESULT CreateHR = D3D12CreateDevice(WarpAdapter, D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&D3DDeviceCom)); + if (FAILED(CreateHR)) { + hlsl_test::LogCommentFmt( + L"The available version of WARP does not support d3d12."); + + if (SkipUnsupported) + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + + return false; + } + + if (GetModuleHandleW(L"d3d10warp.dll") != NULL) { + WCHAR FullModuleFilePath[MAX_PATH] = L""; + GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"), FullModuleFilePath, + sizeof(FullModuleFilePath)); + WEX::Logging::Log::Comment(WEX::Common::String().Format( + L"WARP driver loaded from: %ls", FullModuleFilePath)); + } + + } else { + CComPtr HardwareAdapter; + WEX::Common::String AdapterValue; + HRESULT HR = WEX::TestExecution::RuntimeParameters::TryGetValue( + L"Adapter", AdapterValue); + if (SUCCEEDED(HR)) + st::GetHardwareAdapter(DXGIFactory, AdapterValue, &HardwareAdapter); + else + WEX::Logging::Log::Comment( + L"Using default hardware adapter with D3D12 support."); + + VERIFY_SUCCEEDED(D3D12CreateDevice(HardwareAdapter, D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&D3DDeviceCom))); + } + // retrieve adapter information + const LUID AdapterID = D3DDeviceCom->GetAdapterLuid(); + CComPtr DXGIAdapter; + DXGIFactory->EnumAdapterByLuid(AdapterID, IID_PPV_ARGS(&DXGIAdapter)); + DXGI_ADAPTER_DESC AdapterDesc; + VERIFY_SUCCEEDED(DXGIAdapter->GetDesc(&AdapterDesc)); + hlsl_test::LogCommentFmt(L"Using Adapter:%s", AdapterDesc.Description); + + if (D3DDeviceCom == nullptr) + return false; + + if (!useDxbc()) { + // Check for DXIL support. + typedef struct D3D12_FEATURE_DATA_SHADER_MODEL { + ExecTestUtils::D3D_SHADER_MODEL HighestShaderModel; + } D3D12_FEATURE_DATA_SHADER_MODEL; + const UINT D3D12_FEATURE_SHADER_MODEL = 7; + D3D12_FEATURE_DATA_SHADER_MODEL SMData; + SMData.HighestShaderModel = TestModel; + if (FAILED(D3DDeviceCom->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, &SMData, + sizeof(SMData))) || + SMData.HighestShaderModel < TestModel) { + const UINT Minor = (UINT)TestModel & 0x0f; + hlsl_test::LogCommentFmt(L"The selected device does not support " + L"shader model 6.%1u", + Minor); + + if (SkipUnsupported) + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + + return false; + } + } + + if (useDebugIfaces()) { + CComPtr InfoQueue; + if (SUCCEEDED(D3DDeviceCom->QueryInterface(&InfoQueue))) + InfoQueue->SetMuteDebugOutput(FALSE); + } + + *D3DDevice = D3DDeviceCom.Detach(); + return true; +} + +inline void readHlslDataIntoNewStream(LPCWSTR RelativePath, IStream **Stream, + dxc::DxcDllSupport &Support) { + VERIFY_SUCCEEDED(Support.Initialize()); + CComPtr Library; + CComPtr Blob; + CComPtr StreamCom; + std::wstring Path = hlsl_test::GetPathToHlslDataFile( + RelativePath, HLSLDATAFILEPARAM, DEFAULT_EXEC_TEST_DIR); + VERIFY_SUCCEEDED(Support.CreateInstance(CLSID_DxcLibrary, &Library)); + VERIFY_SUCCEEDED(Library->CreateBlobFromFile(Path.c_str(), nullptr, &Blob)); + VERIFY_SUCCEEDED(Library->CreateStreamFromBlobReadOnly(Blob, &StreamCom)); + *Stream = StreamCom.Detach(); +} + +static HRESULT enableAgilitySDK(HMODULE Runtime, UINT SDKVersion, + LPCWSTR SDKPath) { + D3D12GetInterfaceFn GetInterfaceFunc = + (D3D12GetInterfaceFn)GetProcAddress(Runtime, "D3D12GetInterface"); + CComPtr D3D12SDKConfiguration; + IFR(GetInterfaceFunc(CLSID_D3D12SDKConfiguration, + IID_PPV_ARGS(&D3D12SDKConfiguration))); + IFR(D3D12SDKConfiguration->SetSDKVersion(SDKVersion, CW2A(SDKPath))); + + // Currently, it appears that the SetSDKVersion will succeed even when + // D3D12Core is not found, or its version doesn't match. When that's the + // case, will cause a failure in the very next thing that actually requires + // D3D12Core.dll to be loaded instead. So, we attempt to clear experimental + // features next, which is a valid use case and a no-op at this point. This + // requires D3D12Core to be loaded. If this fails, we know the AgilitySDK + // setting actually failed. + D3D12EnableExperimentalFeaturesFn ExperimentalFeaturesFunc = + (D3D12EnableExperimentalFeaturesFn)GetProcAddress( + Runtime, "D3D12EnableExperimentalFeatures"); + if (ExperimentalFeaturesFunc == nullptr) + // If this failed, D3D12 must be too old for AgilitySDK. But if that's + // the case, creating D3D12SDKConfiguration should have failed. So while + // this case shouldn't be hit, fail if it is. + return HRESULT_FROM_WIN32(GetLastError()); + + return ExperimentalFeaturesFunc(0, nullptr, nullptr, nullptr); +} + +static HRESULT +enableExperimentalShaderModels(HMODULE hRuntime, + UUID AdditionalFeatures[] = nullptr, + size_t NumAdditionalFeatures = 0) { + D3D12EnableExperimentalFeaturesFn ExperimentalFeaturesFunc = + (D3D12EnableExperimentalFeaturesFn)GetProcAddress( + hRuntime, "D3D12EnableExperimentalFeatures"); + if (ExperimentalFeaturesFunc == nullptr) + return HRESULT_FROM_WIN32(GetLastError()); + + std::vector Features; + + Features.push_back(D3D12ExperimentalShaderModels); + + if (AdditionalFeatures != nullptr && NumAdditionalFeatures > 0) + Features.insert(Features.end(), AdditionalFeatures, + AdditionalFeatures + NumAdditionalFeatures); + + return ExperimentalFeaturesFunc((UINT)Features.size(), Features.data(), + nullptr, nullptr); +} + +static HRESULT +enableExperimentalShaderModels(UUID AdditionalFeatures[] = nullptr, + size_t NumAdditionalFeatures = 0) { + HMODULE Runtime = LoadLibraryW(L"d3d12.dll"); + if (Runtime == NULL) + return E_FAIL; + return enableExperimentalShaderModels(Runtime, AdditionalFeatures, + NumAdditionalFeatures); +} + +static HRESULT disableExperimentalShaderModels() { + HMODULE Runtime = LoadLibraryW(L"d3d12.dll"); + if (Runtime == NULL) + return E_FAIL; + + D3D12EnableExperimentalFeaturesFn ExperimentalFeaturesFunc = + (D3D12EnableExperimentalFeaturesFn)GetProcAddress( + Runtime, "D3D12EnableExperimentalFeatures"); + if (ExperimentalFeaturesFunc == nullptr) + return HRESULT_FROM_WIN32(GetLastError()); + + return ExperimentalFeaturesFunc(0, nullptr, nullptr, nullptr); +} + +static HRESULT enableAgilitySDK(HMODULE Runtime) { + // D3D12SDKVersion > 1 will use provided version, otherwise, auto-detect. + // D3D12SDKVersion == 1 means fail if we can't auto-detect. + UINT SDKVersion = 0; + WEX::TestExecution::RuntimeParameters::TryGetValue(L"D3D12SDKVersion", + SDKVersion); + + // SDKPath must be relative path from .exe, which means relative to + // TE.exe location, and must start with ".\\", such as with the + // default: ".\\D3D12\\" + WEX::Common::String SDKPath; + if (SUCCEEDED(WEX::TestExecution::RuntimeParameters::TryGetValue( + L"D3D12SDKPath", SDKPath))) { + // Make sure path ends in backslash + if (!SDKPath.IsEmpty() && SDKPath.Right(1) != "\\") + SDKPath.Append("\\"); + } + + if (SDKPath.IsEmpty()) + SDKPath = L".\\D3D12\\"; + + const bool MustFind = SDKVersion > 0; + if (SDKVersion <= 1) { + // lookup version from D3D12Core.dll + SDKVersion = getD3D12SDKVersion((LPCWSTR)SDKPath); + if (MustFind && SDKVersion == 0) { + hlsl_test::LogErrorFmt(L"Agility SDK not found in relative path: %s", + (LPCWSTR)SDKPath); + return E_FAIL; + } + } + + // Not found, not asked for. + if (SDKVersion == 0) + return S_FALSE; + + HRESULT HR = enableAgilitySDK(Runtime, SDKVersion, (LPCWSTR)SDKPath); + if (FAILED(HR)) { + // If SDKVersion provided, fail if not successful. + // 1 means we should find it, and fill in the version automatically. + if (MustFind) { + hlsl_test::LogErrorFmt( + L"Failed to set Agility SDK version %d at path: %s", SDKVersion, + (LPCWSTR)SDKPath); + return HR; + } + return S_FALSE; + } + if (HR == S_OK) + hlsl_test::LogCommentFmt(L"Agility SDK version set to: %d", SDKVersion); + + return HR; +} + +static HRESULT enableExperimentalMode(HMODULE Runtime) { +#ifdef _FORCE_EXPERIMENTAL_SHADERS + bool ExperimentalShaderModels = true; +#else + bool ExperimentalShaderModels = + hlsl_test::GetTestParamBool(L"ExperimentalShaders"); +#endif // _FORCE_EXPERIMENTAL_SHADERS + + HRESULT HR = S_FALSE; + if (ExperimentalShaderModels) { + HR = enableExperimentalShaderModels(Runtime); + if (SUCCEEDED(HR)) + WEX::Logging::Log::Comment(L"Experimental shader models enabled."); + } + + return HR; +} + +static HRESULT enableDebugLayer() { + // The debug layer does net yet validate DXIL programs that require + // rewriting, but basic logging should work properly. + HRESULT HR = S_FALSE; + if (useDebugIfaces()) { + CComPtr DebugController; + HR = D3D12GetDebugInterface(IID_PPV_ARGS(&DebugController)); + if (SUCCEEDED(HR)) { + DebugController->EnableDebugLayer(); + HR = S_OK; + } + } + return HR; +} + +#endif // HLSLEXECTESTUTILS_H diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp index 8dde3faa0b..9e18351a6d 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp @@ -866,6 +866,11 @@ void ShaderOpTest::CreateShaders() { CHECK_HR(pLibrary->CreateBlobWithEncodingFromPinned( pText, (UINT32)strlen(pText), CP_UTF8, &pTextBlob)); CHECK_HR(m_pDxcSupport->CreateInstance(CLSID_DxcCompiler, &pCompiler)); + WEX::Logging::Log::Comment(L"Compiling shader:"); + ShaderOpLogFmt(L"\tTarget profile: %S", S.Target); + if (argumentsWList.size() > 0) { + ShaderOpLogFmt(L"\tArguments: %S", pArguments); + } CHECK_HR(pCompiler->Compile(pTextBlob, nameW, entryPointW, targetW, (LPCWSTR *)argumentsWList.data(), (UINT32)argumentsWList.size(), nullptr, 0, @@ -2752,6 +2757,74 @@ bool ShaderOpParser::ReadAtElementName(IXmlReader *pReader, LPCWSTR pName) { } } +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + st::ShaderOpTest::TShaderCallbackFn pShaderCallback, + std::shared_ptr ShaderOpSet) { + st::ShaderOp *pShaderOp; + if (pName == nullptr) { + if (ShaderOpSet->ShaderOps.size() != 1) { + VERIFY_FAIL(L"Expected a single shader operation."); + } + pShaderOp = ShaderOpSet->ShaderOps[0].get(); + } else { + pShaderOp = ShaderOpSet->GetShaderOp(pName); + } + if (pShaderOp == nullptr) { + std::string msg = "Unable to find shader op "; + msg += pName; + msg += "; available ops"; + const char sep = ':'; + for (auto &pAvailOp : ShaderOpSet->ShaderOps) { + msg += sep; + msg += pAvailOp->Name ? pAvailOp->Name : "[n/a]"; + } + CA2W msgWide(msg.c_str()); + VERIFY_FAIL(msgWide.m_psz); + } + + // This won't actually be used since we're supplying the device, + // but let's make it consistent. + pShaderOp->UseWarpDevice = hlsl_test::GetTestParamUseWARP(true); + + std::shared_ptr test = std::make_shared(); + test->SetDxcSupport(&support); + test->SetInitCallback(pInitCallback); + test->SetShaderCallback(pShaderCallback); + test->SetDevice(pDevice); + test->RunShaderOp(pShaderOp); + + std::shared_ptr result = + std::make_shared(); + result->ShaderOpSet = ShaderOpSet; + result->Test = test; + result->ShaderOp = pShaderOp; + return result; +} + +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + std::shared_ptr ShaderOpSet) { + return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, + nullptr, ShaderOpSet); +} + +std::shared_ptr +RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + IStream *pStream, LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback) { + DXASSERT_NOMSG(pStream != nullptr); + std::shared_ptr ShaderOpSet = + std::make_shared(); + st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); + return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, + ShaderOpSet); +} + #pragma endregion Parsing support } // namespace st diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.h b/tools/clang/unittests/HLSLExec/ShaderOpTest.h index b71ee08765..52b5f37730 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.h +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.h @@ -12,12 +12,12 @@ // results. // // // /////////////////////////////////////////////////////////////////////////////// - -#pragma once - #ifndef __SHADEROPTEST_H__ #define __SHADEROPTEST_H__ +#include +#include +#include #include #include #include @@ -344,6 +344,32 @@ void ParseShaderOpSetFromStream(IStream *pStream, ShaderOpSet *pShaderOpSet); // Deserialize a ShaderOpSet from an IXmlReader instance. void ParseShaderOpSetFromXml(IXmlReader *pReader, ShaderOpSet *pShaderOpSet); +/////////////////////////////////////////////////////////////////////////////// +// RunShaderOpTest* helper functions. +struct ShaderOpTestResult { + st::ShaderOp *ShaderOp; + std::shared_ptr ShaderOpSet; + std::shared_ptr Test; +}; + +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + st::ShaderOpTest::TShaderCallbackFn pShaderCallback, + std::shared_ptr ShaderOpSet); + +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + std::shared_ptr ShaderOpSet); + +std::shared_ptr +RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + IStream *pStream, LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback); + } // namespace st #endif // __SHADEROPTEST_H__ diff --git a/tools/clang/unittests/HLSLExec/TableParameterHandler.cpp b/tools/clang/unittests/HLSLExec/TableParameterHandler.cpp new file mode 100644 index 0000000000..16badb074d --- /dev/null +++ b/tools/clang/unittests/HLSLExec/TableParameterHandler.cpp @@ -0,0 +1,376 @@ +#include "TableParameterHandler.h" +#include "dxc/Test/HlslTestUtils.h" + +TableParameterHandler::TableParameterHandler(TableParameter *pTable, + size_t size) + : m_table(pTable), m_tableSize(size) { + clearTableParameter(); + VERIFY_SUCCEEDED(ParseTableRow()); +} + +TableParameter *TableParameterHandler::GetTableParamByName(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &m_table[i]; + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +void TableParameterHandler::clearTableParameter() { + for (size_t i = 0; i < m_tableSize; ++i) { + m_table[i].m_int32 = 0; + m_table[i].m_uint = 0; + m_table[i].m_double = 0; + m_table[i].m_bool = false; + m_table[i].m_str = WEX::Common::String(); + } +} + +template +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_int32Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_int8Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_int16Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_uint32Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_floatTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_halfTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_doubleTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_boolTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +HRESULT TableParameterHandler::ParseTableRow() { + TableParameter *table = m_table; + for (unsigned int i = 0; i < m_tableSize; ++i) { + switch (table[i].m_type) { + case TableParameter::INT8: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_int32)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int16 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_int8 = (int8_t)(table[i].m_int32); + break; + case TableParameter::INT16: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_int32)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int16 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_int16 = (short)(table[i].m_int32); + break; + case TableParameter::INT32: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_int32)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::UINT: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_uint)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::DOUBLE: + if (FAILED(WEX::TestExecution::TestData::TryGetValue( + table[i].m_name, table[i].m_double)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::STRING: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_str)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::BOOL: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_str)) && + table[i].m_bool) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::INT8_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_int8Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int8Table[j] = (int8_t)tempTable[j]; + } + break; + } + case TableParameter::INT16_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_int16Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int16Table[j] = (int16_t)tempTable[j]; + } + break; + } + case TableParameter::INT32_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_int32Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int32Table[j] = tempTable[j]; + } + break; + } + case TableParameter::UINT8_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_int8Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int8Table[j] = (uint8_t)tempTable[j]; + } + break; + } + case TableParameter::UINT16_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_uint16Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_uint16Table[j] = (uint16_t)tempTable[j]; + } + break; + } + case TableParameter::UINT32_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_uint32Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_uint32Table[j] = tempTable[j]; + } + break; + } + case TableParameter::FLOAT_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_floatTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + ParseDataToFloat(tempTable[j], table[i].m_floatTable[j]); + } + break; + } + case TableParameter::HALF_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_halfTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + uint16_t value = 0; + if (IsHexString(tempTable[j], &value)) { + table[i].m_halfTable[j] = value; + } else { + float val; + ParseDataToFloat(tempTable[j], val); + if (isdenorm(val)) + table[i].m_halfTable[j] = + signbit(val) ? Float16NegDenorm : Float16PosDenorm; + else + table[i].m_halfTable[j] = ConvertFloat32ToFloat16(val); + } + } + break; + } + case TableParameter::DOUBLE_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_doubleTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_doubleTable[j] = tempTable[j]; + } + break; + } + case TableParameter::BOOL_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_boolTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_boolTable[j] = tempTable[j]; + } + break; + } + case TableParameter::STRING_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_StringTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_StringTable[j] = tempTable[j]; + } + break; + } + default: + DXASSERT_NOMSG("Invalid Parameter Type"); + } + if (errno == ERANGE) { + hlsl_test::LogErrorFmt(L"got out of range value for table %s", + table[i].m_name); + return E_FAIL; + } + } + return S_OK; +} diff --git a/tools/clang/unittests/HLSLExec/TableParameterHandler.h b/tools/clang/unittests/HLSLExec/TableParameterHandler.h new file mode 100644 index 0000000000..eac851a263 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/TableParameterHandler.h @@ -0,0 +1,205 @@ +#ifndef TABLE_PARAMETER_HANDLER_H +#define TABLE_PARAMETER_HANDLER_H + +#include +#include +#include +#include +#include +#include +#include +#include // For LPCWSTR + +#include "dxc/Support/Global.h" // For DXASSERT_ARGS +#include "dxc/Test/HlslTestUtils.h" + +// Parameter representation for taef data-driven tests +struct TableParameter { + LPCWSTR m_name; + enum TableParameterType { + INT8, + INT16, + INT32, + UINT, + FLOAT, + HALF, + DOUBLE, + STRING, + BOOL, + INT8_TABLE, + INT16_TABLE, + INT32_TABLE, + FLOAT_TABLE, + HALF_TABLE, + DOUBLE_TABLE, + STRING_TABLE, + UINT8_TABLE, + UINT16_TABLE, + UINT32_TABLE, + BOOL_TABLE + }; + TableParameter(LPCWSTR name, TableParameterType type, bool required) + : m_name(name), m_type(type), m_required(required) {} + TableParameterType m_type; + bool m_required; // required parameter + int8_t m_int8; + int16_t m_int16; + int m_int32; + unsigned int m_uint; + float m_float; + uint16_t m_half; // no such thing as half type in c++. Use int16 instead + double m_double; + bool m_bool; + WEX::Common::String m_str; + std::vector m_int8Table; + std::vector m_int16Table; + std::vector m_int32Table; + std::vector m_uint8Table; + std::vector m_uint16Table; + std::vector m_uint32Table; + std::vector m_floatTable; + std::vector m_halfTable; // no such thing as half type in c++ + std::vector m_doubleTable; + std::vector m_boolTable; + std::vector m_StringTable; +}; + +class TableParameterHandler { +private: + HRESULT ParseTableRow(); + +public: + TableParameter *m_table; + size_t m_tableSize; + TableParameterHandler(TableParameter *pTable, size_t size); + + TableParameter *GetTableParamByName(LPCWSTR name); + void clearTableParameter(); + + template std::vector *GetDataArray(LPCWSTR name); +}; + +// Static helpers +static bool IsHexString(PCWSTR str, uint16_t *value) { + std::wstring wString(str); + wString.erase(std::remove(wString.begin(), wString.end(), L' '), + wString.end()); + LPCWSTR wstr = wString.c_str(); + if (wcsncmp(wstr, L"0x", 2) == 0 || wcsncmp(wstr, L"0b", 2) == 0) { + *value = (uint16_t)wcstol(wstr, NULL, 0); + return true; + } + return false; +} + +static HRESULT ParseDataToFloat(PCWSTR str, float &value) { + std::wstring wString(str); + wString.erase(std::remove(wString.begin(), wString.end(), L' '), + wString.end()); + wString.erase(std::remove(wString.begin(), wString.end(), L'\n'), + wString.end()); + PCWSTR wstr = wString.data(); + if (_wcsicmp(wstr, L"NaN") == 0) { + value = NAN; + } else if (_wcsicmp(wstr, L"-inf") == 0) { + value = -(INFINITY); + } else if (_wcsicmp(wstr, L"inf") == 0) { + value = INFINITY; + } else if (_wcsicmp(wstr, L"-denorm") == 0) { + value = -(FLT_MIN / 2); + } else if (_wcsicmp(wstr, L"denorm") == 0) { + value = FLT_MIN / 2; + } else if (_wcsicmp(wstr, L"-0.0f") == 0 || _wcsicmp(wstr, L"-0.0") == 0 || + _wcsicmp(wstr, L"-0") == 0) { + value = -0.0f; + } else if (_wcsicmp(wstr, L"0.0f") == 0 || _wcsicmp(wstr, L"0.0") == 0 || + _wcsicmp(wstr, L"0") == 0) { + value = 0.0f; + } else if (_wcsnicmp(wstr, L"0x", 2) == + 0) { // For hex values, take values literally + unsigned temp_i = std::stoul(wstr, nullptr, 16); + value = (float &)temp_i; + } else { + // evaluate the expression of wstring + double val = _wtof(wstr); + if (val == 0) { + hlsl_test::LogErrorFmt(L"Failed to parse parameter %s to float", wstr); + return E_FAIL; + } + value = (float)val; + } + return S_OK; +} + +static HRESULT ParseDataToUint(PCWSTR str, unsigned int &value) { + std::wstring wString(str); + wString.erase(std::remove(wString.begin(), wString.end(), L' '), + wString.end()); + PCWSTR wstr = wString.data(); + // evaluate the expression of string + if (_wcsicmp(wstr, L"0") == 0 || _wcsicmp(wstr, L"0x00000000") == 0) { + value = 0; + return S_OK; + } + wchar_t *end; + unsigned int val = std::wcstoul(wstr, &end, 0); + if (val == 0) { + hlsl_test::LogErrorFmt(L"Failed to parse parameter %s to int", wstr); + return E_FAIL; + } + value = val; + return S_OK; +} + +static HRESULT ParseDataToVectorFloat(PCWSTR str, float *ptr, size_t count) { + std::wstring wstr(str); + size_t curPosition = 0; + // parse a string of dot product separated by commas + for (size_t i = 0; i < count; ++i) { + size_t nextPosition = wstr.find(L",", curPosition); + if (FAILED(ParseDataToFloat( + wstr.substr(curPosition, nextPosition - curPosition).data(), + *(ptr + i)))) { + return E_FAIL; + } + curPosition = nextPosition + 1; + } + return S_OK; +} + +static HRESULT ParseDataToVectorHalf(PCWSTR str, uint16_t *ptr, size_t count) { + std::wstring wstr(str); + size_t curPosition = 0; + // parse a string of dot product separated by commas + for (size_t i = 0; i < count; ++i) { + size_t nextPosition = wstr.find(L",", curPosition); + float floatValue; + if (FAILED(ParseDataToFloat( + wstr.substr(curPosition, nextPosition - curPosition).data(), + floatValue))) { + return E_FAIL; + } + *(ptr + i) = ConvertFloat32ToFloat16(floatValue); + curPosition = nextPosition + 1; + } + return S_OK; +} + +static HRESULT ParseDataToVectorUint(PCWSTR str, unsigned int *ptr, + size_t count) { + std::wstring wstr(str); + size_t curPosition = 0; + // parse a string of dot product separated by commas + for (size_t i = 0; i < count; ++i) { + size_t nextPosition = wstr.find(L",", curPosition); + if (FAILED(ParseDataToUint( + wstr.substr(curPosition, nextPosition - curPosition).data(), + *(ptr + i)))) { + return E_FAIL; + } + curPosition = nextPosition + 1; + } + return S_OK; +} + +#endif // TABLE_PARAMETER_HANDLER_H From 8a77b0c714d0e0db4b1c0202f2697f91ce3928a8 Mon Sep 17 00:00:00 2001 From: Jeff Noyle Date: Tue, 17 Jun 2025 17:06:51 -0700 Subject: [PATCH 64/93] PIX shader debugger: Support dynamic indices for local arrays (#7536) The root of the problem being addressed here is this line from the previous version of DxilAnnotateWithVirtualRegister.cpp at (old) line 251 in function GetStructOffset: ``` auto *pArrayIndex = llvm::dyn_cast(pGEP->getOperand(GEPOperandIndex++)); ``` When an array is dynamically indexed, this dyn_cast of course returns nullptr, and this function returns a zero, which eventually caused the values of all dynamically-indexed array elements in PIX's shader debugger to be reported as the value of the zeroth element in the array. The next issue was that stores to an alloca-backed dynamic array weren't being properly recognized as significant events from PIX debugger's point of view. PIX adds its own "fake" alloca stores to help tie its debug output with the debug info that ends up in the PDB, so it's easy enough to co-opt that machinery to cover stores to "real" allocas, i.e. function-local array storage. To do so, the "AnnotateStore" function needs some of the metadata (i.e. PIX instruction number) that is added during runOnModule here. This necessitated rearranging runOnModule and putting stores into a vector that we then iterate over at the end of runOnModule. Now that indices aren't collapsed into just the zeroth, PIX needs to know how much storage to allocate for the full array, which is the motivation for the change in DxilDebugInstrumentation.cpp to return some metadata that PIX can parse. DxilDbgValueToDbgDeclare.cpp's changes are just a variable rename to aid readability. The rearrangement of runOnModule can induce some allocas to be visited more than once, so there are changes in DxilPIXVirtualRegisters.cpp to make sure we don't overwrite an existing alloca ordinal with a new one (which would confuse previously-established references to that alloca). file-check tests have been added to validate that -the stores to local arrays are being noticed properly. -the debug pass correctly outputs the metadata that informs PIX about alloca sizes The majority of these changes really needs end-to-end testing in PIX, where I can gather real debug output as generated by the GPU in response to the instrumentation, then match those results up with PDB data and finally show HLSL variable contents in the shader debugger, so there are some tests waiting on the PIX side for when this change makes its way there. --- .../DxilAnnotateWithVirtualRegister.cpp | 140 +++++++++++++----- .../DxilDbgValueToDbgDeclare.cpp | 17 ++- .../DxilDebugInstrumentation.cpp | 17 ++- lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp | 12 +- ...ValueToDbgDeclare_dynamic_array_index.hlsl | 27 ++++ .../pix/Debug_dynamic_array_index.hlsl | 19 +++ tools/clang/unittests/HLSL/PixTest.cpp | 1 - 7 files changed, 178 insertions(+), 55 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl create mode 100644 tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl diff --git a/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp b/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp index babf5b7953..88f696b7fa 100644 --- a/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp +++ b/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp @@ -76,19 +76,29 @@ class DxilAnnotateWithVirtualRegister : public llvm::ModulePass { private: void AnnotateValues(llvm::Instruction *pI); - void AnnotateStore(llvm::Instruction *pI); - void SplitVectorStores(hlsl::OP *HlslOP, llvm::Instruction *pI); + void AnnotateStore(hlsl::OP *HlslOP, llvm::Instruction *pI); + void SplitVectorStores(llvm::Instruction *pI); bool IsAllocaRegisterWrite(llvm::Value *V, llvm::AllocaInst **pAI, llvm::Value **pIdx); void AnnotateAlloca(llvm::AllocaInst *pAlloca); void AnnotateGeneric(llvm::Instruction *pI); void AssignNewDxilRegister(llvm::Instruction *pI); void AssignNewAllocaRegister(llvm::AllocaInst *pAlloca, std::uint32_t C); - + llvm::Value *AddConstIntValues(llvm::Value *l, llvm::Value *r); + llvm::Value *MultiplyConstIntValue(llvm::Value *l, uint32_t r); + llvm::Value *GetStructOffset(llvm::GetElementPtrInst *pGEP, + uint32_t &GEPOperandIndex, + llvm::Type *pElementType); hlsl::DxilModule *m_DM; std::uint32_t m_uVReg; std::unique_ptr m_MST; int m_StartInstruction = 0; + struct RememberedAllocaStores { + llvm::StoreInst *StoreInst; + llvm::Value *Index; + llvm::MDNode *AllocaReg; + }; + std::vector m_RememberedAllocaStores; void Init(llvm::Module &M) { m_DM = &M.GetOrCreateDxilModule(); @@ -129,8 +139,6 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { m_DM->SetValidatorVersion(1, 4); } - std::uint32_t InstNum = m_StartInstruction; - auto instrumentableFunctions = PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM); @@ -138,7 +146,7 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { for (auto &block : F->getBasicBlockList()) { for (auto it = block.begin(); it != block.end();) { llvm::Instruction *I = &*(it++); - SplitVectorStores(m_DM->GetOP(), I); + SplitVectorStores(I); } } } @@ -151,17 +159,32 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { } } + // Process all allocas referenced by dbg.declare intrinsics for (auto *F : instrumentableFunctions) { for (auto &block : F->getBasicBlockList()) { - for (llvm::Instruction &I : block.getInstList()) { - AnnotateStore(&I); + for (auto &I : block) { + if (auto *DbgDeclare = llvm::dyn_cast(&I)) { + // The first operand of DbgDeclare is the address (typically an + // AllocaInst) + if (auto *AddrVal = + llvm::dyn_cast(DbgDeclare->getAddress())) { + AnnotateValues(AddrVal); + } + } } } } + for (auto *F : instrumentableFunctions) + for (auto &block : F->getBasicBlockList()) { + for (llvm::Instruction &I : block.getInstList()) { + AnnotateStore(m_DM->GetOP(), &I); + } + } + for (auto *F : instrumentableFunctions) { - int InstructionRangeStart = InstNum; - int InstructionRangeEnd = InstNum; + int InstructionRangeStart = m_StartInstruction; + int InstructionRangeEnd = m_StartInstruction; for (auto &block : F->getBasicBlockList()) { for (llvm::Instruction &I : block.getInstList()) { // If the instruction is part of the debug value instrumentation added @@ -171,8 +194,9 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { if (PixAllocaReg::FromInst(Alloca, &unused1, &unused2)) continue; if (!llvm::isa(&I)) { - pix_dxil::PixDxilInstNum::AddMD(M.getContext(), &I, InstNum++); - InstructionRangeEnd = InstNum; + pix_dxil::PixDxilInstNum::AddMD(M.getContext(), &I, + m_StartInstruction++); + InstructionRangeEnd = m_StartInstruction; } } } @@ -188,12 +212,17 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { } } + for (auto const &as : m_RememberedAllocaStores) { + PixAllocaRegWrite::AddMD(m_DM->GetCtx(), as.StoreInst, as.AllocaReg, + as.Index); + } + if (OSOverride != nullptr) { // Print a set of strings of the exemplary form "InstructionCount: // " if (m_DM->GetShaderModel()->GetKind() == hlsl::ShaderModel::Kind::Library) *OSOverride << "\nIsLibrary\n"; - *OSOverride << "\nInstructionCount:" << InstNum << "\n"; + *OSOverride << "\nInstructionCount:" << m_StartInstruction << "\n"; } m_DM = nullptr; @@ -210,7 +239,8 @@ void DxilAnnotateWithVirtualRegister::AnnotateValues(llvm::Instruction *pI) { } } -void DxilAnnotateWithVirtualRegister::AnnotateStore(llvm::Instruction *pI) { +void DxilAnnotateWithVirtualRegister::AnnotateStore(hlsl::OP *HlslOP, + llvm::Instruction *pI) { auto *pSt = llvm::dyn_cast(pI); if (pSt == nullptr) { return; @@ -226,15 +256,47 @@ void DxilAnnotateWithVirtualRegister::AnnotateStore(llvm::Instruction *pI) { if (AllocaReg == nullptr) { return; } + m_RememberedAllocaStores.push_back({pSt, Index, AllocaReg}); +} + +llvm::Value * +DxilAnnotateWithVirtualRegister::MultiplyConstIntValue(llvm::Value *l, + uint32_t r) { + if (r == 1) + return l; + if (auto *lci = llvm::dyn_cast(l)) + return m_DM->GetOP()->GetU32Const(lci->getLimitedValue() * r); + // Should never get here, but if we do, return the left as a reasonable + // default: + return l; +} - PixAllocaRegWrite::AddMD(m_DM->GetCtx(), pSt, AllocaReg, Index); +llvm::Value * +DxilAnnotateWithVirtualRegister::AddConstIntValues(llvm::Value *l, + llvm::Value *r) { + auto *rci = llvm::dyn_cast(r); + if (rci && rci->getLimitedValue() == 0) + return l; + auto *lci = llvm::dyn_cast(l); + if (lci && lci->getLimitedValue() == 0) + return r; + // Both an assert and a check, in case of unexpected circumstances. + DXASSERT(lci != nullptr && rci != nullptr, + "Both sides of add should be constant ints"); + if (lci != nullptr && rci != nullptr) + return m_DM->GetOP()->GetU32Const(lci->getLimitedValue() + + rci->getLimitedValue()); + // In an emergency, return the left argument. It'll be closest to + // the desired value. + return l; } -static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, - uint32_t &GEPOperandIndex, - llvm::Type *pElementType) { +llvm::Value * +DxilAnnotateWithVirtualRegister::GetStructOffset(llvm::GetElementPtrInst *pGEP, + uint32_t &GEPOperandIndex, + llvm::Type *pElementType) { if (IsInstrumentableFundamentalType(pElementType)) { - return 0; + return m_DM->GetOP()->GetU32Const(0); } else if (auto *pArray = llvm::dyn_cast(pElementType)) { // 1D-array example: // @@ -248,18 +310,13 @@ static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, // -The zeroth element in the struct (which is the array) // -The zeroth element in that array - auto *pArrayIndex = - llvm::dyn_cast(pGEP->getOperand(GEPOperandIndex++)); - - if (pArrayIndex == nullptr) { - return 0; - } + auto *pArrayIndex = pGEP->getOperand(GEPOperandIndex++); - uint32_t ArrayIndex = pArrayIndex->getLimitedValue(); auto pArrayElementType = pArray->getArrayElementType(); - uint32_t MemberIndex = ArrayIndex * CountStructMembers(pArrayElementType); - return MemberIndex + - GetStructOffset(pGEP, GEPOperandIndex, pArrayElementType); + auto *MemberIndex = MultiplyConstIntValue( + pArrayIndex, CountStructMembers(pArrayElementType)); + return AddConstIntValues( + MemberIndex, GetStructOffset(pGEP, GEPOperandIndex, pArrayElementType)); } else if (auto *pStruct = llvm::dyn_cast(pElementType)) { DXASSERT(GEPOperandIndex < pGEP->getNumOperands(), "Unexpectedly read too many GetElementPtrInst operands"); @@ -268,7 +325,7 @@ static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, llvm::dyn_cast(pGEP->getOperand(GEPOperandIndex++)); if (pMemberIndex == nullptr) { - return 0; + return m_DM->GetOP()->GetU32Const(0); } uint32_t MemberIndex = pMemberIndex->getLimitedValue(); @@ -278,16 +335,17 @@ static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, MemberOffset += CountStructMembers(pStruct->getElementType(i)); } - return MemberOffset + GetStructOffset(pGEP, GEPOperandIndex, - pStruct->getElementType(MemberIndex)); + return AddConstIntValues( + m_DM->GetOP()->GetU32Const(MemberOffset), + GetStructOffset(pGEP, GEPOperandIndex, + pStruct->getElementType(MemberIndex))); } else { - return 0; + return m_DM->GetOP()->GetU32Const(0); } } bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( llvm::Value *V, llvm::AllocaInst **pAI, llvm::Value **pIdx) { - llvm::IRBuilder<> B(m_DM->GetCtx()); *pAI = nullptr; *pIdx = nullptr; @@ -366,7 +424,8 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( auto offset = GetStructOffset(pGEP, GEPOperandIndex, pStructType); - llvm::Value *IndexValue = B.getInt32(offset + precedingMemberCount); + llvm::Value *IndexValue = AddConstIntValues( + offset, m_DM->GetOP()->GetU32Const(precedingMemberCount)); if (IndexValue != nullptr) { *pAI = Alloca; @@ -383,7 +442,7 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( } *pAI = pAlloca; - *pIdx = B.getInt32(0); + *pIdx = m_DM->GetOP()->GetU32Const(0); return true; } @@ -463,12 +522,13 @@ void DxilAnnotateWithVirtualRegister::AssignNewDxilRegister( void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister( llvm::AllocaInst *pAlloca, std::uint32_t C) { - PixAllocaReg::AddMD(m_DM->GetCtx(), pAlloca, m_uVReg, C); - m_uVReg += C; + if (!PixAllocaReg::FromInst(pAlloca, nullptr, nullptr)) { + PixAllocaReg::AddMD(m_DM->GetCtx(), pAlloca, m_uVReg, C); + m_uVReg += C; + } } -void DxilAnnotateWithVirtualRegister::SplitVectorStores(hlsl::OP *HlslOP, - llvm::Instruction *pI) { +void DxilAnnotateWithVirtualRegister::SplitVectorStores(llvm::Instruction *pI) { auto *pSt = llvm::dyn_cast(pI); if (pSt == nullptr) { return; diff --git a/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp b/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp index bf25d9f85f..9ddbe876b5 100644 --- a/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp +++ b/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp @@ -36,7 +36,7 @@ using namespace PIXPassHelpers; using namespace llvm; -//#define VALUE_TO_DECLARE_LOGGING +// #define VALUE_TO_DECLARE_LOGGING #ifdef VALUE_TO_DECLARE_LOGGING #ifndef PIX_DEBUG_DUMP_HELPER @@ -859,8 +859,8 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, VALUE_TO_DECLARE_LOG("... variable was null too"); } - llvm::Value *V = DbgValue->getValue(); - if (V == nullptr) { + llvm::Value *ValueFromDbgInst = DbgValue->getValue(); + if (ValueFromDbgInst == nullptr) { // The metadata contained a null Value, so we ignore it. This // seems to be a dxcompiler bug. VALUE_TO_DECLARE_LOG("...Null value!"); @@ -873,20 +873,20 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, return; } - if (llvm::isa(V->getType())) { + if (llvm::isa(ValueFromDbgInst->getType())) { // Safeguard: If the type is not a pointer type, then this is // dbg.value directly pointing to a memory location instead of // a value. if (!IsDITypePointer(Ty, EmptyMap)) { // We only know how to handle AllocaInsts for now - if (!isa(V)) { + if (!isa(ValueFromDbgInst)) { VALUE_TO_DECLARE_LOG( "... variable had pointer type, but is not an alloca."); return; } IRBuilder<> B(DbgValue->getNextNode()); - V = B.CreateLoad(V); + ValueFromDbgInst = B.CreateLoad(ValueFromDbgInst); } } @@ -931,7 +931,7 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, } const OffsetInBits InitialOffset = PackedOffsetFromVar; - auto *insertPt = llvm::dyn_cast(V); + auto *insertPt = llvm::dyn_cast(ValueFromDbgInst); if (insertPt != nullptr && !llvm::isa(insertPt)) { insertPt = insertPt->getNextNode(); // Drivers may crash if phi nodes aren't always at the top of a block, @@ -950,7 +950,8 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, // Offset}. InitialOffset is the offset from DbgValue's expression // (i.e., the offset from the Variable's start), and Offset is the // Scalar Value's packed offset from DbgValue's value. - for (const ValueAndOffset &VO : SplitValue(V, InitialOffset, B)) { + for (const ValueAndOffset &VO : + SplitValue(ValueFromDbgInst, InitialOffset, B)) { OffsetInBits AlignedOffset; if (!Offsets.GetAlignedOffsetFromPackedOffset(VO.m_PackedOffset, diff --git a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp index a7d7e72cb4..4dd43b07cc 100644 --- a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp +++ b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp @@ -1356,7 +1356,19 @@ DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock( IndexingToken = "s"; // static indexing, no debug output required } else { IndexingToken = "d"; // dynamic indexing - RegisterOrStaticIndex = std::to_string(IandT->AllocaBase); + int MaxArraySize = 1; + if (auto *Store = dyn_cast(&Inst)) { + if (auto *GEP = + dyn_cast(Store->getPointerOperand())) { + if (auto *Alloca = + dyn_cast(GEP->getPointerOperand())) { + MaxArraySize = + Alloca->getAllocatedType()->getArrayNumElements(); + } + } + } + RegisterOrStaticIndex = std::to_string(IandT->AllocaBase) + "-" + + std::to_string(MaxArraySize); DebugOutputForThisInstruction.ValueToWriteToDebugMemory = IandT->AllocaWriteIndex; } @@ -1374,7 +1386,8 @@ DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock( *OSOverride << "," << *RegisterOrStaticIndex; } if (IandT->ConstantAllocaStoreValue) { - *OSOverride << "," << std::to_string(*IandT->ConstantAllocaStoreValue); + uint64_t value = IandT->ConstantAllocaStoreValue.value(); + *OSOverride << "," << std::to_string(value); } *OSOverride << ";"; if (DebugOutputForThisInstruction.ValueToWriteToDebugMemory) diff --git a/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp b/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp index f68e2082bc..a60f6a77a7 100644 --- a/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp +++ b/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp @@ -124,8 +124,10 @@ static bool ParsePixAllocaReg(llvm::MDNode *MD, std::uint32_t *RegNum, return false; } - *RegNum = mdRegNum->getLimitedValue(); - *Count = mdCount->getLimitedValue(); + if (RegNum != nullptr) + *RegNum = mdRegNum->getLimitedValue(); + if (Count != nullptr) + *Count = mdCount->getLimitedValue(); return true; } @@ -144,8 +146,10 @@ void pix_dxil::PixAllocaReg::AddMD(llvm::LLVMContext &Ctx, bool pix_dxil::PixAllocaReg::FromInst(llvm::AllocaInst const *pAlloca, std::uint32_t *pRegBase, std::uint32_t *pRegSize) { - *pRegBase = 0; - *pRegSize = 0; + if (pRegBase != nullptr) + *pRegBase = 0; + if (pRegSize != nullptr) + *pRegSize = 0; auto *mdNodes = pAlloca->getMetadata(MDName); if (mdNodes == nullptr) { diff --git a/tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl b/tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl new file mode 100644 index 0000000000..cba891424a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -Tcs_6_0 /Od %s | %opt -S -dxil-annotate-with-virtual-regs | %FileCheck %s + +// Check that there is an alloca backing the local array +// CHECK: [[ARRAYNAME:%.*]] = alloca [4 x float] + +// Grab the GEP for the above array's element that we're expecting to store to: +// CHECK: [[ARRAYELEMENTPTR:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[ARRAYNAME]] + +// Check that the store to the alloca is annotated with pix-alloca-reg-read metadata +// (meaning that the pass accurately noted that the 8.0 is stored to a dynamic array index) +// CHECK: store float 8.000000e+00, float* [[ARRAYELEMENTPTR]] +// CHECK-SAME: !pix-alloca-reg-write + + +RWByteAddressBuffer RawUAV: register(u1); + +[numthreads(1, 1, 1)] +void main() +{ + float local_array[4]; + local_array[RawUAV.Load(0)] = 8; + local_array[RawUAV.Load(1)] = 128; + + RawUAV.Store(64+0,local_array[0]); + RawUAV.Store(64+4,local_array[1]); +} + diff --git a/tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl b/tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl new file mode 100644 index 0000000000..9ab5bce95a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -Tcs_6_0 /Od %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,UAVSize=128,upstreamSVPositionRow=2 -hlsl-dxilemit | %FileCheck %s + +// Check that there is a block precis that correctly returns that the array is a 4-value float array +// CHECK: Block#0 +// CHECK-SAME: d,0-4 + +RWByteAddressBuffer RawUAV: register(u1); + +[numthreads(1, 1, 1)] +void main() +{ + float local_array[4]; + local_array[RawUAV.Load(0)] = 8; + local_array[RawUAV.Load(1)] = 128; + + RawUAV.Store(64+0,local_array[0]); + RawUAV.Store(64+4,local_array[1]); +} + diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index af7801c7bf..c032e9e872 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -1220,7 +1220,6 @@ PixTest::TestableResults PixTest::TestStructAnnotationCase( #if 0 // handy for debugging auto disTextW = Disassemble(pAnnotatedContainer); - WEX::Logging::Log::Comment(disTextW.c_str()); #endif ModuleAndHangersOn moduleEtc(pAnnotatedContainer); From 978a6d3f13eef89a3cf513da55e7d1b16fb8aef4 Mon Sep 17 00:00:00 2001 From: Steve Urquhart <53908460+SteveUrquhart@users.noreply.github.com> Date: Wed, 18 Jun 2025 06:36:46 -0400 Subject: [PATCH 65/93] [SPIRV] Emit DebugScope in wrapper (#7341) (#7529) Legalization and optimization will produce inaccurate NS100 debug info if there is no DebugScope emitted in the wrapper function. This PR corrects this oversight and renames the wrapper to "__dxc_setup". This may cause a stack frame named __dxc_setup to appear in an HLSL debugger, however, users should be familiar with this type of thing. A C debugger might show crt0, or a debugger can filter this frame out of the user's view. This PR addresses [7341](https://github.com/microsoft/DirectXShaderCompiler/issues//7341) --- tools/clang/lib/SPIRV/EmitVisitor.cpp | 6 ------ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 18 ++++++++++++++---- tools/clang/lib/SPIRV/SpirvEmitter.h | 1 + .../rich.debug.function.param.hlsl | 2 +- .../CodeGenSPIRV/shader.debug.function.hlsl | 2 +- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index 8de0262ae6..eb94ce0797 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -1635,12 +1635,6 @@ bool EmitVisitor::visit(SpirvDebugLexicalBlock *inst) { } bool EmitVisitor::visit(SpirvDebugScope *inst) { - // Technically entry function wrappers do not exist in HLSL. They - // are just created by DXC. We do not want to emit DebugScope for - // it. - if (inEntryFunctionWrapper) - return true; - initInstruction(inst); curInst.push_back(inst->getResultTypeId()); curInst.push_back(getOrAssignResultId(inst)); diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index cc7016b594..850a8dd736 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -14050,8 +14050,8 @@ bool SpirvEmitter::processTessellationShaderAttributes( } bool SpirvEmitter::emitEntryFunctionWrapperForRayTracing( - const FunctionDecl *decl, SpirvDebugFunction *debugFunction, - SpirvFunction *entryFuncInstr) { + const FunctionDecl *decl, RichDebugInfo **info, + SpirvDebugFunction *debugFunction, SpirvFunction *entryFuncInstr) { // The entry basic block. auto *entryLabel = spvBuilder.createBasicBlock(); spvBuilder.setInsertPoint(entryLabel); @@ -14160,6 +14160,10 @@ bool SpirvEmitter::emitEntryFunctionWrapperForRayTracing( spvBuilder.createReturn(decl->getBody()->getLocEnd()); spvBuilder.endFunction(); + if (spirvOptions.debugInfoRich && decl->hasBody()) { + spvContext.popDebugLexicalScope(*info); + } + return true; } @@ -14374,7 +14378,9 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( astContext.VoidTy, decl->getLocStart(), decl->getName()); if (spirvOptions.debugInfoRich && decl->hasBody()) { - *debugFunction = emitDebugFunction(decl, entryFunction, info, "wrapper"); + *debugFunction = + emitDebugFunction(decl, entryFunction, info, "__dxc_setup"); + spvContext.pushDebugLexicalScope(*info, *debugFunction); } // Specify that entryFunction is an entry function wrapper. @@ -14391,7 +14397,7 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( entryInfo->entryFunction = entryFunction; if (spvContext.isRay()) { - return emitEntryFunctionWrapperForRayTracing(decl, *debugFunction, + return emitEntryFunctionWrapperForRayTracing(decl, info, *debugFunction, entryFuncInstr) ? entryFunction : nullptr; @@ -14632,6 +14638,10 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( if (spvContext.isHS()) doDecl(patchConstFunc); + if (spirvOptions.debugInfoRich && decl->hasBody()) { + spvContext.popDebugLexicalScope(*info); + } + return entryFunction; } diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 14401c6418..ada8db3068 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -958,6 +958,7 @@ class SpirvEmitter : public ASTConsumer { /// The wrapper function is also responsible for initializing global static /// variables for some cases. bool emitEntryFunctionWrapperForRayTracing(const FunctionDecl *entryFunction, + RichDebugInfo **info, SpirvDebugFunction *debugFunction, SpirvFunction *entryFuncId); diff --git a/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl b/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl index 9576837884..a3701a4ed4 100644 --- a/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl +++ b/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl @@ -9,7 +9,7 @@ // CHECK: [[x:%[0-9]+]] = OpString "x" // CHECK: [[srcMainName:%[0-9]+]] = OpString "main" // CHECK: [[color:%[0-9]+]] = OpString "color" -// CHECK: [[mainName:%[0-9]+]] = OpString "wrapper" +// CHECK: [[mainName:%[0-9]+]] = OpString "__dxc_setup" // CHECK: [[int:%[0-9]+]] = OpExtInst %void [[set]] DebugTypeBasic {{%[0-9]+}} %uint_32 Signed // CHECK: [[float:%[0-9]+]] = OpExtInst %void [[set]] DebugTypeBasic {{%[0-9]+}} %uint_32 Float diff --git a/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl b/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl index b263fd88ad..23bb479a46 100644 --- a/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl +++ b/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl @@ -6,7 +6,7 @@ // CHECK: [[fooName:%[0-9]+]] = OpString "foo" // CHECK: [[emptyStr:%[0-9]+]] = OpString "" // CHECK: [[srcMainName:%[0-9]+]] = OpString "main" -// CHECK: [[mainName:%[0-9]+]] = OpString "wrapper" +// CHECK: [[mainName:%[0-9]+]] = OpString "__dxc_setup" // CHECK: [[clOpts:%[0-9]+]] = OpString " -E main -T ps_6_0 -spirv -fcgl -fspv-debug=vulkan // CHECK: [[int:%[0-9]+]] = OpExtInst %void [[set]] DebugTypeBasic {{%[0-9]+}} %uint_32 %uint_4 %uint_0 From d43d909801c185e5bad11a683a970cd23957c3c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 18 Jun 2025 13:23:05 -0700 Subject: [PATCH 66/93] Bump requests from 2.32.0 to 2.32.4 in /utils/git (#7524) Bumps [requests](https://github.com/psf/requests) from 2.32.0 to 2.32.4.
Release notes

Sourced from requests's releases.

v2.32.4

2.32.4 (2025-06-10)

Security

  • CVE-2024-47081 Fixed an issue where a maliciously crafted URL and trusted environment will retrieve credentials for the wrong hostname/machine from a netrc file. (#6965)

Improvements

  • Numerous documentation improvements

Deprecations

  • Added support for pypy 3.11 for Linux and macOS. (#6926)
  • Dropped support for pypy 3.9 following its end of support. (#6926)

v2.32.3

2.32.3 (2024-05-29)

Bugfixes

  • Fixed bug breaking the ability to specify custom SSLContexts in sub-classes of HTTPAdapter. (#6716)
  • Fixed issue where Requests started failing to run on Python versions compiled without the ssl module. (#6724)

v2.32.2

2.32.2 (2024-05-21)

Deprecations

  • To provide a more stable migration for custom HTTPAdapters impacted by the CVE changes in 2.32.0, we've renamed _get_connection to a new public API, get_connection_with_tls_context. Existing custom HTTPAdapters will need to migrate their code to use this new API. get_connection is considered deprecated in all versions of Requests>=2.32.0.

    A minimal (2-line) example has been provided in the linked PR to ease migration, but we strongly urge users to evaluate if their custom adapter is subject to the same issue described in CVE-2024-35195. (#6710)

v2.32.1

2.32.1 (2024-05-20)

Bugfixes

  • Add missing test certs to the sdist distributed on PyPI.
Changelog

Sourced from requests's changelog.

2.32.4 (2025-06-10)

Security

  • CVE-2024-47081 Fixed an issue where a maliciously crafted URL and trusted environment will retrieve credentials for the wrong hostname/machine from a netrc file.

Improvements

  • Numerous documentation improvements

Deprecations

  • Added support for pypy 3.11 for Linux and macOS.
  • Dropped support for pypy 3.9 following its end of support.

2.32.3 (2024-05-29)

Bugfixes

  • Fixed bug breaking the ability to specify custom SSLContexts in sub-classes of HTTPAdapter. (#6716)
  • Fixed issue where Requests started failing to run on Python versions compiled without the ssl module. (#6724)

2.32.2 (2024-05-21)

Deprecations

  • To provide a more stable migration for custom HTTPAdapters impacted by the CVE changes in 2.32.0, we've renamed _get_connection to a new public API, get_connection_with_tls_context. Existing custom HTTPAdapters will need to migrate their code to use this new API. get_connection is considered deprecated in all versions of Requests>=2.32.0.

    A minimal (2-line) example has been provided in the linked PR to ease migration, but we strongly urge users to evaluate if their custom adapter is subject to the same issue described in CVE-2024-35195. (#6710)

2.32.1 (2024-05-20)

Bugfixes

  • Add missing test certs to the sdist distributed on PyPI.
Commits
  • 021dc72 Polish up release tooling for last manual release
  • 821770e Bump version and add release notes for v2.32.4
  • 59f8aa2 Add netrc file search information to authentication documentation (#6876)
  • 5b4b64c Add more tests to prevent regression of CVE 2024 47081
  • 7bc4587 Add new test to check netrc auth leak (#6962)
  • 96ba401 Only use hostname to do netrc lookup instead of netloc
  • 7341690 Merge pull request #6951 from tswast/patch-1
  • 6716d7c remove links
  • a7e1c74 Update docs/conf.py
  • c799b81 docs: fix dead links to kenreitz.org
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=requests&package-manager=pip&previous-version=2.32.0&new-version=2.32.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/microsoft/DirectXShaderCompiler/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- utils/git/requirements_formatting.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt index 6f3e07dcf2..14123e4ac0 100644 --- a/utils/git/requirements_formatting.txt +++ b/utils/git/requirements_formatting.txt @@ -42,7 +42,7 @@ pyjwt[crypto]==2.8.0 # via pygithub pynacl==1.5.0 # via pygithub -requests==2.32.0 +requests==2.32.4 # via pygithub toml==0.10.2 # via darker From 5aec1ec4e4d0e31a263f24458c598a3b151c0d4f Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Wed, 18 Jun 2025 17:19:48 -0700 Subject: [PATCH 67/93] [NFC] Address compiler warnings: C4146 - Cases where we can swap to using ~ operator (#7551) Addresses #7550. --- include/llvm/ADT/StringExtras.h | 4 ++-- include/llvm/CodeGen/SelectionDAGNodes.h | 6 +++--- .../llvm/DebugInfo/DWARF/DWARFDebugRangeList.h | 4 ++-- include/llvm/Support/LEB128.h | 2 +- lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 4 ++-- lib/DXIL/DxilUtil.cpp | 6 +++--- lib/Support/APFloat.cpp | 4 ++-- lib/Support/DataExtractor.cpp | 2 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 4 ++-- .../InstCombine/InstCombineSimplifyDemanded.cpp | 4 ++-- tools/clang/lib/Lex/LiteralSupport.cpp | 15 ++++++++++----- tools/clang/lib/Sema/SemaDecl.cpp | 4 ++-- 13 files changed, 33 insertions(+), 28 deletions(-) diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 270989b349..684ee0f9dc 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -36,12 +36,12 @@ static inline StringRef toStringRef(bool B) { /// Interpret the given character \p C as a hexadecimal digit and return its /// value. /// -/// If \p C is not a valid hex digit, -1U is returned. +/// If \p C is not a valid hex digit, ~0U is returned. static inline unsigned hexDigitValue(char C) { if (C >= '0' && C <= '9') return C-'0'; if (C >= 'a' && C <= 'f') return C-'a'+10U; if (C >= 'A' && C <= 'F') return C-'A'+10U; - return -1U; + return ~0U; } /// utohex_buffer - Emit the specified number into the buffer specified by diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index d4a6371216..ba63d80e94 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -191,12 +191,12 @@ class SDValue { template<> struct DenseMapInfo { static inline SDValue getEmptyKey() { SDValue V; - V.ResNo = -1U; + V.ResNo = ~0U; return V; } static inline SDValue getTombstoneKey() { SDValue V; - V.ResNo = -2U; + V.ResNo = ~1U; return V; } static unsigned getHashValue(const SDValue &Val) { @@ -879,7 +879,7 @@ inline SDValue::SDValue(SDNode *node, unsigned resno) : Node(node), ResNo(resno) { assert((!Node || ResNo < Node->getNumValues()) && "Invalid result number for the given node!"); - assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."); + assert(ResNo < ~1U && "Cannot use result numbers reserved for DenseMaps."); } inline unsigned SDValue::getOpcode() const { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index c930bd603d..8eea252b60 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -49,9 +49,9 @@ class DWARFDebugRangeList { bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { assert(AddressSize == 4 || AddressSize == 8); if (AddressSize == 4) - return StartAddress == -1U; + return StartAddress == ~0U; else - return StartAddress == -1ULL; + return StartAddress == ~0ULL; } }; diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h index 1324cb82ca..f8a2843412 100644 --- a/include/llvm/Support/LEB128.h +++ b/include/llvm/Support/LEB128.h @@ -103,7 +103,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr) { } while (Byte >= 128); // Sign extend negative numbers. if (Byte & 0x40) - Value |= (-1ULL) << Shift; + Value |= (~0ULL) << Shift; if (n) *n = (unsigned)(p - orig_p); return Value; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 185c291d66..a87128ca26 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2401,7 +2401,7 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; if (V != 1) - return -(V >> 1); + return ~(V >> 1) + 1; // There is no such thing as -0 with integers. "-0" really means MININT. return 1ULL << 63; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 0718c81451..f02344ae64 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1360,7 +1360,7 @@ static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); else - Vals.push_back((-V << 1) | 1); + Vals.push_back(((~V + 1) << 1) | 1); } static void WriteConstants(unsigned FirstVal, unsigned LastVal, @@ -1437,7 +1437,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, continue; } const Constant *C = cast(V); - unsigned Code = -1U; + unsigned Code = ~0U; unsigned AbbrevToUse = 0; if (C->isNullValue()) { Code = bitc::CST_CODE_NULL; diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 966c2e189c..cc0b509772 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -181,11 +181,11 @@ void PrintUnescapedString(StringRef Name, raw_ostream &Out) { if (C == '\\') { C = Name[++i]; unsigned value = hexDigitValue(C); - if (value != -1U) { + if (value != ~0U) { C = (unsigned char)value; unsigned value2 = hexDigitValue(Name[i + 1]); - assert(value2 != -1U && "otherwise, not a two digit hex escape"); - if (value2 != -1U) { + assert(value2 != ~0U && "otherwise, not a two digit hex escape"); + if (value2 != ~0U) { C = (C << 4) + (unsigned char)value2; ++i; } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 3c76c72271..f8f1fb03cd 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -331,7 +331,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* If we ran off the end it is exactly zero or one-half, otherwise a little more. */ - if (hexDigit == -1U) + if (hexDigit == ~0U) return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; else return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; @@ -2368,7 +2368,7 @@ APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) } hex_value = hexDigitValue(*p); - if (hex_value == -1U) + if (hex_value == ~0U) break; p++; diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 5d6d60a87f..625fb3595a 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -168,7 +168,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { // Sign bit of byte is 2nd high order bit (0x40) if (shift < 64 && (byte & 0x40)) - result |= -(1ULL << shift); + result |= (~(1ULL << shift) + 1); *offset_ptr = offset; return result; diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index d044764025..0cf9f7797a 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -146,7 +146,7 @@ namespace { private: Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, - unsigned RetValNum = -1U); + unsigned RetValNum = ~0U); Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); void SurveyFunction(const Function &F); @@ -442,7 +442,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U, // that U is really a use of an insertvalue instruction that uses the // original Use. const Function *F = RI->getParent()->getParent(); - if (RetValNum != -1U) { + if (RetValNum != ~0U) { RetOrArg Use = CreateRet(F, RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 2d28b14213..66e01198bd 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -998,7 +998,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned i = 0; i < VWidth; i++) { if (DemandedElts[i]) { unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal != -1u) { + if (MaskVal != ~0u) { assert(MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!"); if (MaskVal < LHSVWidth) @@ -1022,7 +1022,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, bool NewUndefElts = false; for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal == -1u) { + if (MaskVal == ~0u) { UndefElts.setBit(i); } else if (!DemandedElts[i]) { NewUndefElts = true; diff --git a/tools/clang/lib/Lex/LiteralSupport.cpp b/tools/clang/lib/Lex/LiteralSupport.cpp index 606c821bb2..62f241812b 100644 --- a/tools/clang/lib/Lex/LiteralSupport.cpp +++ b/tools/clang/lib/Lex/LiteralSupport.cpp @@ -141,8 +141,12 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, // Hex escapes are a maximal series of hex digits. bool Overflow = false; for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { - int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); - if (CharVal == -1) break; + // originally returned -1 for invalid hex digits, now returns ~0u + // signature: static inline unsigned int llvm::hexDigitValue(char C) + unsigned int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); + if (CharVal == ~0U) + break; + // About to shift out a digit? if (ResultChar & 0xF0000000) Overflow = true; @@ -245,7 +249,7 @@ void clang::expandUCNs(SmallVectorImpl &Buf, StringRef Input) { uint32_t CodePoint = 0; for (++I; NumHexDigits != 0; ++I, --NumHexDigits) { unsigned Value = llvm::hexDigitValue(*I); - assert(Value != -1U); + assert(Value != ~0U); CodePoint <<= 4; CodePoint += Value; @@ -278,8 +282,9 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); unsigned short UcnLenSave = UcnLen; for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) { - int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); - if (CharVal == -1) break; + unsigned int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); + if (CharVal == ~0U) + break; UcnVal <<= 4; UcnVal |= CharVal; } diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp index e09bf4623c..a772054960 100644 --- a/tools/clang/lib/Sema/SemaDecl.cpp +++ b/tools/clang/lib/Sema/SemaDecl.cpp @@ -5331,7 +5331,7 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) { Qualifiers::ObjCLifetime lifetime = type.getObjCLifetime(); if (lifetime == Qualifiers::OCL_Autoreleasing) { // Various kinds of declaration aren't allowed to be __autoreleasing. - unsigned kind = -1U; + unsigned kind = ~0U; if (VarDecl *var = dyn_cast(decl)) { if (var->hasAttr()) kind = 0; // __block @@ -5343,7 +5343,7 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) { kind = 2; // field } - if (kind != -1U) { + if (kind != ~0U) { Diag(decl->getLocation(), diag::err_arc_autoreleasing_var) << kind; } From b78ac50593248bbbf04ba51cb48c3e1f94b416c6 Mon Sep 17 00:00:00 2001 From: Tim Corringham Date: Fri, 20 Jun 2025 12:05:05 +0100 Subject: [PATCH 68/93] Add missing diagnostic argument (#7426) Two instances of the err_integer_literal_too_large diagnostic in HLSL specific code within Sema::ActOnNumericConstant() had a missing argument. When these diagnostics were raised this caused an assert in an assert enabled DXC, and random corruption of the diagnostic text in a non-assert enabled DXC. The trivial fix is to supply the required argument. Fixes #7425 Co-authored-by: Tim Corringham --- tools/clang/lib/Sema/SemaExpr.cpp | 6 ++++-- .../errors/integer_literal_too_large.hlsl | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index 389fcfc3ff..b8272ba4a0 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -3504,12 +3504,14 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { Ty = Context.LitIntTy; if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into 64-bit literal int, report error. - Diag(Tok.getLocation(), diag::err_integer_literal_too_large); + Diag(Tok.getLocation(), diag::err_integer_literal_too_large) + << /* Unsigned */ 1; } } else { if (Literal.GetIntegerValue(ResultVal)) { - Diag(Tok.getLocation(), diag::err_integer_literal_too_large); + Diag(Tok.getLocation(), diag::err_integer_literal_too_large) + << /* Unsigned */ 1; } if (Literal.isLongLong) { if (Literal.isUnsigned) diff --git a/tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl new file mode 100644 index 0000000000..98db6a6f56 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T lib_6_6 %s | FileCheck %s + +// A diagnostic is generated for an integer literal that is too large to be +// represented by any integer type - an argument indicates whether the text +// contains "signed". That argument was missing in HLSL specific code within +// Sema::ActOnNumericConstant() which resulted in an assert being raised if +// the diagnostic was generated in an assert enabled DXC and a random string +// being inserted in a non-assert enabled DXC. + +// CHECK: integer literal is too large to be represented in any integer type +int a = 98765432109876543210; + +// CHECK: integer literal is too large to be represented in any integer type +uint b = 98765432109876543210U; From b4baabb7da9e483b624e12a86ae29df7d162d4f2 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Fri, 20 Jun 2025 12:21:27 -0700 Subject: [PATCH 69/93] [NFC] Address compiler warnings: Fix C4146 compiler warnings in APInt.cpp (#7556) Addresses #7555 All but one are simple updates to use std::numeric_limits. One case converts to use ~ operator and includes a comment with additional context. --- lib/Support/APInt.cpp | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 23f89bb66f..d01238a552 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -70,7 +70,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { if (r < radix) return r; - return -1U; + return std::numeric_limits::max(); } @@ -79,7 +79,7 @@ void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) { pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = -1ULL; + pVal[i] = std::numeric_limits::max(); } void APInt::initSlowCase(const APInt& that) { @@ -735,7 +735,7 @@ unsigned APInt::countLeadingOnes() const { unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == -1ULL) + if (pVal[i] == std::numeric_limits::max()) Count += APINT_BITS_PER_WORD; else { Count += llvm::countLeadingOnes(pVal[i]); @@ -761,7 +761,8 @@ unsigned APInt::countTrailingZeros() const { unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == -1ULL; ++i) + for (; i < getNumWords() && pVal[i] == std::numeric_limits::max(); + ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) Count += llvm::countTrailingOnes(pVal[i]); @@ -1070,7 +1071,7 @@ APInt APInt::ashr(unsigned shiftAmt) const { // issues in the algorithm below. if (shiftAmt == BitWidth) { if (isNegative()) - return APInt(BitWidth, -1ULL, true); + return APInt(BitWidth, std::numeric_limits::max(), true); else return APInt(BitWidth, 0); } @@ -1123,7 +1124,8 @@ APInt APInt::ashr(unsigned shiftAmt) const { } // Remaining words are 0 or -1, just assign them. - uint64_t fillValue = (isNegative() ? -1ULL : 0); + uint64_t fillValue = + (isNegative() ? std::numeric_limits::max() : 0); for (unsigned i = breakWord+1; i < getNumWords(); ++i) val[i] = fillValue; APInt Result(val, BitWidth); @@ -2192,7 +2194,18 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, N = I; } else { Str.push_back('-'); - N = -(uint64_t)I; + // In this else block, all values of I must be less than 0. + // + // Because values are stored in 2's complement and I is a signed + // integer, the expression -I is equivalent to (~I + 1) for all values + // of I, except INT64_MIN, where -I is undefined behavior in C++ due to + // overflow. + // + // However, (~I + 1) is still well-defined even when I == INT64_MIN, and + // it evaluates to the same bit pattern as INT64_MIN. Because N is + // unsigned, assigning N = ~I + 1 preserves the exact bit pattern + // and correctly represents the 2's complement value of -I. + N = (~I + 1); } } @@ -2408,7 +2421,7 @@ APInt::tcLSB(const integerPart *parts, unsigned int n) } } - return -1U; + return std::numeric_limits::max(); } /* Returns the bit number of the most significant set bit of a number. @@ -2428,7 +2441,7 @@ APInt::tcMSB(const integerPart *parts, unsigned int n) } } while (n); - return -1U; + return std::numeric_limits::max(); } /* Copy the bit vector of width srcBITS from SRC, starting at bit From d1d0a31a7a6a039a35d3b8bc9586b23c57bea2a5 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Fri, 20 Jun 2025 17:52:32 -0700 Subject: [PATCH 70/93] [NFC] Address compiler warnings: C4146 - Trivial std::numeric_limits cases (#7559) Addresses #7558. There is also one trivial change to use the ~ operator included in LEB128.h. My notes on the files were wrong and suggested that it should use std::numeric_limits but looking at it again using ~0ULL made more sense. --- .../llvm/DebugInfo/DWARF/DWARFDebugAranges.h | 11 +++--- include/llvm/Support/BlockFrequency.h | 5 ++- lib/Analysis/LoopAccessAnalysis.cpp | 2 +- .../InstCombine/InstructionCombining.cpp | 3 +- lib/Transforms/Scalar/LoadCombine.cpp | 4 +-- tools/clang/include/clang/AST/Expr.h | 4 ++- tools/clang/lib/AST/Expr.cpp | 34 ++++++++----------- tools/clang/lib/CodeGen/CGExprScalar.cpp | 3 +- tools/clang/lib/Lex/Lexer.cpp | 2 +- tools/clang/lib/Sema/SemaExpr.cpp | 5 +-- tools/clang/lib/Sema/SemaType.cpp | 4 +-- utils/TableGen/FixedLenDecoderEmitter.cpp | 9 ++--- 12 files changed, 46 insertions(+), 40 deletions(-) diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index 791f010a88..c34cfab284 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -32,12 +32,13 @@ class DWARFDebugAranges { void construct(); struct Range { - explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL, - uint32_t CUOffset = -1U) - : LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {} + explicit Range(uint64_t LowPC = std::numeric_limits::max(), + uint64_t HighPC = std::numeric_limits::max(), + uint32_t CUOffset = std::numeric_limits::max()) + : LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {} void setHighPC(uint64_t HighPC) { - if (HighPC == -1ULL || HighPC <= LowPC) + if (HighPC == std::numeric_limits::max() || HighPC <= LowPC) Length = 0; else Length = HighPC - LowPC; @@ -45,7 +46,7 @@ class DWARFDebugAranges { uint64_t HighPC() const { if (Length) return LowPC + Length; - return -1ULL; + return std::numeric_limits::max(); } bool containsAddress(uint64_t Address) const { diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h index 4304a253b2..d7d6d741f4 100644 --- a/include/llvm/Support/BlockFrequency.h +++ b/include/llvm/Support/BlockFrequency.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_BLOCKFREQUENCY_H #include "llvm/Support/DataTypes.h" +#include namespace llvm { @@ -29,7 +30,9 @@ class BlockFrequency { BlockFrequency(uint64_t Freq = 0) : Frequency(Freq) { } /// \brief Returns the maximum possible frequency, the saturation value. - static uint64_t getMaxFrequency() { return -1ULL; } + static uint64_t getMaxFrequency() { + return std::numeric_limits::max(); + } /// \brief Returns the frequency as a fixpoint number scaled by the entry /// frequency. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index d6316dc75b..7e5e3e5ebd 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1179,7 +1179,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides) { - MaxSafeDepDistBytes = -1U; + MaxSafeDepDistBytes = std::numeric_limits::max(); while (!CheckDeps.empty()) { MemAccessInfo CurAccess = *CheckDeps.begin(); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 6bc322fa92..c93232b67f 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1937,7 +1937,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { } else if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { ConstantInt *CI = cast(II->getArgOperand(1)); - uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + uint64_t DontKnow = + CI->isZero() ? std::numeric_limits::max() : 0; ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); } } diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 6d358744ef..8f22bb337d 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -131,10 +131,10 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { LoadInst *BaseLoad = nullptr; SmallVector AggregateLoads; bool Combined = false; - uint64_t PrevOffset = -1ull; + uint64_t PrevOffset = std::numeric_limits::max(); uint64_t PrevSize = 0; for (auto &L : Loads) { - if (PrevOffset == -1ull) { + if (PrevOffset == std::numeric_limits::max()) { BaseLoad = L.Load; PrevOffset = L.POP.Offset; PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( diff --git a/tools/clang/include/clang/AST/Expr.h b/tools/clang/include/clang/AST/Expr.h index 26eff309f7..55fd184a79 100644 --- a/tools/clang/include/clang/AST/Expr.h +++ b/tools/clang/include/clang/AST/Expr.h @@ -4510,7 +4510,9 @@ class GenericSelectionExpr : public Expr { Expr *getControllingExpr() { return cast(SubExprs[CONTROLLING]); } /// Whether this generic selection is result-dependent. - bool isResultDependent() const { return ResultIndex == -1U; } + bool isResultDependent() const { + return ResultIndex == std::numeric_limits::max(); + } /// The zero-based index of the result expression's generic association in /// the generic selection's association list. Defined only if the diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp index 8ed14508af..2d039a7e98 100644 --- a/tools/clang/lib/AST/Expr.cpp +++ b/tools/clang/lib/AST/Expr.cpp @@ -3883,25 +3883,21 @@ GenericSelectionExpr::GenericSelectionExpr(const ASTContext &Context, std::copy(AssocExprs.begin(), AssocExprs.end(), SubExprs+END_EXPR); } -GenericSelectionExpr::GenericSelectionExpr(const ASTContext &Context, - SourceLocation GenericLoc, Expr *ControllingExpr, - ArrayRef AssocTypes, - ArrayRef AssocExprs, - SourceLocation DefaultLoc, - SourceLocation RParenLoc, - bool ContainsUnexpandedParameterPack) - : Expr(GenericSelectionExprClass, - Context.DependentTy, - VK_RValue, - OK_Ordinary, - /*isTypeDependent=*/true, - /*isValueDependent=*/true, - /*isInstantiationDependent=*/true, - ContainsUnexpandedParameterPack), - AssocTypes(new (Context) TypeSourceInfo*[AssocTypes.size()]), - SubExprs(new (Context) Stmt*[END_EXPR+AssocExprs.size()]), - NumAssocs(AssocExprs.size()), ResultIndex(-1U), GenericLoc(GenericLoc), - DefaultLoc(DefaultLoc), RParenLoc(RParenLoc) { +GenericSelectionExpr::GenericSelectionExpr( + const ASTContext &Context, SourceLocation GenericLoc, Expr *ControllingExpr, + ArrayRef AssocTypes, ArrayRef AssocExprs, + SourceLocation DefaultLoc, SourceLocation RParenLoc, + bool ContainsUnexpandedParameterPack) + : Expr(GenericSelectionExprClass, Context.DependentTy, VK_RValue, + OK_Ordinary, + /*isTypeDependent=*/true, + /*isValueDependent=*/true, + /*isInstantiationDependent=*/true, ContainsUnexpandedParameterPack), + AssocTypes(new(Context) TypeSourceInfo *[AssocTypes.size()]), + SubExprs(new(Context) Stmt *[END_EXPR + AssocExprs.size()]), + NumAssocs(AssocExprs.size()), + ResultIndex(std::numeric_limits::max()), GenericLoc(GenericLoc), + DefaultLoc(DefaultLoc), RParenLoc(RParenLoc) { SubExprs[CONTROLLING] = ControllingExpr; assert(AssocTypes.size() == AssocExprs.size()); std::copy(AssocTypes.begin(), AssocTypes.end(), this->AssocTypes); diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp index 530c791fcc..50aae94505 100644 --- a/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -2559,7 +2559,8 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck( llvm::Value *IntMin = Builder.getInt(llvm::APInt::getSignedMinValue(Ty->getBitWidth())); - llvm::Value *NegOne = llvm::ConstantInt::get(Ty, -1ULL); + llvm::Value *NegOne = + llvm::ConstantInt::get(Ty, std::numeric_limits::max()); llvm::Value *LHSCmp = Builder.CreateICmpNE(Ops.LHS, IntMin); llvm::Value *RHSCmp = Builder.CreateICmpNE(Ops.RHS, NegOne); diff --git a/tools/clang/lib/Lex/Lexer.cpp b/tools/clang/lib/Lex/Lexer.cpp index 089e76b78b..e39573ca34 100644 --- a/tools/clang/lib/Lex/Lexer.cpp +++ b/tools/clang/lib/Lex/Lexer.cpp @@ -2737,7 +2737,7 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, char C = getCharAndSize(CurPtr, CharSize); unsigned Value = llvm::hexDigitValue(C); - if (Value == -1U) { + if (Value == std::numeric_limits::max()) { if (Result && !isLexingRawMode()) { if (i == 0) { Diag(BufferPtr, diag::warn_ucn_escape_no_digits) diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index b8272ba4a0..cbc4ac37ab 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -1466,7 +1466,7 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, ContainsUnexpandedParameterPack); SmallVector CompatIndices; - unsigned DefaultIndex = -1U; + unsigned DefaultIndex = std::numeric_limits::max(); for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; @@ -1498,7 +1498,8 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, // C11 6.5.1.1p2 "If a generic selection has no default generic association, // its controlling expression shall have type compatible with exactly one of // the types named in its generic association list." - if (DefaultIndex == -1U && CompatIndices.size() == 0) { + if (DefaultIndex == std::numeric_limits::max() && + CompatIndices.size() == 0) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp index ff3b0dbac7..f08ae486b5 100644 --- a/tools/clang/lib/Sema/SemaType.cpp +++ b/tools/clang/lib/Sema/SemaType.cpp @@ -462,7 +462,7 @@ distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state, // objc_gc goes on the innermost pointer to something that's not a // pointer. - unsigned innermost = -1U; + unsigned innermost = std::numeric_limits::max(); bool considerDeclSpec = true; for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) { DeclaratorChunk &chunk = declarator.getTypeObject(i); @@ -501,7 +501,7 @@ distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state, // Otherwise, if we found an appropriate chunk, splice the attribute // into it. - if (innermost != -1U) { + if (innermost != std::numeric_limits::max()) { moveAttrFromListToList(attr, declarator.getAttrListRef(), declarator.getTypeObject(innermost).getAttrListRef()); return; diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index c5ef9d0e99..d356971f24 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -547,10 +547,11 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // group of instructions whose segment values are variable. - FilterChooserMap.insert( - std::make_pair(-1U, llvm::make_unique( - Owner->AllInstructions, VariableInstructions, - Owner->Operands, BitValueArray, *Owner))); + FilterChooserMap.insert(std::make_pair( + std::numeric_limits::max(), + llvm::make_unique(Owner->AllInstructions, + VariableInstructions, Owner->Operands, + BitValueArray, *Owner))); } // No need to recurse for a singleton filtered instruction. From dd725c203c3acffcf0c43f496f3c0676bdae1f80 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Tue, 24 Jun 2025 06:42:33 -0400 Subject: [PATCH 71/93] Revert "[SPIRV] Use unknown image format in vk1.3 and later" (#7572) I just learned more about the VK feature. In VK1.3, the validation rule was moved from the existence of the capability to being specific to the format. It is possible that people will see regressions if their code runs on a driver that does not support VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT or VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT for the format used by the developer. Reverts microsoft/DirectXShaderCompiler#7528 --- .../clang/include/clang/SPIRV/SpirvBuilder.h | 2 - tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 7 - .../CodeGenSPIRV/node.empty-node-input.hlsl | 2 +- .../clang/test/CodeGenSPIRV/type.buffer.hlsl | 176 +++++++----------- .../type.rasterizer-ordered-buffer.hlsl | 92 ++++----- .../type.rasterizer-ordered-texture.hlsl | 36 ++-- .../test/CodeGenSPIRV/type.rwtexture.hlsl | 56 ++---- 7 files changed, 137 insertions(+), 234 deletions(-) diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index 4fe31c6d62..465f7313f1 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -812,8 +812,6 @@ class SpirvBuilder { /// the given target at the given source location. inline void requireExtension(llvm::StringRef extension, SourceLocation); - FeatureManager &getFeatureManager() { return featureManager; } - private: /// \brief If not added already, adds an OpExtInstImport (import of extended /// instruction set) for the given instruction set. Returns the imported diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 0309d56840..1869983ae3 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -1156,13 +1156,6 @@ LowerTypeVisitor::lowerStructFields(const RecordDecl *decl, spv::ImageFormat LowerTypeVisitor::translateSampledTypeToImageFormat(QualType sampledType, SourceLocation srcLoc) { - - // In Vulkan 1.3, all image types can be Unknown. - FeatureManager &featureManager = spvBuilder.getFeatureManager(); - if (!featureManager.isTargetEnvVulkan() || - featureManager.isTargetEnvVulkan1p3OrAbove()) - return spv::ImageFormat::Unknown; - uint32_t elemCount = 1; QualType ty = {}; if (!isScalarType(sampledType, &ty) && diff --git a/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl index da6a1d32df..fa16429a1b 100644 --- a/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl +++ b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl @@ -19,7 +19,7 @@ void emptynodeinput(EmptyNodeInput input) // CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 // CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 -// CHECK-DAG: [[IMG:%[^ ]*]] = OpTypeImage [[UINT]] Buffer 2 0 0 2 Unknown +// CHECK-DAG: [[IMG:%[^ ]*]] = OpTypeImage [[UINT]] Buffer 2 0 0 2 R32ui // CHECK-DAG: [[IMGPTR:%[^ ]*]] = OpTypePointer UniformConstant [[IMG]] // CHECK-DAG: [[BUF:%[^ ]*]] = OpVariable [[IMGPTR]] UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl index 3e7bb73bcb..35d1b868a8 100644 --- a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl @@ -1,149 +1,109 @@ -// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER -// RUN: %dxc -fspv-target-env=vulkan1.3 -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN -// RUN: %dxc -fspv-target-env=universal1.5 -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN - -// Before vulkan1.3, we should be trying to infer the image type for because -// we cannot necessarily use Unknown. However in VK1.3 and later, we can use -// Unknown. +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s // CHECK: OpCapability SampledBuffer -// INFER: OpCapability StorageImageExtendedFormats +// CHECK: OpCapability StorageImageExtendedFormats -// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i -// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 Unknown +// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image Buffer intbuf; -// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui -// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 Unknown +// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 Buffer uintbuf; -// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f -// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 Unknown +// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 Buffer floatbuf; -// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i -// UNKNOWN: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i // CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RWBuffer intrwbuf; -// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui -// UNKNOWN: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui // CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RWBuffer uintrwbuf; -// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f -// UNKNOWN: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f // CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RWBuffer floatrwbuf; -// If the `Unkonwn image format is used, then the images below will reuse the types above. -// UNKNOWN-NOT: OpTypeImage - -// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i -// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i +// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 Buffer int2buf; -// INFER: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui -// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// CHECK: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui +// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 Buffer uint2buf; -// INFER: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f -// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// CHECK: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f +// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 Buffer float2buf; -// INFER: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// CHECK: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RWBuffer int2rwbuf; -// INFER: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// CHECK: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 RWBuffer uint2rwbuf; -// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RWBuffer float2rwbuf; -// INFER: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 -// INFER: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i -// INFER: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 +// CHECK: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 +// CHECK: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i +// CHECK: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 Buffer int3buf; Buffer int4buf; -// INFER: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 -// INFER: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui -// INFER: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 +// CHECK: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 +// CHECK: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui +// CHECK: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 Buffer uint3buf; Buffer uint4buf; -// INFER: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 -// INFER: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f -// INFER: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 +// CHECK: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 +// CHECK: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f +// CHECK: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 Buffer float3buf; Buffer float4buf; -// INFER: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 -// INFER: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// INFER: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 +// CHECK: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 +// CHECK: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// CHECK: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 RWBuffer int3rwbuf; RWBuffer int4rwbuf; -// INFER: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 -// INFER: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// INFER: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 +// CHECK: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 +// CHECK: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// CHECK: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 RWBuffer uint3rwbuf; RWBuffer uint4rwbuf; -// INFER: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 -// INFER: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 +// CHECK: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 +// CHECK: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 RWBuffer float3rwbuf; RWBuffer float4rwbuf; -// INFER: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// INFER: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// INFER: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// INFER: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// INFER: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// INFER: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// INFER: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// INFER: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// INFER: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// INFER: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// INFER: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// INFER: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant -// INFER: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant -// INFER: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant -// INFER: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant -// INFER: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant -// INFER: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant -// INFER: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant -// INFER: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant -// INFER: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant -// INFER: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant -// INFER: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant -// INFER: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant -// INFER: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant - -// UNKNOWN: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// UNKNOWN: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// UNKNOWN: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// UNKNOWN: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// UNKNOWN: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// UNKNOWN: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// UNKNOWN: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// UNKNOWN: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// UNKNOWN: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// UNKNOWN: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// UNKNOWN: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// UNKNOWN: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// CHECK: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// CHECK: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// CHECK: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// CHECK: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// CHECK: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// CHECK: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// CHECK: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// CHECK: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// CHECK: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// CHECK: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// CHECK: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// CHECK: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// CHECK: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant +// CHECK: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant +// CHECK: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant +// CHECK: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant +// CHECK: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant +// CHECK: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant +// CHECK: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant +// CHECK: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant +// CHECK: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant +// CHECK: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant +// CHECK: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant +// CHECK: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl index 0b576fc5e9..c616f65bb9 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl @@ -1,81 +1,59 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER -// RUN: %dxc -fspv-target-env=vulkan1.3 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN -// RUN: %dxc -fspv-target-env=universal1.5 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN - -// Before vulkan1.3, we should be trying to infer the image type for because -// we cannot necessarily use Unknown. However in VK1.3 and later, we can use -// Unknown. +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s // CHECK: OpCapability SampledBuffer -// INFER: OpCapability StorageImageExtendedFormats +// CHECK: OpCapability StorageImageExtendedFormats -// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i -// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 Unknown +// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image RasterizerOrderedBuffer introvbuf; -// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui -// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 RasterizerOrderedBuffer uintrovbuf; -// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f -// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 RasterizerOrderedBuffer floatrovbuf; -// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// INFER: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 +// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RasterizerOrderedBuffer int2rovbuf; -// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// INFER: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 +// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RasterizerOrderedBuffer uint2rovbuf; -// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// INFER: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 +// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RasterizerOrderedBuffer float2rovbuf; -// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 -// INFER: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// CHECK: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 RasterizerOrderedBuffer int3rovbuf; RasterizerOrderedBuffer int4rovbuf; -// INFER: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 -// INFER: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// CHECK: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// CHECK: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RasterizerOrderedBuffer uint3rovbuf; RasterizerOrderedBuffer uint4rovbuf; -// INFER: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 -// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// CHECK: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RasterizerOrderedBuffer float3rovbuf; RasterizerOrderedBuffer float4rovbuf; -// INFER: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// INFER: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// INFER: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// INFER: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// INFER: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// INFER: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// INFER: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// INFER: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// INFER: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// INFER: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// INFER: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// INFER: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant - -// UNKNOWN: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// UNKNOWN: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// UNKNOWN: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// UNKNOWN: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// CHECK: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// CHECK: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// CHECK: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// CHECK: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// CHECK: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// CHECK: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// CHECK: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// CHECK: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// CHECK: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// CHECK: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// CHECK: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// CHECK: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl index 21bff421a0..32dd76e6f1 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl @@ -1,28 +1,23 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER -// RUN: %dxc -fspv-target-env=vulkan1.3 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN -// RUN: %dxc -fspv-target-env=universal1.5 -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s // CHECK: OpCapability Image1D -// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i -// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown +// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i // CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui -// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown +// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui // CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i -// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown +// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i // CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant @@ -38,8 +33,7 @@ RasterizerOrderedTexture3D t3 ; [[vk::image_format("rgba32f")]] RasterizerOrderedTexture3D t4 ; -// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant -// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant +// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant RasterizerOrderedTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl index 884957210a..f901d44cfa 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl @@ -1,43 +1,24 @@ -// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER -// RUN: %dxc -fspv-target-env=vulkan1.3 -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN +// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s // CHECK: OpCapability Image1D -// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i -// INFER: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui -// INFER: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i -// INFER: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 -// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// UNKNOWN: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// UNKNOWN: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// UNKNOWN: %type_3d_image_1 = OpTypeImage %float 3D 2 0 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_3d_image_1 = OpTypePointer UniformConstant %type_3d_image_1 -// UNKNOWN: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// UNKNOWN: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// UNKNOWN: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// UNKNOWN: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Unknown -// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant RWTexture1D t1 ; @@ -52,8 +33,7 @@ RWTexture3D t3 ; [[vk::image_format("rgba32f")]] RWTexture3D t4 ; -// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant -// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant +// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant RWTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant From 8f5595872e158796195d1b5526761b5a4216bf40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Tue, 24 Jun 2025 18:40:03 +0200 Subject: [PATCH 72/93] [SPIR-V] Prepase SDK v2025.3 release (#7579) Updating SPIRV-{Headers,Tools} for the release. --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index c9aad99f92..2a611a970f 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit c9aad99f9276817f18f72a4696239237c83cb775 +Subproject commit 2a611a970fdbc41ac2e3e328802aed9985352dca diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index da48bb20bd..33e0256818 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit da48bb20bdfc8a214d5bffdacca2d1d2ae849009 +Subproject commit 33e02568181e3312f49a3cf33df470bf96ef293a From 3e01e8b70ee18bd33e706a3c6779ec397d7e9a1f Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 24 Jun 2025 17:41:37 -0700 Subject: [PATCH 73/93] [NFC] Address compiler warnings: C4146 - Another round of use two's complement instead of negation (#7567) Addresses https://github.com/microsoft/DirectXShaderCompiler/issues/7565 A few more instances where we can take advantage of -N being equivalent to (~N + 1) --- lib/Analysis/BasicAliasAnalysis.cpp | 6 +++++- lib/Analysis/ConstantFolding.cpp | 4 ++-- lib/Analysis/InstructionSimplify.cpp | 8 ++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index aa0f9ed873..956c334374 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1117,7 +1117,11 @@ AliasResult BasicAliasAnalysis::aliasGEP( // stripped a gep with negative index ('gep , -1, ...). if (V1Size != MemoryLocation::UnknownSize && V2Size != MemoryLocation::UnknownSize) { - if (-(uint64_t)GEP1BaseOffset < V1Size) + // GEP1BaseOffset is negative in this else block and because we're + // assigning to an unsigned variable, we can make use of + // -I == (~I + 1) to compute the absolute value of GEP1BaseOffset. + const uint64_t GEP1BaseOffsetAbs = (~GEP1BaseOffset + 1ULL); + if (GEP1BaseOffsetAbs < V1Size) return PartialAlias; return NoAlias; } diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 69c9b10b60..0167bdf0a1 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -187,7 +187,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + ShiftAmt += isLittleEndian ? SrcBitSize : (~SrcBitSize + 1U); // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); @@ -213,7 +213,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // endianness. Constant *Elt = ConstantExpr::getLShr(Src, ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + ShiftAmt += isLittleEndian ? DstBitSize : (~DstBitSize + 1U); // Truncate the element to an integer with the same pointer size and // convert the element back to a pointer using a inttoptr. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 89c7cc7a3e..96c0b3302d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -4109,7 +4109,7 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + ShiftAmt += isLittleEndian ? SrcBitSize : (~SrcBitSize + 1U); // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); @@ -4144,9 +4144,9 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { for (unsigned j = 0; j != Ratio; ++j) { // Shift the piece of the value into the right place, depending on // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + Constant *Elt = ConstantExpr::getLShr( + Src, ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : (~DstBitSize + 1U); // Truncate the element to an integer with the same pointer size and // convert the element back to a pointer using a inttoptr. From 23118b9eaab90d7cb6b95a95cc8ea3f313b4b05a Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 24 Jun 2025 17:51:39 -0700 Subject: [PATCH 74/93] [NFC] Address compiler warnings: C4146 - A 'grab bag' of remaining instances (#7574) ## Fix C4146 warnings: unary minus on unsigned types Fixes several remaining MSVC C4146 warnings where unary minus was applied to unsigned integers. This should be the last PR containing MSVC C4146 warning fixes. I will remove the disablement of the warning as an error in a subsequent PR once the other pending PRs are completed. **Changes:** - Replace `-(unsigned_value)` with `~unsigned_value + 1` for offset calculations - Use `-1LL` instead of `-1ULL` where signed values are intended - Fix alignment padding calculation to avoid unsigned negation **Files changed:** - CoverageMappingGen.cpp, Lexer.cpp, Rewriter.cpp: Use two's complement for safe unsigned negation in offset calculations - ItaniumCXXABI.cpp: Use signed literal for ABI-compliant null member pointer (-1) - ExprConstant.cpp: Replace `-1ULL` with `~0ULL` for bitmasks - CodeGenMapTable.cpp: Fix sentinel value generation All changes are mathematically equivalent and preserve existing behavior while eliminating compiler warnings. Addresses #7573 --- lib/Support/APFloat.cpp | 2 +- lib/Support/StringRef.cpp | 14 ++++++++----- lib/Support/TimeValue.cpp | 3 +-- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 21 +++++++++++-------- tools/clang/lib/AST/ExprConstant.cpp | 4 ++-- tools/clang/lib/AST/MicrosoftMangle.cpp | 6 +++--- .../clang/lib/CodeGen/CoverageMappingGen.cpp | 2 +- tools/clang/lib/CodeGen/ItaniumCXXABI.cpp | 14 ++++++------- tools/clang/lib/CodeGen/TargetInfo.cpp | 4 ++-- tools/clang/lib/Format/Format.cpp | 2 +- tools/clang/lib/Lex/Lexer.cpp | 2 +- tools/clang/lib/Rewrite/Rewriter.cpp | 4 ++-- 12 files changed, 42 insertions(+), 36 deletions(-) diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f8f1fb03cd..40c22459e2 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -446,7 +446,7 @@ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) if (~parts[count]) return ~(integerPart) 0; /* A lot. */ - return -parts[0]; + return (~parts[0] + 1); } return ~(integerPart) 0; /* A lot. */ diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ddece087a9..52b949d826 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/edit_distance.h" #include +#include using namespace llvm; @@ -393,13 +394,16 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, // Get the positive part of the value. if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) || - // Reject values so large they'd overflow as negative signed, but allow - // "-0". This negates the unsigned so that the negative isn't undefined - // on signed overflow. - (long long)-ULLVal > 0) + // Reject values larger than what can be represented as negative signed. + // The most negative long long is LLONG_MIN, which has magnitude + // (LLONG_MAX + 1). Values larger than this magnitude cannot be negated + // without overflow. + ULLVal > static_cast( + std::numeric_limits::max()) + + 1) return true; - Result = -ULLVal; + Result = (~ULLVal + 1); return false; } diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp index 136b93ecee..06de27bbda 100644 --- a/lib/Support/TimeValue.cpp +++ b/lib/Support/TimeValue.cpp @@ -19,8 +19,7 @@ using namespace sys; const TimeValue::SecondsType TimeValue::PosixZeroTimeSeconds = -946684800; -const TimeValue::SecondsType - TimeValue::Win32ZeroTimeSeconds = -12591158400ULL; +const TimeValue::SecondsType TimeValue::Win32ZeroTimeSeconds = -12591158400LL; void TimeValue::normalize( void ) { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3ab9367a6b..60962ec69a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1395,8 +1395,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset // Offs is the ICmp immediate. if (Scale == 0) - // The cast does the right thing with INT64_MIN. - BaseOffset = -(uint64_t)BaseOffset; + // Negate BaseOffset using two's complement (~x + 1) to avoid undefined + // behavior. Simple negation (-BaseOffset) would be undefined for + // INT64_MIN since -INT64_MIN cannot fit in int64_t. Two's complement + // gives the expected wraparound behavior: -INT64_MIN becomes INT64_MIN. + BaseOffset = ~BaseOffset + 1ULL; return TTI.isLegalICmpImmediate(BaseOffset); } @@ -3000,7 +3003,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // of -1) are now also interesting. for (size_t i = 0, e = Factors.size(); i != e; ++i) if (Factors[i] != -1) - Factors.insert(-(uint64_t)Factors[i]); + Factors.insert(~Factors[i] + 1ULL); Factors.insert(-1); } @@ -3739,7 +3742,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { const SCEV *OrigReg = WI.OrigReg; Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); - const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); + const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, ~Imm + 1ULL)); unsigned BitWidth = SE.getTypeSizeInBits(IntTy); // TODO: Use a more targeted data structure. @@ -3754,8 +3757,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; // Don't create 50 + reg(-50). - if (F.referencesReg(SE.getSCEV( - ConstantInt::get(IntTy, -(uint64_t)Offset)))) + if (F.referencesReg( + SE.getSCEV(ConstantInt::get(IntTy, ~Offset + 1ULL)))) continue; Formula NewF = F; NewF.BaseOffset = Offset; @@ -4556,7 +4559,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, // The other interesting way of "folding" with an ICmpZero is to use a // negated immediate. if (!ICmpScaledV) - ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); + ICmpScaledV = ConstantInt::get(IntTy, ~Offset + 1ULL); else { Ops.push_back(SE.getUnknown(ICmpScaledV)); ICmpScaledV = ConstantInt::get(IntTy, Offset); @@ -4608,8 +4611,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, assert((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and " "a scale at the same time!"); - Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), - -(uint64_t)Offset); + Constant *C = + ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), ~Offset + 1ULL); if (C->getType() != OpTy) C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, OpTy, false), diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index baa0349cfe..c24e44022f 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -6555,7 +6555,7 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { // handle all cases where the expression has side-effects. if (E->getArg(0)->HasSideEffects(Info.Ctx)) { if (E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue() <= 1) - return Success(-1ULL, E); + return Success(~0ULL, E); return Success(0, E); } @@ -6570,7 +6570,7 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { return Error(E); case EvalInfo::EM_ConstantExpressionUnevaluated: case EvalInfo::EM_PotentialConstantExpressionUnevaluated: - return Success(-1ULL, E); + return Success(~0ULL, E); } llvm_unreachable("Invalid EvalMode!"); } diff --git a/tools/clang/lib/AST/MicrosoftMangle.cpp b/tools/clang/lib/AST/MicrosoftMangle.cpp index 40dca1bb1b..ae9f1cd7f8 100644 --- a/tools/clang/lib/AST/MicrosoftMangle.cpp +++ b/tools/clang/lib/AST/MicrosoftMangle.cpp @@ -633,7 +633,7 @@ void MicrosoftCXXNameMangler::mangleNumber(int64_t Number) { uint64_t Value = static_cast(Number); if (Number < 0) { - Value = -Value; + Value = ~Value + 1ULL; Out << '?'; } @@ -2308,7 +2308,7 @@ static void mangleThunkThisAdjustment(const CXXMethodDecl *MD, Out << AccessSpec; Mangler.mangleNumber( static_cast(Adjustment.Virtual.Microsoft.VtordispOffset)); - Mangler.mangleNumber(-static_cast(Adjustment.NonVirtual)); + Mangler.mangleNumber(~static_cast(Adjustment.NonVirtual) + 1); } } else if (Adjustment.NonVirtual != 0) { switch (MD->getAccess()) { @@ -2323,7 +2323,7 @@ static void mangleThunkThisAdjustment(const CXXMethodDecl *MD, case AS_public: Out << 'W'; } - Mangler.mangleNumber(-static_cast(Adjustment.NonVirtual)); + Mangler.mangleNumber(~static_cast(Adjustment.NonVirtual) + 1); } else { switch (MD->getAccess()) { case AS_none: diff --git a/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/tools/clang/lib/CodeGen/CoverageMappingGen.cpp index eca91590e6..e16e015a74 100644 --- a/tools/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/tools/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -116,7 +116,7 @@ class CoverageMappingBuilder { /// \brief Return the start location of an included file or expanded macro. SourceLocation getStartOfFileOrMacro(SourceLocation Loc) { if (Loc.isMacroID()) - return Loc.getLocWithOffset(-SM.getFileOffset(Loc)); + return Loc.getLocWithOffset(~SM.getFileOffset(Loc) + 1); return SM.getLocForStartOfFile(SM.getFileID(Loc)); } diff --git a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index 97fe28be7f..698d34c774 100644 --- a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -639,8 +639,8 @@ llvm::Constant * ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) { // Itanium C++ ABI 2.3: // A NULL pointer is represented as -1. - if (MPT->isMemberDataPointer()) - return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /*isSigned=*/true); + if (MPT->isMemberDataPointer()) + return llvm::ConstantInt::get(CGM.PtrDiffTy, -1LL, /*isSigned=*/true); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0); llvm::Constant *Values[2] = { Zero, Zero }; @@ -1023,7 +1023,7 @@ static CharUnits computeOffsetHint(ASTContext &Context, // If Dst is not derived from Src we can skip the whole computation below and // return that Src is not a public base of Dst. Record all inheritance paths. if (!Dst->isDerivedFrom(Src, Paths)) - return CharUnits::fromQuantity(-2ULL); + return CharUnits::fromQuantity(-2LL); unsigned NumPublicPaths = 0; CharUnits Offset; @@ -1040,7 +1040,7 @@ static CharUnits computeOffsetHint(ASTContext &Context, // If the path contains a virtual base class we can't give any hint. // -1: no hint. if (J->Base->isVirtual()) - return CharUnits::fromQuantity(-1ULL); + return CharUnits::fromQuantity(-1LL); if (NumPublicPaths > 1) // Won't use offsets, skip computation. continue; @@ -1053,11 +1053,11 @@ static CharUnits computeOffsetHint(ASTContext &Context, // -2: Src is not a public base of Dst. if (NumPublicPaths == 0) - return CharUnits::fromQuantity(-2ULL); + return CharUnits::fromQuantity(-2LL); // -3: Src is a multiple public base type but never a virtual base type. if (NumPublicPaths > 1) - return CharUnits::fromQuantity(-3ULL); + return CharUnits::fromQuantity(-3LL); // Otherwise, the Src type is a unique public nonvirtual base type of Dst. // Return the offset of Src from the origin of Dst. @@ -1154,7 +1154,7 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, // Get the offset-to-top from the vtable. llvm::Value *OffsetToTop = - CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL); + CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2LL); OffsetToTop = CGF.Builder.CreateLoad(OffsetToTop, "offset.to.top"); // Finally, add the offset to the pointer. diff --git a/tools/clang/lib/CodeGen/TargetInfo.cpp b/tools/clang/lib/CodeGen/TargetInfo.cpp index aba43964d9..aaf63355af 100644 --- a/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -1283,7 +1283,7 @@ llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, Addr = CGF.Builder.CreateGEP(Addr, Offset); llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(Addr, CGF.Int32Ty); - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -Align); + llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, ~Align + 1); Addr = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask), Addr->getType(), "ap.cur.aligned"); @@ -2849,7 +2849,7 @@ static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr, overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset); llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(overflow_arg_area, CGF.Int64Ty); - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, -(uint64_t)Align); + llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, ~Align + 1); overflow_arg_area = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask), overflow_arg_area->getType(), diff --git a/tools/clang/lib/Format/Format.cpp b/tools/clang/lib/Format/Format.cpp index 7d556c9f0f..b6ca328972 100644 --- a/tools/clang/lib/Format/Format.cpp +++ b/tools/clang/lib/Format/Format.cpp @@ -1049,7 +1049,7 @@ class FormatTokenLexer { FormatTok = new (Allocator.Allocate()) FormatToken; readRawToken(*FormatTok); SourceLocation WhitespaceStart = - FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); + FormatTok->Tok.getLocation().getLocWithOffset(~TrailingWhitespace + 1); FormatTok->IsFirst = IsFirstToken; IsFirstToken = false; diff --git a/tools/clang/lib/Lex/Lexer.cpp b/tools/clang/lib/Lex/Lexer.cpp index e39573ca34..ce9dd8a3c0 100644 --- a/tools/clang/lib/Lex/Lexer.cpp +++ b/tools/clang/lib/Lex/Lexer.cpp @@ -480,7 +480,7 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, } // Create a lexer starting at the beginning of this token. - SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); + SourceLocation LexerStartLoc = Loc.getLocWithOffset(~LocInfo.second + 1); Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); diff --git a/tools/clang/lib/Rewrite/Rewriter.cpp b/tools/clang/lib/Rewrite/Rewriter.cpp index be09a363a6..fa081d65ac 100644 --- a/tools/clang/lib/Rewrite/Rewriter.cpp +++ b/tools/clang/lib/Rewrite/Rewriter.cpp @@ -60,7 +60,7 @@ void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size, Buffer.erase(RealOffset, Size); // Add a delta so that future changes are offset correctly. - AddReplaceDelta(OrigOffset, -Size); + AddReplaceDelta(OrigOffset, ~Size + 1); if (removeLineIfEmpty) { // Find the line that the remove occurred and if it is completely empty @@ -86,7 +86,7 @@ void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size, } if (posI != end() && *posI == '\n') { Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/); - AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/)); + AddReplaceDelta(curLineStartOffs, ~(lineSize + 1 /* + '\n'*/) + 1); } } } From b390fb19adc5d7c23180eb470470411fce986910 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:42:26 -0700 Subject: [PATCH 75/93] [NFC] Address compiler warnings: C4146 - Use two's complement instead of negation (#7562) Replaces uses of the unary - operator on signed integers with the equivalent (sort of, see the details below) expression '~N + 1', assigning the result to an unsigned type. This avoids undefined behavior in edge cases and ensures correctness when certain conditions are met. Details: This transformation is valid when: The signed value N is guaranteed to be negative. The result is stored in an unsigned type that can represent the full range of the signed type (e.g., uint64_t for int64_t). The system uses two's complement representation (as is standard on modern platforms). While -N is undefined for the minimum representable value (e.g., INT64_MIN), the expression ~N + 1 remains well-defined and yields the correct bit pattern. Assigning this result to an appropriately sized unsigned type preserves the intended two's complement interpretation without triggering undefined behavior. Addresses #7561. --- lib/Support/raw_ostream.cpp | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index b11ffb15d5..595468a6dc 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -134,13 +134,18 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) { } raw_ostream &raw_ostream::operator<<(long N) { + // A positive signed long has the same value when casted to its unsigned + // counterpart. If its negative, then we'll handle it in the below if block. + unsigned long UN = static_cast(N); + if (N < 0 && writeBase == 10) { *this << '-'; - // Avoid undefined behavior on LONG_MIN with a cast. - N = -(unsigned long)N; + // Since N is negative and we're storing the result in an unsigned Long, + // we can use the equivalence of -N == ~N + 1 to get the positive value. + UN = ~N + 1UL; } - return this->operator<<(static_cast(N)); + return this->operator<<(UN); } raw_ostream &raw_ostream::operator<<(unsigned long long N) { @@ -169,13 +174,18 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) { } raw_ostream &raw_ostream::operator<<(long long N) { + // A positive signed long has the same value when casted to its unsigned + // counterpart. If its negative, then we'll handle it in the below if block. + unsigned long long UN = static_cast(N); + if (N < 0 && writeBase == 10) { *this << '-'; - // Avoid undefined behavior on INT64_MIN with a cast. - N = -(unsigned long long)N; + // Since N is negative and we're storing the result in an unsigned Long, + // we can use the equivalence of -N == ~N + 1 to get the positive value. + UN = ~N + 1ULL; } - return this->operator<<(static_cast(N)); + return this->operator<<(UN); } // HLSL Change Starts - Generalize non-base10 printing. @@ -470,7 +480,10 @@ raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) { char *EndPtr = NumberBuffer+sizeof(NumberBuffer); char *CurPtr = EndPtr; bool Neg = (FN.DecValue < 0); - uint64_t N = Neg ? -static_cast(FN.DecValue) : FN.DecValue; + // If the value is negative, and because we are storing the result of the ~ + // operation in an unsigned value, we can use the equivalence of + // -N == ~N + 1 to get the positive value of the negative number + uint64_t N = Neg ? (~FN.DecValue + 1UL) : FN.DecValue; while (N) { *--CurPtr = '0' + char(N % 10); N /= 10; From e07be1c3541013f9604186c741969ccb51aa314d Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Wed, 25 Jun 2025 14:32:58 -0600 Subject: [PATCH 76/93] Fix errors in retrieving and assigning load status parameter (#7513) There were two problems with processing the status parameter with the reword of the buffer load code. The first was that the status was not being passed down to the load instruction generation for aggregate types in any shader model version. The second was that the status retrieval from the resret returned by the raw buffer loads was using the wrong index for native vectors supported by shader model 6.9. The status Value was not getting passed all the way down to the load instruction generation for aggregate types because the refactored helper constructor would always set it to null. It needs to be explicitly stated since by that point, the original call instruction it came from has been lost amidst subsequent GEPs, bitcasts, and/or loads that aggregate types (arrays and structs) will use on the results of the original call instruction to get the exact element required. This changes the constructor to take an optional status parameter allowing the locations where it might be set to pass it along. In other cases, it will be null and be appropriately ignored. Modified aggregate tests to verify this behavior. This required keeping track of the return of the last load operation involved in a raw buffer load, which made arrays more complicated. Rather than give them their own CHECK prefix, I lumped them in with large matrices requiring three loads. This did require making all the array lengths 3 to match. The loss in test variability is worth the convenience as there is no known distinction when it comes to array sizes over 1. The status retrieval from the ResRet returned by the raw buffer loads was using the wrong index for native vectors supported by shader model 6.9. Adjusting the index according to the opcode ensures that the index will be correct. This also required a change to validation that allows checkAccessFullyMapped to operate on the second element extracted from a ResRet where applicable and some corresponding null tolerance in related code. Adds status retrieving overloads to the relevant load/store tests for sm6.9, aggregates, and other loads though the last category exhibited no issues. At least I got some statuses right! Fixes #7508 --- include/dxc/DXIL/DxilConstants.h | 1 + lib/DXIL/DxilOperations.cpp | 2 +- lib/DxilValidation/DxilValidation.cpp | 10 +- lib/HLSL/HLOperationLower.cpp | 27 +-- .../intrinsics/buffer-agg-load-stores.hlsl | 165 ++++++++++++------ .../intrinsics/buffer-load-stores-sm69.hlsl | 45 ++++- .../hlsl/intrinsics/buffer-load-stores.hlsl | 104 ++++++++++- 7 files changed, 280 insertions(+), 74 deletions(-) diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 0f28edbc39..84588a2ff7 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -154,6 +154,7 @@ const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +const unsigned kVecResRetStatusIndex = 1; /* hctdb_instrhelp.get_max_oload_dims()*/ // OLOAD_DIMS-TEXT:BEGIN diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index a66dfc68d4..253121346a 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -6438,7 +6438,7 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { - if (!Ty->isStructTy()) + if (!Ty || !Ty->isStructTy()) return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index db596a3821..9587897e22 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -1573,9 +1573,15 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); + StructType *StrTy = dyn_cast(V->getType()); + unsigned ExtractIndex = EVI->getIndices()[0]; + // Ensure parameter is a single value that is extracted from the correct + // ResRet struct location. bool IsLegal = EVI->getNumIndices() == 1 && - EVI->getIndices()[0] == DXIL::kResRetStatusIndex && - ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); + (ExtractIndex == DXIL::kResRetStatusIndex || + ExtractIndex == DXIL::kVecResRetStatusIndex) && + ValCtx.DxilMod.GetOP()->IsResRetType(StrTy) && + ExtractIndex == StrTy->getNumElements() - 1; if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 7d5eb0edce..2033533327 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -3063,10 +3063,10 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, } void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, - hlsl::OP *hlslOp) { + hlsl::OP *hlslOp, + unsigned StatusIndex = DXIL::kResRetStatusIndex) { if (status && !isa(status)) { - Value *statusVal = - Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex); + Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex); Value *checkAccessOp = hlslOp->GetI32Const( static_cast(DXIL::OpCode::CheckAccessFullyMapped)); Function *checkAccessFn = hlslOp->GetOpFunc( @@ -4028,9 +4028,9 @@ struct ResLoadHelper { // Used for some subscript operators that feed the generic HL call inst // into a load op and by the matrixload call instruction. ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, - Value *Offset, Value *mip = nullptr) + Value *Offset, Value *status = nullptr, Value *mip = nullptr) : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), - addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + addr(idx), offset(Offset), status(status), mipLevel(mip) { opcode = LoadOpFromResKind(RK); Type *Ty = Inst->getType(); if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && @@ -4304,18 +4304,22 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Function *F = OP->GetOpFunc(opcode, EltTy); Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); + unsigned StatusIndex; // Extract elements from returned ResRet. // Native vector loads just have one vector element in the ResRet. // Others have up to four scalars that need to be individually extracted. - if (opcode == OP::OpCode::RawBufferVectorLoad) + if (opcode == OP::OpCode::RawBufferVectorLoad) { Elts[i++] = Builder.CreateExtractValue(Ld, 0); - else + StatusIndex = DXIL::kVecResRetStatusIndex; + } else { for (unsigned j = 0; j < chunkSize; j++, i++) Elts[i] = Builder.CreateExtractValue(Ld, j); + StatusIndex = DXIL::kResRetStatusIndex; + } // Update status. - UpdateStatus(Ld, helper.status, Builder, OP); + UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex); if (!FirstLd) FirstLd = Ld; @@ -8537,7 +8541,7 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { - ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status); #ifndef NDEBUG Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); Type *matType = ptr->getType()->getPointerElementType(); @@ -8864,7 +8868,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } } else if (LoadInst *LdInst = dyn_cast(user)) { // Load of scalar/vector within a struct or structured raw load. - ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status); TranslateBufLoad(helper, ResKind, Builder, OP, DL); LdInst->eraseFromParent(); @@ -9239,7 +9243,8 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { Value *Offset = UndefValue::get(Builder.getInt32Ty()); - ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, + /*status*/ nullptr, mipLevel); TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 9f7a487a05..572734d679 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -1,35 +1,35 @@ -// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s @@ -105,27 +105,27 @@ RWStructuredBuffer< TYPE SS > RwStBuf : register(u2); ConsumeStructuredBuffer< TYPE SS > CnStBuf : register(u4); AppendStructuredBuffer< TYPE SS > ApStBuf : register(u5); -TYPE Add(TYPE f1[COLS], TYPE f2[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; return ret; } template -T Add(T v1, T v2) { return v1 + v2; } +T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } -TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS], TYPE f5[COLS], TYPE f6[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i] + f5[i] + f6[i]; return ret; } template -T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } +T Add(T v1, T v2, T v3, T v4, T v5, T v6) { return v1 + v2 + v3 + v4 + v5 + v6; } -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -150,26 +150,55 @@ void main(uint ix[2] : IX) { // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // OFF: [[RIX0:%.*]] = add i32 [[IX0]], [[BOFF:[0-9]+]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // MULTI: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 TYPE babElt1 SS = RwByBuf.Load< TYPE SS >(ix[0]); + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // CHECK-DAG: [[RIX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // OFF: [[RIX1:%.*]] = add i32 [[IX1]], [[BOFF]] + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX1]] + // MULTI: [[IX1p4:%.*]] = add i32 [[RIX1]], [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p4]] + // MULTI: [[IX1p8:%.*]] = add i32 [[RIX1]], [[p8]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + uint status1; + TYPE babElt3 SS = RwByBuf.Load< TYPE SS >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX0]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 SS = RoByBuf.Load< TYPE SS >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX1]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2; + TYPE babElt4 SS = RoByBuf.Load< TYPE SS >(ix[1], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -177,48 +206,76 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] - RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2)); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2, babElt3, babElt4)); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 SS = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt2 SS = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 2 + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 SS = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 SS = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 SS = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 SS = RoStBuf.Load(ix[2], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -226,9 +283,13 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 0, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 16, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] - RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4, stbElt5, stbElt6); + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -236,8 +297,8 @@ void main(uint ix[2] : IX) { // OFF: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 // OFF: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 16 // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -253,7 +314,7 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 16 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] ApStBuf.Append(cnElt); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl index 5305ee495b..f71b29e83e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -26,7 +26,7 @@ AppendStructuredBuffer > ApStBuf : register(u5); // CHECK-LABEL: define void @main [shader("vertex")] -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -45,36 +45,73 @@ void main(uint ix[2] : IX) { // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt1 = RwByBuf.Load< vector >(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status1; + vector babElt3 = RwByBuf.Load< vector >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt2 = RoByBuf.Load< vector >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status2; + vector babElt4 = RoByBuf.Load< vector >(ix[1], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt2 = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt5 = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt4 = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt6 = RoStBuf.Load(ix[2], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index 8dcf5ead1c..896f442c2c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -87,12 +87,36 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status1 = 0; + TYPE babElt3 = RwByBuf.Load< TYPE >(ix1, status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2 = 0; + TYPE babElt4 = RoByBuf.Load< TYPE >(ix1, status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -102,6 +126,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 = RwStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -116,6 +141,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 = RoStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -123,12 +149,34 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 = RoStBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 = RwStBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 = RoStBuf.Load(ix2[0], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -167,6 +215,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt1 = RwTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -183,6 +232,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt2 = RwTyBuf[ix1]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -200,6 +250,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt3 = RoTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -217,6 +268,44 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt4 = RoTyBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt5 = RwTyBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt6 = RoTyBuf.Load(ix2[0], status2); + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // I64: trunc i64 %{{.*}} to i32 @@ -229,8 +318,12 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 - // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + // CHECK: call void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 300 + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4 + typElt5 + typElt6; + RwByBuf.Store< uint > (300, status1 && status2); // Texture Tests // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] @@ -250,6 +343,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -285,6 +379,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -320,6 +415,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] // F64: call double @dx.op.makeDouble.f64(i32 101 From 93c7c2c8e62358ec3600350ce5763dfadbb6d3a0 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Wed, 25 Jun 2025 18:43:47 -0700 Subject: [PATCH 77/93] Update Release Notes (#7563) Update release notes in preparation for release --- docs/ReleaseNotes.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 274164158e..6850902a81 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -19,7 +19,9 @@ The included licenses apply to the following files: ### Upcoming Release -Place release notes for the upcoming release below this line and remove this line upon naming this release. +- Fix regression: [#7510](https://github.com/microsoft/DirectXShaderCompiler/issues/7510) crash when calling `sizeof` on templated type. +- Fix regression: [#7508](https://github.com/microsoft/DirectXShaderCompiler/issues/7508) crash when calling `Load` with `status`. +- Header file `dxcpix.h` was added to the release package. ### Version 1.8.2505 From 8a9f8820723dd1677f698fe98b396666f34a2694 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Jun 2025 10:15:30 -0700 Subject: [PATCH 78/93] Bump urllib3 from 2.2.2 to 2.5.0 in /utils/git (#7554) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.2.2 to 2.5.0.
Release notes

Sourced from urllib3's releases.

2.5.0

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Security issues

urllib3 2.5.0 fixes two moderate security issues:

  • Pool managers now properly control redirects when retries is passed — CVE-2025-50181 reported by @​sandumjacob (5.3 Medium, GHSA-pq67-6m6q-mj2v)
  • Redirects are now controlled by urllib3 in the Node.js runtime — CVE-2025-50182 (5.3 Medium, GHSA-48p4-8xcf-vxj5)

Features

  • Added support for the compression.zstd module that is new in Python 3.14. See PEP 784 for more information. (#3610)
  • Added support for version 0.5 of hatch-vcs (#3612)

Bugfixes

  • Raised exception for HTTPResponse.shutdown on a connection already released to the pool. (#3581)
  • Fixed incorrect CONNECT statement when using an IPv6 proxy with connection_from_host. Previously would not be wrapped in []. (#3615)

2.4.0

🚀 urllib3 is fundraising for HTTP/2 support

urllib3 is raising ~$40,000 USD to release HTTP/2 support and ensure long-term sustainable maintenance of the project after a sharp decline in financial support. If your company or organization uses Python and would benefit from HTTP/2 support in Requests, pip, cloud SDKs, and thousands of other projects please consider contributing financially to ensure HTTP/2 support is developed sustainably and maintained for the long-haul.

Thank you for your support.

Features

  • Applied PEP 639 by specifying the license fields in pyproject.toml. (#3522)
  • Updated exceptions to save and restore more properties during the pickle/serialization process. (#3567)
  • Added verify_flags option to create_urllib3_context with a default of VERIFY_X509_PARTIAL_CHAIN and VERIFY_X509_STRICT for Python 3.13+. (#3571)

Bugfixes

  • Fixed a bug with partial reads of streaming data in Emscripten. (#3555)

Misc

  • Switched to uv for installing development dependecies. (#3550)
  • Removed the multiple.intoto.jsonl asset from GitHub releases. Attestation of release files since v2.3.0 can be found on PyPI. (#3566)

2.3.0

... (truncated)

Changelog

Sourced from urllib3's changelog.

2.5.0 (2025-06-18)

Features

  • Added support for the compression.zstd module that is new in Python 3.14. See PEP 784 <https://peps.python.org/pep-0784/>_ for more information. ([#3610](https://github.com/urllib3/urllib3/issues/3610) <https://github.com/urllib3/urllib3/issues/3610>__)
  • Added support for version 0.5 of hatch-vcs ([#3612](https://github.com/urllib3/urllib3/issues/3612) <https://github.com/urllib3/urllib3/issues/3612>__)

Bugfixes

  • Fixed a security issue where restricting the maximum number of followed redirects at the urllib3.PoolManager level via the retries parameter did not work.
  • Made the Node.js runtime respect redirect parameters such as retries and redirects.
  • Raised exception for HTTPResponse.shutdown on a connection already released to the pool. ([#3581](https://github.com/urllib3/urllib3/issues/3581) <https://github.com/urllib3/urllib3/issues/3581>__)
  • Fixed incorrect CONNECT statement when using an IPv6 proxy with connection_from_host. Previously would not be wrapped in []. ([#3615](https://github.com/urllib3/urllib3/issues/3615) <https://github.com/urllib3/urllib3/issues/3615>__)

2.4.0 (2025-04-10)

Features

  • Applied PEP 639 by specifying the license fields in pyproject.toml. ([#3522](https://github.com/urllib3/urllib3/issues/3522) <https://github.com/urllib3/urllib3/issues/3522>__)
  • Updated exceptions to save and restore more properties during the pickle/serialization process. ([#3567](https://github.com/urllib3/urllib3/issues/3567) <https://github.com/urllib3/urllib3/issues/3567>__)
  • Added verify_flags option to create_urllib3_context with a default of VERIFY_X509_PARTIAL_CHAIN and VERIFY_X509_STRICT for Python 3.13+. ([#3571](https://github.com/urllib3/urllib3/issues/3571) <https://github.com/urllib3/urllib3/issues/3571>__)

Bugfixes

  • Fixed a bug with partial reads of streaming data in Emscripten. ([#3555](https://github.com/urllib3/urllib3/issues/3555) <https://github.com/urllib3/urllib3/issues/3555>__)

Misc

  • Switched to uv for installing development dependecies. ([#3550](https://github.com/urllib3/urllib3/issues/3550) <https://github.com/urllib3/urllib3/issues/3550>__)
  • Removed the multiple.intoto.jsonl asset from GitHub releases. Attestation of release files since v2.3.0 can be found on PyPI. ([#3566](https://github.com/urllib3/urllib3/issues/3566) <https://github.com/urllib3/urllib3/issues/3566>__)

2.3.0 (2024-12-22)

... (truncated)

Commits
  • aaab4ec Release 2.5.0
  • 7eb4a2a Merge commit from fork
  • f05b132 Merge commit from fork
  • d03fe32 Fix HTTP tunneling with IPv6 in older Python versions
  • 11661e9 Bump github/codeql-action from 3.28.0 to 3.29.0 (#3624)
  • 6a0ecc6 Update v2 migration guide to 2.4.0 (#3621)
  • 8e32e60 Raise exception for shutdown on a connection already released to the pool (#3...
  • 9996e0f Fix emscripten CI for Chrome 137+ (#3599)
  • 4fd1a99 Bump RECENT_DATE (#3617)
  • c4b5917 Add support for the new compression.zstd module in Python 3.14 (#3611)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=urllib3&package-manager=pip&previous-version=2.2.2&new-version=2.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/microsoft/DirectXShaderCompiler/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- utils/git/requirements_formatting.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt index 14123e4ac0..2afb003c4f 100644 --- a/utils/git/requirements_formatting.txt +++ b/utils/git/requirements_formatting.txt @@ -46,7 +46,7 @@ requests==2.32.4 # via pygithub toml==0.10.2 # via darker -urllib3==2.2.2 +urllib3==2.5.0 # via requests wrapt==1.15.0 # via deprecated From 94abfe972ad839185965f670329bcf33cd7bccbd Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Thu, 26 Jun 2025 13:04:21 -0700 Subject: [PATCH 79/93] Address compiler warnings: Enable warning C4146 as a break. (#7587) Addresses #7584 by removing the warning disable for 4146. Also includes a few trivial fixes for C4146 across several files that were missed in previous PRs. --- cmake/modules/HandleLLVMOptions.cmake | 1 - include/llvm/ADT/IntervalMap.h | 6 +++++- lib/Analysis/LoopAccessAnalysis.cpp | 4 ++-- lib/Transforms/Scalar/LoadCombine.cpp | 2 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +++--- tools/clang/lib/AST/SelectorLocationsKind.cpp | 4 ++-- tools/clang/lib/CodeGen/ItaniumCXXABI.cpp | 2 +- tools/clang/unittests/HLSLExec/ExecutionTest.cpp | 2 +- tools/clang/unittests/HLSLExec/ShaderOpTest.cpp | 2 +- tools/clang/unittests/HLSLExec/ShaderOpTest.h | 2 +- unittests/ADT/APIntTest.cpp | 3 ++- unittests/ADT/BitVectorTest.cpp | 7 +++++-- unittests/Support/DataExtractorTest.cpp | 6 ++++-- 13 files changed, 28 insertions(+), 19 deletions(-) diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index acf76c2907..00bdaed363 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -301,7 +301,6 @@ if( MSVC ) set(msvc_warning_flags # Disabled warnings. - -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' -wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used' diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 2a00667227..5bb948727e 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -320,7 +320,11 @@ class NodeBase { return Count; } else { // We want to shrink, copy to sib. - unsigned Count = std::min(std::min(unsigned(-Add), Size), N - SSize); + // Count <= INT_MAX: Since Add is an int, unsigned(-Add) <= 2^31, so + // std::min result <= INT_MAX. Meaning its safe to store the result in an + // int to avoid the compiler warning for '-Count' if we were to use an + // unsigned value instead. + int Count = std::min(std::min(unsigned(-Add), Size), N - SSize); transferToLeftSib(Size, Sib, SSize, Count); return -Count; } diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 7e5e3e5ebd..d855df32dc 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1677,8 +1677,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const ValueToValueMap &Strides) : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false), - StoreToLoopInvariantAddress(false) { + MaxSafeDepDistBytes(std::numeric_limits::max()), + CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); } diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 8f22bb337d..fb48513c18 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -186,7 +186,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { // Find first load. This is where we put the new load. LoadPOPPair FirstLP; - FirstLP.InsertOrder = -1u; + FirstLP.InsertOrder = std::numeric_limits::max(); for (const auto &L : Loads) if (L.InsertOrder < FirstLP.InsertOrder) FirstLP = L; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 69ca2688c8..d8e8fa11bd 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4472,8 +4472,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { unsigned WidestType = getWidestType(); unsigned WidestRegister = TTI.getRegisterBitWidth(true); - unsigned MaxSafeDepDist = -1U; - if (Legal->getMaxSafeDepDistBytes() != -1U) + unsigned MaxSafeDepDist = std::numeric_limits::max(); + if (Legal->getMaxSafeDepDistBytes() != std::numeric_limits::max()) MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8; WidestRegister = ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist); @@ -4638,7 +4638,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; // We used the distance for the interleave count. - if (Legal->getMaxSafeDepDistBytes() != -1U) + if (Legal->getMaxSafeDepDistBytes() != std::numeric_limits::max()) return 1; // Do not interleave loops with a relatively small trip count. diff --git a/tools/clang/lib/AST/SelectorLocationsKind.cpp b/tools/clang/lib/AST/SelectorLocationsKind.cpp index 671207a7f2..36fd8cea6e 100644 --- a/tools/clang/lib/AST/SelectorLocationsKind.cpp +++ b/tools/clang/lib/AST/SelectorLocationsKind.cpp @@ -28,7 +28,7 @@ static SourceLocation getStandardSelLoc(unsigned Index, if (EndLoc.isInvalid()) return SourceLocation(); IdentifierInfo *II = Sel.getIdentifierInfoForSlot(0); - unsigned Len = II ? II->getLength() : 0; + int Len = II ? II->getLength() : 0; return EndLoc.getLocWithOffset(-Len); } @@ -36,7 +36,7 @@ static SourceLocation getStandardSelLoc(unsigned Index, if (ArgLoc.isInvalid()) return SourceLocation(); IdentifierInfo *II = Sel.getIdentifierInfoForSlot(Index); - unsigned Len = /* selector id */ (II ? II->getLength() : 0) + /* ':' */ 1; + int Len = /* selector id */ (II ? II->getLength() : 0) + /* ':' */ 1; if (WithArgSpace) ++Len; return ArgLoc.getLocWithOffset(-Len); diff --git a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index 698d34c774..f39ec6d497 100644 --- a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1090,7 +1090,7 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo()); // Load the type info. - Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL); + Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1LL); return CGF.Builder.CreateLoad(Value); } diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index c26b9a1b5b..586c55328d 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -11,7 +11,7 @@ /////////////////////////////////////////////////////////////////////////////// // We need to keep & fix these warnings to integrate smoothly with HLK -#pragma warning(error : 4100 4146 4242 4244 4267 4701 4389 4018) +#pragma warning(error : 4100 4242 4244 4267 4701 4389 4018) // *** THIS FILE CANNOT TAKE ANY LLVM DEPENDENCIES *** // diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp index 9e18351a6d..60ce3a9241 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp @@ -10,7 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// // We need to keep & fix these warnings to integrate smoothly with HLK -#pragma warning(error : 4100 4146 4242 4244 4267 4701 4389) +#pragma warning(error : 4100 4242 4244 4267 4701 4389) #include "d3dx12.h" #include diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.h b/tools/clang/unittests/HLSLExec/ShaderOpTest.h index 52b5f37730..e8298fc8d9 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.h +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.h @@ -26,7 +26,7 @@ #include // We need to keep & fix these warnings to integrate smoothly with HLK -#pragma warning(error : 4100 4146 4242 4244 4267 4701 4389) +#pragma warning(error : 4100 4242 4244 4267 4701 4389) /////////////////////////////////////////////////////////////////////////////// // Forward declarations. diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp index ffba7b1633..a15307023e 100644 --- a/unittests/ADT/APIntTest.cpp +++ b/unittests/ADT/APIntTest.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "gtest/gtest.h" #include +#include #include using namespace llvm; @@ -753,7 +754,7 @@ TEST(APIntTest, StringDeath) { #endif TEST(APIntTest, mul_clear) { - APInt ValA(65, -1ULL); + APInt ValA(65, std::numeric_limits::max()); APInt ValB(65, 4); APInt ValC(65, 0); ValC = ValA * ValB; diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp index 26f103b3c1..c7de9194c4 100644 --- a/unittests/ADT/BitVectorTest.cpp +++ b/unittests/ADT/BitVectorTest.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallBitVector.h" #include "gtest/gtest.h" +#include using namespace llvm; @@ -73,7 +74,8 @@ TYPED_TEST(BitVectorTest, TrivialOperation) { Vec.resize(33, true); Vec.resize(57, false); unsigned Count = 0; - for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) { + for (unsigned i = Vec.find_first(); i != std::numeric_limits::max(); + i = Vec.find_next(i)) { ++Count; EXPECT_TRUE(Vec[i]); EXPECT_TRUE(Vec.test(i)); @@ -103,7 +105,8 @@ TYPED_TEST(BitVectorTest, TrivialOperation) { Vec.resize(91, true); Vec.resize(130, false); Count = 0; - for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) { + for (unsigned i = Vec.find_first(); i != std::numeric_limits::max(); + i = Vec.find_next(i)) { ++Count; EXPECT_TRUE(Vec[i]); EXPECT_TRUE(Vec.test(i)); diff --git a/unittests/Support/DataExtractorTest.cpp b/unittests/Support/DataExtractorTest.cpp index 81de983d22..250b89d696 100644 --- a/unittests/Support/DataExtractorTest.cpp +++ b/unittests/Support/DataExtractorTest.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" #include "llvm/Support/DataExtractor.h" +#include "gtest/gtest.h" +#include using namespace llvm; namespace { @@ -20,7 +21,8 @@ const char bigleb128data[] = "\xAA\xA9\xFF\xAA\xFF\xAA\xFF\x4A"; TEST(DataExtractorTest, OffsetOverflow) { DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8); - EXPECT_FALSE(DE.isValidOffsetForDataOfSize(-2U, 5)); + EXPECT_FALSE(DE.isValidOffsetForDataOfSize( + std::numeric_limits::max() - 1, 5)); } TEST(DataExtractorTest, UnsignedNumbers) { From a9d33d3500d37bd24c10288c76aca8e1c948d4a2 Mon Sep 17 00:00:00 2001 From: Ashley Coleman Date: Mon, 30 Jun 2025 17:03:14 -0700 Subject: [PATCH 80/93] [NFC] Clear C33010 Warning (#7603) Resolves `C33010` which is currently blocking the release pipeline. --------- Co-authored-by: github-actions[bot] --- lib/HLSL/DxilCondenseResources.cpp | 2 +- tools/clang/include/clang/Sema/Overload.h | 15 ++++++++------- tools/clang/lib/Format/FormatToken.h | 6 +++--- tools/clang/lib/Sema/SemaOverload.cpp | 4 ++-- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/HLSL/DxilCondenseResources.cpp b/lib/HLSL/DxilCondenseResources.cpp index 529c203bdc..09dd9cea64 100644 --- a/lib/HLSL/DxilCondenseResources.cpp +++ b/lib/HLSL/DxilCondenseResources.cpp @@ -655,7 +655,7 @@ class ResourceUseErrors { public: ResourceUseErrors() : m_bErrorsReported(false) {} - enum ErrorCode { + enum ErrorCode : unsigned int { // Collision between use of one resource GV and another. // All uses must be guaranteed to resolve to only one GV. // Additionally, when writing resource to alloca, all uses diff --git a/tools/clang/include/clang/Sema/Overload.h b/tools/clang/include/clang/Sema/Overload.h index 89de4ce984..473af49cab 100644 --- a/tools/clang/include/clang/Sema/Overload.h +++ b/tools/clang/include/clang/Sema/Overload.h @@ -57,7 +57,7 @@ namespace clang { /// convert an argument to a parameter's type. The enumerator values /// match with Table 9 of (C++ 13.3.3.1.1) and are listed such that /// better conversion kinds have smaller values. - enum ImplicitConversionKind { + enum ImplicitConversionKind : unsigned int { ICK_Identity = 0, ///< Identity conversion (no conversion) ICK_Lvalue_To_Rvalue, ///< Lvalue-to-rvalue conversion (C++ 4.1) ICK_Array_To_Pointer, ///< Array-to-pointer conversion (C++ 4.2) @@ -79,27 +79,28 @@ namespace clang { ICK_Vector_Conversion, ///< Vector conversions ICK_Vector_Splat, ///< A vector splat from an arithmetic type ICK_Complex_Real, ///< Complex-real conversions (C99 6.3.1.7) - ICK_Block_Pointer_Conversion, ///< Block Pointer conversions + ICK_Block_Pointer_Conversion, ///< Block Pointer conversions ICK_TransparentUnionConversion, ///< Transparent Union Conversions - ICK_Writeback_Conversion, ///< Objective-C ARC writeback conversion + ICK_Writeback_Conversion, ///< Objective-C ARC writeback conversion ICK_Zero_Event_Conversion, ///< Zero constant to event (OpenCL1.2 6.12.10) // HLSL Change Starts - // The following conversion types also imply a potential followup + // The following conversion types also imply a potential followup // ComponentConversion. // List is roughly ordered to preserve the property: // "better conversion kinds have smaller values" - // Unfortunately, this property isn't really possible to preserve due + // Unfortunately, this property isn't really possible to preserve due // to potential additional component conversion. ICK_HLSLVector_Scalar, ///< HLSLVector/Matrix to scalar ICK_HLSLVector_Conversion, ///< HLSLVector/Matrix conversion - ICK_Flat_Conversion, ///< Flat assignment conversion for HLSL (inline conversion, straddled) + ICK_Flat_Conversion, ///< Flat assignment conversion for HLSL (inline + ///< conversion, straddled) ICK_HLSLVector_Splat, ///< HLSLVector/Matrix splat ICK_HLSLVector_Truncation, ///< HLSLVector/Matrix truncation ICK_HLSL_Derived_To_Base, ///< HLSL Derived-to-base // HLSL Change Ends - ICK_Num_Conversion_Kinds ///< The number of conversion kinds + ICK_Num_Conversion_Kinds ///< The number of conversion kinds }; /// ImplicitConversionRank - The rank of an implicit conversion diff --git a/tools/clang/lib/Format/FormatToken.h b/tools/clang/lib/Format/FormatToken.h index f335eda086..249d526871 100644 --- a/tools/clang/lib/Format/FormatToken.h +++ b/tools/clang/lib/Format/FormatToken.h @@ -86,11 +86,11 @@ namespace format { TYPE(UnaryOperator) \ TYPE(Unknown) -enum TokenType { +enum TokenType : unsigned int { #define TYPE(X) TT_##X, -LIST_TOKEN_TYPES + LIST_TOKEN_TYPES #undef TYPE - NUM_TOKEN_TYPES + NUM_TOKEN_TYPES }; /// \brief Determines the name of a token type. diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index 636eaf0213..1bcbc7442f 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -146,8 +146,8 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) { }; static_assert(_countof(Rank) == ICK_Num_Conversion_Kinds, "Otherwise, GetConversionRank is out of sync with ImplicitConversionKind"); // HLSL Change - assert((int)Kind < (int)ICK_Num_Conversion_Kinds); // HLSL Change - return Rank[(int)Kind]; + assert(Kind < _countof(Rank)); // HLSL Change + return Rank[Kind]; // HLSL Change } /// GetImplicitConversionName - Return the name of this kind of From 7e0d771197110c10a39d279dc6a3c3c213c056d3 Mon Sep 17 00:00:00 2001 From: Jeff Noyle Date: Tue, 1 Jul 2025 13:17:08 -0700 Subject: [PATCH 81/93] PIX: Report correct bitfield values in PIX shader debugger (#7557) The key change here is the & in DxcDxilPixStorage.cpp. The generated DXIL packs the bitfields into their 32- or 64-bit-typed Values as expected, but this code, when trying to figure out which Value a bitfield lives in, was looking up the unpacked bit offset, so only fields within the zeroth underlying Value were being reported correctly. With this change, PIX reports correct bitfield values wherever they live, including within deeply nested structs. Unfortunately, the tests had to be in C++ because file-check obv. doesn't run the APIs that PIX uses to read debug data. --------- Co-authored-by: github-actions[bot] --- lib/DxilDia/DxcPixDxilStorage.cpp | 6 +- tools/clang/unittests/HLSL/PixDiaTest.cpp | 167 +++++++++++++++++----- 2 files changed, 138 insertions(+), 35 deletions(-) diff --git a/lib/DxilDia/DxcPixDxilStorage.cpp b/lib/DxilDia/DxcPixDxilStorage.cpp index 79d21303dc..4b06f472e8 100644 --- a/lib/DxilDia/DxcPixDxilStorage.cpp +++ b/lib/DxilDia/DxcPixDxilStorage.cpp @@ -185,7 +185,11 @@ dxil_debug_info::DxcPixDxilScalarStorage::Index(DWORD Index, STDMETHODIMP dxil_debug_info::DxcPixDxilScalarStorage::GetRegisterNumber( DWORD *pRegisterNumber) { const auto &ValueLocationMap = m_pVarInfo->m_ValueLocationMap; - auto RegIt = ValueLocationMap.find(m_OffsetFromStorageStartInBits); + // Bitfields will have been packed into their containing integer type: + DWORD size; + m_pOriginalType->GetSizeInBits(&size); + auto RegIt = + ValueLocationMap.find(m_OffsetFromStorageStartInBits & ~(size - 1)); if (RegIt == ValueLocationMap.end()) { return E_FAIL; diff --git a/tools/clang/unittests/HLSL/PixDiaTest.cpp b/tools/clang/unittests/HLSL/PixDiaTest.cpp index a4439b998d..d36e762762 100644 --- a/tools/clang/unittests/HLSL/PixDiaTest.cpp +++ b/tools/clang/unittests/HLSL/PixDiaTest.cpp @@ -13,6 +13,7 @@ #ifdef _WIN32 #include +#include #include "dxc/DxilContainer/DxilContainer.h" #include "dxc/Support/WinIncludes.h" @@ -186,6 +187,7 @@ class PixDiaTest { TEST_METHOD(DxcPixDxilDebugInfo_BitFields_Derived) TEST_METHOD(DxcPixDxilDebugInfo_BitFields_Bool) TEST_METHOD(DxcPixDxilDebugInfo_BitFields_Overlap) + TEST_METHOD(DxcPixDxilDebugInfo_BitFields_uint64) TEST_METHOD(DxcPixDxilDebugInfo_Min16SizesAndOffsets_Enabled) TEST_METHOD(DxcPixDxilDebugInfo_Min16SizesAndOffsets_Disabled) TEST_METHOD(DxcPixDxilDebugInfo_Min16VectorOffsets_Enabled) @@ -658,11 +660,11 @@ class PixDiaTest { const char *hlsl, const wchar_t *profile, const char *lineAtWhichToExamineVariables, std::vector const &ExpectedVariables); - void RunSizeAndOffsetTestCase(const char *hlsl, - std::array const &memberOffsets, - std::array const &memberSizes, - std::vector extraArgs = { - L"-Od"}); + CComPtr + RunSizeAndOffsetTestCase(const char *hlsl, + std::array const &memberOffsets, + std::array const &memberSizes, + std::vector extraArgs = {L"-Od"}); void RunVectorSizeAndOffsetTestCase(const char *hlsl, std::array const &memberOffsets, std::vector extraArgs = { @@ -2948,12 +2950,11 @@ void main() VERIFY_ARE_EQUAL(32u, secondFieldOffset); } -void PixDiaTest::RunSizeAndOffsetTestCase( - const char *hlsl, std::array const &memberOffsets, - std::array const &memberSizes, - std::vector extraArgs) { - if (m_ver.SkipDxilVersion(1, 5)) - return; +CComPtr +PixDiaTest::RunSizeAndOffsetTestCase(const char *hlsl, + std::array const &memberOffsets, + std::array const &memberSizes, + std::vector extraArgs) { auto debugInfo = CompileAndCreateDxcDebug(hlsl, L"cs_6_5", nullptr, extraArgs).debugInfo; auto live = GetLiveVariablesAt(hlsl, "STOP_HERE", debugInfo); @@ -2974,9 +2975,46 @@ void PixDiaTest::RunSizeAndOffsetTestCase( VERIFY_SUCCEEDED(field->GetFieldSizeInBits(&sizeInBits)); VERIFY_ARE_EQUAL(memberSizes[i], sizeInBits); } + // Check that first and second and third are reported as residing in the same + // register (cuz they do!), and that the third does not + + CComPtr bfStorage; + VERIFY_SUCCEEDED(bf->GetStorage(&bfStorage)); + return bfStorage; +} + +void RunBitfieldAdjacencyTest( + IDxcPixDxilStorage *bfStorage, + std::vector> const &adjacentRuns) { + std::vector> registersByRun; + registersByRun.resize(adjacentRuns.size()); + for (size_t run = 0; run < adjacentRuns.size(); ++run) { + for (auto const &field : adjacentRuns[run]) { + CComPtr fieldStorage; + VERIFY_SUCCEEDED(bfStorage->AccessField(field, &fieldStorage)); + DWORD reg; + VERIFY_SUCCEEDED(fieldStorage->GetRegisterNumber(®)); + registersByRun[run].insert(reg); + } + } + for (size_t run = 0; run < registersByRun.size(); ++run) { + { + // Every field in this run should have the same register number, so this + // set should be of size 1: + VERIFY_ARE_EQUAL(1, registersByRun[run].size()); + // Every adjacent run should have different register numbers: + if (run != 0) { + VERIFY_ARE_NOT_EQUAL(*registersByRun[run - 1].begin(), + *registersByRun[run].begin()); + } + } + } } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Simple) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { @@ -3000,10 +3038,16 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second"}, {L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Derived) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { @@ -3027,10 +3071,16 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second"}, {L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Bool) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { @@ -3054,17 +3104,58 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 1, 2, 32}, {1, 1, 3, 32}); + auto bfStorage = RunSizeAndOffsetTestCase(hlsl, {0, 1, 2, 32}, {1, 1, 3, 32}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second", L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Overlap) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + + const char *hlsl = R"( +struct Bitfields +{ + uint32_t first : 20; + uint32_t second : 20; // should end up in second DWORD + uint32_t third : 3; // should shader second DWORD + uint32_t fourth; // should be in third DWORD +}; + +RWStructuredBuffer UAV: register(u0); + +[numthreads(1, 1, 1)] +void main() +{ + Bitfields bf; + bf.first = UAV[0]; + bf.second = UAV[1]; + bf.third = UAV[2]; + bf.fourth = UAV[3]; + UAV[16] = bf.first + bf.second + bf.third + bf.fourth; //STOP_HERE +} + +)"; + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 32, 52, 64}, {20, 20, 3, 32}); + // (PIX #58022343): fields that overlap their storage type are not yet + // reflected properly in terms of their packed offsets as maintained via + // these PixDxc interfaces based on the dbg.declare data + // RunBitfieldAdjacencyTest(bfStorage, + // {{L"first"}, {L"second", L"third"}, {L"fourth"}}); +} + +TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_uint64) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { - unsigned int first : 20; - unsigned int second : 20; // should end up in second DWORD - unsigned int third : 3; // should shader second DWORD - unsigned int fourth; // should be in third DWORD + uint64_t first : 20; + uint64_t second : 20; // should end up in first uint64 also + uint64_t third : 24; // in first + uint64_t fourth; // should be in second }; RWStructuredBuffer UAV: register(u0); @@ -3081,7 +3172,10 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 32, 52, 64}, {20, 20, 3, 32}); + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 20, 40, 64}, {20, 20, 24, 64}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second", L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_Alignment_ConstInt) { @@ -3502,9 +3596,10 @@ void ClosestHitShader3(inout RayPayload payload, in BuiltInTriangleIntersectionA // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"ClosestHitShader3"); instructionOffset = AdvanceUntilFunctionEntered( @@ -3550,9 +3645,10 @@ TEST_F(PixDiaTest, DxcPixDxilDebugInfo_VariableScopes_ForScopes) { // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); @@ -3597,9 +3693,10 @@ TEST_F(PixDiaTest, DxcPixDxilDebugInfo_VariableScopes_ScopeBraces) { // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); @@ -3644,9 +3741,10 @@ TEST_F(PixDiaTest, DxcPixDxilDebugInfo_VariableScopes_Function) { // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); @@ -3692,9 +3790,10 @@ void CSMain() // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); From 2da0a54f150f51bd6a2b85fd4cc76bdfd614219e Mon Sep 17 00:00:00 2001 From: Alex Sepkowski <5620315+alsepkow@users.noreply.github.com> Date: Tue, 1 Jul 2025 17:12:36 -0700 Subject: [PATCH 82/93] Long Vector Execution Tests: Merge unary and binary op tests to main (#7549) **Summary** Adds infrastructure for long vector execution tests. This code and additional test cases were already added to the staging-sm6.9 branch. This is the second of several PRs to bring these changes into main. That being said, reviews of this code should treat it as brand new. Resolves #7545 **Includes:** - A new test class `LongVector::OpTest` in `LongVectors.h/cpp`, still part of the `ExecHLSLTests.dll` binary. - HLSL source added to `ShaderOpArith.xml` to leverage the existing exec test framework for shader compilation and execution. - A new TAEF metadata file `LongVectorOpTable.xml` defining long vector test cases. - `LongVectorTestData.h` for statically defined input values, including `HLSLHalf_t` and `HLSLBool_t`. This avoids duplicating values across test cases. **Template Handling** To support template instantiation across translation units, `LongVectors.tpp` contains full template definitions included by `LongVectors.h`. These were originally required when tests lived in `ExecutionTests.cpp`. Now that the tests are isolated, the plan is to move the definitions back into `LongVectors.cpp` after merging the long vector tests from `staging-sm6.9` to simplify the manual merge. **Utilities** `HlslTestUtils.h` includes minor updates to support the new test scenarios. --- include/dxc/Test/HlslTestUtils.h | 95 +++- tools/clang/unittests/HLSLExec/CMakeLists.txt | 1 + .../clang/unittests/HLSLExec/ExecHLSLTests.rc | 3 +- .../unittests/HLSLExec/LongVectorOpTable.xml | 515 ++++++++++++++++++ .../unittests/HLSLExec/LongVectorTestData.h | 74 +++ .../clang/unittests/HLSLExec/LongVectors.cpp | 316 +++++++++++ tools/clang/unittests/HLSLExec/LongVectors.h | 282 ++++++++++ .../clang/unittests/HLSLExec/LongVectors.tpp | 476 ++++++++++++++++ .../unittests/HLSLExec/ShaderOpArith.xml | 67 +++ 9 files changed, 1819 insertions(+), 10 deletions(-) create mode 100644 tools/clang/unittests/HLSLExec/LongVectorOpTable.xml create mode 100644 tools/clang/unittests/HLSLExec/LongVectorTestData.h create mode 100644 tools/clang/unittests/HLSLExec/LongVectors.cpp create mode 100644 tools/clang/unittests/HLSLExec/LongVectors.h create mode 100644 tools/clang/unittests/HLSLExec/LongVectors.tpp diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h index 44f3f6148a..dd89fda676 100644 --- a/include/dxc/Test/HlslTestUtils.h +++ b/include/dxc/Test/HlslTestUtils.h @@ -260,6 +260,29 @@ inline void LogErrorFmt(const wchar_t *fmt, ...) { WEX::Logging::Log::Error(buf.data()); } +inline void LogErrorFmtThrow(const char *fileName, int line, const wchar_t *fmt, + ...) { + va_list args; + va_start(args, fmt); + std::wstring buf(vFormatToWString(fmt, args)); + va_end(args); + + std::wstringstream wss; + wss << L"Error in file: " << fileName << L" at line: " << line << L"\n" + << buf.data() << L"\n" + << buf; + + WEX::Logging::Log::Error(wss.str().c_str()); + + // Throws an exception to abort the test. + VERIFY_FAIL(L"Test error"); +} + +// Macro to pass the file name and line number. Otherwise TAEF prints this file +// and line number. +#define LOG_ERROR_FMT_THROW(fmt, ...) \ + hlsl_test::LogErrorFmtThrow(__FILE__, __LINE__, fmt, __VA_ARGS__) + inline std::wstring GetPathToHlslDataFile(const wchar_t *relative, LPCWSTR paramName = HLSLDATAFILEPARAM, @@ -461,15 +484,17 @@ inline bool GetTestParamUseWARP(bool defaultVal) { #ifdef FP_SUBNORMAL -inline bool isdenorm(float f) { return FP_SUBNORMAL == std::fpclassify(f); } +template inline bool isdenorm(T f) { + return FP_SUBNORMAL == std::fpclassify(f); +} #else -inline bool isdenorm(float f) { - return (std::numeric_limits::denorm_min() <= f && - f < std::numeric_limits::min()) || - (-std::numeric_limits::min() < f && - f <= -std::numeric_limits::denorm_min()); +template inline bool isdenorm(T f) { + return (std::numeric_limits::denorm_min() <= f && + f < std::numeric_limits::min()) || + (-std::numeric_limits::min() < f && + f <= -std::numeric_limits::denorm_min()); } #endif // FP_SUBNORMAL @@ -517,6 +542,44 @@ inline bool isnanFloat16(uint16_t val) { uint16_t ConvertFloat32ToFloat16(float val) throw(); float ConvertFloat16ToFloat32(uint16_t val) throw(); +inline bool CompareDoubleULP( + const double &Src, const double &Ref, int64_t ULPTolerance, + hlsl::DXIL::Float32DenormMode Mode = hlsl::DXIL::Float32DenormMode::Any) { + if (Src == Ref) { + return true; + } + if (std::isnan(Src)) { + return std::isnan(Ref); + } + + if (Mode == hlsl::DXIL::Float32DenormMode::Any) { + // If denorm expected, output can be sign preserved zero. Otherwise output + // should pass the regular ulp testing. + if (isdenorm(Ref) && Src == 0 && std::signbit(Src) == std::signbit(Ref)) + return true; + } + + // For FTZ or Preserve mode, we should get the expected number within + // ULPTolerance for any operations. + int64_t Diff = *((const uint64_t *)&Src) - *((const uint64_t *)&Ref); + + uint64_t AbsoluteDiff = Diff < 0 ? -Diff : Diff; + return AbsoluteDiff <= (uint64_t)ULPTolerance; +} + +inline bool CompareDoubleEpsilon(const double &Src, const double &Ref, + float Epsilon) { + if (Src == Ref) { + return true; + } + if (std::isnan(Src)) { + return std::isnan(Ref); + } + // For FTZ or Preserve mode, we should get the expected number within + // epsilon for any operations. + return fabs(Src - Ref) < Epsilon; +} + inline bool CompareFloatULP( const float &fsrc, const float &fref, int ULPTolerance, hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) { @@ -568,12 +631,26 @@ inline bool CompareFloatRelativeEpsilon( inline bool CompareHalfULP(const uint16_t &fsrc, const uint16_t &fref, float ULPTolerance) { + // Treat +0 and -0 as equal + if ((fsrc & ~FLOAT16_BIT_SIGN) == 0 && (fref & ~FLOAT16_BIT_SIGN) == 0) + return true; if (fsrc == fref) return true; - if (isnanFloat16(fsrc)) - return isnanFloat16(fref); + + const bool nanRef = isnanFloat16(fref); + const bool nanSrc = isnanFloat16(fsrc); + if (nanRef || nanSrc) + return nanRef && nanSrc; + + // Map to monotonic ordering for correct ULP diff + auto toOrdered = [](uint16_t h) -> int { + return (h & FLOAT16_BIT_SIGN) ? (~h & 0xFFFF) : (h | 0x8000); + }; + // 16-bit floating point numbers must preserve denorms - int diff = fsrc - fref; + int i_fsrc = toOrdered(fsrc); + int i_fref = toOrdered(fref); + int diff = i_fsrc - i_fref; unsigned int uDiff = diff < 0 ? -diff : diff; return uDiff <= (unsigned int)ULPTolerance; } diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index df61aad854..b490ac94e9 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -9,6 +9,7 @@ add_clang_library(ExecHLSLTests SHARED ExecutionTest.cpp ShaderOpTest.cpp TableParameterHandler.cpp + LongVectors.cpp ExecHLSLTests.rc ) diff --git a/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc b/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc index 6f4659910c..29459ee825 100644 --- a/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc +++ b/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc @@ -1,3 +1,4 @@ #include -ShaderOpArithTable.xml DATASOURCE_XML "ShaderOpArithTable.xml" \ No newline at end of file +ShaderOpArithTable.xml DATASOURCE_XML "ShaderOpArithTable.xml" +LongVectorOpTable.xml DATASOURCE_XML "LongVectorOpTable.xml" diff --git a/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml new file mode 100644 index 0000000000..39a2fa481e --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml @@ -0,0 +1,515 @@ + + + + + + String + + String + String + String + + + + BinaryOpType_ScalarAdd + int16 + + + BinaryOpType_Add + int16 + + + BinaryOpType_ScalarSubtract + int16 + + + BinaryOpType_Subtract + int16 + + + BinaryOpType_ScalarMultiply + int16 + + + BinaryOpType_Multiply + int16 + + + BinaryOpType_ScalarDivide + int16 + + + BinaryOpType_Divide + int16 + + + BinaryOpType_ScalarModulus + int16 + + + BinaryOpType_Modulus + int16 + + + BinaryOpType_ScalarMin + int16 + + + BinaryOpType_Min + int16 + + + BinaryOpType_ScalarMax + int16 + + + BinaryOpType_Max + int16 + + + + BinaryOpType_ScalarAdd + int32 + + + BinaryOpType_Add + int32 + + + BinaryOpType_ScalarSubtract + int32 + + + BinaryOpType_Subtract + int32 + + + BinaryOpType_ScalarMultiply + int32 + + + BinaryOpType_Multiply + int32 + + + BinaryOpType_ScalarDivide + int32 + + + BinaryOpType_Divide + int32 + + + BinaryOpType_ScalarModulus + int32 + + + BinaryOpType_Modulus + int32 + + + BinaryOpType_ScalarMin + int32 + + + BinaryOpType_Min + int32 + + + BinaryOpType_ScalarMax + int32 + + + BinaryOpType_Max + int32 + + + + BinaryOpType_ScalarAdd + int64 + + + BinaryOpType_Add + int64 + + + BinaryOpType_ScalarSubtract + int64 + + + BinaryOpType_Subtract + int64 + + + BinaryOpType_ScalarMultiply + int64 + + + BinaryOpType_Multiply + int64 + + + BinaryOpType_ScalarDivide + int64 + + + BinaryOpType_Divide + int64 + + + BinaryOpType_ScalarModulus + int64 + + + BinaryOpType_Modulus + int64 + + + BinaryOpType_ScalarMin + int64 + + + BinaryOpType_Min + int64 + + + BinaryOpType_ScalarMax + int64 + + + BinaryOpType_Max + int64 + + + + BinaryOpType_ScalarAdd + uint16 + + + BinaryOpType_Add + uint16 + + + BinaryOpType_ScalarSubtract + uint16 + + + BinaryOpType_Subtract + uint16 + + + BinaryOpType_ScalarMultiply + uint16 + + + BinaryOpType_Multiply + uint16 + + + BinaryOpType_ScalarDivide + uint16 + + + BinaryOpType_Divide + uint16 + + + BinaryOpType_ScalarModulus + uint16 + + + BinaryOpType_Modulus + uint16 + + + BinaryOpType_ScalarMin + uint16 + + + BinaryOpType_Min + uint16 + + + BinaryOpType_ScalarMax + uint16 + + + BinaryOpType_Max + uint16 + + + + BinaryOpType_ScalarAdd + uint32 + + + BinaryOpType_Add + uint32 + + + BinaryOpType_ScalarSubtract + uint32 + + + BinaryOpType_Subtract + uint32 + + + BinaryOpType_ScalarMultiply + uint32 + + + BinaryOpType_Multiply + uint32 + + + BinaryOpType_ScalarDivide + uint32 + + + BinaryOpType_Divide + uint32 + + + BinaryOpType_ScalarModulus + uint32 + + + BinaryOpType_Modulus + uint32 + + + BinaryOpType_ScalarMin + uint32 + + + BinaryOpType_Min + uint32 + + + BinaryOpType_ScalarMax + uint32 + + + BinaryOpType_Max + uint32 + + + + BinaryOpType_ScalarAdd + uint64 + + + BinaryOpType_Add + uint64 + + + BinaryOpType_ScalarSubtract + uint64 + + + BinaryOpType_Subtract + uint64 + + + BinaryOpType_ScalarMultiply + uint64 + + + BinaryOpType_Multiply + uint64 + + + BinaryOpType_ScalarDivide + uint64 + + + BinaryOpType_Divide + uint64 + + + BinaryOpType_ScalarModulus + uint64 + + + BinaryOpType_Modulus + uint64 + + + BinaryOpType_ScalarMin + uint64 + + + BinaryOpType_Min + uint64 + + + BinaryOpType_ScalarMax + uint64 + + + BinaryOpType_Max + uint64 + + + + BinaryOpType_ScalarAdd + float32 + + + BinaryOpType_Add + float32 + + + BinaryOpType_ScalarSubtract + float32 + + + BinaryOpType_Subtract + float32 + + + BinaryOpType_ScalarMultiply + float32 + + + BinaryOpType_Multiply + float32 + + + BinaryOpType_ScalarDivide + float32 + + + BinaryOpType_Divide + float32 + + + BinaryOpType_ScalarModulus + float32 + + + BinaryOpType_Modulus + float32 + + + BinaryOpType_ScalarMin + float32 + + + BinaryOpType_Min + float32 + + + BinaryOpType_ScalarMax + float32 + + + BinaryOpType_Max + float32 + + + + BinaryOpType_ScalarAdd + float64 + + + BinaryOpType_Add + float64 + + + BinaryOpType_ScalarSubtract + float64 + + + BinaryOpType_Subtract + float64 + + + BinaryOpType_ScalarMultiply + float64 + + + BinaryOpType_Multiply + float64 + + + BinaryOpType_ScalarDivide + float64 + + + BinaryOpType_Divide + float64 + + + BinaryOpType_ScalarMin + float64 + + + BinaryOpType_Min + float64 + + + BinaryOpType_ScalarMax + float64 + + + BinaryOpType_Max + float64 + +
+ + + + String + String + String + + + + UnaryOpType_Initialize + int16 + + + + UnaryOpType_Initialize + int32 + + + + UnaryOpType_Initialize + int64 + + + + UnaryOpType_Initialize + uint16 + + + + UnaryOpType_Initialize + uint32 + + + + UnaryOpType_Initialize + uint64 + + + + UnaryOpType_Initialize + float32 + + + + UnaryOpType_Initialize + float64 + +
+
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h new file mode 100644 index 0000000000..002c765609 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -0,0 +1,74 @@ +#ifndef LONGVECTORTESTDATA_H +#define LONGVECTORTESTDATA_H + +#include +#include +#include +#include +#include + +template struct LongVectorTestData { + static const std::map> Data; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {-6, 1, 7, 3, 8, 4, -3, 8, 8, -2}}, + {L"DefaultInputValueSet2", {5, -6, -3, -2, 9, 3, 1, -3, -7, 2}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {-6, 1, 7, 3, 8, 4, -3, 8, 8, -2}}, + {L"DefaultInputValueSet2", {5, -6, -3, -2, 9, 3, 1, -3, -7, 2}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {-6, 11, 7, 3, 8, 4, -3, 8, 8, -2}}, + {L"DefaultInputValueSet2", {5, -1337, -3, -2, 9, 3, 1, -3, 501, 2}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {1, 699, 3, 1023, 5, 6, 0, 8, 9, 10}}, + {L"DefaultInputValueSet2", {2, 111, 3, 4, 5, 9, 21, 8, 9, 10}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {1, 2, 3, 4, 5, 0, 7, 8, 9, 10}}, + {L"DefaultInputValueSet2", {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {1, 2, 3, 4, 5, 0, 7, 1000, 9, 10}}, + {L"DefaultInputValueSet2", {1, 2, 1337, 4, 5, 6, 7, 8, 9, 10}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + {L"DefaultInputValueSet2", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + {L"DefaultInputValueSet2", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + }; +}; + +#endif // LONGVECTORTESTDATA_H diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp new file mode 100644 index 0000000000..54e5224798 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -0,0 +1,316 @@ +#include "LongVectors.h" +#include "HlslExecTestUtils.h" +#include + +LongVector::BinaryOpType +LongVector::getBinaryOpType(const std::wstring &OpTypeString) { + return getLongVectorOpType( + binaryOpTypeStringToEnumMap, OpTypeString, + std::size(binaryOpTypeStringToEnumMap)); +} + +LongVector::UnaryOpType +LongVector::getUnaryOpType(const std::wstring &OpTypeString) { + return getLongVectorOpType( + unaryOpTypeStringToEnumMap, OpTypeString, + std::size(unaryOpTypeStringToEnumMap)); +} + +// These are helper arrays to be used with the TableParameterHandler that parses +// the LongVectorOpTable.xml file for us. +static TableParameter BinaryOpParameters[] = { + {L"DataType", TableParameter::STRING, true}, + {L"OpTypeEnum", TableParameter::STRING, true}, + {L"InputValueSetName1", TableParameter::STRING, false}, + {L"InputValueSetName2", TableParameter::STRING, false}, +}; + +static TableParameter UnaryOpParameters[] = { + {L"DataType", TableParameter::STRING, true}, + {L"OpTypeEnum", TableParameter::STRING, true}, + {L"InputValueSetName1", TableParameter::STRING, false}, +}; + +bool LongVector::OpTest::classSetup() { + // Run this only once. + if (!Initialized) { + Initialized = true; + + HMODULE Runtime = LoadLibraryW(L"d3d12.dll"); + if (Runtime == NULL) + return false; + // Do not: FreeLibrary(hRuntime); + // If we actually free the library, it defeats the purpose of + // enableAgilitySDK and enableExperimentalMode. + + HRESULT HR; + HR = enableAgilitySDK(Runtime); + + if (FAILED(HR)) + hlsl_test::LogCommentFmt(L"Unable to enable Agility SDK - 0x%08x.", HR); + else if (HR == S_FALSE) + hlsl_test::LogCommentFmt(L"Agility SDK not enabled."); + else + hlsl_test::LogCommentFmt(L"Agility SDK enabled."); + + HR = enableExperimentalMode(Runtime); + if (FAILED(HR)) + hlsl_test::LogCommentFmt( + L"Unable to enable shader experimental mode - 0x%08x.", HR); + else if (HR == S_FALSE) + hlsl_test::LogCommentFmt(L"Experimental mode not enabled."); + else + hlsl_test::LogCommentFmt(L"Experimental mode enabled."); + + HR = enableDebugLayer(); + if (FAILED(HR)) + hlsl_test::LogCommentFmt(L"Unable to enable debug layer - 0x%08x.", HR); + else if (HR == S_FALSE) + hlsl_test::LogCommentFmt(L"Debug layer not enabled."); + else + hlsl_test::LogCommentFmt(L"Debug layer enabled."); + } + + return true; +} + +TEST_F(LongVector::OpTest, binaryOpTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + using namespace WEX::Common; + + const int TableSize = sizeof(BinaryOpParameters) / sizeof(TableParameter); + TableParameterHandler Handler(BinaryOpParameters, TableSize); + + std::wstring DataType(Handler.GetTableParamByName(L"DataType")->m_str); + std::wstring OpTypeString(Handler.GetTableParamByName(L"OpTypeEnum")->m_str); + + auto OpType = LongVector::getBinaryOpType(OpTypeString); + dispatchTestByDataType(OpType, DataType, Handler); +} + +TEST_F(LongVector::OpTest, unaryOpTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + const int TableSize = sizeof(UnaryOpParameters) / sizeof(TableParameter); + TableParameterHandler Handler(UnaryOpParameters, TableSize); + + std::wstring DataType(Handler.GetTableParamByName(L"DataType")->m_str); + std::wstring OpTypeString(Handler.GetTableParamByName(L"OpTypeEnum")->m_str); + + auto OpType = LongVector::getUnaryOpType(OpTypeString); + dispatchTestByDataType(OpType, DataType, Handler); +} + +template +void LongVector::OpTest::dispatchTestByDataType( + LongVectorOpTypeT OpType, std::wstring DataType, + TableParameterHandler &Handler) { + using namespace WEX::Common; + + if (DataType == L"int16") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"int32") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"int64") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"uint16") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"uint32") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"uint64") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"float32") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"float64") + dispatchTestByVectorSize(OpType, Handler); + else + VERIFY_FAIL( + String().Format(L"DataType: %s is not recognized.", DataType.c_str())); +} + +template +void LongVector::OpTest::dispatchTestByVectorSize( + LongVectorOpTypeT opType, TableParameterHandler &Handler) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + LongVector::TestConfig TestConfig(opType); + + // InputValueSetName1 is optional. So the string may be empty. An empty + // string will result in the default value set for this DataType being used. + std::wstring InputValueSet1( + Handler.GetTableParamByName(L"InputValueSetName1")->m_str); + if (!InputValueSet1.empty()) + TestConfig.setInputValueSet1(InputValueSet1); + + // InputValueSetName2 is optional. So the string may be empty. An empty + // string will result in the default value set for this DataType being used. + if (TestConfig.isBinaryOp()) { + std::wstring InputValueSet2( + Handler.GetTableParamByName(L"InputValueSetName2")->m_str); + if (!InputValueSet2.empty()) + TestConfig.setInputValueSet2(InputValueSet2); + } + + std::vector InputVectorSizes = {3, 4, 5, 16, 17, 35, 100, 256, 1024}; + for (auto SizeToTest : InputVectorSizes) { + testBaseMethod(TestConfig, SizeToTest); + } +} + +template +void LongVector::OpTest::testBaseMethod( + LongVector::TestConfig &TestConfig, + size_t VectorSizeToTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + hlsl_test::LogCommentFmt(L"Running LongVectorOpTestBase<%S, %zu>", + typeid(DataTypeT).name(), VectorSizeToTest); + + bool LogInputs = false; + WEX::TestExecution::RuntimeParameters::TryGetValue(L"LongVectorLogInputs", + LogInputs); + + CComPtr D3DDevice; + if (!createDevice(&D3DDevice, ExecTestUtils::D3D_SHADER_MODEL_6_9, false)) { +#ifdef _HLK_CONF + LOG_ERROR_FMT_THROW( + L"Device does not support SM 6.9. Can't run these tests."); +#else + WEX::Logging::Log::Comment( + "Device does not support SM 6.9. Can't run these tests."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; +#endif + } + + std::vector InputVector1; + InputVector1.reserve(VectorSizeToTest); + std::vector InputVector2; // May be unused, but must be defined. + InputVector2.reserve(VectorSizeToTest); + std::vector ScalarInput; // May be unused, but must be defined. + const bool IsVectorBinaryOp = + TestConfig.isBinaryOp() && !TestConfig.isScalarOp(); + + std::vector InputVector1ValueSet = TestConfig.getInputValueSet1(); + std::vector InputVector2ValueSet = + TestConfig.isBinaryOp() ? TestConfig.getInputValueSet2() + : std::vector(); + + if (TestConfig.isScalarOp()) + // Scalar ops are always binary ops. So InputVector2ValueSet is initialized + // with values above. + ScalarInput.push_back(InputVector2ValueSet[0]); + + // Fill the input vectors with values from the value set. Repeat the values + // when we reach the end of the value set. + for (size_t Index = 0; Index < VectorSizeToTest; Index++) { + InputVector1.push_back( + InputVector1ValueSet[Index % InputVector1ValueSet.size()]); + + if (IsVectorBinaryOp) + InputVector2.push_back( + InputVector2ValueSet[Index % InputVector2ValueSet.size()]); + } + + std::vector ExpectedVector; + ExpectedVector.reserve(VectorSizeToTest); + if (IsVectorBinaryOp) + ExpectedVector = + computeExpectedValues(InputVector1, InputVector2, TestConfig); + else if (TestConfig.isScalarOp()) + ExpectedVector = + computeExpectedValues(InputVector1, ScalarInput[0], TestConfig); + else // Must be a unary op + ExpectedVector = computeExpectedValues(InputVector1, TestConfig); + + if (LogInputs) { + logLongVector(InputVector1, L"InputVector1"); + + if (IsVectorBinaryOp) + logLongVector(InputVector2, L"InputVector2"); + else if (TestConfig.isScalarOp()) + logLongVector(ScalarInput, L"ScalarInput"); + } + + // We have to construct the string outside of the lambda. Otherwise it's + // cleaned up when the lambda finishes executing but before the shader runs. + std::string CompilerOptionsString = + TestConfig.getCompilerOptionsString(VectorSizeToTest); + + // The name of the shader we want to use in ShaderOpArith.xml. Could also add + // logic to set this name in ShaderOpArithTable.xml so we can use different + // shaders for different tests. + LPCSTR ShaderName = "LongVectorOp"; + // ShaderOpArith.xml defines the input/output resources and the shader source. + CComPtr TestXML; + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxcDllSupport); + + // RunShaderOpTest is a helper function that handles resource creation + // and setup. It also handles the shader compilation and execution. It takes a + // callback that is called when the shader is compiled, but before it is + // executed. + std::shared_ptr TestResult = st::RunShaderOpTest( + D3DDevice, DxcDllSupport, TestXML, ShaderName, + [&](LPCSTR Name, std::vector &ShaderData, st::ShaderOp *ShaderOp) { + hlsl_test::LogCommentFmt(L"RunShaderOpTest CallBack. Resource Name: %S", + Name); + + // This callback is called once for each resource defined for + // "LongVectorOp" in ShaderOpArith.xml. All callbacks are fired for each + // resource. We determine whether they are applicable to the test case + // when they run. + + // Process the callback for the OutputVector resource. + if (0 == _stricmp(Name, "OutputVector")) { + // We only need to set the compiler options string once. So this is a + // convenient place to do it. + ShaderOp->Shaders.at(0).Arguments = CompilerOptionsString.c_str(); + + return; + } + + // Process the callback for the InputFuncArgs resource. + if (0 == _stricmp(Name, "InputFuncArgs")) { + if (TestConfig.isScalarOp()) + fillShaderBufferFromLongVectorData(ShaderData, + ScalarInput); + return; + } + + // Process the callback for the InputVector1 resource. + if (0 == _stricmp(Name, "InputVector1")) { + fillShaderBufferFromLongVectorData(ShaderData, + InputVector1); + return; + } + + // Process the callback for the InputVector2 resource. + if (0 == _stricmp(Name, "InputVector2")) { + if (IsVectorBinaryOp) + fillShaderBufferFromLongVectorData(ShaderData, + InputVector2); + + return; + } + + LOG_ERROR_FMT_THROW( + L"RunShaderOpTest CallBack. Unexpected Resource Name: %S", Name); + }); + + // Map the data from GPU to CPU memory so we can verify our expectations. + MappedData ShaderOutData; + TestResult->Test->GetReadBackData("OutputVector", &ShaderOutData); + + std::vector OutputVector; + fillLongVectorDataFromShaderBuffer(ShaderOutData, OutputVector, + VectorSizeToTest); + + VERIFY_SUCCEEDED(doVectorsMatch(OutputVector, ExpectedVector, + TestConfig.getTolerance(), + TestConfig.getValidationType())); +} diff --git a/tools/clang/unittests/HLSLExec/LongVectors.h b/tools/clang/unittests/HLSLExec/LongVectors.h new file mode 100644 index 0000000000..392d059bcd --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.h @@ -0,0 +1,282 @@ +#ifndef LONGVECTORS_H +#define LONGVECTORS_H + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "LongVectorTestData.h" +#include "ShaderOpTest.h" +#include "TableParameterHandler.h" +#include "dxc/Support/WinIncludes.h" +#include "dxc/Support/dxcapi.use.h" +#include "dxc/Test/HlslTestUtils.h" + +namespace LongVector { +template +class TestConfig; // Forward declaration + +class OpTest { +public: + BEGIN_TEST_CLASS(OpTest) + END_TEST_CLASS() + + TEST_CLASS_SETUP(classSetup); + + BEGIN_TEST_METHOD(binaryOpTest) + TEST_METHOD_PROPERTY(L"DataSource", + L"Table:LongVectorOpTable.xml#BinaryOpTable") + END_TEST_METHOD() + + BEGIN_TEST_METHOD(unaryOpTest) + TEST_METHOD_PROPERTY(L"DataSource", + L"Table:LongVectorOpTable.xml#UnaryOpTable") + END_TEST_METHOD() + + template + void dispatchTestByDataType(LongVectorOpTypeT OpType, std::wstring DataType, + TableParameterHandler &Handler); + + template + void dispatchTestByVectorSize(LongVectorOpTypeT OpType, + TableParameterHandler &Handler); + + template + void testBaseMethod( + LongVector::TestConfig &TestConfig, + size_t VectorSizeToTest); + +private: + dxc::DxcDllSupport DxcDllSupport; + bool Initialized = false; +}; + +template +void fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, + std::vector &TestData); + +template +void fillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, + std::vector &TestData, + size_t NumElements); + +template constexpr bool isFloatingPointType() { + return std::is_same_v || std::is_same_v; +} + +struct LongVectorOpTypeStringToEnumValue { + std::wstring OpTypeString; + uint32_t OpTypeValue; +}; + +template +DataTypeT getLongVectorOpType(const LongVectorOpTypeStringToEnumValue *Values, + const std::wstring &OpTypeString, + std::size_t Length); + +enum ValidationType { + ValidationType_Epsilon, + ValidationType_Ulp, +}; + +enum BasicOpType { + BasicOpType_Binary, + BasicOpType_Unary, + BasicOpType_ScalarBinary, + BasicOpType_EnumValueCount +}; + +enum BinaryOpType { + BinaryOpType_ScalarAdd, + BinaryOpType_ScalarMultiply, + BinaryOpType_ScalarSubtract, + BinaryOpType_ScalarDivide, + BinaryOpType_ScalarModulus, + BinaryOpType_Multiply, + BinaryOpType_Add, + BinaryOpType_Subtract, + BinaryOpType_Divide, + BinaryOpType_Modulus, + BinaryOpType_Min, + BinaryOpType_Max, + BinaryOpType_ScalarMin, + BinaryOpType_ScalarMax, + BinaryOpType_EnumValueCount +}; + +static const LongVectorOpTypeStringToEnumValue binaryOpTypeStringToEnumMap[] = { + {L"BinaryOpType_ScalarAdd", BinaryOpType_ScalarAdd}, + {L"BinaryOpType_ScalarMultiply", BinaryOpType_ScalarMultiply}, + {L"BinaryOpType_ScalarSubtract", BinaryOpType_ScalarSubtract}, + {L"BinaryOpType_ScalarDivide", BinaryOpType_ScalarDivide}, + {L"BinaryOpType_ScalarModulus", BinaryOpType_ScalarModulus}, + {L"BinaryOpType_Add", BinaryOpType_Add}, + {L"BinaryOpType_Multiply", BinaryOpType_Multiply}, + {L"BinaryOpType_Subtract", BinaryOpType_Subtract}, + {L"BinaryOpType_Divide", BinaryOpType_Divide}, + {L"BinaryOpType_Modulus", BinaryOpType_Modulus}, + {L"BinaryOpType_Min", BinaryOpType_Min}, + {L"BinaryOpType_Max", BinaryOpType_Max}, + {L"BinaryOpType_ScalarMin", BinaryOpType_ScalarMin}, + {L"BinaryOpType_ScalarMax", BinaryOpType_ScalarMax}, +}; + +static_assert(_countof(binaryOpTypeStringToEnumMap) == + BinaryOpType_EnumValueCount, + "binaryOpTypeStringToEnumMap size mismatch. Did you " + "add a new enum value?"); + +BinaryOpType getBinaryOpType(const std::wstring &OpTypeString); + +enum UnaryOpType { UnaryOpType_Initialize, UnaryOpType_EnumValueCount }; + +static const LongVectorOpTypeStringToEnumValue unaryOpTypeStringToEnumMap[] = { + {L"UnaryOpType_Initialize", UnaryOpType_Initialize}, +}; + +static_assert(_countof(unaryOpTypeStringToEnumMap) == + UnaryOpType_EnumValueCount, + "unaryOpTypeStringToEnumMap size mismatch. Did you add " + "a new enum value?"); + +UnaryOpType getUnaryOpType(const std::wstring &OpTypeString); + +template +std::vector getInputValueSetByKey(const std::wstring &Key, + bool LogKey = true) { + if (LogKey) + WEX::Logging::Log::Comment( + WEX::Common::String().Format(L"Using Value Set Key: %s", Key.c_str())); + return std::vector(LongVectorTestData::Data.at(Key)); +} + +template +DataTypeT mod(const DataTypeT &A, const DataTypeT &B); + +template struct TestConfigTraits { + TestConfigTraits(LongVectorOpTypeT OpType) : OpType(OpType) {} + // LongVectorOpTypeT* Enum values. We don't use a UINT because + // we want the type data. + LongVectorOpTypeT OpType; +}; + +template +bool doValuesMatch(DataTypeT A, DataTypeT B, float Tolerance, ValidationType); +bool doValuesMatch(float A, float B, float Tolerance, + ValidationType ValidationType); +bool doValuesMatch(double A, double B, float Tolerance, + ValidationType ValidationType); + +template +bool doVectorsMatch(const std::vector &ActualValues, + const std::vector &ExpectedValues, + float Tolerance, ValidationType ValidationType); +// Binary ops +template +std::vector +computeExpectedValues(const std::vector &InputVector1, + const std::vector &InputVector2, + const TestConfig &Config); + +// Binary scalar ops +template +std::vector +computeExpectedValues(const std::vector &InputVector1, + const DataTypeT &ScalarInput, + const TestConfig &Config); + +// Unary ops +template +std::vector +computeExpectedValues(const std::vector &InputVector1, + const TestConfig &Config); + +template +void logLongVector(const std::vector &Values, + const std::wstring &Name); + +// Used to pass into LongVectorOpTestBase +template class TestConfig { +public: + TestConfig() = default; + + TestConfig(UnaryOpType OpType); + TestConfig(BinaryOpType OpType); + + bool isBinaryOp() const { + return BasicOpType == LongVector::BasicOpType_Binary || + BasicOpType == LongVector::BasicOpType_ScalarBinary; + } + + bool isUnaryOp() const { + return BasicOpType == LongVector::BasicOpType_Unary; + } + + bool isScalarOp() const { + return BasicOpType == LongVector::BasicOpType_ScalarBinary; + } + + bool hasFunctionDefinition() const; + std::string getOPERAND2String() const; + + // A helper to get the hlsl type as a string for a given C++ type. + // Used in the long vector tests. + std::string getHLSLTypeString() const; + + DataTypeT computeExpectedValue(const DataTypeT &A, const DataTypeT &B, + BinaryOpType OpType) const; + DataTypeT computeExpectedValue(const DataTypeT &A, const DataTypeT &B) const; + DataTypeT computeExpectedValue(const DataTypeT &A, UnaryOpType OpType) const; + DataTypeT computeExpectedValue(const DataTypeT &A) const; + + void setInputValueSet1(const std::wstring &InputValueSetName) { + this->InputValueSetName1 = InputValueSetName; + } + + void setInputValueSet2(const std::wstring &InputValueSetName) { + this->InputValueSetName2 = InputValueSetName; + } + + std::vector getInputValueSet1() const { + return getInputValueSet(1); + } + + std::vector getInputValueSet2() const { + return getInputValueSet(2); + } + + float getTolerance() const { return Tolerance; } + LongVector::ValidationType getValidationType() const { + return ValidationType; + } + + std::string getCompilerOptionsString(size_t VectorSize) const; + +private: + std::vector getInputValueSet(size_t ValueSetIndex) const; + + // To be used for the value of -DOPERATOR + std::string OperatorString; + // To be used for the value of -DFUNC + std::string IntrinsicString; + LongVector::BasicOpType BasicOpType = LongVector::BasicOpType_EnumValueCount; + float Tolerance = 0.0; + LongVector::ValidationType ValidationType = + LongVector::ValidationType::ValidationType_Epsilon; + LongVector::TestConfigTraits OpTypeTraits; + std::wstring InputValueSetName1 = L"DefaultInputValueSet1"; + std::wstring InputValueSetName2 = L"DefaultInputValueSet2"; +}; // class LongVector::TestConfig + +}; // namespace LongVector + +#include "LongVectors.tpp" + +#endif // LONGVECTORS_H diff --git a/tools/clang/unittests/HLSLExec/LongVectors.tpp b/tools/clang/unittests/HLSLExec/LongVectors.tpp new file mode 100644 index 0000000000..de333cf863 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.tpp @@ -0,0 +1,476 @@ +template +DataTypeT LongVector::getLongVectorOpType(const LongVectorOpTypeStringToEnumValue *Values, + const std::wstring &OpTypeString, + std::size_t Length) { + for (size_t i = 0; i < Length; i++) { + if (Values[i].OpTypeString == OpTypeString) + return static_cast(Values[i].OpTypeValue); + } + + LOG_ERROR_FMT_THROW(L"Invalid LongVectorOpType string: %s", + OpTypeString.c_str()); + + return static_cast(UINT_MAX); +} + +// Helper to fill the shader buffer based on type. Convenient to be used when +// copying HLSL*_t types so we can copy the underlying type directly instead of +// the struct. +template +void LongVector::fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, std::vector &TestData) { + + const size_t NumElements = TestData.size(); + const size_t DataSize = sizeof(DataTypeT) * NumElements; + ShaderBuffer.resize(DataSize); + + DataTypeT *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i]; +} + +// Helpers so we do the right thing for float types. +template +DataTypeT LongVector::mod(const DataTypeT &A, const DataTypeT &B) { + return A % B; +} + +template <> float LongVector::mod(const float &A, const float &B) { + return std::fmod(A, B); +} + +template <> double LongVector::mod(const double &A, const double &B) { + return std::fmod(A, B); +} + +// Helper to fill the test data from the shader buffer based on type. Convenient +// to be used when copying HLSL*_t types so we can use the underlying type. +template +void LongVector::fillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, + std::vector &TestData, + size_t NumElements) { + DataTypeT *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + TestData.push_back(ShaderBufferPtr[i]); +} + +template +bool LongVector::doValuesMatch(DataTypeT A, DataTypeT B, float Tolerance, + LongVector::ValidationType) { + if (Tolerance == 0.0f) + return A == B; + + DataTypeT Diff = A > B ? A - B : B - A; + return Diff <= Tolerance; +} + +bool LongVector::doValuesMatch(float A, float B, float Tolerance, + LongVector::ValidationType ValidationType) { + switch (ValidationType) { + case LongVector::ValidationType_Epsilon: + return CompareFloatEpsilon(A, B, Tolerance); + case LongVector::ValidationType_Ulp: { + // Tolerance is in ULPs. Convert to int for the comparison. + const int IntTolerance = static_cast(Tolerance); + return CompareFloatULP(A, B, IntTolerance); + }; + default: + WEX::Logging::Log::Error( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool LongVector::doValuesMatch(double A, double B, float Tolerance, + LongVector::ValidationType ValidationType) { + switch (ValidationType) { + case LongVector::ValidationType_Epsilon: + return CompareDoubleEpsilon(A, B, Tolerance); + case LongVector::ValidationType_Ulp: { + // Tolerance is in ULPs. Convert to int64_t for the comparison. + const int64_t IntTolerance = static_cast(Tolerance); + return CompareDoubleULP(A, B, IntTolerance); + }; + default: + WEX::Logging::Log::Error( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + + +template +bool LongVector::doVectorsMatch(const std::vector &ActualValues, + const std::vector &ExpectedValues, + float Tolerance, + LongVector::ValidationType ValidationType) { + // Stash mismatched indexes for easy failure logging later + std::vector MismatchedIndexes; + VERIFY_IS_TRUE(ActualValues.size() == ExpectedValues.size(), + L"doVectorsMatch() called with mismatched vector sizes."); + for (size_t i = 0; i < ActualValues.size(); ++i) { + if (!doValuesMatch(ActualValues[i], ExpectedValues[i], Tolerance, + ValidationType)) + MismatchedIndexes.push_back(i); + } + + if (MismatchedIndexes.empty()) + return true; + + if (!MismatchedIndexes.empty()) { + for (size_t Index : MismatchedIndexes) { + std::wstringstream Wss(L""); + Wss << std::setprecision(15); // Set precision for floating point types + Wss << L"Mismatch at Index: " << Index; + Wss << L" Actual Value:" << ActualValues[Index] << ","; + Wss << L" Expected Value:" << ExpectedValues[Index]; + WEX::Logging::Log::Error(Wss.str().c_str()); + } + } + + return false; +} + +template +std::vector LongVector::computeExpectedValues( + const std::vector &InputVector1, + const std::vector &InputVector2, + const LongVector::TestConfig &Config) { + + VERIFY_IS_TRUE( + Config.isBinaryOp(), + L"computeExpectedValues() called with a non-binary op config."); + + std::vector ExpectedValues = {}; + + for (size_t i = 0; i < InputVector1.size(); ++i) + ExpectedValues.push_back( + Config.computeExpectedValue(InputVector1[i], InputVector2[i])); + + return ExpectedValues; +} + +template +std::vector LongVector::computeExpectedValues( + const std::vector &InputVector1, const DataTypeT &ScalarInput, + const LongVector::TestConfig &Config) { + + VERIFY_IS_TRUE(Config.isScalarOp(), L"computeExpectedValues() called with a " + L"non-binary non-scalar op config."); + + std::vector ExpectedValues; + + for (size_t i = 0; i < InputVector1.size(); ++i) + ExpectedValues.push_back( + Config.computeExpectedValue(InputVector1[i], ScalarInput)); + + return ExpectedValues; +} + +template +std::vector LongVector::computeExpectedValues( + const std::vector &InputVector1, + const LongVector::TestConfig &Config) { + + VERIFY_IS_TRUE(Config.isUnaryOp(), + L"computeExpectedValues() called with a non-unary op config."); + + std::vector ExpectedValues; + + for (size_t i = 0; i < InputVector1.size(); ++i) + ExpectedValues.push_back(Config.computeExpectedValue(InputVector1[i])); + + return ExpectedValues; +} + +template +void LongVector::logLongVector(const std::vector &Values, + const std::wstring &Name) { + WEX::Logging::Log::Comment( + WEX::Common::String().Format(L"LongVector Name: %s", Name.c_str())); + + const size_t LoggingWidth = 40; + + std::wstringstream Wss(L""); + Wss << L"LongVector Values: "; + Wss << L"["; + const size_t NumElements = Values.size(); + for (size_t i = 0; i < NumElements; i++) { + if (i % LoggingWidth == 0 && i != 0) + Wss << L"\n "; + Wss << Values[i]; + if (i != NumElements - 1) + Wss << L", "; + } + Wss << L" ]"; + + WEX::Logging::Log::Comment(Wss.str().c_str()); +} + +template +LongVector::TestConfig::TestConfig(LongVector::UnaryOpType OpType) + : OpTypeTraits(OpType) { + IntrinsicString = ""; + BasicOpType = LongVector::BasicOpType_Unary; + + if (isFloatingPointType()) + Tolerance = 1; + + switch (OpType) { + case LongVector::UnaryOpType_Initialize: + IntrinsicString = "TestInitialize"; + break; + default: + VERIFY_FAIL("Invalid UnaryOpType"); + } +} + +template +LongVector::TestConfig::TestConfig(LongVector::BinaryOpType OpType) + : OpTypeTraits(OpType) { + IntrinsicString = ""; + BasicOpType = LongVector::BasicOpType_Binary; + + if (isFloatingPointType()) + Tolerance = 1; + ValidationType = LongVector::ValidationType_Ulp; + + switch (OpType) { + case LongVector::BinaryOpType_ScalarAdd: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "+"; + break; + case LongVector::BinaryOpType_ScalarMultiply: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "*"; + break; + case LongVector::BinaryOpType_ScalarSubtract: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "-"; + break; + case LongVector::BinaryOpType_ScalarDivide: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "/"; + break; + case LongVector::BinaryOpType_ScalarModulus: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "%"; + break; + case LongVector::BinaryOpType_Multiply: + OperatorString = "*"; + break; + case LongVector::BinaryOpType_Add: + OperatorString = "+"; + break; + case LongVector::BinaryOpType_Subtract: + OperatorString = "-"; + break; + case LongVector::BinaryOpType_Divide: + OperatorString = "/"; + break; + case LongVector::BinaryOpType_Modulus: + OperatorString = "%"; + break; + case LongVector::BinaryOpType_Min: + OperatorString = ","; + IntrinsicString = "min"; + break; + case LongVector::BinaryOpType_Max: + OperatorString = ","; + IntrinsicString = "max"; + break; + case LongVector::BinaryOpType_ScalarMin: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = ","; + IntrinsicString = "min"; + break; + case LongVector::BinaryOpType_ScalarMax: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = ","; + IntrinsicString = "max"; + break; + default: + VERIFY_FAIL("Invalid BinaryOpType"); + } +} + +template +bool LongVector::TestConfig::hasFunctionDefinition() const { + if constexpr (std::is_same_v) { + if (OpTypeTraits.OpType == LongVector::UnaryOpType_Initialize) + return true; + else + return false; + } + + return false; +} + +template +std::string LongVector::TestConfig::getOPERAND2String() const { + if (hasFunctionDefinition()) { + switch (static_cast(OpTypeTraits.OpType)) { + case LongVector::UnaryOpType_Initialize: + return std::string(" -DFUNC_INITIALIZE=1"); + default: + VERIFY_FAIL("Invalid UnaryOpType"); + } + } + return std::string(""); +} + +template +std::string LongVector::TestConfig::getHLSLTypeString() const { + if (std::is_same_v) + return "float"; + if (std::is_same_v) + return "double"; + if (std::is_same_v) + return "int16_t"; + if (std::is_same_v) + return "int"; + if (std::is_same_v) + return "int64_t"; + if (std::is_same_v) + return "uint16_t"; + if (std::is_same_v) + return "uint32_t"; + if (std::is_same_v) + return "uint64_t"; + + std::string ErrStr("getHLSLTypeString() Unsupported type: "); + ErrStr.append(typeid(DataTypeT).name()); + VERIFY_IS_TRUE(false, ErrStr.c_str()); + return "UnknownType"; +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, const DataTypeT &B, + LongVector::BinaryOpType OpType) const { + switch (OpType) { + case LongVector::BinaryOpType_ScalarAdd: + return A + B; + case LongVector::BinaryOpType_ScalarMultiply: + return A * B; + case LongVector::BinaryOpType_ScalarSubtract: + return A - B; + case LongVector::BinaryOpType_ScalarDivide: + return A / B; + case LongVector::BinaryOpType_ScalarModulus: + return mod(A, B); + case LongVector::BinaryOpType_Multiply: + return A * B; + case LongVector::BinaryOpType_Add: + return A + B; + case LongVector::BinaryOpType_Subtract: + return A - B; + case LongVector::BinaryOpType_Divide: + return A / B; + case LongVector::BinaryOpType_Modulus: + return mod(A, B); + case LongVector::BinaryOpType_Min: + // std::max and std::min are wrapped in () to avoid collisions with the // + // macro defintions for min and max in windows.h + return (std::min)(A, B); + case LongVector::BinaryOpType_Max: + return (std::max)(A, B); + case LongVector::BinaryOpType_ScalarMin: + return (std::min)(A, B); + case LongVector::BinaryOpType_ScalarMax: + return (std::max)(A, B); + default: + LOG_ERROR_FMT_THROW(L"Unknown BinaryOpType: %d", OpTypeTraits.OpType); + return DataTypeT(); + } +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, const DataTypeT &B) const { + if(!isBinaryOp()) + LOG_ERROR_FMT_THROW( + L"computeExpectedValue(const DataTypeT &A, const DataTypeT &B) called " + L"on a unary op: %d", + OpTypeTraits.OpType); + + return computeExpectedValue(A, B, static_cast(OpTypeTraits.OpType)); +} + + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, + LongVector::UnaryOpType OpType) const { + switch (OpType) { + case LongVector::UnaryOpType_Initialize: + return A; + default: + LOG_ERROR_FMT_THROW(L"Unknown UnaryOpType :%d", OpTypeTraits.OpType); + return DataTypeT(); + } +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A) const { + + if constexpr (std::is_same_v) { + const auto OpType = static_cast(OpTypeTraits.OpType); + return computeExpectedValue(A, OpType); + } + + LOG_ERROR_FMT_THROW( + L"computeExpectedValue(const DataType&A) called on an unrecognized binary op: %d", + OpTypeTraits.OpType); + + return DataTypeT(); +} + +template +std::string LongVector::TestConfig::getCompilerOptionsString(size_t VectorSize) const { + std::stringstream CompilerOptions(""); + std::string HLSLType = getHLSLTypeString(); + CompilerOptions << "-DTYPE="; + CompilerOptions << HLSLType; + CompilerOptions << " -DNUM="; + CompilerOptions << VectorSize; + const bool Is16BitType = + (HLSLType == "int16_t" || HLSLType == "uint16_t" || HLSLType == "half"); + CompilerOptions << (Is16BitType ? " -enable-16bit-types" : ""); + CompilerOptions << " -DOPERATOR="; + CompilerOptions << OperatorString; + + if (isBinaryOp()) { + CompilerOptions << " -DOPERAND2="; + CompilerOptions << (isScalarOp() ? "InputScalar" : "InputVector2"); + + if (isScalarOp()) + CompilerOptions << " -DIS_SCALAR_OP=1"; + else + CompilerOptions << " -DIS_BINARY_VECTOR_OP=1"; + + CompilerOptions << " -DFUNC="; + CompilerOptions << IntrinsicString; + } else { // Unary Op + CompilerOptions << " -DFUNC="; + CompilerOptions << IntrinsicString; + CompilerOptions << " -DOPERAND2="; + CompilerOptions << getOPERAND2String(); + } + + return CompilerOptions.str(); +} + +template +std::vector LongVector::TestConfig::getInputValueSet(size_t ValueSetIndex) const { + if (ValueSetIndex == 2 && !isBinaryOp()) + VERIFY_FAIL("ValueSetindex==2 is only valid for binary ops."); + + std::wstring InputValueSetName = L""; + if (ValueSetIndex == 1) + InputValueSetName = InputValueSetName1; + else if (ValueSetIndex == 2) + InputValueSetName = InputValueSetName2; + else + VERIFY_FAIL("Invalid ValueSetIndex"); + + return getInputValueSetByKey(InputValueSetName); +} diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index e768f205f1..a782bd97ae 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3750,4 +3750,71 @@ void MSMain(uint GID : SV_GroupIndex, + + RootFlags(0), UAV(u0), UAV(u1), UAV(u2), + UAV(u3) + + + + + + + + + + + + + + + TestInitialize(vector Vector) + { + vector VectorCopy = Vector; + return VectorCopy; + } + #endif + + RWByteAddressBuffer g_InputFuncArgs : register(u0); + RWByteAddressBuffer g_InputVector1 : register(u1); + RWByteAddressBuffer g_InputVector2 : register(u2); + RWByteAddressBuffer g_OutputVector : register(u3); + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + + vector InputVector1 = g_InputVector1.Load< vector >(0); + + #ifdef IS_BINARY_VECTOR_OP + vector InputVector2 = g_InputVector2.Load< vector >(0); + #endif + + #ifdef IS_SCALAR_OP + TYPE InputScalar = g_InputFuncArgs.Load(0); + #endif + + #ifdef FUNC_CLAMP + TYPE Clamp_ArgMin = g_InputFuncArgs.Load(0); + TYPE Clamp_ArgMax = g_InputFuncArgs.Load(sizeof(TYPE)); + vector ClampArgMinMax = {Clamp_ArgMin, Clamp_ArgMax}; + #endif + + vector OutputVector = FUNC(InputVector1 OPERATOR OPERAND2); + + g_OutputVector.Store< vector >(0, OutputVector); + }; + ]]> + + From b331216bace89303857ef66617b94f03a6c715fd Mon Sep 17 00:00:00 2001 From: Shawn Hatori <5499686+shawnhatori@users.noreply.github.com> Date: Thu, 3 Jul 2025 13:45:28 -0400 Subject: [PATCH 83/93] [SPIR-V] Explicitly state which layout rules require scalar block layout (#7539) I was trying to debug a Vulkan Storage Buffer-related memory alignment issue in my application where I was using SPIR-V generated via `dxc` with `-fvk-use-dx-layout`. In `SPIR-V.rst`, I happened to miss the paragraph that follows the list of layout rules (removed in this proposal). That paragraph starts with "To use scalar layout", which given my use of DirectX layout, I did not think was relevant to me. However, the next sentence of that paragraph sneakily and indirectly mentions that `VK_EXT_scalar_block_layout` is required for the DirectX memory layout as well. I have proposed explicitly stating the extension requirement when the relevant layout rules are listed. --- docs/SPIR-V.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 771cf0e5a2..2bcdb99bfe 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -1012,17 +1012,18 @@ right now: 2. DirectX memory layout rules for uniform buffers and storage buffers: they allow packing data on the application side that can be shared with DirectX. They can be enabled by ``-fvk-use-dx-layout``. + + NOTE: This requires ``VK_EXT_scalar_block_layout`` to be enabled on the + application side. 3. Strict OpenGL ``std140`` for uniform buffers and strict OpenGL ``std430`` for storage buffers: they allow packing data on the application side that can be shared with OpenGL. They can be enabled by ``-fvk-use-gl-layout``. 4. Scalar layout rules introduced via `VK_EXT_scalar_block_layout`, which basically aligns all aggregrate types according to their elements' natural alignment. They can be enabled by ``-fvk-use-scalar-layout``. - -To use scalar layout, the application side need to request -``VK_EXT_scalar_block_layout``. This is also true for using DirectX memory -layout since there is no dedicated DirectX layout extension for Vulkan -(at least for now). So we must request something more permissive. + + NOTE: This requires ``VK_EXT_scalar_block_layout`` to be enabled on the + application side. In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL ``std140``/``std430`` rules with the following modification for vector type @@ -1032,7 +1033,7 @@ alignment: 2. If the above causes an `improper straddle `_, the alignment will be set to 16 bytes. -As an exmaple, for the following HLSL definition: +As an example, for the following HLSL definition: .. code:: hlsl From 4fcf67f78f7d6ffd286316112694a3ae000860e2 Mon Sep 17 00:00:00 2001 From: Dan Brown <61992655+danbrown-amd@users.noreply.github.com> Date: Thu, 3 Jul 2025 12:34:35 -0600 Subject: [PATCH 84/93] [spirv] Fixes #7535 (vk::BufferPointer alignment issue). (#7571) --- tools/clang/lib/SPIRV/SpirvBuilder.cpp | 15 ++++++-- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 37 +++++-------------- .../vk.buffer-pointer.alias.cs.hlsl | 2 +- .../CodeGenSPIRV/vk.buffer-pointer.alias.hlsl | 4 +- .../vk.buffer-pointer.atomic.hlsl | 2 +- .../vk.buffer-pointer.from-uint.hlsl | 4 +- .../vk.buffer-pointer.linked-list.hlsl | 6 +-- 7 files changed, 30 insertions(+), 40 deletions(-) diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index e085603b21..22523eed0e 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -205,10 +205,17 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, instruction->setRValue(true); if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) { - AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); - uint32_t align, size, stride; - std::tie(align, size) = alignmentCalc.getAlignmentAndSize( - resultType, pointer->getLayoutRule(), llvm::None, &stride); + QualType pointerType = pointer->getAstResultType(); + uint32_t align = 0; + if (!pointerType.isNull() && hlsl::IsVKBufferPointerType(pointerType)) { + align = hlsl::GetVKBufferPointerAlignment(pointerType); + } + if (!align) { + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t stride; + std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( + resultType, pointer->getLayoutRule(), llvm::None, &stride); + } instruction->setAlignment(align); } diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 850a8dd736..c2ee495d28 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -11179,36 +11179,19 @@ SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr, SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( const CXXMemberCallExpr *callExpr) { - LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, - spvBuilder); - Expr *obj = callExpr->getImplicitObjectArgument(); - SpirvInstruction *bufferPointer = doExpr(obj); + SpirvInstruction *bufferPointer = + doExpr(callExpr->getImplicitObjectArgument()); if (!bufferPointer) return nullptr; - if (bufferPointer->isRValue()) { - bufferPointer->setRValue(false); - bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); - bufferPointer->setLayoutRule(spirvOptions.sBufferLayoutRule); - return bufferPointer; - } - - unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType()); - lowerTypeVisitor.visitInstruction(bufferPointer); - - const SpirvPointerType *bufferPointerType = - dyn_cast(bufferPointer->getResultType()); - SpirvLoad *retVal = - spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer, - callExpr->getLocStart()); - if (!align) { - QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType()); - AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); - uint32_t stride; - std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( - bufferType, retVal->getLayoutRule(), llvm::None, &stride); - } - retVal->setAlignment(align); + + SpirvInstruction *retVal = + bufferPointer->isRValue() + ? bufferPointer + : spvBuilder.createLoad(bufferPointer->getAstResultType(), + bufferPointer, callExpr->getLocStart()); retVal->setRValue(false); + retVal->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); + retVal->setLayoutRule(spirvOptions.sBufferLayoutRule); return retVal; } diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl index f0f5c54a16..e063a4bc23 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl @@ -20,7 +20,7 @@ void main() { foo(rwbuf[0].Get()); } -// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8 +// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} // CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8 // CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0 // CHECK: OpStore [[L2]] %int_1 Aligned 4 diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl index fc5b9edad0..e159f6997c 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl @@ -62,10 +62,10 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]] // CHECK: OpStore [[BP1]] [[X4]] // CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]] -// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16 +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] // CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]] // CHECK: OpStore [[X7]] [[X5]] Aligned 16 -// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16 +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] // CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]] // CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16 // CHECK: OpReturnValue [[X10]] diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl index 992d8b39fd..485da6fd93 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl @@ -29,7 +29,7 @@ void main() uint u0, u1; // CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]] -// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4 +// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] // CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]] // CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]] // CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]] diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl index b44e1eca09..e7908e0ce7 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl @@ -37,8 +37,8 @@ void main() { // CHECK: [[TEST:%[_0-9A-Za-z]*]] = OpVariable [[PFPPUINT]] Function // CHECK: [[X1:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PPUINT]] // CHECK: OpStore [[TEST]] [[X1]] -// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] Aligned 32 -// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 4 +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 32 // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PUUINT]] [[OUTPUT]] [[I0]] [[U0]] // CHECK: OpStore [[X4]] [[X3]] // CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl index 71fee1a795..75380d3f4e 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl @@ -76,9 +76,9 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]] // CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]] // CHECK: OpStore [[GP]] [[X2]] -// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]] -// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8 +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 32 // CHECK: OpStore [[GP]] [[X5]] // CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] // CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]] @@ -94,7 +94,7 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[IF_TRUE]] = OpLabel // CHECK: OpReturnValue [[CV4FLOAT]] // CHECK: [[IF_MERGE]] = OpLabel -// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] // CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]] // CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16 // CHECK: OpReturnValue [[X15]] From d751c827ed3b61e87fdf57d0f424cb2d7af30cd0 Mon Sep 17 00:00:00 2001 From: Russell Liu Date: Mon, 7 Jul 2025 21:34:41 +0800 Subject: [PATCH 85/93] [SPIRV] Allow spirv type as template parameter (#7626) SPIR-V intrinsics allow us to create spirv basic type and opaque type in HLSL, but these type are object and not allowed in template parameter. ```fundamental error: object 'Int8Type' is not allowed in builtin template parameters /* OpTypeCooperativeMatrixKHR */ 4456, Int8Type, ^ ``` This doesn't make sense to me, and is not convenience to use. This change wants to allow that use those in template parameter. --- tools/clang/lib/Sema/SemaHLSL.cpp | 9 ++++++ .../CodeGenSPIRV/spv.intrinsicInTemplate.hlsl | 29 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 3d9de1804d..8e800e8f68 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -5402,6 +5402,15 @@ class HLSLExternalSource : public ExternalSemaSource { objectKind = ClassifyRecordType(recordType); switch (objectKind) { case AR_TOBJ_OBJECT: +#ifdef ENABLE_SPIRV_CODEGEN + if (const auto *namespaceDecl = dyn_cast( + recordType->getDecl()->getDeclContext()); + namespaceDecl && namespaceDecl->getName().equals("vk") && + (recordType->getDecl()->getName().equals("SpirvType") || + recordType->getDecl()->getName().equals("SpirvOpaqueType"))) { + return true; + } +#endif m_sema->Diag(argLoc, diag::err_hlsl_unsupported_object_context) << type << static_cast(TypeDiagContext::TypeParameter); return false; diff --git a/tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl b/tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl new file mode 100644 index 0000000000..0ecda64dbb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl @@ -0,0 +1,29 @@ +// RUN: %dxc -T cs_6_8 -HV 2021 -O0 -spirv -fspv-target-env=universal1.5 %s | FileCheck %s + +// CHECK: [[Int8Type:%.*]] = OpTypeInt 8 0 +using Int8Type = vk::SpirvType >, + vk::Literal > >; + +// CHECK: [[MatrixType:%.*]] = OpTypeCooperativeMatrixKHR [[Int8Type]] %uint_3 %uint_16 %uint_16 %uint_0 +using I8MatA = vk::SpirvOpaqueType< + /* OpTypeCooperativeMatrixKHR */ 4456, Int8Type, + vk::integral_constant, + vk::integral_constant, vk::integral_constant, + vk::integral_constant >; + +template +[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType +__builtin_spv_CooperativeMatrixLoadKHR([[vk::ext_reference]] PointerType pointer, + uint32_t memory_layout, uint32_t stride, [[vk::ext_literal]] uint32_t memory_operand); + +StructuredBuffer buffer : register(t0, space0); + +[numthreads(32, 1, 1)] void main() { + [[vk::ext_extension("SPV_KHR_cooperative_matrix")]] + [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]] + [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]] + [[vk::ext_capability(/* Int8 */ 39)]] + // CHECK: OpCooperativeMatrixLoadKHR [[MatrixType]] %{{.*}} %uint_0 %uint_32 None + I8MatA matA = __builtin_spv_CooperativeMatrixLoadKHR(buffer[0], /* rowMajor */ 0, 32, 0); +} From a11702ef0a393a9e0b78f982f9f0fa66d919c867 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Tue, 8 Jul 2025 10:46:31 -0400 Subject: [PATCH 86/93] [SPIRV] Add the derivative group execution mode only on shader types that allow it. (#7628) DXC allows user to use decrivative instruction in shader models that do not allow it, but they must be dead code that will be removed. However, when we see a derivative instruction in the SPIR-V backend that is not in a pixel shader we assume it need the DerivativeGroup execution mode, and we fail when we try to add it to a vertex shader. To allow out implementation to match DXIL, we will not assume we can add the execution mode. We will only add it for shader that we know can use is, and skip the other. If the derivative instruction is not removed during optimizations, there will be a validation error. While fixing this, we observed another bug that is fixed at the same time since they are closely related. The TaskNV and TaskEXT shader types do not have the same id, and the SPV_KHR_compute_shader_derivatives does not work with the NV mesh shader extension. That was fixed up. Fixes #7478 --- tools/clang/lib/SPIRV/SpirvEmitter.cpp | 49 ++++++++++++------- .../amplification_shader_derivative.hlsl | 28 +++++++++++ .../CodeGenSPIRV/mesh_shader_derivative.hlsl | 34 +++++++++++++ .../vertex_shader_derivative_in_branch.hlsl | 23 +++++++++ 4 files changed, 115 insertions(+), 19 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl create mode 100644 tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index c2ee495d28..734340e9ae 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -4399,9 +4399,7 @@ SpirvEmitter::processTextureLevelOfDetail(const CXXMemberCallExpr *expr, spvBuilder.createImageQuery(spv::Op::OpImageQueryLod, queryResultType, expr->getExprLoc(), sampledImage, coordinate); - if (spvContext.isCS() || spvContext.isNode()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); // The first component of the float2 contains the mipmap array layer. // The second component of the float2 represents the unclamped lod. return spvBuilder.createCompositeExtract(astContext.FloatTy, query, @@ -5780,9 +5778,7 @@ SpirvEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr, const auto retType = expr->getDirectCallee()->getReturnType(); if (isSample) { - if (spvContext.isCS() || spvContext.isNode()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); return createImageSample(retType, imageType, image, sampler, coordinate, /*compareVal*/ nullptr, /*bias*/ nullptr, /*lod*/ nullptr, std::make_pair(nullptr, nullptr), @@ -5870,9 +5866,9 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, const auto retType = expr->getDirectCallee()->getReturnType(); - if (!lod && (spvContext.isCS() || spvContext.isNode())) { + if (!lod) addDerivativeGroupExecutionMode(); - } + return createImageSample( retType, imageType, image, sampler, coordinate, /*compareVal*/ nullptr, bias, lod, std::make_pair(nullptr, nullptr), @@ -5992,9 +5988,7 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { const auto retType = expr->getDirectCallee()->getReturnType(); const auto imageType = imageExpr->getType(); - if (spvContext.isCS()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); return createImageSample( retType, imageType, image, sampler, coordinate, compareVal, @@ -6047,9 +6041,7 @@ SpirvEmitter::processTextureSampleCmpBias(const CXXMemberCallExpr *expr) { const auto retType = expr->getDirectCallee()->getReturnType(); const auto imageType = imageExpr->getType(); - if (spvContext.isCS()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); return createImageSample( retType, imageType, image, sampler, coordinate, compareVal, bias, @@ -9782,8 +9774,7 @@ SpirvInstruction *SpirvEmitter::processDerivativeIntrinsic( QualType returnType = arg->getAstResultType(); assert(isFloatOrVecOfFloatType(returnType)); - if (!spvContext.isPS()) - addDerivativeGroupExecutionMode(); + addDerivativeGroupExecutionMode(); needsLegalization = true; QualType B32Type = astContext.FloatTy; @@ -12512,8 +12503,7 @@ SpirvInstruction *SpirvEmitter::processIntrinsicUsingSpirvInst( case spv::Op::OpFwidth: case spv::Op::OpFwidthFine: case spv::Op::OpFwidthCoarse: - if (spvContext.isCS() || spvContext.isNode()) - addDerivativeGroupExecutionMode(); + addDerivativeGroupExecutionMode(); needsLegalization = true; break; default: @@ -15771,8 +15761,29 @@ bool SpirvEmitter::spirvToolsValidate(std::vector *mod, return tools.Validate(mod->data(), mod->size(), options); } +static bool canUseDerivativeGroupExecutionMode(SpirvContext::ShaderModelKind sm, + bool usingEXTMeshShader) { + switch (sm) { + case SpirvContext::ShaderModelKind::Compute: + case SpirvContext::ShaderModelKind::Node: + return true; + + // The KHR extension that allows derivative instruction in mesh and task + // (amplification) shader does not work with SPV_NV_mesh_shader extesion. + case SpirvContext::ShaderModelKind::Mesh: + case SpirvContext::ShaderModelKind::Amplification: + return usingEXTMeshShader; + default: + return false; + } +} + void SpirvEmitter::addDerivativeGroupExecutionMode() { - assert(spvContext.isCS()); + bool usingEXTMeshShader = + featureManager.isExtensionEnabled(Extension::EXT_mesh_shader); + SpirvContext::ShaderModelKind sm = spvContext.getCurrentShaderModelKind(); + if (!canUseDerivativeGroupExecutionMode(sm, usingEXTMeshShader)) + return; SpirvExecutionMode *numThreadsEm = cast(spvBuilder.getModule()->findExecutionMode( diff --git a/tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl b/tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl new file mode 100644 index 0000000000..9982cf1cda --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.3 %s -spirv | FileCheck %s --check-prefix=VK13 +// RUN: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.1 -Vd %s -spirv | FileCheck %s --check-prefix=VK11 + +// VK13-DAG: OpCapability ComputeDerivativeGroupLinearKHR +// VK13-DAG: OpCapability DerivativeControl +// VK13-DAG: OpCapability MeshShadingEXT +// VK13-DAG: OpExtension "SPV_EXT_mesh_shader" +// VK13-DAG: OpExtension "SPV_KHR_compute_shader_derivatives" +// VK13: OpEntryPoint TaskEXT %main "main" +// VK13: OpExecutionMode %main DerivativeGroupLinearKHR + +// VK11-DAG: OpExtension "SPV_NV_mesh_shader" +// VK11: OpEntryPoint TaskNV %main "main" +// VK11-NOT: OpExecutionMode %main DerivativeGroup + +struct AmplificationPayload +{ + float4 value; +}; + +groupshared AmplificationPayload payload; + +[numthreads(4, 1, 1)] +void main(in uint tid : SV_GroupThreadID, in uint gtid : SV_GroupID) +{ + payload.value = ddx_coarse(float4(tid, 0, 0, 0)); + DispatchMesh(1,1,1, payload); +} diff --git a/tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl b/tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl new file mode 100644 index 0000000000..3f26921e28 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc -T ms_6_5 -E main -fspv-target-env=vulkan1.3 %s -spirv | FileCheck %s --check-prefix=VK13 +// RUN: %dxc -T ms_6_5 -E main -fspv-target-env=vulkan1.1 -Vd %s -spirv | FileCheck %s --check-prefix=VK11 + +// VK13-DAG: OpCapability ComputeDerivativeGroupLinearKHR +// VK13-DAG: OpCapability DerivativeControl +// vk13-DAG: OpCapability MeshShadingEXT +// VK13-DAG: OpExtension "SPV_EXT_mesh_shader" +// VK13-DAG: OpExtension "SPV_KHR_compute_shader_derivatives" +// VK13: OpEntryPoint MeshEXT %main "main" +// VK13: OpExecutionMode %main DerivativeGroupLinearKHR + +// VK11-DAG: OpExtension "SPV_NV_mesh_shader" +// VK11: OpEntryPoint MeshNV %main "main" +// VK11-NOT: OpExecutionMode %main DerivativeGroup + +struct VSOut +{ + float4 pos : SV_Position; +}; + +[numthreads(4, 1, 1)] +[outputtopology("triangle")] +void main(in uint tid : SV_GroupThreadID, out vertices VSOut verts[3], out indices uint3 tris[1]) +{ + SetMeshOutputCounts(3, 1); + + float4 val = ddx_coarse(float4(tid, 0, 0, 0)); + + verts[0].pos = val; + verts[1].pos = val + float4(0,1,0,0); + verts[2].pos = val + float4(1,0,0,0); + + tris[0] = uint3(0,1,2); +} diff --git a/tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl b/tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl new file mode 100644 index 0000000000..9719dc1dc5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -T vs_6_0 -E main -DCOND=false -fspv-target-env=vulkan1.3 %s -spirv | FileCheck %s +// CHECK-NOT: OpCapability DerivativeControl +// CHECK-NOT: OpExtension "SPV_KHR_compute_shader_derivatives" + +// RUN: not %dxc -T vs_6_0 -E main -DCOND=true -fspv-target-env=vulkan1.3 %s -spirv 2>&1 | FileCheck %s -check-prefix=ERROR +// ERROR: generated SPIR-V is invalid: +// ERROR-NEXT: Derivative instructions require Fragment, GLCompute, MeshEXT or TaskEXT execution model: DPdx + +struct VSOut +{ + float4 pos : SV_Position; +}; + +VSOut main(float4 pos : POSITION) +{ + VSOut output; + output.pos = pos; + if (COND) + { + output.pos += ddx(pos); + } + return output; +} From 4efa3dc842ac99a38d940aa64cb80819a7ebd49c Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Thu, 10 Jul 2025 15:30:42 -0700 Subject: [PATCH 87/93] Merge HLSLHalf_t and HLSLBool_t (#7630) This PR merges some more long vector exec test code from staging-sm6.9 into main. Specifically, we bring over the helper classes that define data types for half and bool. Halfs are only available in newer c++ versions so a simple class was needed to implement the proper logic using existing DX helpers that were added for this same reason. The bool class is used as the size of a bool in c++ differs from that in HLSL. Also brings in some tests cases using these data types. Test cases were verified locally by running against WARP. Addresses #7546 --- .../unittests/HLSLExec/LongVectorOpTable.xml | 84 +++++++ .../unittests/HLSLExec/LongVectorTestData.h | 226 +++++++++++++++++- .../clang/unittests/HLSLExec/LongVectors.cpp | 6 +- tools/clang/unittests/HLSLExec/LongVectors.h | 7 +- .../clang/unittests/HLSLExec/LongVectors.tpp | 73 +++++- 5 files changed, 383 insertions(+), 13 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml index 39a2fa481e..df8fe250c9 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml +++ b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml @@ -12,6 +12,23 @@ String String + + + BinaryOpType_ScalarAdd + bool + + + BinaryOpType_Add + bool + + + BinaryOpType_ScalarSubtract + bool + + + BinaryOpType_Subtract + bool + BinaryOpType_ScalarAdd @@ -354,6 +371,63 @@ BinaryOpType_Max uint64 + + + BinaryOpType_ScalarAdd + float16 + + + BinaryOpType_Add + float16 + + + BinaryOpType_ScalarSubtract + float16 + + + BinaryOpType_Subtract + float16 + + + BinaryOpType_ScalarMultiply + float16 + + + BinaryOpType_Multiply + float16 + + + BinaryOpType_ScalarDivide + float16 + + + BinaryOpType_Divide + float16 + + + BinaryOpType_ScalarModulus + float16 + + + BinaryOpType_Modulus + float16 + + + BinaryOpType_ScalarMin + float16 + + + BinaryOpType_Min + float16 + + + BinaryOpType_ScalarMax + float16 + + + BinaryOpType_Max + float16 + BinaryOpType_ScalarAdd @@ -471,6 +545,11 @@ String String + + + UnaryOpType_Initialize + bool + UnaryOpType_Initialize @@ -501,6 +580,11 @@ UnaryOpType_Initialize uint64 + + + UnaryOpType_Initialize + float16 + UnaryOpType_Initialize diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 002c765609..bc6ea8c7c2 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -7,10 +7,204 @@ #include #include +// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes. +// Take int32_t as a constuctor argument and convert it to bool when needed. +// Comparisons cast to a bool because we only care if the bool representation is +// true or false. +struct HLSLBool_t { + HLSLBool_t() : Val(0) {} + HLSLBool_t(int32_t Val) : Val(Val) {} + HLSLBool_t(bool Val) : Val(Val) {} + HLSLBool_t(const HLSLBool_t &Other) : Val(Other.Val) {} + + bool operator==(const HLSLBool_t &Other) const { + return static_cast(Val) == static_cast(Other.Val); + } + + bool operator!=(const HLSLBool_t &Other) const { + return static_cast(Val) != static_cast(Other.Val); + } + + bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; } + + bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; } + + bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; } + + bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; } + + HLSLBool_t operator*(const HLSLBool_t &Other) const { + return HLSLBool_t(Val * Other.Val); + } + + HLSLBool_t operator+(const HLSLBool_t &Other) const { + return HLSLBool_t(Val + Other.Val); + } + + HLSLBool_t operator-(const HLSLBool_t &Other) const { + return HLSLBool_t(Val - Other.Val); + } + + HLSLBool_t operator/(const HLSLBool_t &Other) const { + return HLSLBool_t(Val / Other.Val); + } + + HLSLBool_t operator%(const HLSLBool_t &Other) const { + return HLSLBool_t(Val % Other.Val); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) { + Os << static_cast(Obj.Val); + return Os; + } + + // So we can construct std::strings using std::ostream + friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) { + Os << static_cast(Obj.Val); + return Os; + } + + int32_t Val = 0; +}; + +// No native float16 type in C++ until C++23 . So we use uint16_t to represent +// it. Simple little wrapping struct to help handle the right behavior. +struct HLSLHalf_t { + HLSLHalf_t() : Val(0) {} + HLSLHalf_t(DirectX::PackedVector::HALF Val) : Val(Val) {} + HLSLHalf_t(const HLSLHalf_t &Other) : Val(Other.Val) {} + HLSLHalf_t(const float F) { + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + HLSLHalf_t(const double D) { + float F = 0.0f; + // We wrap '::max' in () to prevent it from being expanded as a + // macro by the Windows SDK. + if (D >= (std::numeric_limits::max)()) + F = (std::numeric_limits::max)(); + else if (D <= std::numeric_limits::lowest()) + F = std::numeric_limits::lowest(); + else + F = static_cast(D); + + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + HLSLHalf_t(const int I) { + VERIFY_IS_TRUE(I == 0, L"HLSLHalf_t constructor with int override only " + L"meant for cases when initializing to 0."); + const float F = static_cast(I); + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + + // Implicit conversion to float for use with things like std::acos, std::tan, + // etc + operator float() const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val); + } + + bool operator==(const HLSLHalf_t &Other) const { + // Convert to floats to properly handle the '0 == -0' case which must + // compare to true but have different uint16_t values. + // That is, 0 == -0 is true. We store Val as a uint16_t. + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return A == B; + } + + bool operator<(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) < + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator>(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) > + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + // Used by tolerance checks in the tests. + bool operator>(float F) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + return A > F; + } + + bool operator<(float F) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + return A < F; + } + + bool operator<=(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) <= + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator>=(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) >= + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; } + + HLSLHalf_t operator*(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A * B)); + } + + HLSLHalf_t operator+(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A + B)); + } + + HLSLHalf_t operator-(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A - B)); + } + + HLSLHalf_t operator/(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A / B)); + } + + HLSLHalf_t operator%(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + const float C = std::fmod(A, B); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(C)); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) { + Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); + return Os; + } + + // So we can construct std::wstrings using std::wostream + friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) { + Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); + return Os; + } + + // HALF is an alias to uint16_t + DirectX::PackedVector::HALF Val = 0; +}; + template struct LongVectorTestData { static const std::map> Data; }; +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {false, true, false, false, false, false, true, true, true, true}}, + {L"DefaultInputValueSet2", + {true, false, false, false, false, true, true, true, false, false}}, + }; +}; + template <> struct LongVectorTestData { inline static const std::map> Data = { {L"DefaultInputValueSet1", {-6, 1, 7, 3, 8, 4, -3, 8, 8, -2}}, @@ -53,12 +247,36 @@ template <> struct LongVectorTestData { }; }; +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {-1.0, -1.0, 1.0, -0.01, 1.0, -0.01, 1.0, -0.01, 1.0, -0.01}}, + {L"DefaultInputValueSet2", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + {L"DefaultClampArgs", {-1.0, 1.0}}, // Min, Max values for clamp + // Range [ -pi/2, pi/2] + {L"TrigonometricInputValueSet_RangeHalfPi", + {-1.073, 0.044, -1.047, 0.313, 1.447, -0.865, 1.364, -0.715, -0.800, + 0.541}}, + {L"TrigonometricInputValueSet_RangeOne", + {0.331, 0.727, -0.957, 0.677, -0.025, 0.495, 0.855, -0.673, -0.678, + -0.905}}, + }; +}; + template <> struct LongVectorTestData { inline static const std::map> Data = { {L"DefaultInputValueSet1", {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, {L"DefaultInputValueSet2", {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + // Range [ -pi/2, pi/2] + {L"TrigonometricInputValueSet_RangeHalfPi", + {0.315f, -0.316f, 1.409f, -0.09f, -1.569f, 1.302f, -0.326f, 0.781f, + -1.235f, 0.623f}}, + {L"TrigonometricInputValueSet_RangeOne", + {0.727f, 0.331f, -0.957f, 0.677f, -0.025f, 0.495f, 0.855f, -0.673f, + -0.678f, -0.905f}}, }; }; @@ -68,7 +286,13 @@ template <> struct LongVectorTestData { {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, {L"DefaultInputValueSet2", {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, - }; + // Range [ -pi/2, pi/2] + {L"TrigonometricInputValueSet_RangeHalfPi", + {0.807, 0.605, 1.317, 0.188, 1.566, -1.507, 0.67, -1.553, 0.194, + -0.883}}, + {L"TrigonometricInputValueSet_RangeOne", + {0.331, 0.277, -0.957, 0.677, -0.025, 0.495, 0.855, -0.673, -0.678, + -0.905}}}; }; #endif // LONGVECTORTESTDATA_H diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 54e5224798..9c2d3d229c 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -110,7 +110,9 @@ void LongVector::OpTest::dispatchTestByDataType( TableParameterHandler &Handler) { using namespace WEX::Common; - if (DataType == L"int16") + if (DataType == L"bool") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"int16") dispatchTestByVectorSize(OpType, Handler); else if (DataType == L"int32") dispatchTestByVectorSize(OpType, Handler); @@ -122,6 +124,8 @@ void LongVector::OpTest::dispatchTestByDataType( dispatchTestByVectorSize(OpType, Handler); else if (DataType == L"uint64") dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"float16") + dispatchTestByVectorSize(OpType, Handler); else if (DataType == L"float32") dispatchTestByVectorSize(OpType, Handler); else if (DataType == L"float64") diff --git a/tools/clang/unittests/HLSLExec/LongVectors.h b/tools/clang/unittests/HLSLExec/LongVectors.h index 392d059bcd..9157da679d 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.h +++ b/tools/clang/unittests/HLSLExec/LongVectors.h @@ -68,7 +68,9 @@ void fillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, size_t NumElements); template constexpr bool isFloatingPointType() { - return std::is_same_v || std::is_same_v; + return std::is_same_v || + std::is_same_v || + std::is_same_v; } struct LongVectorOpTypeStringToEnumValue { @@ -169,6 +171,9 @@ template struct TestConfigTraits { template bool doValuesMatch(DataTypeT A, DataTypeT B, float Tolerance, ValidationType); +bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, float, ValidationType); +bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, float Tolerance, + ValidationType ValidationType); bool doValuesMatch(float A, float B, float Tolerance, ValidationType ValidationType); bool doValuesMatch(double A, double B, float Tolerance, diff --git a/tools/clang/unittests/HLSLExec/LongVectors.tpp b/tools/clang/unittests/HLSLExec/LongVectors.tpp index de333cf863..331d4452eb 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.tpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.tpp @@ -19,17 +19,32 @@ DataTypeT LongVector::getLongVectorOpType(const LongVectorOpTypeStringToEnumValu template void LongVector::fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, std::vector &TestData) { + // Note: DataSize for HLSLHalf_t and HLSLBool_t may be larger than the + // underlying type in some cases. Thats fine. Resize just makes sure we have + // enough space. const size_t NumElements = TestData.size(); const size_t DataSize = sizeof(DataTypeT) * NumElements; ShaderBuffer.resize(DataSize); - DataTypeT *ShaderBufferPtr = - reinterpret_cast(ShaderBuffer.data()); - for (size_t i = 0; i < NumElements; ++i) - ShaderBufferPtr[i] = TestData[i]; + if constexpr (std::is_same_v) { + DirectX::PackedVector::HALF *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i].Val; + } else if constexpr (std::is_same_v) { + int32_t *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i].Val; + } else { + DataTypeT *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i]; + } } -// Helpers so we do the right thing for float types. +// Helpers so we do the right thing for float types. HLSLHalf_t is handled in an +// operator overload. template DataTypeT LongVector::mod(const DataTypeT &A, const DataTypeT &B) { return A % B; @@ -49,10 +64,23 @@ template void LongVector::fillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, std::vector &TestData, size_t NumElements) { - DataTypeT *ShaderBufferPtr = - reinterpret_cast(ShaderBuffer.data()); - for (size_t i = 0; i < NumElements; ++i) - TestData.push_back(ShaderBufferPtr[i]); + if constexpr (std::is_same_v) { + DirectX::PackedVector::HALF *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + // HLSLHalf_t has a DirectX::PackedVector::HALF based constructor. + TestData.push_back(ShaderBufferPtr[i]); + } else if constexpr (std::is_same_v) { + int32_t *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + // HLSLBool_t has a int32_t based constructor. + TestData.push_back(ShaderBufferPtr[i]); + } else { + DataTypeT *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + TestData.push_back(ShaderBufferPtr[i]); + } } template @@ -65,6 +93,25 @@ bool LongVector::doValuesMatch(DataTypeT A, DataTypeT B, float Tolerance, return Diff <= Tolerance; } +bool LongVector::doValuesMatch(HLSLBool_t A, HLSLBool_t B, float, + LongVector::ValidationType) { + return A == B; +} + +bool LongVector::doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, float Tolerance, + LongVector::ValidationType ValidationType) { + switch (ValidationType) { + case LongVector::ValidationType_Epsilon: + return CompareHalfEpsilon(A.Val, B.Val, Tolerance); + case LongVector::ValidationType_Ulp: + return CompareHalfULP(A.Val, B.Val, Tolerance); + default: + WEX::Logging::Log::Error( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + bool LongVector::doValuesMatch(float A, float B, float Tolerance, LongVector::ValidationType ValidationType) { switch (ValidationType) { @@ -322,6 +369,10 @@ std::string LongVector::TestConfig::getOPERAND2Str template std::string LongVector::TestConfig::getHLSLTypeString() const { + if (std::is_same_v) + return "bool"; + if (std::is_same_v) + return "half"; if (std::is_same_v) return "float"; if (std::is_same_v) @@ -414,6 +465,8 @@ DataTypeT LongVector::TestConfig::computeExpectedV if constexpr (std::is_same_v) { const auto OpType = static_cast(OpTypeTraits.OpType); + // HLSLHalf_t is a struct. We need to call the constructor to get the + // expected value. return computeExpectedValue(A, OpType); } @@ -433,7 +486,7 @@ std::string LongVector::TestConfig::getCompilerOpt CompilerOptions << " -DNUM="; CompilerOptions << VectorSize; const bool Is16BitType = - (HLSLType == "int16_t" || HLSLType == "uint16_t" || HLSLType == "half"); + (HLSLType == "int16_t" || HLSLType == "uint16_t" || HLSLType == "half"); CompilerOptions << (Is16BitType ? " -enable-16bit-types" : ""); CompilerOptions << " -DOPERATOR="; CompilerOptions << OperatorString; From 68dedee546982f23a47766c20d37d587befb5ed0 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Fri, 11 Jul 2025 10:25:05 -0400 Subject: [PATCH 88/93] [SPIRV] Add option to use the Unknown image format (#7632) Many Vulkan driver allow the SPIR-V to use the `Unknown` image format for storage images and texel buffers. This makes the SPIR-V more flexible, and it useful for developers. However, it can be cumbersome to have to add the `vk::image_format` attribute to all resources. This option allows users to make `Unkown` the default image format for all resource types. Fixes #7484 --- docs/SPIR-V.rst | 7 + include/dxc/Support/HLSLOptions.td | 6 + include/dxc/Support/SPIRVOptions.h | 1 + lib/DxcSupport/HLSLOptions.cpp | 2 + tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 4 + .../clang/test/CodeGenSPIRV/type.buffer.hlsl | 171 +++++++++++------- .../type.rasterizer-ordered-buffer.hlsl | 91 ++++++---- .../type.rasterizer-ordered-texture.hlsl | 35 ++-- .../test/CodeGenSPIRV/type.rwtexture.hlsl | 56 ++++-- 9 files changed, 237 insertions(+), 136 deletions(-) diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index 2bcdb99bfe..a695e5854d 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -896,6 +896,13 @@ are translated into SPIR-V ``OpTypeImage``, with parameters: The meanings of the headers in the above table is explained in ``OpTypeImage`` of the SPIR-V spec. +For storage images (e.g. ``RWTexture2D``) and texel buffers (e.g. ``RWBuffer``), +the image format is typically inferred from the data type ``T``. However, the +``-fspv-use-unknown-image-format`` command-line option can be used to change +this behavior. When this option is active, the default format for these +resources becomes ``Unknown`` if not otherwise specified by the +``[[vk::image_format]]`` attribute. + Vulkan specific Image Formats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 58f6bdfbf3..4a38e275c3 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -403,6 +403,12 @@ def fspv_enable_maximal_reconvergence: Flag<["-"], "fspv-enable-maximal-reconver HelpText<"Enables the MaximallyReconvergesKHR execution mode for this module.">; def fspv_use_vulkan_memory_model: Flag<["-"], "fspv-use-vulkan-memory-model">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Generates SPIR-V modules that use the Vulkan memory model instead of GLSL450.">; +def fspv_use_unknown_image_format + : Flag<["-"], "fspv-use-unknown-image-format">, + Group, + Flags<[CoreOption, DriverOption]>, + HelpText<"For storage images and texel buffers, sets the default format to 'Unknown' when not specified via the `vk::image_format` attribute. If this option is not used, the format is inferred from the resource's data type.">; + def fvk_auto_shift_bindings: Flag<["-"], "fvk-auto-shift-bindings">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Apply fvk-*-shift to resources without an explicit register assignment.">; def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group, Flags<[CoreOption, DriverOption, HelpHidden]>, diff --git a/include/dxc/Support/SPIRVOptions.h b/include/dxc/Support/SPIRVOptions.h index 1b88ef4def..352cf6c2ec 100644 --- a/include/dxc/Support/SPIRVOptions.h +++ b/include/dxc/Support/SPIRVOptions.h @@ -71,6 +71,7 @@ struct SpirvCodeGenOptions { bool fixFuncCallArguments; bool enableMaximalReconvergence; bool useVulkanMemoryModel; + bool useUnknownImageFormat; bool IEEEStrict; /// Maximum length in words for the OpString literal containing the shader /// source for DebugSource and DebugSourceContinued. If the source code length diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index eb071eb0a6..b3eb422eb9 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -1120,6 +1120,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, Args.hasFlag(OPT_fspv_enable_maximal_reconvergence, OPT_INVALID, false); opts.SpirvOptions.useVulkanMemoryModel = Args.hasFlag(OPT_fspv_use_vulkan_memory_model, OPT_INVALID, false); + opts.SpirvOptions.useUnknownImageFormat = + Args.hasFlag(OPT_fspv_use_unknown_image_format, OPT_INVALID, false); if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) || diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 1869983ae3..9d1f1fff60 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -1156,6 +1156,10 @@ LowerTypeVisitor::lowerStructFields(const RecordDecl *decl, spv::ImageFormat LowerTypeVisitor::translateSampledTypeToImageFormat(QualType sampledType, SourceLocation srcLoc) { + + if (spvOptions.useUnknownImageFormat) + return spv::ImageFormat::Unknown; + uint32_t elemCount = 1; QualType ty = {}; if (!isScalarType(sampledType, &ty) && diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl index 35d1b868a8..769fe808b2 100644 --- a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl @@ -1,109 +1,144 @@ -// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability SampledBuffer -// CHECK: OpCapability StorageImageExtendedFormats +// INFER: OpCapability StorageImageExtendedFormats -// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i +// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i +// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image Buffer intbuf; -// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui +// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui +// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 Buffer uintbuf; -// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f +// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f +// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 Buffer floatbuf; -// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i +// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i +// UNKNOWN: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RWBuffer intrwbuf; -// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// UNKNOWN: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RWBuffer uintrwbuf; -// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f +// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f +// UNKNOWN: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RWBuffer floatrwbuf; -// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// If the `Unkonwn image format is used, then the images below will reuse the types above. +// UNKNOWN-NOT: OpTypeImage + +// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 Buffer int2buf; -// CHECK: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// INFER: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 Buffer uint2buf; -// CHECK: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// INFER: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 Buffer float2buf; -// CHECK: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// INFER: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RWBuffer int2rwbuf; -// CHECK: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// INFER: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 RWBuffer uint2rwbuf; -// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RWBuffer float2rwbuf; -// CHECK: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 -// CHECK: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 +// INFER: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 +// INFER: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 Buffer int3buf; Buffer int4buf; -// CHECK: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 -// CHECK: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 +// INFER: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 +// INFER: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 Buffer uint3buf; Buffer uint4buf; -// CHECK: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 -// CHECK: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 +// INFER: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 +// INFER: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 Buffer float3buf; Buffer float4buf; -// CHECK: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 -// CHECK: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 +// INFER: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 +// INFER: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 RWBuffer int3rwbuf; RWBuffer int4rwbuf; -// CHECK: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 -// CHECK: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 +// INFER: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 +// INFER: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 RWBuffer uint3rwbuf; RWBuffer uint4rwbuf; -// CHECK: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 -// CHECK: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 +// INFER: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 +// INFER: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 RWBuffer float3rwbuf; RWBuffer float4rwbuf; -// CHECK: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// CHECK: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// CHECK: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// CHECK: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// CHECK: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// CHECK: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// CHECK: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// CHECK: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// CHECK: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// CHECK: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// CHECK: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// CHECK: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant -// CHECK: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant -// CHECK: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant -// CHECK: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant -// CHECK: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant -// CHECK: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant -// CHECK: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant -// CHECK: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant -// CHECK: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant -// CHECK: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant -// CHECK: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant -// CHECK: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant -// CHECK: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant +// INFER: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// INFER: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// INFER: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// INFER: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// INFER: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// INFER: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// INFER: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// INFER: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// INFER: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// INFER: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// INFER: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// INFER: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// INFER: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant +// INFER: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant +// INFER: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant +// INFER: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant +// INFER: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant +// INFER: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant +// INFER: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant +// INFER: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant +// INFER: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant +// INFER: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant +// INFER: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant +// INFER: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant + +// UNKNOWN: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl index c616f65bb9..cf84562e52 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl @@ -1,59 +1,80 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN + +// Before vulkan1.3, we should be trying to infer the image type for because +// we cannot necessarily use Unknown. However in VK1.3 and later, we can use +// Unknown. // CHECK: OpCapability SampledBuffer -// CHECK: OpCapability StorageImageExtendedFormats +// INFER: OpCapability StorageImageExtendedFormats -// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i +// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i +// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image RasterizerOrderedBuffer introvbuf; -// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 RasterizerOrderedBuffer uintrovbuf; -// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f +// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f +// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 RasterizerOrderedBuffer floatrovbuf; -// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 +// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RasterizerOrderedBuffer int2rovbuf; -// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 +// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RasterizerOrderedBuffer uint2rovbuf; -// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 +// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RasterizerOrderedBuffer float2rovbuf; -// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 -// CHECK: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// INFER: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 RasterizerOrderedBuffer int3rovbuf; RasterizerOrderedBuffer int4rovbuf; -// CHECK: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 -// CHECK: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// INFER: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// INFER: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RasterizerOrderedBuffer uint3rovbuf; RasterizerOrderedBuffer uint4rovbuf; -// CHECK: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 -// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// INFER: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RasterizerOrderedBuffer float3rovbuf; RasterizerOrderedBuffer float4rovbuf; -// CHECK: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// CHECK: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// CHECK: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// CHECK: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// CHECK: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// CHECK: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// CHECK: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// CHECK: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// CHECK: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// CHECK: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// CHECK: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// CHECK: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// INFER: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// INFER: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// INFER: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// INFER: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// INFER: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// INFER: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// INFER: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// INFER: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// INFER: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// INFER: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// INFER: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// INFER: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant + +// UNKNOWN: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl index 32dd76e6f1..651840b0e6 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl @@ -1,23 +1,27 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability Image1D -// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant @@ -33,7 +37,8 @@ RasterizerOrderedTexture3D t3 ; [[vk::image_format("rgba32f")]] RasterizerOrderedTexture3D t4 ; -// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant RasterizerOrderedTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl index f901d44cfa..44e7592869 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl @@ -1,24 +1,43 @@ -// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability Image1D -// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// INFER: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// UNKNOWN: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// UNKNOWN: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// UNKNOWN: %type_3d_image_1 = OpTypeImage %float 3D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_3d_image_1 = OpTypePointer UniformConstant %type_3d_image_1 +// UNKNOWN: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// UNKNOWN: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// UNKNOWN: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// UNKNOWN: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant RWTexture1D t1 ; @@ -33,7 +52,8 @@ RWTexture3D t3 ; [[vk::image_format("rgba32f")]] RWTexture3D t4 ; -// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant RWTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant From 162bf4ec397e9074031052e4d00d5e7973deec42 Mon Sep 17 00:00:00 2001 From: Chris B Date: Thu, 17 Jul 2025 17:45:56 -0500 Subject: [PATCH 89/93] Fix intrinsic lookup with namespaces (#7599) This change fixes issues with intrinsic lookup caused by not correctly respecting the using declaration(s) that impact unqualified lookups. This probably isn't a perfect solution because I'm sure there's some nuance of unqualified lookups in C++ that I'm not handling, but this does respect scoped using directives and allows us to get things working. Additionally this change disables emitting some "declared here" notes when the source location referred to is invalid. Fixes #7495 --- .../include/clang/Sema/ExternalSemaSource.h | 7 +- tools/clang/include/clang/Sema/Sema.h | 5 + tools/clang/lib/Sema/SemaCodeComplete.cpp | 2 +- tools/clang/lib/Sema/SemaHLSL.cpp | 170 ++++++++++-------- tools/clang/lib/Sema/SemaLookup.cpp | 36 +++- tools/clang/lib/Sema/SemaOverload.cpp | 7 +- tools/clang/test/SemaHLSL/effects-syntax.hlsl | 2 - tools/clang/test/SemaHLSL/raytracings.hlsl | 4 +- .../SemaHLSL/using-namespace-dx-errors.hlsl | 42 +++++ .../test/SemaHLSL/using-namespace-dx.hlsl | 56 ++++++ 10 files changed, 247 insertions(+), 84 deletions(-) create mode 100644 tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl create mode 100644 tools/clang/test/SemaHLSL/using-namespace-dx.hlsl diff --git a/tools/clang/include/clang/Sema/ExternalSemaSource.h b/tools/clang/include/clang/Sema/ExternalSemaSource.h index 91578e2440..b10d649cc6 100644 --- a/tools/clang/include/clang/Sema/ExternalSemaSource.h +++ b/tools/clang/include/clang/Sema/ExternalSemaSource.h @@ -211,10 +211,9 @@ class ExternalSemaSource : public ExternalASTSource { // add call candidates to the given expression. It returns 'true' // if standard overload search should be suppressed; false otherwise. virtual bool AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, - ArrayRef Args, - OverloadCandidateSet &CandidateSet, - bool PartialOverloading) - { + ArrayRef Args, + OverloadCandidateSet &CandidateSet, + Scope *S, bool PartialOverloading) { return false; } diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h index 755c7e0755..5e20f6f0f8 100644 --- a/tools/clang/include/clang/Sema/Sema.h +++ b/tools/clang/include/clang/Sema/Sema.h @@ -2495,9 +2495,14 @@ class Sema { DeclAccessPair FoundDecl, FunctionDecl *Fn); + // HLSL Change Begin + void CollectNamespaceContexts(Scope *, + SmallVectorImpl &); + // HLSL Change End void AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, ArrayRef Args, OverloadCandidateSet &CandidateSet, + Scope *S, // HLSL Change bool PartialOverloading = false); // An enum used to represent the different possible results of building a diff --git a/tools/clang/lib/Sema/SemaCodeComplete.cpp b/tools/clang/lib/Sema/SemaCodeComplete.cpp index b1b4668ba3..84d0990346 100644 --- a/tools/clang/lib/Sema/SemaCodeComplete.cpp +++ b/tools/clang/lib/Sema/SemaCodeComplete.cpp @@ -4020,7 +4020,7 @@ void Sema::CodeCompleteCall(Scope *S, Expr *Fn, ArrayRef Args) { Expr *NakedFn = Fn->IgnoreParenCasts(); if (auto ULE = dyn_cast(NakedFn)) - AddOverloadedCallCandidates(ULE, Args, CandidateSet, + AddOverloadedCallCandidates(ULE, Args, CandidateSet, S, // HLSL Change /*PartialOverloading=*/true); else if (auto UME = dyn_cast(NakedFn)) { TemplateArgumentListInfo TemplateArgsBuffer, *TemplateArgs = nullptr; diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 8e800e8f68..dcb6142858 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -4152,6 +4152,7 @@ class HLSLExternalSource : public ExternalSemaSource { SourceLocation(), &context.Idents.get("dx"), /*PrevDecl*/ nullptr); m_dxNSDecl->setImplicit(); + m_dxNSDecl->setHasExternalLexicalStorage(true); context.getTranslationUnitDecl()->addDecl(m_dxNSDecl); #ifdef ENABLE_SPIRV_CODEGEN @@ -5169,7 +5170,7 @@ class HLSLExternalSource : public ExternalSemaSource { bool AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, ArrayRef Args, - OverloadCandidateSet &CandidateSet, + OverloadCandidateSet &CandidateSet, Scope *S, bool PartialOverloading) override { DXASSERT_NOMSG(ULE != nullptr); @@ -5194,6 +5195,8 @@ class HLSLExternalSource : public ExternalSemaSource { // Exceptions: // - Vulkan-specific intrinsics live in the 'vk::' namespace. // - DirectX-specific intrinsics live in the 'dx::' namespace. + // - Global namespaces could just mean we have a `using` declaration... so + // it can be anywhere! if (isQualified && !isGlobalNamespace && !isVkNamespace && !isDxNamespace) return false; @@ -5204,81 +5207,106 @@ class HLSLExternalSource : public ExternalSemaSource { } StringRef nameIdentifier = idInfo->getName(); - const HLSL_INTRINSIC *table = g_Intrinsics; - auto tableCount = _countof(g_Intrinsics); - if (isDxNamespace) { - table = g_DxIntrinsics; - tableCount = _countof(g_DxIntrinsics); + using IntrinsicArray = llvm::ArrayRef; + struct IntrinsicTableEntry { + IntrinsicArray Table; + NamespaceDecl *NS; + }; + + llvm::SmallVector SearchTables; + + bool SearchDX = isDxNamespace; + bool SearchVK = isVkNamespace; + if (isGlobalNamespace || !isQualified) + SearchTables.push_back( + IntrinsicTableEntry{IntrinsicArray(g_Intrinsics), m_hlslNSDecl}); + + if (S && !isQualified) { + SmallVector NSContexts; + m_sema->CollectNamespaceContexts(S, NSContexts); + for (const auto &UD : NSContexts) { + if (static_cast(m_dxNSDecl) == UD) + SearchDX = true; + else if (static_cast(m_vkNSDecl) == UD) + SearchVK = true; + } } + + if (SearchDX) + SearchTables.push_back( + IntrinsicTableEntry{IntrinsicArray(g_DxIntrinsics), m_dxNSDecl}); #ifdef ENABLE_SPIRV_CODEGEN - if (isVkNamespace) { - table = g_VkIntrinsics; - tableCount = _countof(g_VkIntrinsics); - } -#endif // ENABLE_SPIRV_CODEGEN + if (SearchVK) + SearchTables.push_back( + IntrinsicTableEntry{IntrinsicArray(g_VkIntrinsics), m_vkNSDecl}); +#endif - IntrinsicDefIter cursor = FindIntrinsicByNameAndArgCount( - table, tableCount, StringRef(), nameIdentifier, Args.size()); - IntrinsicDefIter end = IntrinsicDefIter::CreateEnd( - table, tableCount, IntrinsicTableDefIter::CreateEnd(m_intrinsicTables)); - - for (; cursor != end; ++cursor) { - // If this is the intrinsic we're interested in, build up a representation - // of the types we need. - const HLSL_INTRINSIC *pIntrinsic = *cursor; - LPCSTR tableName = cursor.GetTableName(); - LPCSTR lowering = cursor.GetLoweringStrategy(); - DXASSERT(pIntrinsic->uNumArgs <= g_MaxIntrinsicParamCount + 1, - "otherwise g_MaxIntrinsicParamCount needs to be updated for " - "wider signatures"); - - std::vector functionArgTypes; - size_t badArgIdx; - bool argsMatch = - MatchArguments(cursor, QualType(), QualType(), QualType(), Args, - &functionArgTypes, badArgIdx); - if (!functionArgTypes.size()) - return false; + assert(!SearchTables.empty() && "Must have at least one search table!"); + + for (const auto &T : SearchTables) { + + IntrinsicDefIter cursor = FindIntrinsicByNameAndArgCount( + T.Table.data(), T.Table.size(), StringRef(), nameIdentifier, + Args.size()); + IntrinsicDefIter end = IntrinsicDefIter::CreateEnd( + T.Table.data(), T.Table.size(), + IntrinsicTableDefIter::CreateEnd(m_intrinsicTables)); + + for (; cursor != end; ++cursor) { + // If this is the intrinsic we're interested in, build up a + // representation of the types we need. + const HLSL_INTRINSIC *pIntrinsic = *cursor; + LPCSTR tableName = cursor.GetTableName(); + LPCSTR lowering = cursor.GetLoweringStrategy(); + DXASSERT(pIntrinsic->uNumArgs <= g_MaxIntrinsicParamCount + 1, + "otherwise g_MaxIntrinsicParamCount needs to be updated for " + "wider signatures"); + + std::vector functionArgTypes; + size_t badArgIdx; + bool argsMatch = + MatchArguments(cursor, QualType(), QualType(), QualType(), Args, + &functionArgTypes, badArgIdx); + if (!functionArgTypes.size()) + return false; - // Get or create the overload we're interested in. - FunctionDecl *intrinsicFuncDecl = nullptr; - std::pair insertResult = - m_usedIntrinsics.insert(UsedIntrinsic(pIntrinsic, functionArgTypes)); - bool insertedNewValue = insertResult.second; - if (insertedNewValue) { - NamespaceDecl *nsDecl = m_hlslNSDecl; - if (isVkNamespace) - nsDecl = m_vkNSDecl; - else if (isDxNamespace) - nsDecl = m_dxNSDecl; - DXASSERT(tableName, - "otherwise IDxcIntrinsicTable::GetTableName() failed"); - intrinsicFuncDecl = - AddHLSLIntrinsicFunction(*m_context, nsDecl, tableName, lowering, - pIntrinsic, &functionArgTypes); - insertResult.first->setFunctionDecl(intrinsicFuncDecl); - } else { - intrinsicFuncDecl = (*insertResult.first).getFunctionDecl(); - } + // Get or create the overload we're interested in. + FunctionDecl *intrinsicFuncDecl = nullptr; + std::pair insertResult = + m_usedIntrinsics.insert( + UsedIntrinsic(pIntrinsic, functionArgTypes)); + bool insertedNewValue = insertResult.second; + if (insertedNewValue) { + DXASSERT(tableName, + "otherwise IDxcIntrinsicTable::GetTableName() failed"); + intrinsicFuncDecl = + AddHLSLIntrinsicFunction(*m_context, T.NS, tableName, lowering, + pIntrinsic, &functionArgTypes); + insertResult.first->setFunctionDecl(intrinsicFuncDecl); + } else { + intrinsicFuncDecl = (*insertResult.first).getFunctionDecl(); + } - OverloadCandidate &candidate = CandidateSet.addCandidate(Args.size()); - candidate.Function = intrinsicFuncDecl; - candidate.FoundDecl.setDecl(intrinsicFuncDecl); - candidate.Viable = argsMatch; - CandidateSet.isNewCandidate(intrinsicFuncDecl); // used to insert into set - if (argsMatch) - return true; - if (badArgIdx) { - candidate.FailureKind = ovl_fail_bad_conversion; - QualType ParamType = - intrinsicFuncDecl->getParamDecl(badArgIdx - 1)->getType(); - candidate.Conversions[badArgIdx - 1].setBad( - BadConversionSequence::no_conversion, Args[badArgIdx - 1], - ParamType); - } else { - // A less informative error. Needed when the failure relates to the - // return type - candidate.FailureKind = ovl_fail_bad_final_conversion; + OverloadCandidate &candidate = CandidateSet.addCandidate(Args.size()); + candidate.Function = intrinsicFuncDecl; + candidate.FoundDecl.setDecl(intrinsicFuncDecl); + candidate.Viable = argsMatch; + CandidateSet.isNewCandidate( + intrinsicFuncDecl); // used to insert into set + if (argsMatch) + return true; + if (badArgIdx) { + candidate.FailureKind = ovl_fail_bad_conversion; + QualType ParamType = + intrinsicFuncDecl->getParamDecl(badArgIdx - 1)->getType(); + candidate.Conversions[badArgIdx - 1].setBad( + BadConversionSequence::no_conversion, Args[badArgIdx - 1], + ParamType); + } else { + // A less informative error. Needed when the failure relates to the + // return type + candidate.FailureKind = ovl_fail_bad_final_conversion; + } } } diff --git a/tools/clang/lib/Sema/SemaLookup.cpp b/tools/clang/lib/Sema/SemaLookup.cpp index 98832a8f57..eec8a7fa64 100644 --- a/tools/clang/lib/Sema/SemaLookup.cpp +++ b/tools/clang/lib/Sema/SemaLookup.cpp @@ -55,6 +55,7 @@ using namespace clang; using namespace sema; +// HLSL Note: This set of utilities copied to SemaHLSL.cpp. namespace { class UnqualUsingEntry { const DeclContext *Nominated; @@ -4809,9 +4810,12 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction, NamedDecl *ChosenDecl = Correction.isKeyword() ? nullptr : Correction.getCorrectionDecl(); - if (PrevNote.getDiagID() && ChosenDecl) + // HLSL Change begin: don't put notes on invalid source locations. + if (PrevNote.getDiagID() && ChosenDecl && + !ChosenDecl->getLocation().isInvalid()) Diag(ChosenDecl->getLocation(), PrevNote) << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo); + // HLSL Change end } TypoExpr *Sema::createDelayedTypo(std::unique_ptr TCC, @@ -4836,3 +4840,33 @@ const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const { void Sema::clearDelayedTypo(TypoExpr *TE) { DelayedTypos.erase(TE); } + +// HLSL Change Begin +void Sema::CollectNamespaceContexts(Scope *S, + SmallVectorImpl &NSs) { + UnqualUsingDirectiveSet UDirs; + + // Add using directives from this context up to the top level. This + // handles cases where the current declaration is in a context that has + // a using directive but might be in a scope chain that doesn't reach + // the using directive (i.e. a using inside a namespace or class + // declaration but the function definition is outside). + DeclContext *Ctx = S->getEntity(); + for (DeclContext *UCtx = Ctx; UCtx; UCtx = UCtx->getParent()) { + if (UCtx->isTransparentContext()) + continue; + + UDirs.visit(UCtx, UCtx); + } + // Find the first namespace or translation-unit scope. + Scope *Innermost = S; + while (Innermost && !isNamespaceOrTranslationUnitScope(Innermost)) + Innermost = Innermost->getParent(); + + UDirs.visitScopeChain(S, Innermost); + UDirs.done(); + + for (auto &UD : UDirs) + NSs.push_back(UD.getNominatedNamespace()); +} +// HLSL Change End diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index 1bcbc7442f..274b66646b 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -10627,6 +10627,7 @@ static void AddOverloadedCallCandidate(Sema &S, void Sema::AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, ArrayRef Args, OverloadCandidateSet &CandidateSet, + Scope *S, // HLSL Change bool PartialOverloading) { #ifndef NDEBUG @@ -10659,8 +10660,8 @@ void Sema::AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, #endif // HLSL Change - allow ExternalSource the ability to add the overloads for a call. - if (ExternalSource && - ExternalSource->AddOverloadedCallCandidates(ULE, Args, CandidateSet, PartialOverloading)) { + if (ExternalSource && ExternalSource->AddOverloadedCallCandidates( + ULE, Args, CandidateSet, S, PartialOverloading)) { return; } @@ -10970,7 +10971,7 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn, // Add the functions denoted by the callee to the set of candidate // functions, including those from argument-dependent lookup. - AddOverloadedCallCandidates(ULE, Args, *CandidateSet); + AddOverloadedCallCandidates(ULE, Args, *CandidateSet, S); // HLSL Change if (getLangOpts().MSVCCompat && CurContext->isDependentContext() && !isSFINAEContext() && diff --git a/tools/clang/test/SemaHLSL/effects-syntax.hlsl b/tools/clang/test/SemaHLSL/effects-syntax.hlsl index 5a7492a9da..e5468cbd41 100644 --- a/tools/clang/test/SemaHLSL/effects-syntax.hlsl +++ b/tools/clang/test/SemaHLSL/effects-syntax.hlsl @@ -108,12 +108,10 @@ static const PixelShader ps1 { state=foo; }; /* expected-warning /*verify-ast No matching AST found for line! */ -// expected-note@? {{'PixelShader' declared here}} PixelShadeR ps < int foo=1;> = ps1; // Case insensitive! /* expected-error {{unknown type name 'PixelShadeR'; did you mean 'PixelShader'?}} expected-warning {{effect object ignored - effect syntax is deprecated}} expected-warning {{possible effect annotation ignored - effect syntax is deprecated}} fxc-pass {{}} */ /*verify-ast No matching AST found for line! */ -// expected-note@? {{'VertexShader' declared here}} VertexShadeR vs; // Case insensitive! /* expected-error {{unknown type name 'VertexShadeR'; did you mean 'VertexShader'?}} expected-warning {{effect object ignored - effect syntax is deprecated}} fxc-pass {{}} */ // Case sensitive diff --git a/tools/clang/test/SemaHLSL/raytracings.hlsl b/tools/clang/test/SemaHLSL/raytracings.hlsl index d3bc01fcd6..429037f22b 100644 --- a/tools/clang/test/SemaHLSL/raytracings.hlsl +++ b/tools/clang/test/SemaHLSL/raytracings.hlsl @@ -12,14 +12,14 @@ void run() { RAY_FLAG_CULL_OPAQUE + RAY_FLAG_CULL_NON_OPAQUE; - rayFlags += RAY_FLAG_INVALID; /* expected-note@? {{'RAY_FLAG_NONE' declared here}} expected-error {{use of undeclared identifier 'RAY_FLAG_INVALID'; did you mean 'RAY_FLAG_NONE'?}} */ + rayFlags += RAY_FLAG_INVALID; /* expected-error {{use of undeclared identifier 'RAY_FLAG_INVALID'; did you mean 'RAY_FLAG_NONE'?}} */ int intFlag = RAY_FLAG_CULL_OPAQUE; int hitKindFlag = HIT_KIND_TRIANGLE_FRONT_FACE + HIT_KIND_TRIANGLE_BACK_FACE; - hitKindFlag += HIT_KIND_INVALID; /* expected-note@? {{'HIT_KIND_NONE' declared here}} expected-error {{use of undeclared identifier 'HIT_KIND_INVALID'; did you mean 'HIT_KIND_NONE'?}} */ + hitKindFlag += HIT_KIND_INVALID; /* expected-error {{use of undeclared identifier 'HIT_KIND_INVALID'; did you mean 'HIT_KIND_NONE'?}} */ BuiltInTriangleIntersectionAttributes attr; diff --git a/tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl b/tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl new file mode 100644 index 0000000000..233ce103ce --- /dev/null +++ b/tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl @@ -0,0 +1,42 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +RaytracingAccelerationStructure Scene : register(t0, space0); + +struct[raypayload] RayPayload { + float4 color : write(caller) : read(closesthit); +}; + +[shader("raygeneration")] void MyRaygenShader() { + // Set the ray's extents. + RayDesc ray; + ray.Origin = float3(0, 0, 1); + ray.Direction = float3(1, 0, 0); + ray.TMin = 0.001; + ray.TMax = 10000.0; + + RayPayload payload = {float4(0, 0, 0, 0)}; + + { + using namespace dx; + HitObject hit = + HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, + ray, payload); + + int sortKey = 1; + MaybeReorderThread(sortKey, 1); + } + + { + int sortKey = 1; + MaybeReorderThread(sortKey, 1); // expected-error{{use of undeclared identifier 'MaybeReorderThread'; did you mean 'MaybeReorderThread'?}} + } + + int sortKey = 1; + MaybeReorderThread(sortKey, 1); // expected-error{{use of undeclared identifier 'MaybeReorderThread'; did you mean 'MaybeReorderThread'?}} + + HitObject hit = // expected-error{{unknown type name 'HitObject'}} + HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, + ray, payload); + + HitObject::Invoke(hit, payload); // expected-error{{use of undeclared identifier 'HitObject'}} +} diff --git a/tools/clang/test/SemaHLSL/using-namespace-dx.hlsl b/tools/clang/test/SemaHLSL/using-namespace-dx.hlsl new file mode 100644 index 0000000000..093e86b2fa --- /dev/null +++ b/tools/clang/test/SemaHLSL/using-namespace-dx.hlsl @@ -0,0 +1,56 @@ +// RUN: %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck %s + +RaytracingAccelerationStructure Scene : register(t0, space0); + +struct[raypayload] RayPayload { + float4 color : write(caller) : read(closesthit); +}; + +namespace MyStuff { + using namespace dx; + void MaybeReorderThread(int2 V); +} + +void MyStuff::MaybeReorderThread(int2 V) { + MaybeReorderThread(V.x, V.y); +} + +[shader("raygeneration")] void MyRaygenShader() { + // Set the ray's extents. + RayDesc ray; + ray.Origin = float3(0, 0, 1); + ray.Direction = float3(1, 0, 0); + ray.TMin = 0.001; + ray.TMax = 10000.0; + + RayPayload payload = {float4(0, 0, 0, 0)}; + + using namespace dx; + HitObject hit = + HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, + ray, payload); + + int sortKey = 1; + MaybeReorderThread(sortKey, 1); + + HitObject::Invoke(hit, payload); + + MyStuff::MaybeReorderThread(int2(sortKey, 1)); +} + +// Find the DeclRefExpr for the call to MaybeReorderThread: + +// CHECK: FunctionDecl [[MyDeclAddr:0x[0-9a-fA-F]+]] parent {{.*}} used MaybeReorderThread 'void (int2)' +// CHECK: DeclRefExpr {{.*}} 'void (unsigned int, unsigned int)' lvalue Function [[DeclAddr:0x[0-9a-fA-F]+]] 'MaybeReorderThread' 'void (unsigned int, unsigned int)' + +// CHECK: FunctionDecl [[DeclAddr]] <> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} CoherenceHint 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} NumCoherenceHintBitsFromLSB 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 359 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.9 0 0 "" + +// CHECK-LABEL: MyRaygenShader + +// CHECK: DeclRefExpr {{.*}} 'void (unsigned int, unsigned int)' lvalue Function [[DeclAddr:0x[0-9a-fA-F]+]] 'MaybeReorderThread' 'void (unsigned int, unsigned int)' +// CHECK: DeclRefExpr {{.*}} 'void (int2)' lvalue Function [[MyDeclAddr:0x[0-9a-fA-F]+]] 'MaybeReorderThread' 'void (int2)' + From 020fbdf5056a798387d85ddb95fe630389125256 Mon Sep 17 00:00:00 2001 From: iOrange Date: Mon, 21 Jul 2025 13:26:35 -0400 Subject: [PATCH 90/93] [SPIR-V] Fixed a crash if encounter constant buffer fields with overlapping register assignments (#7636) The issue: simple vertex shader like so ``` uniform float4x4 gMVP : register(c0); uniform float4 gFoo : register(c5); uniform float4 gBar : register(c5); float4 main(float4 pos : POSITION) : SV_Position { return mul(gMVP, pos * gFoo + gBar); } ``` will result in an internal crash ``` dxc.exe -spirv -T vs_6_2 -E main test.hlsl -Fo test.spirv Internal compiler error: access violation. Attempted to read from address 0x0000000000000000 ``` Due to `LowerTypeVisitor` trying to assign offsets to fields without explicit locations. It'll sort fields first, which will fill the map with the fields first. And since it's using `std::map` - if there's fields with the same `register` number - it'll only insert first, other will be left out, resulting nullptrs in the output vector. We read the content of the vector down the road crashing. My change fixes the crash and tries to output somewhat useful info about compilation fail. I hope this helps you in fixing it properly, or you can take it as it is. --- tools/clang/lib/SPIRV/LowerTypeVisitor.cpp | 66 +++++++++++-------- tools/clang/lib/SPIRV/LowerTypeVisitor.h | 6 ++ .../test/CodeGenSPIRV/cbuffer.overlap.hlsl | 11 ++++ 3 files changed, 55 insertions(+), 28 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/cbuffer.overlap.hlsl diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index 9d1f1fff60..45d04e8160 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -37,33 +37,6 @@ inline uint32_t roundToPow2(uint32_t val, uint32_t pow2) { } // end anonymous namespace -// This method sorts a field list in the following order: -// - fields with register annotation first, sorted by register index. -// - then fields without annotation, in order of declaration. -static std::vector -sortFields(llvm::ArrayRef fields) { - std::vector output; - output.resize(fields.size()); - - auto back_inserter = output.rbegin(); - std::map fixed_fields; - for (auto it = fields.rbegin(); it < fields.rend(); it++) { - if (it->registerC) { - fixed_fields.insert({it->registerC->RegisterNumber, &*it}); - } else { - *back_inserter = &*it; - back_inserter++; - } - } - - auto front_inserter = output.begin(); - for (const auto &item : fixed_fields) { - *front_inserter = item.second; - front_inserter++; - } - return output; -} - static void setDefaultFieldSize(const AlignmentSizeCalculator &alignmentCalc, const SpirvLayoutRule rule, const HybridStructType::FieldInfo *currentField, @@ -292,6 +265,37 @@ bool LowerTypeVisitor::visitInstruction(SpirvInstruction *instr) { return true; } +std::vector LowerTypeVisitor::sortFields( + llvm::ArrayRef fields) { + std::vector output; + output.resize(fields.size()); + + auto back_inserter = output.rbegin(); + std::map fixed_fields; + for (auto it = fields.rbegin(); it < fields.rend(); it++) { + if (it->registerC) { + auto insertionResult = + fixed_fields.insert({it->registerC->RegisterNumber, &*it}); + if (!insertionResult.second) { + emitError( + "field \"%0\" at register(c%1) overlaps with previous members", + it->registerC->Loc) + << it->name << it->registerC->RegisterNumber; + } + } else { + *back_inserter = &*it; + back_inserter++; + } + } + + auto front_inserter = output.begin(); + for (const auto &item : fixed_fields) { + *front_inserter = item.second; + front_inserter++; + } + return output; +} + const SpirvType *LowerTypeVisitor::lowerType(const SpirvType *type, SpirvLayoutRule rule, SourceLocation loc) { @@ -1378,12 +1382,19 @@ LowerTypeVisitor::populateLayoutInformation( llvm::SmallVector loweredFields; llvm::DenseMap fieldToIndexMap; + llvm::SmallVector result; + // This stores the index of the field in the actual SPIR-V construct. // When bitfields are merged, this index will be the same for merged fields. uint32_t fieldIndexInConstruct = 0; for (size_t i = 0, iPrevious = -1; i < sortedFields.size(); iPrevious = i++) { const size_t fieldIndexForMap = loweredFields.size(); + // Can happen if sortFields runs over fields with the same register(c#) + if (!sortedFields[i]) { + return result; + } + loweredFields.emplace_back(fieldVisitor( (iPrevious < loweredFields.size() ? &loweredFields[iPrevious] : nullptr), @@ -1397,7 +1408,6 @@ LowerTypeVisitor::populateLayoutInformation( } // Re-order the sorted fields back to their original order. - llvm::SmallVector result; for (const auto &field : fields) result.push_back(loweredFields[fieldToIndexMap[&field]]); return result; diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h index 26b6e44f6d..276e6c9232 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h @@ -62,6 +62,12 @@ class LowerTypeVisitor : public Visitor { return astContext.getDiagnostics().Report(srcLoc, diagId); } + // This method sorts a field list in the following order: + // - fields with register annotation first, sorted by register index. + // - then fields without annotation, in order of declaration. + std::vector + sortFields(llvm::ArrayRef fields); + /// Lowers the given Hybrid type into a SPIR-V type. /// /// Uses the above lowerType method to lower the QualType components of hybrid diff --git a/tools/clang/test/CodeGenSPIRV/cbuffer.overlap.hlsl b/tools/clang/test/CodeGenSPIRV/cbuffer.overlap.hlsl new file mode 100644 index 0000000000..4a2e72c7b5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/cbuffer.overlap.hlsl @@ -0,0 +1,11 @@ +// RUN: not %dxc -T vs_6_2 -E main -fcgl %s -spirv 2>&1 | FileCheck %s + +// CHECK: error: field "gFoo" at register(c5) overlaps with previous members + +uniform float4x4 gMVP : register(c0); +uniform float4 gFoo : register(c5); +uniform float4 gBar : register(c5); + +float4 main(float4 pos : POSITION) : SV_Position { + return mul(gMVP, pos * gFoo + gBar); +} From 5ceaf84f8e033cce3e195237bbc772c80b9b2540 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 22 Jul 2025 10:38:54 -0700 Subject: [PATCH 91/93] [ExecutionTests] Fix shader source to prevent OOB access for ExecutionTests::AtomicsShared64Test (#7641) Fixes #5198. ExecutionTests::AtomicsShared64Test could sporadically fail. g_sint64Share is indexed with ix%3 + 1, which could lead to an out of bounds access. Increase the size of g_sint64Share to 4 and ensure all values are initialized. --- tools/clang/unittests/HLSLExec/ShaderOpArith.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index a782bd97ae..dbea8e2aaf 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -1976,7 +1976,7 @@ RWStructuredBuffer g_shareXchg64Buf : register(u5); groupshared uint64_t g_uint64Share[6]; - groupshared int64_t g_sint64Share[3]; + groupshared int64_t g_sint64Share[4]; groupshared uint64_t g_xchg64Share[64]; #define VEC_CALL(op, uav, ix, val) op(uav[ix*stride], val); @@ -2046,7 +2046,7 @@ // Zero-init shared memory, with special cases if (ix < 6) g_uint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : ix == 3 ? ~0ULL : 0; - if (ix < 3) + if (ix < 4) g_sint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : 0; if (ix < 64) g_xchg64Share[ix] = 0; @@ -2552,11 +2552,11 @@ void InitSharedMem(uint ix) { // Zero-init shared memory, with special cases - if (ix < 6) + if (ix < 7) g_uintShare[ix] = ix == 1 ? 99999999 : ix == 3 ? -1 : 0; - if (ix < 3) + if (ix < 4) g_sintShare[ix] = ix == 1 ? 99999999 : 0; - if (ix < 64) + if (ix < 65) g_xchgShare[ix] = 0; GroupMemoryBarrierWithGroupSync(); From c78ed99263d4fba32764b54b49beba075973c851 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Tue, 22 Jul 2025 19:58:51 +0200 Subject: [PATCH 92/93] [SER] GetAttributes(out udt) instead of templated return (#7606) ``` Old: T HitObject::GetAttributes() New: void HitObject::GetAttributes(out udt) ``` - remove HitObject::GetAttributes template code path from DeduceTemplateArgumentsForHLSL - cleanup intersection attribute diagnostic code path - adjust GetAttributes calls and expected AST, HLOps in tests (DXIL unaffected) Closes #7534 This is a breaking change. Merge and release must be coordinated with: - hlsl-spec change (https://github.com/microsoft/hlsl-specs/issues/495) - HLK releases (SM6.9 preview tests use old signature) --------- Co-authored-by: Tex Riddell --- include/dxc/HLSL/HLOperations.h | 3 + lib/HLSL/HLOperationLower.cpp | 17 +- .../Scalar/ScalarReplAggregatesHLSL.cpp | 4 + tools/clang/include/clang/Sema/Sema.h | 5 +- tools/clang/lib/Sema/SemaChecking.cpp | 2 +- tools/clang/lib/Sema/SemaExpr.cpp | 2 - tools/clang/lib/Sema/SemaHLSL.cpp | 236 ++++++++---------- .../HitObject/hitobject_attributes.hlsl | 3 +- .../hitobject_attributes_builtin.hlsl | 5 +- .../DxilGen/hitobject_attributes_dxilgen.ll | 103 +++++--- .../HitObject/hitobject_attributes.hlsl | 14 +- .../hitobject_attributes_invalid_longvec.hlsl | 5 +- .../hitobject_attributes_invalid_udt.hlsl | 6 +- .../types/invalid-hitobject-decls-struct.hlsl | 2 +- .../invalid-hitobject-decls-templated.hlsl | 2 +- .../hlsl/types/invalid-longvec-decls.hlsl | 2 +- utils/hct/gen_intrin_main.txt | 2 +- 17 files changed, 209 insertions(+), 204 deletions(-) diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index 0da9804ecb..79cbadc42c 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -462,6 +462,9 @@ const unsigned kHitObjectInvoke_PayloadOpIdx = 2; const unsigned kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx = 4; const unsigned kHitObjectFromRayQuery_WithAttrs_NumOp = 5; +// HitObject::GetAttributes +const unsigned kHitObjectGetAttributes_AttributeOpIdx = 2; + // Linear Algebra Operations // MatVecMul diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 2033533327..3c062475af 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -6378,18 +6378,11 @@ Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, Value *HitObjectPtr = CI->getArgOperand(1); Value *HitObject = Builder.CreateLoad(HitObjectPtr); - - Type *AttrTy = cast(CI->getType())->getPointerElementType(); - - IRBuilder<> EntryBuilder( - dxilutil::FindAllocaInsertionPt(CI->getParent()->getParent())); - unsigned AttrAlign = Helper.dataLayout.getABITypeAlignment(AttrTy); - AllocaInst *AttrMem = EntryBuilder.CreateAlloca(AttrTy); - AttrMem->setAlignment(AttrAlign); - Constant *opArg = OP->GetU32Const((unsigned)OpCode); - TrivialDxilOperation(OpCode, {opArg, HitObject, AttrMem}, CI->getType(), - Helper.voidTy, OP, Builder); - return AttrMem; + Value *AttrOutPtr = + CI->getArgOperand(HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx); + TrivialDxilOperation(OpCode, {nullptr, HitObject, AttrOutPtr}, + AttrOutPtr->getType(), CI, OP); + return nullptr; } Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index d8746862bc..54250ad36d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -1518,6 +1518,10 @@ static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { if (OpIdx == HLOperandIndex::kHitObjectInvoke_PayloadOpIdx) return true; break; + case IntrinsicOp::MOP_DxHitObject_GetAttributes: + if (OpIdx == HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx) + return true; + break; default: break; } diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h index 5e20f6f0f8..6eb0aba801 100644 --- a/tools/clang/include/clang/Sema/Sema.h +++ b/tools/clang/include/clang/Sema/Sema.h @@ -3811,8 +3811,7 @@ class Sema { void DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A); void DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType, SourceLocation Loc); - void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, - const FunctionProtoType *Proto); + void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); void DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, hlsl::DXIL::ShaderKind EntrySK, hlsl::DXIL::NodeLaunchType NodeLaunchTy, @@ -8831,8 +8830,6 @@ class Sema { bool AllowOnePastEnd=true, bool IndexNegated=false); // HLSL Change Starts - checking array subscript access to vector or matrix member void CheckHLSLArrayAccess(const Expr *expr); - bool CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall); - bool CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); // HLSL Change ends void CheckArrayAccess(const Expr *E); // Used to grab the relevant information from a FormatAttr and a diff --git a/tools/clang/lib/Sema/SemaChecking.cpp b/tools/clang/lib/Sema/SemaChecking.cpp index 9e64732336..e3932220f9 100644 --- a/tools/clang/lib/Sema/SemaChecking.cpp +++ b/tools/clang/lib/Sema/SemaChecking.cpp @@ -1426,7 +1426,7 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, CheckMemaccessArguments(TheCall, CMId, FnInfo); #endif // HLSL Change Ends - CheckHLSLFunctionCall(FDecl, TheCall, Proto); // HLSL Change + CheckHLSLFunctionCall(FDecl, TheCall); // HLSL Change return false; } diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index cbc4ac37ab..cccf711126 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -5349,8 +5349,6 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); - if (CheckHLSLFunctionCall(FDecl, TheCall)) - return ExprError(); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); } else if (NDecl) { diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index dcb6142858..656dfb401f 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -10829,18 +10829,24 @@ HLSLExternalSource::ApplyTypeSpecSignToParsedType(clang::QualType &type, } } -bool DiagnoseIntersectionAttributes(Sema &S, SourceLocation Loc, QualType Ty) { - // Must be a UDT +bool CheckIntersectionAttributeArg(Sema &S, Expr *E) { + SourceLocation Loc = E->getExprLoc(); + QualType Ty = E->getType(); + + // Identify problematic fields first (high diagnostic accuracy, may miss some + // invalid cases) + const TypeDiagContext DiagContext = TypeDiagContext::Attributes; + if (DiagnoseTypeElements(S, Loc, Ty, DiagContext, DiagContext)) + return true; + + // Must be a UDT (low diagnostic accuracy, catches remaining invalid cases) if (Ty.isNull() || !hlsl::IsHLSLCopyableAnnotatableRecord(Ty)) { S.Diag(Loc, diag::err_payload_attrs_must_be_udt) << /*payload|attributes|callable*/ 1 << /*parameter %2|type*/ 1; - return false; + return true; } - const TypeDiagContext DiagContext = TypeDiagContext::Attributes; - if (DiagnoseTypeElements(S, Loc, Ty, DiagContext, DiagContext)) - return false; - return true; + return false; } Sema::TemplateDeductionResult @@ -10951,7 +10957,6 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( LPCSTR tableName = cursor.GetTableName(); // Currently only intrinsic we allow for explicit template arguments are // for Load/Store for ByteAddressBuffer/RWByteAddressBuffer - // and HitObject::GetAttributes with user-defined intersection attributes. // Check Explicit template arguments UINT intrinsicOp = (*cursor)->Op; @@ -10966,11 +10971,9 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( IsBABLoad = intrinsicOp == (UINT)IntrinsicOp::MOP_Load; IsBABStore = intrinsicOp == (UINT)IntrinsicOp::MOP_Store; } - bool IsHitObjectGetAttributes = - intrinsicOp == (UINT)IntrinsicOp::MOP_DxHitObject_GetAttributes; if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() >= 1) { SourceLocation Loc = ExplicitTemplateArgs->getLAngleLoc(); - if (!IsBABLoad && !IsBABStore && !IsHitObjectGetAttributes) { + if (!IsBABLoad && !IsBABStore) { getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_unsupported) << intrinsicName; return Sema::TemplateDeductionResult::TDK_Invalid; @@ -11000,10 +11003,6 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( return Sema::TemplateDeductionResult::TDK_Invalid; } } - if (IsHitObjectGetAttributes && - !DiagnoseIntersectionAttributes(*getSema(), Loc, - functionTemplateTypeArg)) - return Sema::TemplateDeductionResult::TDK_Invalid; } else if (IsBABStore) { // Prior to HLSL 2018, Store operation only stored scalar uint. if (!Is2018) { @@ -12277,9 +12276,78 @@ static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, } #endif +static bool isRelatedDeclMarkedNointerpolation(Expr *E) { + if (!E) + return false; + E = E->IgnoreCasts(); + if (auto *DRE = dyn_cast(E)) + return DRE->getDecl()->hasAttr(); + + if (auto *ME = dyn_cast(E)) + return ME->getMemberDecl()->hasAttr() || + isRelatedDeclMarkedNointerpolation(ME->getBase()); + + if (auto *HVE = dyn_cast(E)) + return isRelatedDeclMarkedNointerpolation(HVE->getBase()); + + if (auto *ASE = dyn_cast(E)) + return isRelatedDeclMarkedNointerpolation(ASE->getBase()); + + return false; +} + +static bool CheckIntrinsicGetAttributeAtVertex(Sema &S, FunctionDecl *FDecl, + CallExpr *TheCall) { + assert(TheCall->getNumArgs() > 0); + auto argument = TheCall->getArg(0)->IgnoreCasts(); + + if (!isRelatedDeclMarkedNointerpolation(argument)) { + S.Diag(argument->getExprLoc(), diag::err_hlsl_parameter_requires_attribute) + << 0 << FDecl->getName() << "nointerpolation"; + return true; + } + + return false; +} + +static bool CheckNoInterpolationParams(Sema &S, FunctionDecl *FDecl, + CallExpr *TheCall) { + // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want + // to limit the scope, and fail gracefully in some cases. + if (!S.getLangOpts().SPIRV) + return false; + + bool error = false; + for (unsigned i = 0; i < FDecl->getNumParams(); i++) { + assert(i < TheCall->getNumArgs()); + + if (!FDecl->getParamDecl(i)->hasAttr()) + continue; + + if (!isRelatedDeclMarkedNointerpolation(TheCall->getArg(i))) { + S.Diag(TheCall->getArg(i)->getExprLoc(), + diag::err_hlsl_parameter_requires_attribute) + << i << FDecl->getName() << "nointerpolation"; + error = true; + } + } + + return error; +} + +// Verify that user-defined intrinsic struct args contain no long vectors +static bool CheckUDTIntrinsicArg(Sema &S, Expr *Arg) { + const TypeDiagContext DiagContext = + TypeDiagContext::UserDefinedStructParameter; + return DiagnoseTypeElements(S, Arg->getExprLoc(), Arg->getType(), DiagContext, + DiagContext); +} + // Check HLSL call constraints, not fatal to creating the AST. -void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, - const FunctionProtoType *Proto) { +void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { + if (CheckNoInterpolationParams(*this, FDecl, TheCall)) + return; + HLSLIntrinsicAttr *IntrinsicAttr = FDecl->getAttr(); if (!IntrinsicAttr) return; @@ -12307,6 +12375,28 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP___builtin_OuterProductAccumulate: CheckOuterProductAccumulateCall(*this, FDecl, TheCall); break; + case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: + // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want + // to limit the scope, and fail gracefully in some cases. + if (!getLangOpts().SPIRV) + return; + CheckIntrinsicGetAttributeAtVertex(*this, FDecl, TheCall); + break; + case hlsl::IntrinsicOp::IOP_DispatchMesh: + CheckUDTIntrinsicArg(*this, TheCall->getArg(3)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::IOP_CallShader: + CheckUDTIntrinsicArg(*this, TheCall->getArg(1)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::IOP_TraceRay: + CheckUDTIntrinsicArg(*this, TheCall->getArg(7)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::IOP_ReportHit: + CheckIntersectionAttributeArg(*this, TheCall->getArg(2)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::MOP_DxHitObject_GetAttributes: + CheckIntersectionAttributeArg(*this, TheCall->getArg(0)->IgnoreCasts()); + break; #ifdef ENABLE_SPIRV_CODEGEN case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, false); @@ -16841,118 +16931,6 @@ QualType Sema::getHLSLDefaultSpecialization(TemplateDecl *Decl) { return QualType(); } -static bool isRelatedDeclMarkedNointerpolation(Expr *E) { - if (!E) - return false; - E = E->IgnoreCasts(); - if (auto *DRE = dyn_cast(E)) - return DRE->getDecl()->hasAttr(); - - if (auto *ME = dyn_cast(E)) - return ME->getMemberDecl()->hasAttr() || - isRelatedDeclMarkedNointerpolation(ME->getBase()); - - if (auto *HVE = dyn_cast(E)) - return isRelatedDeclMarkedNointerpolation(HVE->getBase()); - - if (auto *ASE = dyn_cast(E)) - return isRelatedDeclMarkedNointerpolation(ASE->getBase()); - - return false; -} - -// Verify that user-defined intrinsic struct args contain no long vectors -static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - const TypeDiagContext DiagContext = - TypeDiagContext::UserDefinedStructParameter; - return DiagnoseTypeElements(*S, Arg->getExprLoc(), Arg->getType(), - DiagContext, DiagContext); -} - -static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, - CallExpr *TheCall) { - assert(TheCall->getNumArgs() > 0); - auto argument = TheCall->getArg(0)->IgnoreCasts(); - - if (!isRelatedDeclMarkedNointerpolation(argument)) { - S->Diag(argument->getExprLoc(), diag::err_hlsl_parameter_requires_attribute) - << 0 << FDecl->getName() << "nointerpolation"; - return true; - } - - return false; -} - -bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { - auto attr = FDecl->getAttr(); - - if (!attr) - return false; - - if (!IsBuiltinTable(attr->getGroup())) - return false; - - switch (hlsl::IntrinsicOp(attr->getOpcode())) { - case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: - // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want - // to limit the scope, and fail gracefully in some cases. - if (!getLangOpts().SPIRV) - return false; - // This should never happen for SPIR-V. But on the DXIL side, extension can - // be added by inserting new intrinsics, meaning opcodes can collide with - // existing ones. See the ExtensionTest.EvalAttributeCollision test. - assert(FDecl->getName() == "GetAttributeAtVertex"); - return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall); - case hlsl::IntrinsicOp::IOP_DispatchMesh: - assert(TheCall->getNumArgs() > 3); - assert(FDecl->getName() == "DispatchMesh"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts()); - case hlsl::IntrinsicOp::IOP_CallShader: - assert(TheCall->getNumArgs() > 1); - assert(FDecl->getName() == "CallShader"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts()); - case hlsl::IntrinsicOp::IOP_TraceRay: - assert(TheCall->getNumArgs() > 7); - assert(FDecl->getName() == "TraceRay"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts()); - case hlsl::IntrinsicOp::IOP_ReportHit: - assert(TheCall->getNumArgs() > 2); - assert(FDecl->getName() == "ReportHit"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts()); - default: - break; - } - - return false; -} - -bool Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { - if (hlsl::IsIntrinsicOp(FDecl) && CheckHLSLIntrinsicCall(FDecl, TheCall)) - return true; - - // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want - // to limit the scope, and fail gracefully in some cases. - if (!getLangOpts().SPIRV) - return false; - - bool error = false; - for (unsigned i = 0; i < FDecl->getNumParams(); i++) { - assert(i < TheCall->getNumArgs()); - - if (!FDecl->getParamDecl(i)->hasAttr()) - continue; - - if (!isRelatedDeclMarkedNointerpolation(TheCall->getArg(i))) { - Diag(TheCall->getArg(i)->getExprLoc(), - diag::err_hlsl_parameter_requires_attribute) - << i << FDecl->getName() << "nointerpolation"; - error = true; - } - } - - return error; -} - namespace hlsl { static bool nodeInputIsCompatible(DXIL::NodeIOKind IOType, diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl index 03cefe8e48..55ef023a2f 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl @@ -20,7 +20,8 @@ CustomAttrs { [shader("raygeneration")] void main() { dx::HitObject hit; - CustomAttrs attrs = hit.GetAttributes(); + CustomAttrs attrs; + hit.GetAttributes(attrs); float sum = attrs.v.x + attrs.v.y + attrs.v.z + attrs.v.w + attrs.y; outbuf.Store(0, sum); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl index a096bb6f11..59140ab37e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl @@ -5,7 +5,7 @@ // as a template argument to GetAttributes. // For -fcgl, just check the form of the HL call. -// FCGL: %{{[^ ]+}} = call %struct.BuiltInTriangleIntersectionAttributes* @"dx.hl.op..%struct.BuiltInTriangleIntersectionAttributes* (i32, %dx.types.HitObject*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.BuiltInTriangleIntersectionAttributes*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}, %struct.BuiltInTriangleIntersectionAttributes* %{{[^ ]+}}) // CHECK: %[[ATTR:[^ ]+]] = alloca %struct.BuiltInTriangleIntersectionAttributes // CHECK: call void @dx.op.hitObject_Attributes.struct.BuiltInTriangleIntersectionAttributes(i32 289, %dx.types.HitObject %{{[^ ]+}}, %struct.BuiltInTriangleIntersectionAttributes* nonnull %[[ATTR]]) @@ -34,7 +34,8 @@ void MyRaygenShader() dx::HitObject hit = dx::HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, ray, payload); - MyAttribs attr = hit.GetAttributes(); + MyAttribs attr; + hit.GetAttributes(attr); payload.color += float4(attr,0,1); // Write the raytraced color to the output texture. diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll index 4887be4d58..3488a3df03 100644 --- a/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll @@ -30,30 +30,41 @@ target triple = "dxil-ms-dx" define void @"\01?main@@YAXXZ"() #0 { entry: %hit = alloca %dx.types.HitObject, align 4 - %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !21 ; line:22 col:3 - call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !21 ; line:22 col:3 - %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !25 ; line:22 col:17 - %2 = call %struct.CustomAttrs* @"dx.hl.op..%struct.CustomAttrs* (i32, %dx.types.HitObject*)"(i32 364, %dx.types.HitObject* %hit), !dbg !26 ; line:23 col:23 - %3 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %2, i32 0, i32 0, !dbg !26 ; line:23 col:23 - %4 = load <4 x float>, <4 x float>* %3, !dbg !26 ; line:23 col:23 - %5 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %2, i32 0, i32 1, !dbg !26 ; line:23 col:23 - %6 = load i32, i32* %5, !dbg !26 ; line:23 col:23 - %7 = extractelement <4 x float> %4, i32 0, !dbg !27 ; line:24 col:15 - %8 = extractelement <4 x float> %4, i32 1, !dbg !28 ; line:24 col:27 - %add = fadd float %7, %8, !dbg !29 ; line:24 col:25 - %9 = extractelement <4 x float> %4, i32 2, !dbg !30 ; line:24 col:39 - %add4 = fadd float %add, %9, !dbg !31 ; line:24 col:37 - %10 = extractelement <4 x float> %4, i32 3, !dbg !32 ; line:24 col:51 - %add6 = fadd float %add4, %10, !dbg !33 ; line:24 col:49 - %conv = sitofp i32 %6 to float, !dbg !34 ; line:24 col:63 - %add7 = fadd float %add6, %conv, !dbg !35 ; line:24 col:61 - %11 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !36 ; line:25 col:3 - %12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %11), !dbg !36 ; line:25 col:3 - %13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %12, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !36 ; line:25 col:3 - call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %13, i32 0, float %add7), !dbg !36 ; line:25 col:3 - %14 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !37 ; line:26 col:1 - call void @llvm.lifetime.end(i64 4, i8* %14) #0, !dbg !37 ; line:26 col:1 - ret void, !dbg !37 ; line:26 col:1 + %attrs = alloca %struct.CustomAttrs, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !21 ; line:29 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !21 ; line:29 col:3 + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !25 ; line:29 col:17 + %2 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !26 ; line:30 col:3 + call void @llvm.lifetime.start(i64 20, i8* %2) #0, !dbg !26 ; line:30 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.CustomAttrs*)"(i32 364, %dx.types.HitObject* %hit, %struct.CustomAttrs* %attrs), !dbg !27 ; line:31 col:3 + %v = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !28 ; line:32 col:21 + %3 = load <4 x float>, <4 x float>* %v, align 4, !dbg !29 ; line:32 col:15 + %4 = extractelement <4 x float> %3, i32 0, !dbg !29 ; line:32 col:15 + %v1 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !30 ; line:32 col:33 + %5 = load <4 x float>, <4 x float>* %v1, align 4, !dbg !31 ; line:32 col:27 + %6 = extractelement <4 x float> %5, i32 1, !dbg !31 ; line:32 col:27 + %add = fadd float %4, %6, !dbg !32 ; line:32 col:25 + %v2 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !33 ; line:32 col:45 + %7 = load <4 x float>, <4 x float>* %v2, align 4, !dbg !34 ; line:32 col:39 + %8 = extractelement <4 x float> %7, i32 2, !dbg !34 ; line:32 col:39 + %add3 = fadd float %add, %8, !dbg !35 ; line:32 col:37 + %v4 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !36 ; line:32 col:57 + %9 = load <4 x float>, <4 x float>* %v4, align 4, !dbg !37 ; line:32 col:51 + %10 = extractelement <4 x float> %9, i32 3, !dbg !37 ; line:32 col:51 + %add5 = fadd float %add3, %10, !dbg !38 ; line:32 col:49 + %y = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !39 ; line:32 col:69 + %11 = load i32, i32* %y, align 4, !dbg !39, !tbaa !40 ; line:32 col:69 + %conv = sitofp i32 %11 to float, !dbg !44 ; line:32 col:63 + %add6 = fadd float %add5, %conv, !dbg !45 ; line:32 col:61 + %12 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !46 ; line:33 col:3 + %13 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %12), !dbg !46 ; line:33 col:3 + %14 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %13, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !46 ; line:33 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %14, i32 0, float %add6), !dbg !46 ; line:33 col:3 + %15 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !47 ; line:34 col:1 + call void @llvm.lifetime.end(i64 20, i8* %15) #0, !dbg !47 ; line:34 col:1 + %16 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !47 ; line:34 col:1 + call void @llvm.lifetime.end(i64 4, i8* %16) #0, !dbg !47 ; line:34 col:1 + ret void, !dbg !47 ; line:34 col:1 } ; Function Attrs: nounwind @@ -66,7 +77,7 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) #0 declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 ; Function Attrs: nounwind -declare %struct.CustomAttrs* @"dx.hl.op..%struct.CustomAttrs* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.CustomAttrs*)"(i32, %dx.types.HitObject*, %struct.CustomAttrs*) #0 ; Function Attrs: nounwind declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 @@ -111,20 +122,30 @@ attributes #1 = { nounwind readnone } !18 = !{void ()* @"\01?main@@YAXXZ", i32 7} !19 = !{i32 -2147483584} !20 = !{i32 -1} -!21 = !DILocation(line: 22, column: 3, scope: !22) -!22 = !DISubprogram(name: "main", scope: !23, file: !23, line: 21, type: !24, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") -!23 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl", directory: "") +!21 = !DILocation(line: 29, column: 3, scope: !22) +!22 = !DISubprogram(name: "main", scope: !23, file: !23, line: 28, type: !24, isLocal: false, isDefinition: true, scopeLine: 28, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!23 = !DIFile(filename: "tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl", directory: "") !24 = !DISubroutineType(types: !13) -!25 = !DILocation(line: 22, column: 17, scope: !22) -!26 = !DILocation(line: 23, column: 23, scope: !22) -!27 = !DILocation(line: 24, column: 15, scope: !22) -!28 = !DILocation(line: 24, column: 27, scope: !22) -!29 = !DILocation(line: 24, column: 25, scope: !22) -!30 = !DILocation(line: 24, column: 39, scope: !22) -!31 = !DILocation(line: 24, column: 37, scope: !22) -!32 = !DILocation(line: 24, column: 51, scope: !22) -!33 = !DILocation(line: 24, column: 49, scope: !22) -!34 = !DILocation(line: 24, column: 63, scope: !22) -!35 = !DILocation(line: 24, column: 61, scope: !22) -!36 = !DILocation(line: 25, column: 3, scope: !22) -!37 = !DILocation(line: 26, column: 1, scope: !22) +!25 = !DILocation(line: 29, column: 17, scope: !22) +!26 = !DILocation(line: 30, column: 3, scope: !22) +!27 = !DILocation(line: 31, column: 3, scope: !22) +!28 = !DILocation(line: 32, column: 21, scope: !22) +!29 = !DILocation(line: 32, column: 15, scope: !22) +!30 = !DILocation(line: 32, column: 33, scope: !22) +!31 = !DILocation(line: 32, column: 27, scope: !22) +!32 = !DILocation(line: 32, column: 25, scope: !22) +!33 = !DILocation(line: 32, column: 45, scope: !22) +!34 = !DILocation(line: 32, column: 39, scope: !22) +!35 = !DILocation(line: 32, column: 37, scope: !22) +!36 = !DILocation(line: 32, column: 57, scope: !22) +!37 = !DILocation(line: 32, column: 51, scope: !22) +!38 = !DILocation(line: 32, column: 49, scope: !22) +!39 = !DILocation(line: 32, column: 69, scope: !22) +!40 = !{!41, !41, i64 0} +!41 = !{!"int", !42, i64 0} +!42 = !{!"omnipotent char", !43, i64 0} +!43 = !{!"Simple C/C++ TBAA"} +!44 = !DILocation(line: 32, column: 63, scope: !22) +!45 = !DILocation(line: 32, column: 61, scope: !22) +!46 = !DILocation(line: 33, column: 3, scope: !22) +!47 = !DILocation(line: 34, column: 1, scope: !22) diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl index 79db78cdaf..609d94f291 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl @@ -1,15 +1,20 @@ // RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST // RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL + // AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetAttributes // AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult -// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetAttributes 'TResult () const' -// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetAttributes 'CustomAttrs &()' extern +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TAttributes +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetAttributes 'TResult (TAttributes &) const' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Attributes 'TAttributes &' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetAttributes 'void (CustomAttrs &)' extern +// AST-NEXT: | | | |-TemplateArgument type 'void' // AST-NEXT: | | | |-TemplateArgument type 'CustomAttrs' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> GetAttributes 'CustomAttrs &&__restrict' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 364 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" -// FCGL: %{{[^ ]+}} = call %struct.CustomAttrs* @"dx.hl.op..%struct.CustomAttrs* (i32, %dx.types.HitObject*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.CustomAttrs*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}, %struct.CustomAttrs* %{{[^ ]+}}) RWByteAddressBuffer outbuf; @@ -22,7 +27,8 @@ CustomAttrs { [shader("raygeneration")] void main() { dx::HitObject hit; - CustomAttrs attrs = hit.GetAttributes(); + CustomAttrs attrs; + hit.GetAttributes(attrs); float sum = attrs.v.x + attrs.v.y + attrs.v.z + attrs.v.w + attrs.y; outbuf.Store(0, sum); } diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl index 240ccfb9d4..97bb81a7cb 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl @@ -9,6 +9,7 @@ CustomAttrs { [shader("raygeneration")] void main() { dx::HitObject hit; - // expected-error@+1{{vectors of over 4 elements in attributes are not supported}} - CustomAttrs attrs = hit.GetAttributes(); + // expected-error@+2{{vectors of over 4 elements in attributes are not supported}} + CustomAttrs attrs; + hit.GetAttributes(attrs); } diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl index 0f27f089e4..f8935676c5 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl @@ -9,6 +9,8 @@ CustomAttrs { [shader("raygeneration")] void main() { dx::HitObject hit; - // expected-error@+1{{attributes type must be a user-defined type composed of only numeric types}} - CustomAttrs attrs = hit.GetAttributes(); + CustomAttrs attrs; + hit.GetAttributes(attrs); + // expected-error@-1{{vectors of over 4 elements in attributes are not supported}} + // expected-error@-2{{attributes type must be a user-defined type composed of only numeric types}} } diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl index b6b28700a9..c852d17a1a 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl @@ -264,7 +264,7 @@ void Intersection() { float hitT = RayTCurrent(); RTTYPE attr = (RTTYPE)0; bool bReported = ReportHit(hitT, 0, attr); - // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-error@-1{{object 'dx::HitObject' is not allowed in attributes}} // expected-note@16{{'dx::HitObject' field declared here}} } diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl index 4ffd53878d..c2303a8608 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl @@ -275,7 +275,7 @@ void Intersection() { float hitT = RayTCurrent(); RTTYPE attr = (RTTYPE)0; bool bReported = ReportHit(hitT, 0, attr); - // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-error@-1{{object 'dx::HitObject' is not allowed in attributes}} // expected-note@40{{'dx::HitObject' field declared here}} } diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl index 0604feeaec..96c5d4b5f4 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -146,7 +146,7 @@ void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements i void Intersection() { float hitT = RayTCurrent(); RTTYPE attr = (RTTYPE)0; - bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in attributes are not supported}} } [shader("callable")] diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index ae8df55a0c..f2c0cc5e2e 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1131,7 +1131,7 @@ namespace DxHitObjectMethods { uint [[rn,class_prefix,min_sm=6.9]] GetPrimitiveIndex(); uint [[rn,class_prefix,min_sm=6.9]] GetHitKind(); uint [[rn,class_prefix,min_sm=6.9]] GetShaderTableIndex(); - $funcT [[class_prefix,min_sm=6.9]] GetAttributes(); + void [[class_prefix,min_sm=6.9]] GetAttributes(out udt Attributes); void [[class_prefix,min_sm=6.9]] SetShaderTableIndex(in uint RecordIndex); uint [[ro,class_prefix,min_sm=6.9]] LoadLocalRootTableConstant(in uint RootConstantOffsetInBytes); } namespace From d64d34ced89e2ac407d151b8f04516684fc41ed0 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Wed, 23 Jul 2025 17:33:18 -0700 Subject: [PATCH 93/93] Merge Long Vector Trigonometric Op Exec Tests (#7665) Resolves #7629 Merge the long vector trig op exec tests from staging-sm6.9. Verified locally against WARP: `F:\hlsl.bin\TAEF\x64\te.exe "F:\hlsl.bin\Debug\bin\ExecHLSLTests.dll" /name:LongVector::OpTest::trig* /p:D3D12SDKVersion=1 /p:"ExperimentalShaders=*"` --- .../unittests/HLSLExec/LongVectorOpTable.xml | 94 ++++++++++++++ .../clang/unittests/HLSLExec/LongVectors.cpp | 21 +++ tools/clang/unittests/HLSLExec/LongVectors.h | 49 +++++++ .../clang/unittests/HLSLExec/LongVectors.tpp | 121 ++++++++++++++++++ 4 files changed, 285 insertions(+) diff --git a/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml index df8fe250c9..f3b2e62dbc 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml +++ b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml @@ -596,4 +596,98 @@ float64 + + + + String + + String + String + + + + TrigonometricOpType_Acos + float16 + TrigonometricInputValueSet_RangeOne + + + TrigonometricOpType_Asin + float16 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Atan + float16 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Cos + float16 + + + TrigonometricOpType_Cosh + float16 + + + TrigonometricOpType_Sin + float16 + + + TrigonometricOpType_Sinh + float16 + + + TrigonometricOpType_Tan + float16 + + + TrigonometricOpType_Tanh + float16 + + + + TrigonometricOpType_Acos + float32 + TrigonometricInputValueSet_RangeOne + + + TrigonometricOpType_Asin + float32 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Atan + float32 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Cos + float32 + + + TrigonometricOpType_Cosh + float32 + + + TrigonometricOpType_Sin + float32 + + + TrigonometricOpType_Sinh + float32 + + + TrigonometricOpType_Tan + float32 + + + TrigonometricOpType_Tanh + float32 + +
diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 9c2d3d229c..b9e79cfc5e 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -16,6 +16,13 @@ LongVector::getUnaryOpType(const std::wstring &OpTypeString) { std::size(unaryOpTypeStringToEnumMap)); } +LongVector::TrigonometricOpType +LongVector::getTrigonometricOpType(const std::wstring &OpTypeString) { + return getLongVectorOpType( + trigonometricOpTypeStringToEnumMap, OpTypeString, + std::size(trigonometricOpTypeStringToEnumMap)); +} + // These are helper arrays to be used with the TableParameterHandler that parses // the LongVectorOpTable.xml file for us. static TableParameter BinaryOpParameters[] = { @@ -90,6 +97,20 @@ TEST_F(LongVector::OpTest, binaryOpTest) { dispatchTestByDataType(OpType, DataType, Handler); } +TEST_F(LongVector::OpTest, trigonometricOpTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + const int TableSize = sizeof(UnaryOpParameters) / sizeof(TableParameter); + TableParameterHandler Handler(UnaryOpParameters, TableSize); + + std::wstring DataType(Handler.GetTableParamByName(L"DataType")->m_str); + std::wstring OpTypeString(Handler.GetTableParamByName(L"OpTypeEnum")->m_str); + + auto OpType = LongVector::getTrigonometricOpType(OpTypeString); + dispatchTestByDataType(OpType, DataType, Handler); +} + TEST_F(LongVector::OpTest, unaryOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); diff --git a/tools/clang/unittests/HLSLExec/LongVectors.h b/tools/clang/unittests/HLSLExec/LongVectors.h index 9157da679d..0e046d1966 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.h +++ b/tools/clang/unittests/HLSLExec/LongVectors.h @@ -35,6 +35,11 @@ class OpTest { L"Table:LongVectorOpTable.xml#BinaryOpTable") END_TEST_METHOD() + BEGIN_TEST_METHOD(trigonometricOpTest) + TEST_METHOD_PROPERTY(L"DataSource", + L"Table:LongVectorOpTable.xml#TrigonometricOpTable") + END_TEST_METHOD() + BEGIN_TEST_METHOD(unaryOpTest) TEST_METHOD_PROPERTY(L"DataSource", L"Table:LongVectorOpTable.xml#UnaryOpTable") @@ -150,6 +155,39 @@ static_assert(_countof(unaryOpTypeStringToEnumMap) == UnaryOpType getUnaryOpType(const std::wstring &OpTypeString); +enum TrigonometricOpType { + TrigonometricOpType_Acos, + TrigonometricOpType_Asin, + TrigonometricOpType_Atan, + TrigonometricOpType_Cos, + TrigonometricOpType_Cosh, + TrigonometricOpType_Sin, + TrigonometricOpType_Sinh, + TrigonometricOpType_Tan, + TrigonometricOpType_Tanh, + TrigonometricOpType_EnumValueCount +}; + +static const LongVectorOpTypeStringToEnumValue + trigonometricOpTypeStringToEnumMap[] = { + {L"TrigonometricOpType_Acos", TrigonometricOpType_Acos}, + {L"TrigonometricOpType_Asin", TrigonometricOpType_Asin}, + {L"TrigonometricOpType_Atan", TrigonometricOpType_Atan}, + {L"TrigonometricOpType_Cos", TrigonometricOpType_Cos}, + {L"TrigonometricOpType_Cosh", TrigonometricOpType_Cosh}, + {L"TrigonometricOpType_Sin", TrigonometricOpType_Sin}, + {L"TrigonometricOpType_Sinh", TrigonometricOpType_Sinh}, + {L"TrigonometricOpType_Tan", TrigonometricOpType_Tan}, + {L"TrigonometricOpType_Tanh", TrigonometricOpType_Tanh}, +}; + +static_assert(_countof(trigonometricOpTypeStringToEnumMap) == + TrigonometricOpType_EnumValueCount, + "trigonometricOpTypeStringToEnumMap size mismatch. Did you add " + "a new enum value?"); + +TrigonometricOpType getTrigonometricOpType(const std::wstring &OpTypeString); + template std::vector getInputValueSetByKey(const std::wstring &Key, bool LogKey = true) { @@ -214,6 +252,7 @@ template class TestConfig { TestConfig(UnaryOpType OpType); TestConfig(BinaryOpType OpType); + TestConfig(TrigonometricOpType OpType); bool isBinaryOp() const { return BasicOpType == LongVector::BasicOpType_Binary || @@ -238,9 +277,15 @@ template class TestConfig { DataTypeT computeExpectedValue(const DataTypeT &A, const DataTypeT &B, BinaryOpType OpType) const; DataTypeT computeExpectedValue(const DataTypeT &A, const DataTypeT &B) const; + DataTypeT computeExpectedValue(const DataTypeT &A, + TrigonometricOpType OpType) const; DataTypeT computeExpectedValue(const DataTypeT &A, UnaryOpType OpType) const; DataTypeT computeExpectedValue(const DataTypeT &A) const; + void setInputArgsArrayName(const std::wstring &InputArgsArrayName) { + this->InputArgsArrayName = InputArgsArrayName; + } + void setInputValueSet1(const std::wstring &InputValueSetName) { this->InputValueSetName1 = InputValueSetName; } @@ -257,6 +302,8 @@ template class TestConfig { return getInputValueSet(2); } + std::vector getInputArgsArray() const; + float getTolerance() const { return Tolerance; } LongVector::ValidationType getValidationType() const { return ValidationType; @@ -278,6 +325,8 @@ template class TestConfig { LongVector::TestConfigTraits OpTypeTraits; std::wstring InputValueSetName1 = L"DefaultInputValueSet1"; std::wstring InputValueSetName2 = L"DefaultInputValueSet2"; + // No default args array + std::wstring InputArgsArrayName = L""; }; // class LongVector::TestConfig }; // namespace LongVector diff --git a/tools/clang/unittests/HLSLExec/LongVectors.tpp b/tools/clang/unittests/HLSLExec/LongVectors.tpp index 331d4452eb..29affa4b2e 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.tpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.tpp @@ -342,6 +342,59 @@ LongVector::TestConfig::TestConfig(LongVector::Bin } } +template +LongVector::TestConfig::TestConfig(LongVector::TrigonometricOpType OpType) + : OpTypeTraits(OpType) { + IntrinsicString = ""; + BasicOpType = LongVector::BasicOpType_Unary; + + // All trigonometric ops are floating point types. + // These trig functions are defined to have a max absolute error of 0.0008 + // as per the D3D functional specs. An example with this spec for sin and + // cos is available here: + // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#22.10.20 + ValidationType = LongVector::ValidationType_Epsilon; + if (std::is_same_v) + Tolerance = 0.0010f; + else if (std::is_same_v) + Tolerance = 0.0008f; + else + VERIFY_FAIL( + "Invalid type for trigonometric op. Expecting half or float."); + + switch (OpType) { + case LongVector::TrigonometricOpType_Acos: + IntrinsicString = "acos"; + break; + case LongVector::TrigonometricOpType_Asin: + IntrinsicString = "asin"; + break; + case LongVector::TrigonometricOpType_Atan: + IntrinsicString = "atan"; + break; + case LongVector::TrigonometricOpType_Cos: + IntrinsicString = "cos"; + break; + case LongVector::TrigonometricOpType_Cosh: + IntrinsicString = "cosh"; + break; + case LongVector::TrigonometricOpType_Sin: + IntrinsicString = "sin"; + break; + case LongVector::TrigonometricOpType_Sinh: + IntrinsicString = "sinh"; + break; + case LongVector::TrigonometricOpType_Tan: + IntrinsicString = "tan"; + break; + case LongVector::TrigonometricOpType_Tanh: + IntrinsicString = "tanh"; + break; + default: + VERIFY_FAIL("Invalid TrigonometricOpType"); + } +} + template bool LongVector::TestConfig::hasFunctionDefinition() const { if constexpr (std::is_same_v) { @@ -463,6 +516,13 @@ DataTypeT LongVector::TestConfig::computeExpectedV template DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A) const { + if constexpr (std::is_same_v) { + const auto OpType = static_cast(OpTypeTraits.OpType); + // HLSLHalf_t is a struct. We need to call the constructor to get the + // expected value. + return computeExpectedValue(A, OpType); + } + if constexpr (std::is_same_v) { const auto OpType = static_cast(OpTypeTraits.OpType); // HLSLHalf_t is a struct. We need to call the constructor to get the @@ -477,6 +537,67 @@ DataTypeT LongVector::TestConfig::computeExpectedV return DataTypeT(); } +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, + LongVector::TrigonometricOpType OpType) const { + // The trig functions are only valid on floating point types. The constexpr in + // this case is a relatively easy and clean way to prevent the compiler from + // erroring out trying to resolve these for the non floating point types. We + // won't use them in the first place. + if constexpr (isFloatingPointType()) { + switch (OpType) { + case LongVector::TrigonometricOpType_Acos: + return std::acos(A); + case LongVector::TrigonometricOpType_Asin: + return std::asin(A); + case LongVector::TrigonometricOpType_Atan: + return std::atan(A); + case LongVector::TrigonometricOpType_Cos: + return std::cos(A); + case LongVector::TrigonometricOpType_Cosh: + return std::cosh(A); + case LongVector::TrigonometricOpType_Sin: + return std::sin(A); + case LongVector::TrigonometricOpType_Sinh: + return std::sinh(A); + case LongVector::TrigonometricOpType_Tan: + return std::tan(A); + case LongVector::TrigonometricOpType_Tanh: + return std::tanh(A); + default: + LOG_ERROR_FMT_THROW(L"Unknown TrigonometricOpType: %d", + OpTypeTraits.OpType); + return DataTypeT(); + } + } + + LOG_ERROR_FMT_THROW(L"ComputeExpectedValue(const DataTypeT &A, " + L"LongVectorOpTypeT OpType) called on a " + L"non-float type: %d", + OpType); + + return DataTypeT(); +} + +template +std::vector LongVector::TestConfig::getInputArgsArray() const { + + std::vector InputArgs; + + std::wstring InputArgsArrayName = this->InputArgsArrayName; + + if (InputArgsArrayName.empty()) + VERIFY_FAIL("No args array name set."); + + if (std::is_same_v && isClampOp()) + VERIFY_FAIL("Clamp is not supported for bools."); + else + return getInputValueSetByKey(InputArgsArrayName, false); + + VERIFY_FAIL("Invalid type for args array."); + return std::vector(); +} + template std::string LongVector::TestConfig::getCompilerOptionsString(size_t VectorSize) const { std::stringstream CompilerOptions("");