diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7967fa03e3..8f07f59077 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,10 +1,12 @@ trigger: - main - release* + - staging* pr: - main - release* + - staging* resources: - repo: self diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h index 0e37ccdcff..3b6f9d4ec4 100644 --- a/include/dxc/Test/HlslTestUtils.h +++ b/include/dxc/Test/HlslTestUtils.h @@ -258,6 +258,17 @@ inline void LogErrorFmt(const wchar_t *fmt, ...) { WEX::Logging::Log::Error(buf.data()); } +inline void LogErrorFmtThrow(const wchar_t *fmt, ...) { + va_list args; + va_start(args, fmt); + std::wstring buf(vFormatToWString(fmt, args)); + va_end(args); + WEX::Logging::Log::Error(buf.data()); + + // Throws an exception to abort the test. + VERIFY_FAIL(L"Test error"); +} + inline std::wstring GetPathToHlslDataFile(const wchar_t *relative, LPCWSTR paramName = HLSLDATAFILEPARAM, @@ -459,15 +470,17 @@ inline bool GetTestParamUseWARP(bool defaultVal) { #ifdef FP_SUBNORMAL -inline bool isdenorm(float f) { return FP_SUBNORMAL == std::fpclassify(f); } +template inline bool isdenorm(T f) { + return FP_SUBNORMAL == std::fpclassify(f); +} #else -inline bool isdenorm(float f) { - return (std::numeric_limits::denorm_min() <= f && - f < std::numeric_limits::min()) || - (-std::numeric_limits::min() < f && - f <= -std::numeric_limits::denorm_min()); +template inline bool isdenorm(T f) { + return (std::numeric_limits::denorm_min() <= f && + f < std::numeric_limits::min()) || + (-std::numeric_limits::min() < f && + f <= -std::numeric_limits::denorm_min()); } #endif // FP_SUBNORMAL @@ -515,6 +528,31 @@ inline bool isnanFloat16(uint16_t val) { uint16_t ConvertFloat32ToFloat16(float val) throw(); float ConvertFloat16ToFloat32(uint16_t val) throw(); +inline bool CompareDoubleULP( + const double &Src, const double &Ref, int64_t ULPTolerance, + hlsl::DXIL::Float32DenormMode Mode = hlsl::DXIL::Float32DenormMode::Any) { + if (Src == Ref) { + return true; + } + if (std::isnan(Src)) { + return std::isnan(Ref); + } + + if (Mode == hlsl::DXIL::Float32DenormMode::Any) { + // If denorm expected, output can be sign preserved zero. Otherwise output + // should pass the regular ulp testing. + if (isdenorm(Ref) && Src == 0 && std::signbit(Src) == std::signbit(Ref)) + return true; + } + + // For FTZ or Preserve mode, we should get the expected number within + // ULPTolerance for any operations. + int64_t Diff = *((const uint64_t *)&Src) - *((const uint64_t *)&Ref); + + uint64_t AbsoluteDiff = Diff < 0 ? -Diff : Diff; + return AbsoluteDiff <= (uint64_t)ULPTolerance; +} + inline bool CompareFloatULP( const float &fsrc, const float &fref, int ULPTolerance, hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) { diff --git a/include/dxc/Test/WEXAdapter.h b/include/dxc/Test/WEXAdapter.h index f180c01a99..e8263eb576 100644 --- a/include/dxc/Test/WEXAdapter.h +++ b/include/dxc/Test/WEXAdapter.h @@ -178,8 +178,8 @@ inline void EndGroup(const wchar_t *name) { wprintf(L"END TEST(S): <%ls>\n", name); } inline void Comment(const wchar_t *msg) { - fputws(msg, stdout); - fputwc(L'\n', stdout); + fputws(msg, stderr); + fputwc(L'\n', stderr); } inline void Error(const wchar_t *msg) { fputws(msg, stderr); diff --git a/lib/HLSL/DxilLinker.cpp b/lib/HLSL/DxilLinker.cpp index 75d1bf78e9..c58a2e909a 100644 --- a/lib/HLSL/DxilLinker.cpp +++ b/lib/HLSL/DxilLinker.cpp @@ -1276,6 +1276,10 @@ void DxilLinkJob::RunPreparePass(Module &M) { // Clean up vectors, and run mem2reg again PM.add(createScalarizerPass()); + + // Need dxilelimvector for pre 6.9 + // PM.add(createDxilEliminateVectorPass()); + PM.add(createPromoteMemoryToRegisterPass()); PM.add(createSimplifyInstPass()); diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 6e58c0e872..9839fa42b7 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -387,7 +387,7 @@ enum ArBasicKind { #define IS_BPROP_UNSIGNABLE(_Props) \ (IS_BPROP_AINT(_Props) && GET_BPROP_BITS(_Props) != BPROP_BITS12) -#define IS_BPROP_ENUM(_Props) (((_Props)&BPROP_ENUM) != 0) +#define IS_BPROP_ENUM(_Props) (((_Props) & BPROP_ENUM) != 0) const UINT g_uBasicKindProps[] = { BPROP_PRIMITIVE | BPROP_BOOLEAN | BPROP_INTEGER | BPROP_NUMERIC | diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl new file mode 100644 index 0000000000..1910e08a25 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvecs.hlsl @@ -0,0 +1,154 @@ +// RUN: %dxc -Wno-conversion -T cs_6_9 %s | FileCheck %s --check-prefixes=CHECK,F32 +// RUN: %dxc -Wno-conversion -T cs_6_9 -DF64 %s | FileCheck %s --check-prefixes=CHECK,F64 + +RWByteAddressBuffer buf; + +// "TYPE" is the mainly focused test type. +// "UNTYPE" is the other type used for mixed precision testing. +#ifdef F64 +typedef double TYPE; +typedef float UNTYPE; +#else +typedef float TYPE; +typedef double UNTYPE; +#endif + +// Two main test function overloads. One expects matching element types. +// The other uses different types to test ops and overload resolution. +template vector dostuff(vector thing1, vector thing2, vector thing3); +vector dostuff(vector thing1, vector thing2, vector thing3); + +// Just a trick to capture the needed type spellings since the DXC version of FileCheck can't do that explicitly. +// F32-DAG: %dx.types.ResRet.[[TY:v8f32]] = type { [[TYPE:<8 x float>]] +// F32-DAG: %dx.types.ResRet.[[UNTY:v8f64]] = type { [[UNTYPE:<8 x double>]] +// F64-DAG: %dx.types.ResRet.[[TY:v8f64]] = type { [[TYPE:<8 x double>]] +// F64-DAG: %dx.types.ResRet.[[UNTY:v8f32]] = type { [[UNTYPE:<8 x float>]] + +// Verify that groupshared vectors are kept as aggregates +// CHECK: @"\01?gs_vec1@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]] +// CHECK: @"\01?gs_vec2@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]] +// CHECK: @"\01?gs_vec3@@3V?$vector@{{M|N}}$07@@A" = external addrspace(3) global [[TYPE]] +groupshared vector gs_vec1, gs_vec2, gs_vec3; + +[numthreads(8,1,1)] +void main() { + // CHECK: [[buf:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + + // CHECK: [[vec1_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 0 + // CHECK-DAG: [[vec1:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0 + // F32-DAG: [[vec1_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0 + // F64-DAG: [[vec1_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec1_res]], 0 + vector vec1 = buf.Load >(0); + + // CHECK: [[vec2_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 60 + // CHECK-DAG: [[vec2:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0 + // F32-DAG: [[vec2_32:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0 + // F64-DAG: [[vec2_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec2_res]], 0 + vector vec2 = buf.Load >(60); + + // CHECK: [[vec3_res:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferVectorLoad.[[TY]](i32 303, %dx.types.Handle [[buf]], i32 120 + // CHECK-DAG: [[vec3:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0 + // F64-DAG: [[vec3_64:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[vec3_res]], 0 + vector vec3 = buf.Load >(120); + + // CHECK: [[unvec_res:%.*]] = call %dx.types.ResRet.[[UNTY]] @dx.op.rawBufferVectorLoad.[[UNTY]](i32 303, %dx.types.Handle [[buf]], i32 180 + // CHECK-DAG: [[unvec:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0 + // F32-DAG: [[unvec_64:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0 + // F64-DAG: [[unvec_32:%.*]] = extractvalue %dx.types.ResRet.[[UNTY]] [[unvec_res]], 0 + vector unvec = buf.Load >(180); + + vec1 = dostuff(vec1, vec2, vec3); + + // Test mixed type operations + vec2 = dostuff(vec2, unvec, vec3); + + gs_vec2 = dostuff(gs_vec1, gs_vec2, gs_vec3); + + // mix groupshared and non + //vec1 = dostuff(vec1, gs_vec2, vec3); + + buf.Store >(240, vec1 * vec2 - vec3 * gs_vec1 + gs_vec2 / gs_vec3); +} + +// Test the required ops on long vectors and confirm correct lowering. +template +vector dostuff(vector thing1, vector thing2, vector thing3) { + vector res = 0; + + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]]) ; FMin(a,b) + res += min(thing1, thing2); + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec3]]) ; FMax(a,b) + res += max(thing1, thing3); + + // CHECK: [[tmp:%.*]] = call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec1]], [[TYPE]] [[vec2]]) ; FMax(a,b) + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 36, [[TYPE]] [[tmp]], [[TYPE]] [[vec3]]) ; FMin(a,b) + res += clamp(thing1, thing2, thing3); + + // F32: [[vec3_64:%.*]] = fpext <8 x float> [[vec3]] to <8 x double> + // F32: [[vec2_64:%.*]] = fpext <8 x float> [[vec2]] to <8 x double> + // F32: [[vec1_64:%.*]] = fpext <8 x float> [[vec1]] to <8 x double> + // CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec1_64]], <8 x double> [[vec2_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c) + res += (vector)fma((vector)thing1, (vector)(thing2), (vector)thing3); + + // Even in the double test, these will be downconverted because these builtins only take floats. + // F64: [[vec2_32:%.*]] = fptrunc <8 x double> [[vec2]] to <8 x float> + // F64: [[vec1_32:%.*]] = fptrunc <8 x double> [[vec1]] to <8 x float> + + // CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[vec2_32]], [[vec1_32]] + // CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]] + res += step(thing1, thing2); + + // CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec1_32]], @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]]) ; Exp(value) + res += exp(thing1); + + // CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec1_32]]) ; Log(value) + // CHECK: fmul fast <8 x float> [[tmp]], @dx.op.unary.v8f32(i32 20, <8 x float> [[vec1_32]]) ; Htan(value) + res += tanh(thing1); + // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec1_32]]) ; Atan(value) + res += atan(thing1); + + return res; +} + +// A mixed-type overload to test overload resolution and mingle different vector element types in ops +vector dostuff(vector thing1, vector thing2, vector thing3) { + vector res = 0; + + // F64: [[unvec_64:%.*]] = fpext <8 x float> [[unvec]] to <8 x double> + // CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]]) ; FMin(a,b) + res += min(thing1, thing2); + + // CHECK: call [[TYPE]] @dx.op.binary.[[TY]](i32 35, [[TYPE]] [[vec2]], [[TYPE]] [[vec3]]) ; FMax(a,b) + res += max(thing1, thing3); + + // CHECK: [[tmp:%.*]] = call <8 x double> @dx.op.binary.v8f64(i32 35, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]]) ; FMax(a,b) + // CHECK: call <8 x double> @dx.op.binary.v8f64(i32 36, <8 x double> [[tmp]], <8 x double> [[vec3_64]]) ; FMin(a,b) + res += clamp(thing1, thing2, thing3); + + // CHECK: call <8 x double> @dx.op.tertiary.v8f64(i32 47, <8 x double> [[vec2_64]], <8 x double> [[unvec_64]], <8 x double> [[vec3_64]]) ; Fma(a,b,c) + res += (vector)fma((vector)thing1, (vector)(thing2), (vector)thing3); + + // F32: [[unvec_32:%.*]] = fptrunc <8 x double> [[unvec]] to <8 x float> + // CHECK: [[tmp:%.*]] = fcmp fast olt <8 x float> [[unvec_32]], [[vec2_32]] + // CHECK: select <8 x i1> [[tmp]], [[TYPE]] zeroinitializer, [[TYPE]] + res += step(thing1, thing2); + + // CHECK: [[tmp:%.*]] = fmul fast <8 x float> [[vec2_32]], @dx.op.unary.v8f32(i32 21, <8 x float> [[tmp]]) ; Exp(value) + res += exp(thing1); + + // CHECK: [[tmp:%.*]] = call <8 x float> @dx.op.unary.v8f32(i32 23, <8 x float> [[vec2_32]]) ; Log(value) + // CHECK: fmul fast <8 x float> [[tmp]], @dx.op.unary.v8f32(i32 20, <8 x float> [[vec2_32]]) ; Htan(value) + res += tanh(thing1); + // CHECK: call <8 x float> @dx.op.unary.v8f32(i32 17, <8 x float> [[vec2_32]]) ; Atan(value) + res += atan(thing1); + + return res; +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/linker/resources/preserve_sb_types.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/linker/resources/preserve_sb_types.hlsl index f9c75a9381..82dd3586c1 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/linker/resources/preserve_sb_types.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/linker/resources/preserve_sb_types.hlsl @@ -155,5 +155,7 @@ export float4 xform(float4 v) { [shader("vertex")] float4 main(float3 pos : Position) : SV_Position { - return xform(float4(pos, 1)) * StructBuf[0].f; + float4 res = xform(float4(pos, 1)); + res *=StructBuf[0].f; + return res ; } diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index 3878fa3f34..c047a9be00 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -39,3 +39,10 @@ endif() file(TO_NATIVE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" DOS_STYLE_SOURCE_DIR) file(TO_NATIVE_PATH "${TAEF_BIN_DIR}" DOS_TAEF_BIN_DIR) configure_file(ExecHLSLTests.vcxproj.user.txt ExecHLSLTests.vcxproj.user) + +# Copy the ShaderOpArith.xml file to the output directory. It's used by the exec +# tests and it's convenient to have it copied here if you want to easily copy +# the tests to another machine after building. +set(XML_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/ShaderOpArith.xml) +set(XML_DESTINATION ${CMAKE_BINARY_DIR}/${CMAKE_BUILD_TYPE}/bin) +file(COPY ${XML_SOURCE} DESTINATION ${XML_DESTINATION}) \ No newline at end of file diff --git a/tools/clang/unittests/HLSLExec/CoopVec.h b/tools/clang/unittests/HLSLExec/CoopVec.h new file mode 100644 index 0000000000..f166c61f67 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/CoopVec.h @@ -0,0 +1,359 @@ +#pragma once + +#if HAVE_COOPVEC_API + +#include +#include +#include + +#include "dxc/Support/microcom.h" + +#include "CoopVecAPI.h" + +struct LinAlgHeaderIncludeHandler : public IDxcIncludeHandler { +private: + DXC_MICROCOM_REF_FIELD(RefCount) + dxc::DxcDllSupport &DxcSupport; + +public: + LinAlgHeaderIncludeHandler() = delete; + LinAlgHeaderIncludeHandler(dxc::DxcDllSupport &DxcSupport) + : RefCount(0), DxcSupport(DxcSupport) {} + + DXC_MICROCOM_ADDREF_RELEASE_IMPL(RefCount) + + HRESULT STDMETHODCALLTYPE LoadSource(LPCWSTR Filename, + IDxcBlob **IncludeSource) { + if (wcscmp(Filename, L"dx/linalg.h") == 0 || + wcscmp(Filename, L".\\dx\\linalg.h") == 0) { + WEX::Common::String ParamValue; + if (FAILED(WEX::TestExecution::RuntimeParameters::TryGetValue( + L"LinAlgHeader", ParamValue))) { + return E_FAIL; + } + if (ParamValue.IsEmpty()) { + return E_FAIL; + } + LPCWSTR RealHeaderPath = + reinterpret_cast(ParamValue.GetBuffer()); + + CComPtr HeaderUtils; + + IFT(DxcSupport.CreateInstance(CLSID_DxcUtils, &HeaderUtils)); + + IDxcBlobEncoding *HeaderBlob; + IFT(HeaderUtils->LoadFile(RealHeaderPath, nullptr, &HeaderBlob)); + + *IncludeSource = HeaderBlob; + + return S_OK; + } + return E_FAIL; + } + + HRESULT STDMETHODCALLTYPE QueryInterface(REFIID IID, void **Object) override { +// FIXME: This is a workaround for a warning-as-error about unused parameters. +#pragma warning(push) +#pragma warning(disable : 4100) + return DoBasicQueryInterface(this, IID, Object); +#pragma warning(pop) + } +}; + +namespace CoopVecHelpers { +template +static std::vector CreateAllOnesInputMatrix(uint32_t Width, + uint32_t Height) { + std::vector InputMatrix(Width * Height); + for (uint32_t i = 0; i < Width * Height; i++) { + if constexpr (std::is_same_v || + std::is_same_v) { + InputMatrix[i] = 1; + } else if constexpr (std::is_same_v) { + InputMatrix[i] = ConvertFloat32ToFloat16(1.0f); + } else if constexpr (std::is_same_v) { + InputMatrix[i] = 1.0f; + } else { + WEX::Logging::Log::Error(L"Unsupported input type"); + break; + } + } + + // Convert to uint8_t vector + std::vector Uint8InputMatrix(InputMatrix.size() * sizeof(EltTy)); + std::memcpy(Uint8InputMatrix.data(), InputMatrix.data(), + InputMatrix.size() * sizeof(EltTy)); + return Uint8InputMatrix; +} + +template +static std::vector CreateInputVector(uint32_t NumThreads, + uint32_t EltsPerThread) { + std::vector InputVector(NumThreads * EltsPerThread); + std::fill(InputVector.begin(), InputVector.end(), EltTy(0)); + if (EltsPerThread < 2) { + WEX::Logging::Log::Error(L"EltsPerThread must be at least 2"); + return std::vector(); + } + for (uint32_t TID = 0; TID < NumThreads; TID++) { + if constexpr (std::is_same_v || + std::is_same_v) { + InputVector[TID * EltsPerThread + 0] = 1; + InputVector[TID * EltsPerThread + 1] = 1; + } else if constexpr (std::is_same_v) { + InputVector[TID * EltsPerThread + 0] = ConvertFloat32ToFloat16(1.0f); + InputVector[TID * EltsPerThread + 1] = ConvertFloat32ToFloat16(1.0f); + } else if constexpr (std::is_same_v) { + InputVector[TID * EltsPerThread + 0] = 1.0f; + InputVector[TID * EltsPerThread + 1] = 1.0f; + } else { + WEX::Logging::Log::Error(L"Unsupported input type"); + break; + } + } + + // Convert to uint8_t vector + std::vector Uint8InputVector(InputVector.size() * sizeof(EltTy)); + std::memcpy(Uint8InputVector.data(), InputVector.data(), + InputVector.size() * sizeof(EltTy)); + return Uint8InputVector; +} + +template +static std::vector CreateInputBias(uint32_t NumElts) { + std::vector InputBias(NumElts); + if constexpr (std::is_same_v || + std::is_same_v) { + std::fill(InputBias.begin(), InputBias.end(), EltTy(1)); + } else if constexpr (std::is_same_v) { + std::fill(InputBias.begin(), InputBias.end(), + ConvertFloat32ToFloat16(1.0f)); + } else if constexpr (std::is_same_v) { + std::fill(InputBias.begin(), InputBias.end(), 1); + } else { + WEX::Logging::Log::Error(L"Unsupported bias type"); + } + // Convert to uint8_t vector + std::vector Uint8InputBias(InputBias.size() * sizeof(EltTy)); + std::memcpy(Uint8InputBias.data(), InputBias.data(), + InputBias.size() * sizeof(EltTy)); + return Uint8InputBias; +} + +static std::wstring +DataTypeToFilterString(D3D12_LINEAR_ALGEBRA_DATATYPE DataType) { + switch (DataType) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + return L"SINT8_T4_PACKED"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED: + return L"UINT8_T4_PACKED"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8: + return L"SINT8"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8: + return L"UINT8"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT16: + return L"SINT16"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT16: + return L"UINT16"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32: + return L"SINT32"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32: + return L"UINT32"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32: + return L"FLOAT32"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16: + return L"FLOAT16"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3: + return L"FLOAT_E4M3"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2: + return L"FLOAT_E5M2"; + default: + return L""; + } +} + +static bool IsDataTypeInFilter(const wchar_t *FilterKey, + D3D12_LINEAR_ALGEBRA_DATATYPE DataType) { + WEX::Common::String ParamValue; + if (FAILED(WEX::TestExecution::RuntimeParameters::TryGetValue(FilterKey, + ParamValue))) { + // Filter not set, so treat as no filter + return true; + } + if (ParamValue.IsEmpty()) { + // Empty filter, so treat as no filter + return true; + } + + // Check if the filter matches the target data type + LPCWSTR FilterString = reinterpret_cast(ParamValue.GetBuffer()); + return DataTypeToFilterString(DataType) == FilterString; +} + +static std::wstring +MatrixLayoutToFilterString(D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT MatrixLayout) { + switch (MatrixLayout) { + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR: + return L"ROW_MAJOR"; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR: + return L"COLUMN_MAJOR"; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL: + return L"MUL_OPTIMAL"; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL: + return L"OUTER_PRODUCT_OPTIMAL"; + default: + return L""; + } +} + +static bool +IsMatrixLayoutInFilter(const wchar_t *FilterKey, + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT MatrixLayout) { + WEX::Common::String ParamValue; + if (FAILED(WEX::TestExecution::RuntimeParameters::TryGetValue(FilterKey, + ParamValue))) { + // Filter not set, so treat as no filter + return true; + } + if (ParamValue.IsEmpty()) { + // Empty filter, so treat as no filter + return true; + } + + // Check if the filter matches the target data type + LPCWSTR FilterString = reinterpret_cast(ParamValue.GetBuffer()); + return MatrixLayoutToFilterString(MatrixLayout) == FilterString; +} + +static std::wstring MatrixLayoutToHlslLayoutString( + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT MatrixLayout) { + switch (MatrixLayout) { + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR: + return L"MATRIX_LAYOUT_ROW_MAJOR"; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR: + return L"MATRIX_LAYOUT_COLUMN_MAJOR"; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL: + return L"MATRIX_LAYOUT_MUL_OPTIMAL"; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL: + return L"MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL"; + default: + return L""; + } +} + +// This multiplier is used to compute the row/column stride for a matrix +// given it's element size. +static int +GetStrideMultiplierForMatrixDataType(D3D12_LINEAR_ALGEBRA_DATATYPE DataType) { + switch (DataType) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED: + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8: + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3: + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2: + return 1; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT16: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT16: + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16: + return 2; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32: + return 4; + default: + WEX::Logging::Log::Error(L"Unsupported matrix data type"); + return 1; + } +} + +static int GetNumPackedElementsForInputDataType( + D3D12_LINEAR_ALGEBRA_DATATYPE InputInterpretation) { + // Int8 packed types are the only ones that have more than 1 element per + // shader variable + switch (InputInterpretation) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED: + return 4; + default: + return 1; + } +} + +// This type is used in generated HLSL source to represent the vector type +// for the given data type. +static std::wstring +GetHlslDataTypeForDataType(D3D12_LINEAR_ALGEBRA_DATATYPE DataType) { + switch (DataType) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT16: + return L"int16_t"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT16: + return L"uint16_t"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32: + return L"int32_t"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32: + return L"uint32_t"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16: + return L"half"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32: + return L"float"; + default: + WEX::Logging::Log::Error(L"Unsupported input data type"); + return L""; + } +} + +static std::wstring +GetHlslInterpretationForDataType(D3D12_LINEAR_ALGEBRA_DATATYPE Interpretation) { + switch (Interpretation) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + return L"DATA_TYPE_SINT8_T4_PACKED"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED: + return L"DATA_TYPE_UINT8_T4_PACKED"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8: + return L"DATA_TYPE_SINT8"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8: + return L"DATA_TYPE_UINT8"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT16: + return L"DATA_TYPE_SINT16"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT16: + return L"DATA_TYPE_UINT16"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32: + return L"DATA_TYPE_SINT32"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32: + return L"DATA_TYPE_UINT32"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16: + return L"DATA_TYPE_FLOAT16"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32: + return L"DATA_TYPE_FLOAT32"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3: + return L"DATA_TYPE_FLOAT8_E4M3"; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2: + return L"DATA_TYPE_FLOAT8_E5M2"; + default: + WEX::Logging::Log::Error(L"Unsupported interpretation"); + return L""; + } +} + +// The returned data type is used for matrix conversion. It is hard-coded +// for the test framework where all integer matrices start as SINT8 and +// all FP matrices start as FLOAT32. +static D3D12_LINEAR_ALGEBRA_DATATYPE +GetMatrixSrcDataType(D3D12_LINEAR_ALGEBRA_DATATYPE MatrixInterpretation) { + switch (MatrixInterpretation) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED: + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT16: + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT16: + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32: + case D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32: + return D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8; + default: + return D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32; + } +} +}; // namespace CoopVecHelpers + +#endif // HAVE_COOPVEC_API diff --git a/tools/clang/unittests/HLSLExec/CoopVecAPI.h b/tools/clang/unittests/HLSLExec/CoopVecAPI.h new file mode 100644 index 0000000000..16c1105edc --- /dev/null +++ b/tools/clang/unittests/HLSLExec/CoopVecAPI.h @@ -0,0 +1,178 @@ +#pragma once +// clang-format off + +#if !defined(D3D12_PREVIEW_SDK_VERSION) || D3D12_PREVIEW_SDK_VERSION < 717 + +#ifdef __ID3D12GraphicsCommandList10_INTERFACE_DEFINED__ +#define HAVE_COOPVEC_API 1 + +// This file contains the definitions of the D3D12 cooperative vector API. +// It is used to test the cooperative vector API on older SDKs. + +constexpr int D3D12_FEATURE_D3D12_OPTIONS_EXPERIMENTAL = 9; +constexpr int D3D12_FEATURE_COOPERATIVE_VECTOR = 11; + +// -------------------------------------------------------------------------------------------------------------------------------- +// Experimental Feature: D3D12CooperativeVectorExperiment +// +// Use with D3D12CooperativeVectorExperiment to enable cooperative vector experimental feature. +// +// Enabling D3D12CooperativeVectorExperiment needs no configuration struct, pass NULL in the pConfigurationStructs array. +// +// -------------------------------------------------------------------------------------------------------------------------------- +static const UUID D3D12CooperativeVectorExperiment = { /* 384748be-cca5-471e-a125-5cc997e04d39 */ + 0x384748be, + 0xcca5, + 0x471e, + {0xa1, 0x25, 0x5c, 0xc9, 0x97, 0xe0, 0x4d, 0x39} +}; + +/* interface __MIDL_itf_d3d12_0000_0082 */ +/* [local] */ + +typedef +enum D3D12_COOPERATIVE_VECTOR_TIER + { + D3D12_COOPERATIVE_VECTOR_TIER_NOT_SUPPORTED = 0, + D3D12_COOPERATIVE_VECTOR_TIER_1_0 = 0x10, + D3D12_COOPERATIVE_VECTOR_TIER_1_1 = 0x11 + } D3D12_COOPERATIVE_VECTOR_TIER; + +typedef +enum D3D12_LINEAR_ALGEBRA_DATATYPE + { + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT16 = 2, + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT16 = 3, + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32 = 4, + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32 = 5, + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 = 7, + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32 = 8, + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED = 16, + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED = 17, + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8 = 18, + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 = 19, + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 = 20, + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2 = 21 + } D3D12_LINEAR_ALGEBRA_DATATYPE; + +typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS_EXPERIMENTAL + { + _Out_ D3D12_COOPERATIVE_VECTOR_TIER CooperativeVectorTier; + } D3D12_FEATURE_DATA_D3D12_OPTIONS_EXPERIMENTAL; + +typedef struct D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL + { + D3D12_LINEAR_ALGEBRA_DATATYPE InputType; + D3D12_LINEAR_ALGEBRA_DATATYPE InputInterpretation; + D3D12_LINEAR_ALGEBRA_DATATYPE MatrixInterpretation; + D3D12_LINEAR_ALGEBRA_DATATYPE BiasInterpretation; + D3D12_LINEAR_ALGEBRA_DATATYPE OutputType; + BOOL TransposeSupported; + } D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL; + +typedef struct D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE + { + D3D12_LINEAR_ALGEBRA_DATATYPE InputType; + D3D12_LINEAR_ALGEBRA_DATATYPE AccumulationType; + } D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE; + +typedef struct D3D12_FEATURE_DATA_COOPERATIVE_VECTOR + { + _Inout_ UINT MatrixVectorMulAddPropCount; + _Out_ D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL *pMatrixVectorMulAddProperties; + _Inout_ UINT OuterProductAccumulatePropCount; + _Out_ D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE *pOuterProductAccumulateProperties; + _Inout_ UINT VectorAccumulatePropCount; + _Out_ D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE *pVectorAccumulateProperties; + } D3D12_FEATURE_DATA_COOPERATIVE_VECTOR; + +typedef +enum D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT + { + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR = 0, + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR = ( D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR + 1 ) , + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL = ( D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR + 1 ) , + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL = ( D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL + 1 ) + } D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT; + +typedef struct D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DEST_INFO + { + _Inout_ UINT DestSize; + _In_ D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT DestLayout; + _In_ UINT DestStride; + _In_ UINT NumRows; + _In_ UINT NumColumns; + _In_ D3D12_LINEAR_ALGEBRA_DATATYPE DestDataType; + } D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DEST_INFO; + +typedef struct D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DATA + { + _Inout_ D3D12_GPU_VIRTUAL_ADDRESS DestVA; + _In_ D3D12_GPU_VIRTUAL_ADDRESS SrcVA; + } D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DATA; + +typedef struct D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_SRC_INFO + { + _In_ UINT SrcSize; + _In_ D3D12_LINEAR_ALGEBRA_DATATYPE SrcDataType; + _In_ D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT SrcLayout; + _In_ UINT SrcStride; + } D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_SRC_INFO; + +typedef struct D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_INFO + { + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DEST_INFO DestInfo; + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_SRC_INFO SrcInfo; + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DATA DataDesc; + } D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_INFO; + + + +#ifndef __ID3D12DevicePreview_INTERFACE_DEFINED__ +#define __ID3D12DevicePreview_INTERFACE_DEFINED__ + +EXTERN_C const IID IID_ID3D12DevicePreview; + +MIDL_INTERFACE("55ea41d3-6bf5-4332-bbf9-905e6b4e2930") +ID3D12DevicePreview : public IUnknown +{ +public: + virtual void STDMETHODCALLTYPE GetLinearAlgebraMatrixConversionDestinationInfo( + _Inout_ D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DEST_INFO *pDesc) = 0; + +}; + +#endif /* __ID3D12DevicePreview_INTERFACE_DEFINED__ */ + + +#ifndef __ID3D12GraphicsCommandList11_INTERFACE_DEFINED__ +#define __ID3D12GraphicsCommandList11_INTERFACE_DEFINED__ + +EXTERN_C const IID IID_ID3D12GraphicsCommandList11; + +MIDL_INTERFACE("f0dcfabc-a84a-4fe3-b3b9-eab26b306c38") +ID3D12GraphicsCommandList11 : public ID3D12GraphicsCommandList10 +{ +public: + virtual void STDMETHODCALLTYPE Reserved0() = 0; + virtual void STDMETHODCALLTYPE Reserved1() = 0; + virtual void STDMETHODCALLTYPE Reserved2() = 0; + + virtual void STDMETHODCALLTYPE ConvertLinearAlgebraMatrix( + _In_ const D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_INFO *pDesc, + _In_ UINT DescCount) = 0; + +}; + +#endif /* __ID3D12GraphicsCommandList11_INTERFACE_DEFINED__ */ + +#else // __ID3D12GraphicsCommandList10_INTERFACE_DEFINED__ +// The used d3d12.h header does not support ID3D12GraphicsCommandList10, +// so we cannot define ID3D12GraphicsCommandList11. +#define HAVE_COOPVEC_API 0 +#endif // __ID3D12GraphicsCommandList10_INTERFACE_DEFINED__ + +#else // D3D12_PREVIEW_SDK_VERSION < 717 +// Preview header has CoopVec support +#define HAVE_COOPVEC_API 1 +#endif // D3D12_PREVIEW_SDK_VERSION < 717 diff --git a/tools/clang/unittests/HLSLExec/DXRUtil.h b/tools/clang/unittests/HLSLExec/DXRUtil.h new file mode 100644 index 0000000000..14bbf5bf1b --- /dev/null +++ b/tools/clang/unittests/HLSLExec/DXRUtil.h @@ -0,0 +1,221 @@ +//===------------ DXRUtil.h - DXR Utility Functions ------------*- C++ -*-===// +/////////////////////////////////////////////////////////////////////////////// +// // +// DXRUtil.h // +// Copyright (C) Nvidia Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// This file contains the utility functions for DXR execution tests. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#pragma once + +//= DXR Utility +//============================================================================ +#define SHADER_ID_SIZE_IN_BYTES 32 + +#ifndef ROUND_UP +#define ROUND_UP(v, PowerOf2Alignment) \ + (((v) + (PowerOf2Alignment)-1) & ~((PowerOf2Alignment)-1)) +#endif +struct SceneConsts { + DirectX::XMFLOAT4 Eye; + DirectX::XMFLOAT4 U; + DirectX::XMFLOAT4 V; + DirectX::XMFLOAT4 W; + float SceneScale; + unsigned WindowSize[2]; + int RayFlags; +}; + +struct Instance { + D3D12_RAYTRACING_GEOMETRY_TYPE Type; + DirectX::XMFLOAT4X4 Matrix; + UINT GeometryCount; + UINT BottomASIdx; + UINT InstanceID; + UINT Mask; + UINT Flags; +}; + +class ShaderTable { +public: + ShaderTable(ID3D12Device *Device, int RaygenCount, int MissCount, + int HitGroupCount, int RayTypeCount, int RootTableDwords) + : RayTypeCount(RayTypeCount), RaygenCount(RaygenCount), + MissCount(MissCount * RayTypeCount), + HitGroupCount(HitGroupCount * RayTypeCount), + RootTableSizeInBytes(RootTableDwords * 4), + ShaderRecordSizeInBytes( + ROUND_UP(RootTableSizeInBytes + SHADER_ID_SIZE_IN_BYTES, + D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT)), + MissStartIdx(RaygenCount), HitGroupStartIdx(MissStartIdx + MissCount) { + + const int TotalSizeInBytes = + (RaygenCount + MissCount + HitGroupCount) * ShaderRecordSizeInBytes; + + D3D12_RESOURCE_DESC Desc = CD3DX12_RESOURCE_DESC::Buffer( + TotalSizeInBytes, D3D12_RESOURCE_FLAG_NONE, + std::max(D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT, + D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT)); + CD3DX12_HEAP_PROPERTIES Heap = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + VERIFY_SUCCEEDED(Device->CreateCommittedResource( + &Heap, D3D12_HEAP_FLAG_NONE, &Desc, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, nullptr, + IID_PPV_ARGS(&SBTResource))); + SBTResource->SetName(L"SBT Resource Heap"); + CD3DX12_HEAP_PROPERTIES Upload = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + VERIFY_SUCCEEDED(Device->CreateCommittedResource( + &Upload, D3D12_HEAP_FLAG_NONE, &Desc, D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, IID_PPV_ARGS(&SBTUploadResource))); + SBTUploadResource->SetName(L"SBT Upload Heap"); + + VERIFY_SUCCEEDED(SBTUploadResource->Map(0, nullptr, (void **)&HostPtr)); + } + + void Upload(ID3D12GraphicsCommandList *CmdList) { + CD3DX12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( + SBTResource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, + D3D12_RESOURCE_STATE_COPY_DEST); + CmdList->ResourceBarrier(1, &Barrier); + CmdList->CopyResource(SBTResource, SBTUploadResource); + CD3DX12_RESOURCE_BARRIER Barrier2 = CD3DX12_RESOURCE_BARRIER::Transition( + SBTResource, D3D12_RESOURCE_STATE_COPY_DEST, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + CmdList->ResourceBarrier(1, &Barrier2); + } + + int GetShaderRecordSizeInBytes() { return ShaderRecordSizeInBytes; } + + int GetRaygenShaderRecordIdx(int Idx) { return Idx; } + int GetMissShaderRecordIdx(int Idx, int RayType) { + return MissStartIdx + Idx * RayTypeCount + RayType; + } + int GetHitGroupShaderRecordIdx(int Idx, int RayType) { + return HitGroupStartIdx + Idx * RayTypeCount + RayType; + } + + void *GetRaygenShaderIdPtr(int Idx) { + return HostPtr + GetRaygenShaderRecordIdx(Idx) * ShaderRecordSizeInBytes; + } + void *GetMissShaderIdPtr(int Idx, int RayType) { + return HostPtr + + GetMissShaderRecordIdx(Idx, RayType) * ShaderRecordSizeInBytes; + } + void *GetHitGroupShaderIdPtr(int Idx, int RayType) { + return HostPtr + + GetHitGroupShaderRecordIdx(Idx, RayType) * ShaderRecordSizeInBytes; + } + + void *GetRaygenRootTablePtr(int Idx) { + return (char *)GetRaygenShaderIdPtr(Idx) + SHADER_ID_SIZE_IN_BYTES; + } + void *GetMissRootTablePtr(int Idx, int RayType) { + return (char *)GetMissShaderIdPtr(Idx, RayType) + SHADER_ID_SIZE_IN_BYTES; + } + void *GetHitGroupRootTablePtr(int Idx, int RayType) { + return (char *)GetHitGroupShaderIdPtr(Idx, RayType) + + SHADER_ID_SIZE_IN_BYTES; + } + + int GetRaygenRangeInBytes() { return RaygenCount * ShaderRecordSizeInBytes; } + int GetMissRangeInBytes() { return MissCount * ShaderRecordSizeInBytes; } + int GetHitGroupRangeInBytes() { + return HitGroupCount * ShaderRecordSizeInBytes; + } + + D3D12_GPU_VIRTUAL_ADDRESS GetRaygenStartGpuVA() { + return SBTResource->GetGPUVirtualAddress() + + GetRaygenShaderRecordIdx(0) * ShaderRecordSizeInBytes; + } + D3D12_GPU_VIRTUAL_ADDRESS GetMissStartGpuVA() { + return SBTResource->GetGPUVirtualAddress() + + GetMissShaderRecordIdx(0, 0) * ShaderRecordSizeInBytes; + } + D3D12_GPU_VIRTUAL_ADDRESS GetHitGroupStartGpuVA() { + return SBTResource->GetGPUVirtualAddress() + + GetHitGroupShaderRecordIdx(0, 0) * ShaderRecordSizeInBytes; + } + +private: + CComPtr SBTResource; + CComPtr SBTUploadResource; + char *HostPtr = nullptr; + int RayTypeCount = 0; + int RaygenCount = 0; + int MissCount = 0; + int HitGroupCount = 0; + int RootTableSizeInBytes = 0; + int ShaderRecordSizeInBytes = 0; + int MissStartIdx = 0; + int HitGroupStartIdx = 0; +}; + +//----------------------------------------------------------------------------- +void AllocateBuffer( + ID3D12Device *Device, UINT64 BufferSize, ID3D12Resource **Resource, + bool AllowUAV = false, + D3D12_RESOURCE_STATES InitialResourceState = D3D12_RESOURCE_STATE_COMMON, + const wchar_t *ResourceName = nullptr) { + auto UploadHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + auto BufferDesc = CD3DX12_RESOURCE_DESC::Buffer( + BufferSize, AllowUAV ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS + : D3D12_RESOURCE_FLAG_NONE); + VERIFY_SUCCEEDED(Device->CreateCommittedResource( + &UploadHeapProperties, D3D12_HEAP_FLAG_NONE, &BufferDesc, + InitialResourceState, nullptr, IID_PPV_ARGS(Resource))); + if (ResourceName) { + (*Resource)->SetName(ResourceName); + } +} + +//----------------------------------------------------------------------------- +void ReallocScratchResource(ID3D12Device *Device, ID3D12Resource **Resource, + UINT64 NBytes) { + if (!(*Resource) || (*Resource)->GetDesc().Width < NBytes) { + AllocateBuffer(Device, NBytes, Resource, true, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, L"scratchResource"); + } +} + +//----------------------------------------------------------------------------- +void AllocateUploadBuffer(ID3D12Device *Device, const void *Data, + UINT64 DataSize, ID3D12Resource **Resource, + const wchar_t *ResourceName = nullptr) { + auto UploadHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + auto BufferDesc = CD3DX12_RESOURCE_DESC::Buffer(DataSize); + VERIFY_SUCCEEDED(Device->CreateCommittedResource( + &UploadHeapProperties, D3D12_HEAP_FLAG_NONE, &BufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, IID_PPV_ARGS(Resource))); + if (ResourceName) { + (*Resource)->SetName(ResourceName); + } + void *MappedData; + VERIFY_SUCCEEDED((*Resource)->Map(0, nullptr, &MappedData)); + memcpy(MappedData, Data, DataSize); + (*Resource)->Unmap(0, nullptr); +} + +//----------------------------------------------------------------------------- +void AllocateBufferFromUpload(ID3D12Device *Device, + ID3D12GraphicsCommandList *CommandList, + ID3D12Resource *UploadSource, + ID3D12Resource **Resource, + D3D12_RESOURCE_STATES TargetResourceState, + const wchar_t *ResourceName = nullptr) { + const bool AllowUAV = + TargetResourceState == D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + AllocateBuffer(Device, UploadSource->GetDesc().Width, Resource, AllowUAV, + D3D12_RESOURCE_STATE_COPY_DEST, ResourceName); + CommandList->CopyResource(*Resource, UploadSource); + CD3DX12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( + *Resource, D3D12_RESOURCE_STATE_COPY_DEST, TargetResourceState); + CommandList->ResourceBarrier(1, (const D3D12_RESOURCE_BARRIER *)&Barrier); +} + +//= DXR Utility +//============================================================================ \ No newline at end of file diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 6db27d7a41..16ccf9f11c 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -35,6 +35,8 @@ #include #include #include +#include +#include #undef _read #include "dxc/Test/DxcTestUtils.h" @@ -49,6 +51,7 @@ // https://msdn.microsoft.com/en-us/library/windows/desktop/dn899120(v=vs.85).aspx // https://developer.microsoft.com/en-US/windows/downloads/windows-10-sdk // + #include #include #include @@ -60,6 +63,10 @@ #include "ShaderOpTest.h" #include #include +#include "LongVectors.h" +#include "DXRUtil.h" +#include "CoopVecAPI.h" +#include "CoopVec.h" // clang-format on #pragma comment(lib, "d3dcompiler.lib") @@ -67,6 +74,9 @@ #pragma comment(lib, "dxguid.lib") #pragma comment(lib, "version.lib") +// Float values for this were taken from Microsoft online documentation for the +// DirectX HALF data type. HALF is equivalent to IEEE 754 binary 16 format. + // A more recent Windows SDK than currently required is needed for these. typedef HRESULT(WINAPI *D3D12EnableExperimentalFeaturesFn)( UINT NumFeatures, __in_ecount(NumFeatures) const IID *pIIDs, @@ -501,6 +511,139 @@ class ExecutionTest { L"Table:ShaderOpArithTable.xml#PackUnpackOpTable") END_TEST_METHOD() + // bool binary ops + TEST_METHOD(LongVector_ScalarAdd_bool) + TEST_METHOD(LongVector_ScalarMultiply_bool) + TEST_METHOD(LongVector_Multiply_bool) + TEST_METHOD(LongVector_Add_bool) + TEST_METHOD(LongVector_Min_bool) + TEST_METHOD(LongVector_Max_bool) + // bool unary ops + // Note that clamp doesn't make sense for bools. + TEST_METHOD(LongVector_Initialize_bool); + + // float16 (half) binary ops + TEST_METHOD(LongVector_ScalarAdd_float16) + TEST_METHOD(LongVector_ScalarMultiply_float16) + TEST_METHOD(LongVector_Multiply_float16) + TEST_METHOD(LongVector_Add_float16) + TEST_METHOD(LongVector_Min_float16) + TEST_METHOD(LongVector_Max_float16) + // float16 (half) unary ops + TEST_METHOD(LongVector_Clamp_float16); + TEST_METHOD(LongVector_Initialize_float16); + + // float32 binary ops + TEST_METHOD(LongVector_ScalarAdd_float32) + TEST_METHOD(LongVector_ScalarMultiply_float32) + TEST_METHOD(LongVector_Multiply_float32) + TEST_METHOD(LongVector_Add_float32) + TEST_METHOD(LongVector_Min_float32) + TEST_METHOD(LongVector_Max_float32) + // float32 unary ops + TEST_METHOD(LongVector_Clamp_float32); + TEST_METHOD(LongVector_Initialize_float32); + + // float64 binary ops + TEST_METHOD(LongVector_ScalarAdd_float64) + TEST_METHOD(LongVector_ScalarMultiply_float64) + TEST_METHOD(LongVector_Multiply_float64) + TEST_METHOD(LongVector_Add_float64) + TEST_METHOD(LongVector_Min_float64) + TEST_METHOD(LongVector_Max_float64) + // float64 unary ops + TEST_METHOD(LongVector_Clamp_float64); + TEST_METHOD(LongVector_Initialize_float64); + + // int16 binary ops + TEST_METHOD(LongVector_ScalarAdd_int16) + TEST_METHOD(LongVector_ScalarMultiply_int16) + TEST_METHOD(LongVector_Multiply_int16) + TEST_METHOD(LongVector_Add_int16) + TEST_METHOD(LongVector_Min_int16) + TEST_METHOD(LongVector_Max_int16) + // int16 unary ops + TEST_METHOD(LongVector_Clamp_int16); + TEST_METHOD(LongVector_Initialize_int16); + + // int32 binary ops + TEST_METHOD(LongVector_ScalarAdd_int32) + TEST_METHOD(LongVector_ScalarMultiply_int32) + TEST_METHOD(LongVector_Multiply_int32) + TEST_METHOD(LongVector_Add_int32) + TEST_METHOD(LongVector_Min_int32) + TEST_METHOD(LongVector_Max_int32) + // int32 unary ops + TEST_METHOD(LongVector_Clamp_int32); + TEST_METHOD(LongVector_Initialize_int32); + + // int64 binary ops + TEST_METHOD(LongVector_ScalarAdd_int64) + TEST_METHOD(LongVector_ScalarMultiply_int64) + TEST_METHOD(LongVector_Multiply_int64) + TEST_METHOD(LongVector_Add_int64) + TEST_METHOD(LongVector_Min_int64) + TEST_METHOD(LongVector_Max_int64) + // int64 unary ops + TEST_METHOD(LongVector_Clamp_int64); + TEST_METHOD(LongVector_Initialize_int64); + + // uint16 binary ops + TEST_METHOD(LongVector_ScalarAdd_uint16) + TEST_METHOD(LongVector_ScalarMultiply_uint16) + TEST_METHOD(LongVector_Multiply_uint16) + TEST_METHOD(LongVector_Add_uint16) + TEST_METHOD(LongVector_Min_uint16) + TEST_METHOD(LongVector_Max_uint16) + // uint16 unary ops + TEST_METHOD(LongVector_Clamp_uint16); + TEST_METHOD(LongVector_Initialize_uint16); + + // uint32 binary ops + TEST_METHOD(LongVector_ScalarAdd_uint32) + TEST_METHOD(LongVector_ScalarMultiply_uint32) + TEST_METHOD(LongVector_Multiply_uint32) + TEST_METHOD(LongVector_Add_uint32) + TEST_METHOD(LongVector_Min_uint32) + TEST_METHOD(LongVector_Max_uint32) + // uint32 unary ops + TEST_METHOD(LongVector_Clamp_uint32); + TEST_METHOD(LongVector_Initialize_uint32); + + // uint64 binary ops + TEST_METHOD(LongVector_ScalarAdd_uint64) + TEST_METHOD(LongVector_ScalarMultiply_uint64) + TEST_METHOD(LongVector_Multiply_uint64) + TEST_METHOD(LongVector_Add_uint64) + TEST_METHOD(LongVector_Min_uint64) + TEST_METHOD(LongVector_Max_uint64) + // uint64 unary ops + TEST_METHOD(LongVector_Clamp_uint64); + TEST_METHOD(LongVector_Initialize_uint64); + + // Shader Execution Reordering tests + TEST_METHOD(SERBasicTest); + TEST_METHOD(SERNOPValuesTest); + TEST_METHOD(SERRayQueryTest); + TEST_METHOD(SERIntersectionTest); + TEST_METHOD(SERGetAttributesTest); + TEST_METHOD(SERTraceHitMissNopTest); + TEST_METHOD(SERIsMissTest); + TEST_METHOD(SERShaderTableIndexTest); + TEST_METHOD(SERLoadLocalRootTableConstantTest); + TEST_METHOD(SERInvokeNoSBTTest); + TEST_METHOD(SERMaybeReorderThreadTest) + TEST_METHOD(SERDynamicHitObjectArrayTest); + TEST_METHOD(SERWaveIncoherentHitTest); + TEST_METHOD(SERReorderCoherentTest); + TEST_METHOD(SERGetterPermutationTest); + TEST_METHOD(SERAttributesPermutationTest); + TEST_METHOD(SERMultiPayloadTest); + + // CoopVec tests + TEST_METHOD(CoopVec_Mul); + TEST_METHOD(CoopVec_OuterProduct); + dxc::DxcDllSupport m_support; bool m_D3DInitCompleted = false; @@ -636,7 +779,7 @@ class ExecutionTest { #endif } - bool UseDebugIfaces() { return true; } + bool UseDebugIfaces() { return false; } bool SaveImages() { return GetTestParamBool(L"SaveImages"); } @@ -659,6 +802,42 @@ class ExecutionTest { void RunResourceTest(ID3D12Device *pDevice, const char *pShader, const wchar_t *sm, bool isDynamic); + void runCoopVecMulTest(); + void runCoopVecOuterProductTest(); + +#if HAVE_COOPVEC_API + struct CoopVecMulSubtestConfig { + int InputPerThread; + int OutputPerThread; + int NumThreads; + int NumLevels; + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT MatrixLayout; + bool Bias; + }; + + void + runCoopVecMulTestConfig(ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL &MulProps); + void runCoopVecMulSubtest(ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL &MulProps, + CoopVecMulSubtestConfig &Config); + + struct CoopVecOuterProductSubtestConfig { + int DimM; // Row Count + int DimN; // Column Count + int NumThreads; + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT MatrixLayout; + }; + + void runCoopVecOuterProductTestConfig( + ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE &AccumulateProps); + void runCoopVecOuterProductSubtest( + ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE &AccumulateProps, + CoopVecOuterProductSubtestConfig &Config); +#endif // HAVE_COOPVEC_API + template void WaveIntrinsicsActivePrefixTest(TableParameter *pParameterList, size_t numParameter, bool isPrefix); @@ -710,11 +889,16 @@ class ExecutionTest { const char *pShaderModelStr, const char *pShader, Ty *pInputDataPairs, unsigned inputDataCount); + template + void LongVectorOpTestBase(LongVectorOpTestConfig &TestConfig); + template void LongVectorOpTestBase(LongVectorOpType OpType); + template const wchar_t *BasicShaderModelTest_GetFormatString(); void CompileFromText(LPCSTR pText, LPCWSTR pEntryPoint, LPCWSTR pTargetProfile, ID3DBlob **ppBlob, - LPCWSTR *pOptions = nullptr, int numOptions = 0) { + LPCWSTR *pOptions = nullptr, int numOptions = 0, + IDxcIncludeHandler *pIncludeHandler = nullptr) { VERIFY_SUCCEEDED(m_support.Initialize()); CComPtr pCompiler; CComPtr pLibrary; @@ -727,7 +911,7 @@ class ExecutionTest { pText, (UINT32)strlen(pText), CP_UTF8, &pTextBlob)); VERIFY_SUCCEEDED(pCompiler->Compile(pTextBlob, L"hlsl.hlsl", pEntryPoint, pTargetProfile, pOptions, numOptions, - nullptr, 0, nullptr, &pResult)); + nullptr, 0, pIncludeHandler, &pResult)); VERIFY_SUCCEEDED(pResult->GetStatus(&resultCode)); if (FAILED(resultCode)) { #ifndef _HLK_CONF @@ -762,7 +946,8 @@ class ExecutionTest { ID3D12RootSignature *pRootSignature, LPCSTR pShader, LPCWSTR pTargetProfile, ID3D12PipelineState **ppComputeState, - LPCWSTR *pOptions = nullptr, int numOptions = 0) { + LPCWSTR *pOptions = nullptr, int numOptions = 0, + IDxcIncludeHandler *pIncludeHandler = nullptr) { CComPtr pComputeShader; // Load and compile shaders. @@ -772,7 +957,7 @@ class ExecutionTest { #endif } else { CompileFromText(pShader, L"main", pTargetProfile, &pComputeShader, - pOptions, numOptions); + pOptions, numOptions, pIncludeHandler); } // Describe and create the compute pipeline state object (PSO). @@ -1609,6 +1794,21 @@ class ExecutionTest { #endif } + bool DoesDeviceSupportCooperativeVector(ID3D12Device *Device) { +#if HAVE_COOPVEC_API + D3D12_FEATURE_DATA_D3D12_OPTIONS_EXPERIMENTAL O; + if (FAILED(Device->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_D3D12_OPTIONS_EXPERIMENTAL, &O, + sizeof(O)))) + return false; + return O.CooperativeVectorTier != + D3D12_COOPERATIVE_VECTOR_TIER_NOT_SUPPORTED; +#else + UNREFERENCED_PARAMETER(Device); + return false; +#endif + } + bool IsFallbackPathEnabled() { // Enable fallback paths with: /p:"EnableFallback=1" UINT EnableFallbackValue = 0; @@ -1721,8 +1921,18 @@ class ExecutionTest { if (pD3D12EnableExperimentalFeatures == nullptr) { return HRESULT_FROM_WIN32(GetLastError()); } - return pD3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModelsID, - nullptr, nullptr); + + std::vector Features; + + Features.push_back(D3D12ExperimentalShaderModels); + +#if HAVE_COOPVEC_API + if (GetTestParamBool(L"CooperativeVectorExperimental")) { + Features.push_back(D3D12CooperativeVectorExperiment); + } +#endif + return pD3D12EnableExperimentalFeatures((UINT)Features.size(), + Features.data(), nullptr, nullptr); } static HRESULT EnableExperimentalShaderModels() { @@ -1917,6 +2127,42 @@ class ExecutionTest { CComPtr &pRootSignature, LPCWSTR pTargetProfile, LPCWSTR *pOptions, int numOptions); + bool CreateDXRDevice(ID3D12Device **ppDevice, D3D_SHADER_MODEL testModel, + bool skipUnsupported); + struct DXRRunConfig { + int WindowWidth = 64; + int WindowHeight = 64; + bool UseMesh = true; + bool UseProceduralGeometry = false; + int PayloadCount = 1; + int AttributeCount = 2; + int MaxRecursion = 1; + int NumMissShaders = 1; + int NumHitGroups = 1; + }; + CComPtr + RunDXRTest(ID3D12Device *Device0, LPCSTR ShaderSrc, LPCWSTR TargetProfile, + LPCWSTR *Options, int NumOptions, std::vector &TestData, + const DXRRunConfig &Config); + + CComPtr RunDXRTest(ID3D12Device *Device0, LPCSTR ShaderSrc, + LPCWSTR TargetProfile, LPCWSTR *Options, + int NumOptions, std::vector &TestData, + int WindowWidth, int WindowHeight, + bool UseMesh, bool UseProceduralGeometry, + int PayloadCount, int AttributeCount) { + DXRRunConfig Config = {WindowWidth, + WindowHeight, + UseMesh, + UseProceduralGeometry, + PayloadCount, + AttributeCount, + 1, + 1, + 1}; + return RunDXRTest(Device0, ShaderSrc, TargetProfile, Options, NumOptions, + TestData, Config); + } void SetDescriptorHeap(ID3D12GraphicsCommandList *pCommandList, ID3D12DescriptorHeap *pHeap) { @@ -2078,6 +2324,751 @@ void ExecutionTest::RunRWByteBufferComputeTest(ID3D12Device *pDevice, WaitForSignal(pCommandQueue, FO); } +bool ExecutionTest::CreateDXRDevice(ID3D12Device **ppDevice, + D3D_SHADER_MODEL testModel, + bool skipUnsupported) { + bool SupportsSM = CreateDevice(ppDevice, testModel, skipUnsupported); + if (!SupportsSM) + return false; + + if (DoesDeviceSupportRayTracing(*ppDevice)) + return true; + + if (skipUnsupported) { + WEX::Logging::Log::Comment( + L"DXR test skipped: device does not support DXR."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + } + return false; +} + +CComPtr +ExecutionTest::RunDXRTest(ID3D12Device *Device0, LPCSTR ShaderSrc, + LPCWSTR TargetProfile, LPCWSTR *Options, + int NumOptions, std::vector &TestData, + const DXRRunConfig &Config) { + CComPtr Device; + VERIFY_SUCCEEDED(Device0->QueryInterface(IID_PPV_ARGS(&Device))); + + FenceObj FO; + InitFenceObj(Device, &FO); + + // Setup Resources + CComPtr TestBuffer; + CComPtr TestBufferRead; + CComPtr SceneConstantBuffer; + + // Descriptor heap + CComPtr DescriptorHeap; + { + // + // UAV descriptor heap layout: + // 0 - test buffer UAV + // 1 - vertex buffer SRV + // 2 - index buffer SRV + // + D3D12_DESCRIPTOR_HEAP_DESC DescriptorHeapDesc = {}; + DescriptorHeapDesc.NumDescriptors = 3; + DescriptorHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + DescriptorHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + Device->CreateDescriptorHeap(&DescriptorHeapDesc, + IID_PPV_ARGS(&DescriptorHeap)); + DescriptorHeap->SetName(L"Descriptor Heap"); + } + int DescriptorSize = Device->GetDescriptorHandleIncrementSize( + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + // Testbuffer + { + auto ResDesc = CD3DX12_RESOURCE_DESC::Buffer( + TestData.size() * sizeof(int), + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + auto DefaultHeapProperties = + CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + VERIFY_SUCCEEDED(Device->CreateCommittedResource( + &DefaultHeapProperties, D3D12_HEAP_FLAG_NONE, &ResDesc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, + IID_PPV_ARGS(&TestBuffer))); + TestBuffer->SetName(L"Test Buffer"); + + const int DescriptorIndex = 0; + D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptorHandle = + CD3DX12_CPU_DESCRIPTOR_HANDLE( + DescriptorHeap->GetCPUDescriptorHandleForHeapStart(), + DescriptorIndex, DescriptorSize); + D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {}; + UAVDesc.Format = DXGI_FORMAT_UNKNOWN; + UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + UAVDesc.Buffer.FirstElement = 0; + UAVDesc.Buffer.NumElements = (UINT)TestData.size(); + UAVDesc.Buffer.StructureByteStride = sizeof(int); + UAVDesc.Buffer.CounterOffsetInBytes = 0; + UAVDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + Device->CreateUnorderedAccessView(TestBuffer, nullptr, &UAVDesc, + CPUDescriptorHandle); + } + + // Testbuffer Readback + { + CD3DX12_HEAP_PROPERTIES ReadHeap(D3D12_HEAP_TYPE_READBACK); + CD3DX12_RESOURCE_DESC ReadDesc( + CD3DX12_RESOURCE_DESC::Buffer(TestData.size() * sizeof(int))); + Device->CreateCommittedResource(&ReadHeap, D3D12_HEAP_FLAG_NONE, &ReadDesc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&TestBufferRead)); + } + + // Create CBV resource (sceneConstantBuffer), index 1 + { + const int DescriptorIndex = 1; + const UINT ConstantBufferSize = + (sizeof(SceneConsts) + + (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - 1)) & + ~(D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT - + 1); // must be a multiple 256 bytes + D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptorHandle = + CD3DX12_CPU_DESCRIPTOR_HANDLE( + DescriptorHeap->GetCPUDescriptorHandleForHeapStart(), + DescriptorIndex, DescriptorSize); + auto ResDesc = CD3DX12_RESOURCE_DESC::Buffer(ConstantBufferSize); + auto UploadHeapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + Device->CreateCommittedResource(&UploadHeapProperties, D3D12_HEAP_FLAG_NONE, + &ResDesc, D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&SceneConstantBuffer)); + + UINT8 *SceneConstantBufferWO; + CD3DX12_RANGE ReadRange( + 0, 0); // We do not intend to read from this resource on the CPU. + SceneConstantBuffer->Map(0, &ReadRange, + reinterpret_cast(&SceneConstantBufferWO)); + + // Setup Scene Constants + SceneConsts SceneConsts = { + {25.f, -25.f, 700.f, 0.f}, + {536.f, 0.f, 0.f, 0.f}, + {0.f, 301.f, 0.f, 0.f}, + {0.f, 0., -699.f, 0.f}, + 100.f, + {(unsigned int)Config.WindowWidth, (unsigned int)Config.WindowHeight}, + 0x00}; + + memcpy(SceneConstantBufferWO, &SceneConsts, sizeof(SceneConsts)); + SceneConstantBuffer->Unmap(0, nullptr); + + D3D12_CONSTANT_BUFFER_VIEW_DESC Desc = {}; + Desc.SizeInBytes = ConstantBufferSize; + Desc.BufferLocation = SceneConstantBuffer->GetGPUVirtualAddress(); + Device->CreateConstantBufferView(&Desc, CPUDescriptorHandle); + } + + // Local (SBT) root signature + CComPtr LocalRootSignature; + { + CD3DX12_DESCRIPTOR_RANGE BufferRanges[1]; + CD3DX12_ROOT_PARAMETER RootParameters[2]; + BufferRanges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 1, 0, + 2); // vertexBuffer(t1), indexBuffer(t2) + RootParameters[0].InitAsDescriptorTable( + _countof(BufferRanges), BufferRanges, D3D12_SHADER_VISIBILITY_ALL); + RootParameters[1].InitAsConstants(4, 1, 0, D3D12_SHADER_VISIBILITY_ALL); + + CD3DX12_ROOT_SIGNATURE_DESC RootSignatureDesc; + RootSignatureDesc.Init(_countof(RootParameters), RootParameters, 0, nullptr, + D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE); + CComPtr Signature; + CComPtr Error; + VERIFY_SUCCEEDED(D3D12SerializeRootSignature( + &RootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &Signature, &Error)); + VERIFY_SUCCEEDED(Device->CreateRootSignature( + 0, Signature->GetBufferPointer(), Signature->GetBufferSize(), + IID_PPV_ARGS(&LocalRootSignature))); + LocalRootSignature->SetName(L"Local Root Signature"); + } + + // Global root signature + CComPtr GlobalRootSignature; + { + CD3DX12_DESCRIPTOR_RANGE BufferRanges[1]; + CD3DX12_ROOT_PARAMETER RootParameters[3]; + BufferRanges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, + 0); // testBuffer(u0) + RootParameters[0].InitAsShaderResourceView( + 0, 0, D3D12_SHADER_VISIBILITY_ALL); // accelStruct(t0) + RootParameters[1].InitAsConstantBufferView(0); // sceneConstants(b0) + RootParameters[2].InitAsDescriptorTable( + _countof(BufferRanges), BufferRanges, D3D12_SHADER_VISIBILITY_ALL); + + CD3DX12_ROOT_SIGNATURE_DESC RootSignatureDesc; + RootSignatureDesc.Init(_countof(RootParameters), RootParameters, 0, nullptr, + D3D12_ROOT_SIGNATURE_FLAG_NONE); + CComPtr Signature; + CComPtr Error; + VERIFY_SUCCEEDED(D3D12SerializeRootSignature( + &RootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &Signature, &Error)); + VERIFY_SUCCEEDED(Device->CreateRootSignature( + 0, Signature->GetBufferPointer(), Signature->GetBufferSize(), + IID_PPV_ARGS(&GlobalRootSignature))); + GlobalRootSignature->SetName(L"Global Root Signature"); + } + + // Create command queue. + CComPtr CommandQueue; + CreateCommandQueue(Device, L"RunDXRTest Command Queue", &CommandQueue, + D3D12_COMMAND_LIST_TYPE_DIRECT); + + // Compile raygen shader. + CComPtr ShaderLib; + CompileFromText(ShaderSrc, L"raygen", TargetProfile, &ShaderLib, Options, + NumOptions); + + // Construct HitGroups + struct HitGroupDesc { + std::wstring ClosestHit; + std::wstring AnyHit; + std::wstring Intersection; + std::wstring HitGroupName; + const bool IsProcedural() const { return !Intersection.empty();} + }; + std::vector HitGroupDescs; + + const bool PrimaryHitGroupsAreAABB = !Config.UseMesh && Config.UseProceduralGeometry; + const bool EnableSecondaryHitGroups = Config.UseMesh && Config.UseProceduralGeometry; + + // Base hit group + HitGroupDesc PrimaryHitGroup{L"closesthit", L"anyhit", L"", L"HitGroup"}; + if (PrimaryHitGroupsAreAABB) + PrimaryHitGroup.Intersection = L"intersection"; + HitGroupDescs.push_back(PrimaryHitGroup); + + for (int i = 1; i < Config.NumHitGroups; i++) { + std::wstring ClosestHit = L"closesthit" + std::to_wstring(i); + std::wstring AnyHit = L"anyhit" + std::to_wstring(i); + std::wstring Intersection = L""; + if (PrimaryHitGroupsAreAABB) + Intersection = L"intersection" + std::to_wstring(i); + std::wstring HitGroupName = L"HitGroup" + std::to_wstring(i); + HitGroupDescs.push_back( + HitGroupDesc{ClosestHit, AnyHit, Intersection, HitGroupName}); + } + + if (EnableSecondaryHitGroups) { + HitGroupDescs.push_back( + HitGroupDesc{L"chAABB", L"ahAABB", L"intersection", L"HitGroupAABB"}); + for (int i = 1; i < Config.NumHitGroups; i++) { + std::wstring ClosestHit = L"chAABB" + std::to_wstring(i); + std::wstring AnyHit = L"ahAABB" + std::to_wstring(i); + std::wstring Intersection = L"intersection" + std::to_wstring(i); + std::wstring HitGroupName = L"HitGroupAABB" + std::to_wstring(i); + HitGroupDescs.push_back( + HitGroupDesc{ClosestHit, AnyHit, Intersection, HitGroupName}); + } + } + + // Collect required shader names from HitGroups + std::vector ShaderNames; + ShaderNames.push_back(L"raygen"); + ShaderNames.push_back(L"miss"); + for (int i = 1; i < Config.NumMissShaders; i++) + ShaderNames.push_back(L"miss" + std::to_wstring(i)); + for (const HitGroupDesc &HitGroupDesc : HitGroupDescs) { + ShaderNames.push_back(HitGroupDesc.ClosestHit); + ShaderNames.push_back(HitGroupDesc.AnyHit); + if (HitGroupDesc.IsProcedural()) + ShaderNames.push_back(HitGroupDesc.Intersection); + } + + // Describe and create the RT pipeline state object (RTPSO). + CD3DX12_STATE_OBJECT_DESC StateObjectDesc( + D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE); + auto Lib = StateObjectDesc.CreateSubobject(); + CD3DX12_SHADER_BYTECODE ByteCode(ShaderLib); + Lib->SetDXILLibrary(&ByteCode); + + for (std::wstring Export : ShaderNames) + Lib->DefineExport(Export.c_str()); + + StateObjectDesc.CreateSubobject() + ->Config(Config.PayloadCount * sizeof(float), Config.AttributeCount * sizeof(float)); + StateObjectDesc + .CreateSubobject() + ->Config(Config.MaxRecursion); + + // Set Global Root Signature subobject. + auto GlobalRootSigSubObj = + StateObjectDesc + .CreateSubobject(); + GlobalRootSigSubObj->SetRootSignature(GlobalRootSignature); + // Set Local Root Signature subobject. + StateObjectDesc.CreateSubobject() + ->SetRootSignature(LocalRootSignature); + + auto Exports = StateObjectDesc.CreateSubobject< + CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT>(); + Exports->SetSubobjectToAssociate(*GlobalRootSigSubObj); + for (std::wstring Export : ShaderNames) + Exports->AddExport(Export.c_str()); + + for (const HitGroupDesc &HitGroupDesc : HitGroupDescs) { + auto HitGroup = + StateObjectDesc.CreateSubobject(); + HitGroup->SetClosestHitShaderImport(HitGroupDesc.ClosestHit.c_str()); + HitGroup->SetAnyHitShaderImport(HitGroupDesc.AnyHit.c_str()); + if (HitGroupDesc.IsProcedural()) { + HitGroup->SetIntersectionShaderImport(HitGroupDesc.Intersection.c_str()); + HitGroup->SetHitGroupType(D3D12_HIT_GROUP_TYPE_PROCEDURAL_PRIMITIVE); + } else { + HitGroup->SetHitGroupType(D3D12_HIT_GROUP_TYPE_TRIANGLES); + } + HitGroup->SetHitGroupExport(HitGroupDesc.HitGroupName.c_str()); + } + + CComPtr StateObject; + CComPtr StateObjectProperties; + VERIFY_SUCCEEDED( + Device->CreateStateObject(StateObjectDesc, IID_PPV_ARGS(&StateObject))); + VERIFY_SUCCEEDED(StateObject->QueryInterface(&StateObjectProperties)); + + // Create SBT + ShaderTable ShaderTable( + Device, + 1, // raygen count + Config.NumMissShaders, // miss count + (int) HitGroupDescs.size(), // hit group count + 1, // ray type count + 4 // dwords per root table + ); + + int LocalRootConsts[4] = {12, 34, 56, 78}; + + // raygen + memcpy(ShaderTable.GetRaygenShaderIdPtr(0), + StateObjectProperties->GetShaderIdentifier(L"raygen"), + SHADER_ID_SIZE_IN_BYTES); + memcpy(ShaderTable.GetRaygenRootTablePtr(0), LocalRootConsts, + sizeof(LocalRootConsts)); + + // miss shaders + memcpy(ShaderTable.GetMissShaderIdPtr(0, 0), + StateObjectProperties->GetShaderIdentifier(L"miss"), + SHADER_ID_SIZE_IN_BYTES); + memcpy(ShaderTable.GetMissRootTablePtr(0, 0), LocalRootConsts, + sizeof(LocalRootConsts)); + for (int i = 1; i < Config.NumMissShaders; i++) { + std::wstring MissShaderName = L"miss" + std::to_wstring(i); + memcpy(ShaderTable.GetMissShaderIdPtr(i, 0), + StateObjectProperties->GetShaderIdentifier(MissShaderName.c_str()), + SHADER_ID_SIZE_IN_BYTES); + memcpy(ShaderTable.GetMissRootTablePtr(i, 0), LocalRootConsts, + sizeof(LocalRootConsts)); + } + + // hit groups + for (int HitGroupIdx = 0; HitGroupIdx < HitGroupDescs.size(); HitGroupIdx++) { + const HitGroupDesc &HitGroupDesc = HitGroupDescs[HitGroupIdx]; + memcpy( + ShaderTable.GetHitGroupShaderIdPtr(HitGroupIdx, 0), + StateObjectProperties->GetShaderIdentifier(HitGroupDesc.HitGroupName.c_str()), + SHADER_ID_SIZE_IN_BYTES); + memcpy(ShaderTable.GetHitGroupRootTablePtr(HitGroupIdx, 0), LocalRootConsts, + sizeof(LocalRootConsts)); + } + + // Create a command allocator and list. + CComPtr CommandAllocator; + CComPtr CommandList; + VERIFY_SUCCEEDED(Device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&CommandAllocator))); + VERIFY_SUCCEEDED(Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + CommandAllocator, nullptr, + IID_PPV_ARGS(&CommandList))); + CommandList->SetName(L"ExecutionTest::RunDXRTest Command List"); + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, nullptr)); + + // Create scene geometry. + CComPtr TLASResource; + CComPtr BLASMeshResource; + CComPtr BLASProceduralGeometryResource; + CComPtr ScratchResource; + + if (Config.UseMesh) { + CComPtr VertexBuffer; + CComPtr VertexBufferUpload; + CComPtr IndexBuffer; + CComPtr IndexBufferUpload; + + // Define a Quad + const float Verts[] = { + -50.5f, 50.5f, 0.5f, // top left + 50.5f, -50.5f, 0.5f, // bottom right + -50.5f, -50.5f, 0.5f, // bottom left + 50.5f, 50.5f, 0.5f // top right + }; + const int Indices[] = { + 0, 1, 2, // first triangle + 0, 3, 1 // second triangle + }; + + const UINT64 VertexDataSize = sizeof(Verts); + const UINT64 IndexDataSize = sizeof(Indices); + + AllocateUploadBuffer(Device, Verts, VertexDataSize, &VertexBufferUpload, + L"VertexBufferUpload"); + AllocateUploadBuffer(Device, Indices, IndexDataSize, &IndexBufferUpload, + L"IndexBufferUpload"); + + AllocateBufferFromUpload( + Device, CommandList, VertexBufferUpload, &VertexBuffer, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, L"VertexBuffer"); + AllocateBufferFromUpload( + Device, CommandList, IndexBufferUpload, &IndexBuffer, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, L"IndexBuffer"); + + { + const int DescriptorIndex = 1; + D3D12_CPU_DESCRIPTOR_HANDLE CpuDescriptorHandle = + CD3DX12_CPU_DESCRIPTOR_HANDLE( + DescriptorHeap->GetCPUDescriptorHandleForHeapStart(), + DescriptorIndex, DescriptorSize); + D3D12_SHADER_RESOURCE_VIEW_DESC SrvDesc = {}; + SrvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + SrvDesc.Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + SrvDesc.Buffer.NumElements = + UINT(VertexDataSize / sizeof(DirectX::XMFLOAT3)); + SrvDesc.Format = DXGI_FORMAT_UNKNOWN; + SrvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + SrvDesc.Buffer.StructureByteStride = sizeof(DirectX::XMFLOAT3); + Device->CreateShaderResourceView(VertexBuffer, &SrvDesc, + CpuDescriptorHandle); + } + { + const int DescriptorIndex = 2; + D3D12_CPU_DESCRIPTOR_HANDLE CpuDescriptorHandle = + CD3DX12_CPU_DESCRIPTOR_HANDLE( + DescriptorHeap->GetCPUDescriptorHandleForHeapStart(), + DescriptorIndex, DescriptorSize); + D3D12_SHADER_RESOURCE_VIEW_DESC SrvDesc = {}; + SrvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + SrvDesc.Shader4ComponentMapping = + D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + SrvDesc.Buffer.NumElements = UINT(IndexDataSize / sizeof(int)); + SrvDesc.Format = DXGI_FORMAT_UNKNOWN; + SrvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + SrvDesc.Buffer.StructureByteStride = sizeof(int); + Device->CreateShaderResourceView(IndexBuffer, &SrvDesc, + CpuDescriptorHandle); + } + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, nullptr)); + + // Build triangle BLAS. + D3D12_RAYTRACING_GEOMETRY_DESC GeometryDesc = {}; + GeometryDesc.Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES; + GeometryDesc.Triangles.IndexBuffer = IndexBuffer->GetGPUVirtualAddress(); + GeometryDesc.Triangles.IndexCount = + static_cast(IndexBuffer->GetDesc().Width) / sizeof(int); + GeometryDesc.Triangles.IndexFormat = DXGI_FORMAT_R32_UINT; + GeometryDesc.Triangles.Transform3x4 = 0; + GeometryDesc.Triangles.VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT; + GeometryDesc.Triangles.VertexCount = + static_cast(VertexBuffer->GetDesc().Width) / + sizeof(DirectX::XMFLOAT3); + GeometryDesc.Triangles.VertexBuffer.StartAddress = + VertexBuffer->GetGPUVirtualAddress(); + GeometryDesc.Triangles.VertexBuffer.StrideInBytes = + sizeof(DirectX::XMFLOAT3); + GeometryDesc.Flags = D3D12_RAYTRACING_GEOMETRY_FLAG_NONE; // Non-opaque to + // trigger anyhit. + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS BuildFlags = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE; + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS AccelInputs = {}; + AccelInputs.Type = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + AccelInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + AccelInputs.pGeometryDescs = &GeometryDesc; + AccelInputs.NumDescs = 1; + AccelInputs.Flags = BuildFlags; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {}; + Device->GetRaytracingAccelerationStructurePrebuildInfo(&AccelInputs, + &PrebuildInfo); + + ScratchResource.Release(); + ReallocScratchResource(Device, &ScratchResource, + PrebuildInfo.ScratchDataSizeInBytes); + AllocateBuffer( + Device, PrebuildInfo.ResultDataMaxSizeInBytes, &BLASMeshResource, true, + D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, L"blasMesh"); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC BuildDesc = {}; + BuildDesc.Inputs = AccelInputs; + BuildDesc.ScratchAccelerationStructureData = + ScratchResource->GetGPUVirtualAddress(); + BuildDesc.DestAccelerationStructureData = + BLASMeshResource->GetGPUVirtualAddress(); + + CommandList->BuildRaytracingAccelerationStructure(&BuildDesc, 0, nullptr); + CD3DX12_RESOURCE_BARRIER Barrier = + CD3DX12_RESOURCE_BARRIER::UAV(BLASMeshResource); + CommandList->ResourceBarrier(1, (const D3D12_RESOURCE_BARRIER *)&Barrier); + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, nullptr)); + } + + if (Config.UseProceduralGeometry) { + // Define procedural geometry AABB for a plane + CComPtr AabbBuffer; + CComPtr AabbBufferUpload; + + // Define the AABB for the plane, matching the size of the quad defined by + // verts[] + const float BoxSize = 500.f; + const D3D12_RAYTRACING_AABB Aabb = { + -BoxSize, -BoxSize, -BoxSize, // Min corner (x, y, z) + BoxSize, BoxSize, BoxSize // Max corner (x, y, z) + }; + const UINT64 AabbDataSize = sizeof(Aabb); + + // Create an upload buffer for the AABB + AllocateUploadBuffer(Device, &Aabb, AabbDataSize, &AabbBufferUpload, + L"AabbBufferUpload"); + + // Create a GPU buffer for the AABB + AllocateBufferFromUpload(Device, CommandList, AabbBufferUpload, &AabbBuffer, + D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, + L"AabbBuffer"); + + // Describe the procedural geometry + D3D12_RAYTRACING_GEOMETRY_DESC ProcGeometryDesc = {}; + ProcGeometryDesc.Type = + D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS; + ProcGeometryDesc.AABBs.AABBs.StartAddress = + AabbBuffer->GetGPUVirtualAddress(); + ProcGeometryDesc.AABBs.AABBs.StrideInBytes = sizeof(D3D12_RAYTRACING_AABB); + ProcGeometryDesc.AABBs.AABBCount = 1; + + // Build the BLAS for the procedural geometry + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS BLASInputs = {}; + BLASInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL; + BLASInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + BLASInputs.NumDescs = 1; + BLASInputs.pGeometryDescs = &ProcGeometryDesc; + BLASInputs.Flags = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {}; + Device->GetRaytracingAccelerationStructurePrebuildInfo(&BLASInputs, + &PrebuildInfo); + + // Allocate scratch and result buffers for the BLAS + ScratchResource.Release(); + ReallocScratchResource(Device, &ScratchResource, + PrebuildInfo.ScratchDataSizeInBytes); + AllocateBuffer(Device, PrebuildInfo.ResultDataMaxSizeInBytes, + &BLASProceduralGeometryResource, true, + D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, + L"BlasProceduralGeometry"); + + // Build the BLAS + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC BLASDesc = {}; + BLASDesc.Inputs = BLASInputs; + BLASDesc.ScratchAccelerationStructureData = + ScratchResource->GetGPUVirtualAddress(); + BLASDesc.DestAccelerationStructureData = + BLASProceduralGeometryResource->GetGPUVirtualAddress(); + + CommandList->BuildRaytracingAccelerationStructure(&BLASDesc, 0, nullptr); + + // Add a UAV barrier to ensure the BLAS is built before using it + CD3DX12_RESOURCE_BARRIER Barrier = + CD3DX12_RESOURCE_BARRIER::UAV(BLASProceduralGeometryResource); + CommandList->ResourceBarrier(1, &Barrier); + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, nullptr)); + } + + // Build TLAS. + CComPtr InstanceDescs; + { + D3D12_RAYTRACING_INSTANCE_DESC CPUInstanceDescs[2] = {}; + const int MeshIdx = 0; + const int ProcGeoIdx = Config.UseMesh && Config.UseProceduralGeometry ? 1 : 0; + const int NumInstanceDescs = ProcGeoIdx + 1; + + for (int i = 0; i < NumInstanceDescs; ++i) { + D3D12_RAYTRACING_INSTANCE_DESC &InstanceDesc = CPUInstanceDescs[i]; + InstanceDesc.Transform[0][0] = InstanceDesc.Transform[1][1] = + InstanceDesc.Transform[2][2] = 1; + InstanceDesc.InstanceID = i; + InstanceDesc.InstanceContributionToHitGroupIndex = + i * Config.NumHitGroups; + InstanceDesc.InstanceMask = 1; + InstanceDesc.Flags = D3D12_RAYTRACING_INSTANCE_FLAG_NONE; + } + + if (Config.UseMesh) + CPUInstanceDescs[MeshIdx].AccelerationStructure = + BLASMeshResource->GetGPUVirtualAddress(); + if (Config.UseProceduralGeometry) + CPUInstanceDescs[ProcGeoIdx].AccelerationStructure = + BLASProceduralGeometryResource->GetGPUVirtualAddress(); + + AllocateUploadBuffer(Device, &CPUInstanceDescs, + NumInstanceDescs * + sizeof(D3D12_RAYTRACING_INSTANCE_DESC), + &InstanceDescs, L"InstanceDescs"); + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS BuildFlags = + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD; + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS AccelInputs = {}; + AccelInputs.Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL; + AccelInputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + AccelInputs.NumDescs = NumInstanceDescs; + AccelInputs.Flags = BuildFlags; + AccelInputs.InstanceDescs = InstanceDescs->GetGPUVirtualAddress(); + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO PrebuildInfo = {}; + Device->GetRaytracingAccelerationStructurePrebuildInfo(&AccelInputs, + &PrebuildInfo); + + ScratchResource.Release(); + ReallocScratchResource(Device, &ScratchResource, + PrebuildInfo.ScratchDataSizeInBytes); + AllocateBuffer(Device, PrebuildInfo.ResultDataMaxSizeInBytes, &TLASResource, + true, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, + L"TLAS"); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC BuildDesc = {}; + BuildDesc.Inputs = AccelInputs; + BuildDesc.ScratchAccelerationStructureData = + ScratchResource->GetGPUVirtualAddress(); + BuildDesc.DestAccelerationStructureData = + TLASResource->GetGPUVirtualAddress(); + + CommandList->BuildRaytracingAccelerationStructure(&BuildDesc, 0, 0); + + CD3DX12_RESOURCE_BARRIER Barrier = + CD3DX12_RESOURCE_BARRIER::UAV(TLASResource); + CommandList->ResourceBarrier(1, (const D3D12_RESOURCE_BARRIER *)&Barrier); + } + + // Set the local root constants. + CommandList->SetComputeRootSignature(LocalRootSignature); + CommandList->SetComputeRoot32BitConstant(1, 12, 0); + CommandList->SetComputeRoot32BitConstant(1, 34, 1); + CommandList->SetComputeRoot32BitConstant(1, 56, 2); + CommandList->SetComputeRoot32BitConstant(1, 78, 3); + + ShaderTable.Upload(CommandList); + + ID3D12DescriptorHeap *const Heaps[1] = {DescriptorHeap}; + CommandList->SetDescriptorHeaps(1, Heaps); + CommandList->SetComputeRootSignature(GlobalRootSignature); + CommandList->SetComputeRootShaderResourceView( + 0, TLASResource->GetGPUVirtualAddress()); + CommandList->SetComputeRootConstantBufferView( + 1, SceneConstantBuffer->GetGPUVirtualAddress()); + CommandList->SetComputeRootDescriptorTable( + 2, DescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + + D3D12_DISPATCH_RAYS_DESC DispatchDesc = {}; + DispatchDesc.RayGenerationShaderRecord.StartAddress = + ShaderTable.GetRaygenStartGpuVA(); + DispatchDesc.RayGenerationShaderRecord.SizeInBytes = + ShaderTable.GetRaygenRangeInBytes(); + DispatchDesc.MissShaderTable.StartAddress = ShaderTable.GetMissStartGpuVA(); + DispatchDesc.MissShaderTable.SizeInBytes = ShaderTable.GetMissRangeInBytes(); + DispatchDesc.MissShaderTable.StrideInBytes = + ShaderTable.GetShaderRecordSizeInBytes(); + DispatchDesc.HitGroupTable.StartAddress = ShaderTable.GetHitGroupStartGpuVA(); + DispatchDesc.HitGroupTable.SizeInBytes = + ShaderTable.GetHitGroupRangeInBytes(); + DispatchDesc.HitGroupTable.StrideInBytes = + ShaderTable.GetShaderRecordSizeInBytes(); + DispatchDesc.Width = Config.WindowWidth; + DispatchDesc.Height = Config.WindowHeight; + DispatchDesc.Depth = 1; + CommandList->SetPipelineState1(StateObject); + CommandList->DispatchRays(&DispatchDesc); + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, nullptr)); + + // Copy the testBuffer contents to CPU + D3D12_RESOURCE_BARRIER Barriers[1]; + Barriers[0] = CD3DX12_RESOURCE_BARRIER::Transition( + TestBuffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + CommandList->ResourceBarrier(1, Barriers); + CommandList->CopyResource(TestBufferRead, TestBuffer); + Barriers[0] = CD3DX12_RESOURCE_BARRIER::Transition( + TestBuffer, D3D12_RESOURCE_STATE_COPY_SOURCE, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + CommandList->ResourceBarrier(1, Barriers); + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + // Copy the shader test data into 'testData'. + MappedData Data(TestBufferRead, (UINT32)TestData.size() * sizeof(int)); + const int *DataPtr = (int *)Data.data(); + + for (int i = 0; i < TestData.size(); i++) + TestData[i] = *DataPtr++; + + // Cleanup resources + TestBuffer.Release(); + TestBufferRead.Release(); + SceneConstantBuffer.Release(); + DescriptorHeap.Release(); + CommandQueue.Release(); + CommandAllocator.Release(); + CommandList.Release(); + StateObject.Release(); + StateObjectProperties.Release(); + TLASResource.Release(); + BLASMeshResource.Release(); + BLASProceduralGeometryResource.Release(); + InstanceDescs.Release(); + ScratchResource.Release(); + + return TestBufferRead; +} + +// SER TESTS +#include "ExecutionTest_SER.h" +// + void ExecutionTest::RunLifetimeIntrinsicComputeTest( ID3D12Device *pDevice, LPCSTR pShader, CComPtr &pUavHeap, @@ -11096,138 +12087,2202 @@ TEST_F(ExecutionTest, PackUnpackTest) { } } -// This test expects a that retrieves a signal value from each of a -// few resources that are initialized here. determines if it uses -// the 6.6 Dynamic Resources feature. Values are read back from the result UAV -// and compared to the expected signals -void ExecutionTest::RunResourceTest(ID3D12Device *pDevice, const char *pShader, - const wchar_t *sm, bool isDynamic) { +TEST_F(ExecutionTest, LongVector_ScalarAdd_bool) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} - const int NumSRVs = 3; - const int NumUAVs = 4; - const int NumResources = NumSRVs + NumUAVs; - const int NumSamplers = 2; - const int valueSize = 16; +TEST_F(ExecutionTest, LongVector_ScalarMultiply_bool) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} - static const int DispatchGroupX = 1; - static const int DispatchGroupY = 1; - static const int DispatchGroupZ = 1; +TEST_F(ExecutionTest, LongVector_Multiply_bool) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} - CComPtr pCommandList; - CComPtr pCommandQueue; - CComPtr pCommandAllocator; - FenceObj FO; +TEST_F(ExecutionTest, LongVector_Add_bool) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} - UINT valueSizeInBytes = valueSize * sizeof(float); - CreateComputeCommandQueue(pDevice, L"DynamicResourcesTest Command Queue", - &pCommandQueue); - InitFenceObj(pDevice, &FO); +TEST_F(ExecutionTest, LongVector_Min_bool) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} - // Create root signature. - CComPtr pRootSignature; - if (!isDynamic) { - // Not dynamic, create a range for each resource and from them, the root - // signature - CD3DX12_DESCRIPTOR_RANGE ranges[NumResources]; - CD3DX12_DESCRIPTOR_RANGE srange[NumSamplers]; - for (int i = 0; i < NumSRVs; i++) - ranges[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, i, 0); +TEST_F(ExecutionTest, LongVector_Max_bool) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} - for (int i = NumSRVs; i < NumResources; i++) - ranges[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, i - NumSRVs, 0); +TEST_F(ExecutionTest, LongVector_ScalarAdd_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} - for (int i = 0; i < NumSamplers; i++) - srange[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, i, 0); +TEST_F(ExecutionTest, LongVector_ScalarMultiply_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} - CreateRootSignatureFromRanges(pDevice, &pRootSignature, ranges, - NumResources, srange, NumSamplers); - } else { - // Dynamic just requires the flags indicating that the builtin arrays should - // be accessible -#if !defined(D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) -#define D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED \ - (D3D12_ROOT_SIGNATURE_FLAGS)0x400 -#define D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED \ - (D3D12_ROOT_SIGNATURE_FLAGS)0x800 -#endif - CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; - rootSignatureDesc.Init( - 0, nullptr, 0, nullptr, - D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | - D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED); - CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSignature); - } +TEST_F(ExecutionTest, LongVector_Multiply_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} - // Create pipeline state object. - CComPtr pComputeState; - CreateComputePSO(pDevice, pRootSignature, pShader, sm, &pComputeState); +TEST_F(ExecutionTest, LongVector_Add_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} - // Create a command allocator and list for compute. - VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator( - D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); - VERIFY_SUCCEEDED(pDevice->CreateCommandList( - 0, D3D12_COMMAND_LIST_TYPE_COMPUTE, pCommandAllocator, pComputeState, - IID_PPV_ARGS(&pCommandList))); +TEST_F(ExecutionTest, LongVector_Min_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} - // Set up SRV resources - CComPtr pSRVResources[NumSRVs]; - CComPtr pUAVResources[NumUAVs]; - CComPtr pUploadResources[NumResources]; - { - D3D12_RESOURCE_DESC bufDesc = - CD3DX12_RESOURCE_DESC::Buffer(valueSizeInBytes); - float values[valueSize]; - for (int i = 0; i < NumSRVs - 1; i++) { - for (int j = 0; j < valueSize; j++) - values[j] = 10.0f + i; - CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, - bufDesc, &pSRVResources[i], &pUploadResources[i]); - } - D3D12_RESOURCE_DESC tex2dDesc = - CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 4, 4); - for (int j = 0; j < valueSize; j++) - values[j] = 10.0 + (NumSRVs - 1); - CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, - tex2dDesc, &pSRVResources[NumSRVs - 1], - &pUploadResources[NumSRVs - 1]); - } +TEST_F(ExecutionTest, LongVector_Max_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} - // Set up UAV resources - CComPtr pReadBuffer; - float values[valueSize]; - for (int i = 0; i < NumUAVs - 2; i++) { - for (int j = 0; j < valueSize; j++) - values[j] = 20.0f + i; - CreateTestUavs(pDevice, pCommandList, values, valueSizeInBytes, - &pUAVResources[i], &pUploadResources[NumSRVs + i]); - } - for (int j = 0; j < valueSize; j++) - values[j] = 20.0 + (NumUAVs - 1); - CreateTestUavs(pDevice, pCommandList, values, valueSizeInBytes, - &pUAVResources[NumUAVs - 2], - &pUploadResources[NumResources - 2], &pReadBuffer); +TEST_F(ExecutionTest, LongVector_ScalarAdd_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} - for (int j = 0; j < valueSize; j++) - values[j] = 20.0 + (NumUAVs - 2); - D3D12_RESOURCE_DESC tex1dDesc = - CD3DX12_RESOURCE_DESC::Tex1D(DXGI_FORMAT_R32_FLOAT, valueSize, 1, 0, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, - tex1dDesc, &pUAVResources[NumUAVs - 1], - &pUploadResources[NumResources - 1]); +TEST_F(ExecutionTest, LongVector_ScalarMultiply_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} - // Close the command list and execute it to perform the GPU setup. - pCommandList->Close(); - ExecuteCommandList(pCommandQueue, pCommandList); - WaitForSignal(pCommandQueue, FO); - VERIFY_SUCCEEDED(pCommandAllocator->Reset()); - VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pComputeState)); +TEST_F(ExecutionTest, LongVector_Multiply_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} - CComPtr pResHeap; - CComPtr pSampHeap; - CreateDefaultDescHeaps(pDevice, NumSRVs + NumUAVs, NumSamplers, &pResHeap, +TEST_F(ExecutionTest, LongVector_Add_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_ScalarAdd_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarAdd); +} + +TEST_F(ExecutionTest, LongVector_ScalarMultiply_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_ScalarMultiply); +} + +TEST_F(ExecutionTest, LongVector_Multiply_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Multiply); +} + +TEST_F(ExecutionTest, LongVector_Add_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Add); +} + +TEST_F(ExecutionTest, LongVector_Min_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Min); +} + +TEST_F(ExecutionTest, LongVector_Max_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Max); +} + +TEST_F(ExecutionTest, LongVector_Initialize_bool) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_float16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_float32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_float64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_int16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_int32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_int64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_uint16) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_uint32) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +TEST_F(ExecutionTest, LongVector_Clamp_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Clamp); +} + +TEST_F(ExecutionTest, LongVector_Initialize_uint64) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + LongVectorOpTestBase(LongVectorOpType_Initialize); +} + +template +void ExecutionTest::LongVectorOpTestBase(LongVectorOpType opType) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + LongVectorOpTestConfig TestConfig(opType); + + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); + LongVectorOpTestBase(TestConfig); +} + +template +void ExecutionTest::LongVectorOpTestBase( + LongVectorOpTestConfig &TestConfig) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + LogCommentFmt(L"Running LongVectorOpTestBase<%S, %zu>", typeid(T).name(), N); + + CComPtr D3DDevice; + if (!CreateDevice(&D3DDevice, D3D_SHADER_MODEL_6_9)) { +#ifdef _HLK_CONF + LogErrorFmtThrow(L"Device does not support SM 6.9. Can't run these tests."); + } +#else + WEX::Logging::Log::Comment( + "Device does not support SM 6.9. Can't run these tests."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; +#endif +} + +DeterministicNumberGenerator NumberGenerator(1337); +std::array InputVector1; +std::array InputVector2; +std::array ScalarInput; +ScalarInput[0] = NumberGenerator.generate(); +const bool IsVectorBinaryOp = TestConfig.IsBinaryOp && !TestConfig.IsScalarOp; + +// Fill the vector inputs with values. +for (size_t Index = 0; Index < N; Index++) { + // Always generate input. + InputVector1[Index] = NumberGenerator.generate(); + + if (IsVectorBinaryOp) + InputVector2[Index] = NumberGenerator.generate(); +} + +// We pass these values into the shader and they're requried to compile. So +// they need to set to something. +T ClampArgMin = 0; +T ClampArgMax = 0; +if (TestConfig.OpType == LongVectorOpType_Clamp) { + if constexpr (std::is_same_v) { + // Attempting to generate a clamp value for HLSLBool_t will result in an + // infinite loop in the below while. We don't have a test case for clamp + // with bools anyways. But adding this check to prevent the mistake. + LogErrorFmtThrow(L"Clamp is not supported for HLSLBool_t."); + } + + ClampArgMin = NumberGenerator.generate(); + ClampArgMax = NumberGenerator.generate(); + while (ClampArgMin >= ClampArgMax) { + // Generate a new value for ClampArgMin. It needs to be smaller than + // or equal to ClampArgMax. + ClampArgMax = NumberGenerator.generate(); + } +} + +std::array ExpectedVector; +for (size_t Index = 0; Index < N; Index++) { + if (TestConfig.IsBinaryOp) { + T Input1 = InputVector1[Index]; + T Input2 = TestConfig.IsScalarOp ? ScalarInput[0] : InputVector2[Index]; + if (TestConfig.OperatorString == "*") { + ExpectedVector[Index] = Input1 * Input2; + } else if (TestConfig.OperatorString == "+") { + ExpectedVector[Index] = Input1 + Input2; + } else if (TestConfig.OperatorString == ",") { + if (TestConfig.OpType == LongVectorOpType_Min) + ExpectedVector[Index] = std::min(Input1, Input2); + else if (TestConfig.OpType == LongVectorOpType_Max) + ExpectedVector[Index] = std::max(Input1, Input2); + else + LogErrorFmtThrow(L"Unrecognized Binary LongVectorOpType: %d", + TestConfig.OpType); + } else { + LogErrorFmtThrow( + L"Don't know how to compute expected value for operatorString: %s", + TestConfig.OperatorString.c_str()); + } + } else // Unary op logic + { + if (TestConfig.OpType == LongVectorOpType_Clamp) { + ExpectedVector[Index] = + std::clamp(InputVector1[Index], ClampArgMin, ClampArgMax); + } else if (TestConfig.OpType = LongVectorOpType_Initialize) { + ExpectedVector[Index] = InputVector1[Index]; + } else { + LogErrorFmtThrow(L"Unrecognized Unary LongVectorOpType: %d", + TestConfig.OpType); + } + } +} + +// Set up the compiler options string. +std::stringstream CompilerOptions(""); +std::string HLSLType = TestConfig.GetHLSLTypeString(); +CompilerOptions << "-DTYPE="; +CompilerOptions << HLSLType; +CompilerOptions << " -DNUM="; +CompilerOptions << N; +const bool Is16BitType = + (HLSLType == "int16_t" || HLSLType == "uint16_t" || HLSLType == "half"); +CompilerOptions << (Is16BitType ? " -enable-16bit-types" : ""); +CompilerOptions << " -DOPERATOR="; +CompilerOptions << TestConfig.OperatorString; +if (TestConfig.IsBinaryOp) { + CompilerOptions << " -DOPERAND2="; + CompilerOptions << (TestConfig.IsScalarOp ? "InputScalar" : "InputVector2"); + + if (TestConfig.IsScalarOp) { + CompilerOptions << " -DIS_SCALAR_OP=1"; + } else { + CompilerOptions << " -DIS_BINARY_VECTOR_OP=1"; + } + CompilerOptions << " -DFUNC="; + CompilerOptions << TestConfig.IntrinsicString; +} else { + CompilerOptions << " -DFUNC="; + CompilerOptions << TestConfig.IntrinsicString; + CompilerOptions << " -DOPERAND2="; + switch (TestConfig.OpType) { + case LongVectorOpType_Clamp: + CompilerOptions << "ClampArgMinMax"; + CompilerOptions << " -DFUNC_CLAMP=1"; + break; + case LongVectorOpType_Initialize: + CompilerOptions << " -DFUNC_INITIALIZE=1"; + break; + } +} + +// We have to construct the string outside of the lambda. Otherwise it's +// cleaned up when the lambda finishes executing but before the shader runs. +std::string CompilerOptionsString = CompilerOptions.str(); + +// ShaderOpArith.xml defines the input/output resources and the shader source. +CComPtr TestXML; +ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML); + +// RunShaderOpTest is a helper function that handles resource creation +// and setup. It also handles the shader compilation and execution. It takes a +// callback that is called when the shader is compiled, but before it is +// executed. +std::shared_ptr TestResult = RunShaderOpTest( + D3DDevice, m_support, TestXML, "LongVectorOp", + [&](LPCSTR Name, std::vector &ShaderData, st::ShaderOp *ShaderOp) { + LogCommentFmt(L"RunShaderOpTest CallBack. Resource Name: %S", Name); + + // This callback is called once for each resource defined for + // "LongVectorOp" in ShaderOpArith.xml. All callbacks are fired for each + // resource. We determine whether they are applicable to the test case + // when they run. + + // Process the callback for the OutputVector resource. + if (0 == _stricmp(Name, "OutputVector")) { + // We only need to set the compiler options string once. So this is a + // convenient place to do it. + ShaderOp->Shaders.at(0).Arguments = CompilerOptionsString.c_str(); + + return; + } + + // Process the callback for the InputFuncArgs resource. + if (0 == _stricmp(Name, "InputFuncArgs")) { + if (TestConfig.IsScalarOp) { + FillShaderBufferFromLongVectorData(ShaderData, ScalarInput); + } else if (TestConfig.OpType == LongVectorOpType_Clamp) { + std::array ClampArgs = {ClampArgMin, ClampArgMax}; + FillShaderBufferFromLongVectorData(ShaderData, ClampArgs); + } + + return; + } + + // Process the callback for the InputVector1 resource. + if (0 == _stricmp(Name, "InputVector1")) { + FillShaderBufferFromLongVectorData(ShaderData, InputVector1); + return; + } + + // Process the callback for the InputVector2 resource. + if (0 == _stricmp(Name, "InputVector2")) { + if (IsVectorBinaryOp) { + FillShaderBufferFromLongVectorData(ShaderData, InputVector2); + } + return; + } + + LogErrorFmtThrow( + L"RunShaderOpTest CallBack. Unexpected Resource Name: %S", Name); + }); + +// Map the data from GPU to CPU memory so we can verify our expectations. +MappedData ShaderOutData; +TestResult->Test->GetReadBackData("OutputVector", &ShaderOutData); + +std::array OutputVector; +FillLongVectorDataFromShaderBuffer(ShaderOutData, OutputVector); + +VERIFY_SUCCEEDED(DoArraysMatch(OutputVector, ExpectedVector, + TestConfig.Tolerance)); +} + +// Runs a set of tests for the Cooperative Vector Mul and MulAdd operations. +// The device will be queried for supported configurations and then each +// supported configuration will be tested against multiple matrix and vector +// sizes. To help reproduce individual test failures, the test will log the +// configuration it is running and the results of each test. The following +// filters can be used to limit test execution to a specific set of +// configurations: +// +// - CoopVecMatrixInterp: SINT8, FLOAT16, FLOAT_E4M3, ... +// - CoopVecMatrixLayout: ROW_MAJOR, COLUMN_MAJOR, MUL_OPTIMAL, +// OUTER_PRODUCT_OPTIMAL +// - CoopVecBiasInterp: SINT32, FLOAT16, FLOAT_E4M3, ... +// - CoopVecInputInterp: SINT8, FLOAT16, FLOAT_E4M3, ... +// - CoopVecInputType: SINT8, UINT8, SINT16, UINT16, SINT32, UINT32, FLOAT16, +// FLOAT32, ... +// - CoopVecOutputType: SINT32, UINT32, FLOAT16, FLOAT32, ... +// +// Filter example: +// TE.exe ... -p:CoopVecMatrixInterp=FLOAT16 +// -p:CoopVecMatrixLayout=MUL_OPTIMAL +// +// The current implementation will always write the final output data as float. +void ExecutionTest::runCoopVecMulTest() { +#if !HAVE_COOPVEC_API + WEX::Logging::Log::Comment( + "Cooperative vector API not supported in build configuration. Skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; +#else + // Create device and verify coopvec support + CComPtr D3DDevice; + if (!CreateDevice(&D3DDevice, D3D_SHADER_MODEL_6_9)) { + return; + } + if (!DoesDeviceSupportCooperativeVector(D3DDevice)) { + WEX::Logging::Log::Comment( + "Device does not support cooperative vector. Skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; + } + + // Query coopvec feature data. First call gets the size of the arrays. The + // second call populates the arrays using memory we allocate. + D3D12_FEATURE_DATA_COOPERATIVE_VECTOR DevOptions = {}; + VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_COOPERATIVE_VECTOR, &DevOptions, + sizeof(DevOptions))); + + // Allocate memory for the arrays in DevOptions + std::vector MulAddProps( + DevOptions.MatrixVectorMulAddPropCount); + DevOptions.pMatrixVectorMulAddProperties = MulAddProps.data(); + + VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_COOPERATIVE_VECTOR, &DevOptions, + sizeof(DevOptions))); + + // Test each supported data type and matrix layout + for (auto MulAddConfig : MulAddProps) { + // Filter on preview test support + bool PreviewConfig = false; + if (MulAddConfig.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.OutputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16) { + PreviewConfig = true; + } + + if (MulAddConfig.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 && + MulAddConfig.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 && + MulAddConfig.OutputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16) { + PreviewConfig = true; + } + + if (MulAddConfig.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2 && + MulAddConfig.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 && + MulAddConfig.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2 && + MulAddConfig.OutputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16) { + PreviewConfig = true; + } + + if (MulAddConfig.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32 && + MulAddConfig.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED && + MulAddConfig.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32 && + MulAddConfig.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 && + MulAddConfig.OutputType == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32) { + PreviewConfig = true; + } + + if (MulAddConfig.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32 && + MulAddConfig.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 && + MulAddConfig.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32 && + MulAddConfig.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 && + MulAddConfig.OutputType == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32) { + PreviewConfig = true; + } + + if (!PreviewConfig) { + continue; + } + + // Apply filters + bool IsInFilter = + CoopVecHelpers::IsDataTypeInFilter(L"CoopVecMatrixInterp", + MulAddConfig.MatrixInterpretation) && + CoopVecHelpers::IsDataTypeInFilter(L"CoopVecBiasInterp", + MulAddConfig.BiasInterpretation) && + CoopVecHelpers::IsDataTypeInFilter(L"CoopVecInputInterp", + MulAddConfig.InputInterpretation) && + CoopVecHelpers::IsDataTypeInFilter(L"CoopVecInputType", + MulAddConfig.InputType) && + CoopVecHelpers::IsDataTypeInFilter(L"CoopVecOutputType", + MulAddConfig.OutputType); + if (!IsInFilter) { + continue; + } + + // Run the test + runCoopVecMulTestConfig(D3DDevice, MulAddConfig); + } +#endif // HAVE_COOPVEC_API +} + +#if HAVE_COOPVEC_API +void ExecutionTest::runCoopVecMulTestConfig( + ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL &MulProps) { + + LogCommentFmt( + L"Running test for MatrixInterpretation: %s, BiasInterpretation: %s, " + L"InputInterpretation: %s, InputType: %s, OutputType: %s", + CoopVecHelpers::DataTypeToFilterString(MulProps.MatrixInterpretation) + .c_str(), + CoopVecHelpers::DataTypeToFilterString(MulProps.BiasInterpretation) + .c_str(), + CoopVecHelpers::DataTypeToFilterString(MulProps.InputInterpretation) + .c_str(), + CoopVecHelpers::DataTypeToFilterString(MulProps.InputType).c_str(), + CoopVecHelpers::DataTypeToFilterString(MulProps.OutputType).c_str()); + + constexpr CoopVecMulSubtestConfig TestConfigs[] = { + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, false}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, true}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, false}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, true}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, false}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, true}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, false}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR, true}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, false}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, true}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, false}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, true}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, false}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, true}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, false}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR, true}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, false}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, true}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, false}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, true}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, false}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, true}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, false}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_MUL_OPTIMAL, true}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + false}, + {16, 16, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + true}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + false}, + {16, 16, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + true}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + false}, + {32, 8, 16, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + true}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + false}, + {32, 8, 32, 1, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, + true}, + }; + + for (auto Config : TestConfigs) { + if ((MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) && + (Config.MatrixLayout == D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR || + Config.MatrixLayout == + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR)) { + continue; + } + + bool IsInFilter = CoopVecHelpers::IsMatrixLayoutInFilter( + L"CoopVecMatrixLayout", Config.MatrixLayout); + if (!IsInFilter) { + continue; + } + + runCoopVecMulSubtest(D3DDevice, MulProps, Config); + } +} + +void ExecutionTest::runCoopVecMulSubtest( + ID3D12Device *D3DDevice, D3D12_COOPERATIVE_VECTOR_PROPERTIES_MUL &MulProps, + CoopVecMulSubtestConfig &Config) { + + LogCommentFmt( + L"Running test for InputPerThread: %d, OutputPerThread: %d, NumThreads: " + L"%d, NumLevels: %d, Bias: %s, MatrixLayout: %s", + Config.InputPerThread, Config.OutputPerThread, Config.NumThreads, + Config.NumLevels, Config.Bias ? L"true" : L"false", + CoopVecHelpers::MatrixLayoutToFilterString(Config.MatrixLayout).c_str()); + + const int OutputBufferSize = (Config.OutputPerThread * Config.NumThreads * 4); + + // Create root signature with a single root entry for all SRVs and UAVs + CComPtr RootSignature; + { + CD3DX12_DESCRIPTOR_RANGE Ranges[2]; + Ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3, 0, + 0); // InputVector, InputMatrix, InputBias + Ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0); // OutputBuffer + CreateRootSignatureFromRanges(D3DDevice, &RootSignature, Ranges, 2, nullptr, + 0); + } + + // Create descriptor heap with space for 4 descriptors: 3 SRVs and 1 UAV + CComPtr DescriptorHeap; + { + D3D12_DESCRIPTOR_HEAP_DESC Desc = {}; + Desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + Desc.NumDescriptors = 4; + Desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + VERIFY_SUCCEEDED( + D3DDevice->CreateDescriptorHeap(&Desc, IID_PPV_ARGS(&DescriptorHeap))); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE BaseHandle( + DescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + + // Create the compute pipeline state for the CoopVec shader + CComPtr ComputePipelineState; + { + std::string ShaderSource = R"( +#include "dx/linalg.h" + +ByteAddressBuffer InputVector : register(t0); +ByteAddressBuffer InputBias : register(t1); +ByteAddressBuffer InputMatrix : register(t2); +RWByteAddressBuffer OutputBuffer: register(u0); + +[shader("compute")] +[numthreads(NUM_THREADS, 1, 1)] +void main(uint threadIdx : SV_GroupThreadID) +{ + using namespace dx::linalg; + + // Ensure 4-byte alignment for vector loads + uint inputOffset = (INPUT_PER_THREAD * threadIdx * (sizeof(INPUT_DATA_TYPE) / INPUT_DIVISOR)); + inputOffset = (inputOffset + 3) & ~3; // Align to 4 bytes + vector input = InputVector.Load >(inputOffset); + + MatrixRef mat = { InputMatrix, 0, STRIDE }; + + vector accum; + + if (USE_BIAS) { + VectorRef biasVec = { InputBias, 0 }; + accum = MulAdd(mat, MakeInterpretedVector(input), biasVec); + } else { + accum = Mul(mat, MakeInterpretedVector(input)); + } + + vector result = (vector)accum; + + // Ensure 4-byte alignment for vector store + uint outputOffset = OUTPUT_PER_THREAD * threadIdx * sizeof(float); + outputOffset = (outputOffset + 3) & ~3; // Align to 4 bytes + OutputBuffer.Store >(outputOffset, result); +} + )"; + + auto CreateDefineFromInt = [](const wchar_t *Name, int Value) { + std::wstringstream Stream; + Stream << L"-D" << Name << L"=" << Value; + return Stream.str(); + }; + + auto CreateDefineFromString = [](const wchar_t *Name, + const std::wstring &Value) { + std::wstringstream Stream; + Stream << L"-D" << Name << L"=" << Value; + return Stream.str(); + }; + + int Stride = 0; + const std::wstring HlslMatrixLayout = + CoopVecHelpers::MatrixLayoutToHlslLayoutString(Config.MatrixLayout); + int StrideMultiplier = CoopVecHelpers::GetStrideMultiplierForMatrixDataType( + MulProps.MatrixInterpretation); + switch (Config.MatrixLayout) { + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR: + Stride = Config.InputPerThread * StrideMultiplier; + break; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR: + Stride = Config.OutputPerThread * StrideMultiplier; + break; + } + + const int InputDivisor = + CoopVecHelpers::GetNumPackedElementsForInputDataType( + MulProps.InputInterpretation); + const std::wstring InputDataType = + CoopVecHelpers::GetHlslDataTypeForDataType(MulProps.InputType); + const std::wstring AccumDataType = + CoopVecHelpers::GetHlslDataTypeForDataType(MulProps.BiasInterpretation); + const std::wstring MatrixDataTypeEnum = + CoopVecHelpers::GetHlslInterpretationForDataType( + MulProps.MatrixInterpretation); + const std::wstring InputInterpretationEnum = + CoopVecHelpers::GetHlslInterpretationForDataType( + MulProps.InputInterpretation); + const std::wstring AccumInterpretationEnum = + CoopVecHelpers::GetHlslInterpretationForDataType( + MulProps.BiasInterpretation); + + auto InputPerThreadDefine = + CreateDefineFromInt(L"INPUT_PER_THREAD", Config.InputPerThread); + auto OutputPerThreadDefine = + CreateDefineFromInt(L"OUTPUT_PER_THREAD", Config.OutputPerThread); + auto NumThreadsDefine = + CreateDefineFromInt(L"NUM_THREADS", Config.NumThreads); + auto StrideDefine = CreateDefineFromInt(L"STRIDE", Stride); + auto InputDataTypeDefine = + CreateDefineFromString(L"INPUT_DATA_TYPE", InputDataType); + auto InputDivisorDefine = + CreateDefineFromInt(L"INPUT_DIVISOR", InputDivisor); + auto AccumDataTypeDefine = + CreateDefineFromString(L"ACCUM_DATA_TYPE", AccumDataType); + auto InputInterpretationEnumDefine = CreateDefineFromString( + L"INPUT_INTERPRETATION_ENUM", InputInterpretationEnum); + auto HlslMatrixLayoutDefine = + CreateDefineFromString(L"HLSL_MATRIX_LAYOUT", HlslMatrixLayout); + auto MatrixDataTypeEnumDefine = + CreateDefineFromString(L"MATRIX_DATA_TYPE_ENUM", MatrixDataTypeEnum); + auto UseBiasDefine = CreateDefineFromInt(L"USE_BIAS", Config.Bias ? 1 : 0); + auto AccumInterpretationEnumDefine = CreateDefineFromString( + L"ACCUM_INTERPRETATION_ENUM", AccumInterpretationEnum); + + LPCWSTR Options[] = { + L"-enable-16bit-types", + InputPerThreadDefine.c_str(), + OutputPerThreadDefine.c_str(), + NumThreadsDefine.c_str(), + StrideDefine.c_str(), + InputDataTypeDefine.c_str(), + InputDivisorDefine.c_str(), + AccumDataTypeDefine.c_str(), + InputInterpretationEnumDefine.c_str(), + HlslMatrixLayoutDefine.c_str(), + MatrixDataTypeEnumDefine.c_str(), + UseBiasDefine.c_str(), + AccumInterpretationEnumDefine.c_str(), + }; + + CComPtr IncludeHandler = + new LinAlgHeaderIncludeHandler(m_support); + + CreateComputePSO(D3DDevice, RootSignature, ShaderSource.c_str(), L"cs_6_9", + &ComputePipelineState, Options, _countof(Options), + IncludeHandler); + } + + // Create a command list for the compute shader. + CComPtr CommandList; + CComPtr CommandAllocator; + CComPtr CommandQueue; + FenceObj FO; + CreateCommandQueue(D3DDevice, L"CoopVec Test Command Queue", &CommandQueue, + D3D12_COMMAND_LIST_TYPE_DIRECT); + InitFenceObj(D3DDevice, &FO); + VERIFY_SUCCEEDED(D3DDevice->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&CommandAllocator))); + VERIFY_SUCCEEDED(D3DDevice->CreateCommandList( + 0, D3D12_COMMAND_LIST_TYPE_DIRECT, CommandAllocator, ComputePipelineState, + IID_PPV_ARGS(&CommandList))); + + // Setup input data + auto ExpectedOutputBuffer = + std::make_unique(Config.OutputPerThread * Config.NumThreads); + + // Setup input matrix as all-ones in sint8 format. This will later be + // converted to the appropriate data type by the matrix conversion API. + CComPtr InputMatrixSRVResource, InputMatrixSRVUploadResource; + std::vector InputMatrix; + if (MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED || + MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED || + MulProps.MatrixInterpretation == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 || + MulProps.MatrixInterpretation == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8) { + InputMatrix = CoopVecHelpers::CreateAllOnesInputMatrix( + Config.InputPerThread, Config.OutputPerThread); + } else if (MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 || + MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) { + // Matrix source data is fp32, which gets converted to fp16 during matrix + // conversion + InputMatrix = CoopVecHelpers::CreateAllOnesInputMatrix( + Config.InputPerThread, Config.OutputPerThread); + } else { + WEX::Logging::Log::Error(L"Unsupported matrix data type"); + return; + } + + CreateTestResources(D3DDevice, CommandList, InputMatrix.data(), + InputMatrix.size(), + CD3DX12_RESOURCE_DESC::Buffer(InputMatrix.size()), + &InputMatrixSRVResource, &InputMatrixSRVUploadResource); + + // Create input vector of an appropriate type. All integer types start as + // SINT8 for now. + CComPtr InputVecSRVResource, InputVecSRVUploadResource; + std::vector InputVector; + + if ((MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32 && + (MulProps.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED || + MulProps.InputInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED)) || + MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 || + MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8) { + InputVector = CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.InputPerThread); + } else if (MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 || + MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) { + InputVector = + CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.InputPerThread); + } else if (MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32) { + InputVector = CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.InputPerThread); + } else if (MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32) { + InputVector = CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.InputPerThread); + } else if (MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32) { + InputVector = CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.InputPerThread); + } else { + WEX::Logging::Log::Error(L"Unsupported input data type"); + return; + } + if (InputVector.size() % 4 != 0) { + // Align size to 4 bytes for ByteAddressBuffer + InputVector.resize(InputVector.size() + 4 - (InputVector.size() % 4)); + } + CreateTestResources(D3DDevice, CommandList, InputVector.data(), + InputVector.size(), + CD3DX12_RESOURCE_DESC::Buffer(InputVector.size()), + &InputVecSRVResource, &InputVecSRVUploadResource); + + // This increments baseHandle + CreateRawSRV(D3DDevice, BaseHandle, + (UINT)(InputVector.size() / sizeof(int32_t)), + InputVecSRVResource); + + // Create input bias + CComPtr InputBiasSRVResource, InputBiasSRVUploadResource; + std::vector InputBias; + + if (MulProps.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED || + MulProps.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8_T4_PACKED || + MulProps.BiasInterpretation == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 || + MulProps.BiasInterpretation == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8) { + InputBias = CoopVecHelpers::CreateInputBias(Config.OutputPerThread); + } else if (MulProps.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_SINT32) { + InputBias = + CoopVecHelpers::CreateInputBias(Config.OutputPerThread); + } else if (MulProps.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_UINT32) { + InputBias = + CoopVecHelpers::CreateInputBias(Config.OutputPerThread); + } else if (MulProps.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16) { + InputBias = CoopVecHelpers::CreateInputBias( + Config.OutputPerThread); + } else if (MulProps.BiasInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32) { + InputBias = CoopVecHelpers::CreateInputBias(Config.OutputPerThread); + } else { + WEX::Logging::Log::Error(L"Unsupported bias data type"); + return; + } + + if (InputBias.size() % 4 != 0) { + // Align size to 4 bytes for ByteAddressBuffer + InputBias.resize(InputBias.size() + 4 - (InputBias.size() % 4)); + } + CreateTestResources(D3DDevice, CommandList, InputBias.data(), + InputBias.size(), + CD3DX12_RESOURCE_DESC::Buffer(InputBias.size()), + &InputBiasSRVResource, &InputBiasSRVUploadResource); + + // This increments baseHandle + CreateRawSRV(D3DDevice, BaseHandle, + (UINT)(InputBias.size() / sizeof(int32_t)), + InputBiasSRVResource); + + // Calculate reference output + // FIXME: This does not capture all cases, but is sufficient for the preview + // feature set + if (MulProps.MatrixInterpretation == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8) { + // The input bias is really an array of int32_t + std::vector InputBiasI32(InputBias.size() / sizeof(int32_t)); + std::memcpy(InputBiasI32.data(), InputBias.data(), InputBias.size()); + + // The input vector is really an array of float if our vector input type is + // FLOAT32 + std::vector InputVectorF32(InputVector.size() / sizeof(int32_t)); + if (MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32) { + std::memcpy(InputVectorF32.data(), InputVector.data(), + InputVector.size()); + } + + for (int ThreadIdx = 0; ThreadIdx < Config.NumThreads; ++ThreadIdx) { + for (int OutputIdx = 0; OutputIdx < Config.OutputPerThread; ++OutputIdx) { + int Acc = 0; + + for (int InputIdx = 0; InputIdx < Config.InputPerThread; ++InputIdx) { + int InputElem; + if (MulProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32) { + InputElem = (int) + InputVectorF32[ThreadIdx * Config.InputPerThread + InputIdx]; + } else { + InputElem = + InputVector[ThreadIdx * Config.InputPerThread + InputIdx]; + } + int const MatrixElem = + InputMatrix[OutputIdx * Config.InputPerThread + InputIdx]; + Acc += InputElem * MatrixElem; + } + + if (Config.Bias) { + Acc += InputBiasI32[OutputIdx]; + } + + float Result = float(Acc); + ExpectedOutputBuffer[ThreadIdx * Config.OutputPerThread + OutputIdx] = + Result; + } + } + } else if (MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 || + MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + MulProps.MatrixInterpretation == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) { + // The input bias/vector is really an array of float16 + std::vector InputVectorFP16( + InputVector.size() / sizeof(DirectX::PackedVector::HALF)); + std::memcpy(InputVectorFP16.data(), InputVector.data(), InputVector.size()); + + std::vector InputBiasFP16( + InputBias.size() / sizeof(DirectX::PackedVector::HALF)); + std::memcpy(InputBiasFP16.data(), InputBias.data(), InputBias.size()); + + // The CPU reference matrix is float + std::vector InputMatrixFP32(InputMatrix.size() / sizeof(float)); + std::memcpy(InputMatrixFP32.data(), InputMatrix.data(), InputMatrix.size()); + + for (int ThreadIdx = 0; ThreadIdx < Config.NumThreads; ++ThreadIdx) { + for (int OutputIdx = 0; OutputIdx < Config.OutputPerThread; ++OutputIdx) { + float Acc = 0; + + for (int InputIdx = 0; InputIdx < Config.InputPerThread; ++InputIdx) { + float const InputElem = ConvertFloat16ToFloat32( + InputVectorFP16[ThreadIdx * Config.InputPerThread + InputIdx]); + float const MatrixElem = + InputMatrixFP32[OutputIdx * Config.InputPerThread + InputIdx]; + Acc += InputElem * MatrixElem; + } + + if (Config.Bias) { + Acc += ConvertFloat16ToFloat32(InputBiasFP16[OutputIdx]); + } + + float Result = Acc; + ExpectedOutputBuffer[ThreadIdx * Config.OutputPerThread + OutputIdx] = + Result; + } + } + } + + CComPtr ConvertedMatrixResource; + { + // Create source matrix info + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_INFO ConvertInfo = {}; + ConvertInfo.SrcInfo.SrcDataType = + CoopVecHelpers::GetMatrixSrcDataType(MulProps.MatrixInterpretation); + ConvertInfo.SrcInfo.SrcLayout = + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR; + + // Create destination matrix info + ConvertInfo.DestInfo.DestSize = 0; // Will be populated by driver + int SrcEltSize = 0; + int DestEltSize = 0; + switch (MulProps.MatrixInterpretation) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8: + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + ConvertInfo.DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8; + SrcEltSize = 1; + DestEltSize = 1; + break; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16: + ConvertInfo.DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16; + SrcEltSize = 4; // FP32 + DestEltSize = 2; // FP16 + break; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3: + ConvertInfo.DestInfo.DestDataType = + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3; + SrcEltSize = 4; // FP32 + DestEltSize = 1; // FP8 + break; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2: + ConvertInfo.DestInfo.DestDataType = + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2; + SrcEltSize = 4; // FP32 + DestEltSize = 1; // FP8 + break; + } + ConvertInfo.SrcInfo.SrcStride = Config.InputPerThread * SrcEltSize; + ConvertInfo.SrcInfo.SrcSize = + Config.InputPerThread * Config.OutputPerThread * SrcEltSize; + + ConvertInfo.DestInfo.DestLayout = Config.MatrixLayout; + ConvertInfo.DestInfo.DestStride = 0; + ConvertInfo.DestInfo.NumRows = Config.OutputPerThread; + ConvertInfo.DestInfo.NumColumns = Config.InputPerThread; + + if (Config.MatrixLayout == D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR) { + ConvertInfo.DestInfo.DestStride = Config.InputPerThread * DestEltSize; + } else if (Config.MatrixLayout == + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR) { + ConvertInfo.DestInfo.DestStride = Config.OutputPerThread * DestEltSize; + } + + // Get destination size using preview interface + { + CComPtr PreviewDevice; + VERIFY_SUCCEEDED(D3DDevice->QueryInterface(__uuidof(ID3D12DevicePreview), + (void **)&PreviewDevice)); + + // Query required destination size + PreviewDevice->GetLinearAlgebraMatrixConversionDestinationInfo( + &ConvertInfo.DestInfo); + } + + // Create resource to hold matrix copy + CreateTestResources( + D3DDevice, CommandList, nullptr, 0, + CD3DX12_RESOURCE_DESC::Buffer(ConvertInfo.DestInfo.DestSize), + &ConvertedMatrixResource, nullptr); + + // Set up data descriptors + ConvertInfo.DataDesc.DestVA = + ConvertedMatrixResource->GetGPUVirtualAddress(); + ConvertInfo.DataDesc.SrcVA = InputMatrixSRVResource->GetGPUVirtualAddress(); + + // Get command list interface and perform conversion + CComPtr CommandList11; + VERIFY_SUCCEEDED(CommandList->QueryInterface( + __uuidof(ID3D12GraphicsCommandList11), (void **)&CommandList11)); + CommandList11->ConvertLinearAlgebraMatrix(&ConvertInfo, 1); + + // This increments baseHandle + if ((ConvertInfo.DestInfo.DestSize % 4) != 0) { + WEX::Logging::Log::Error(L"DestSize is not aligned to 4 bytes"); + return; + } + CreateRawSRV(D3DDevice, BaseHandle, + ConvertInfo.DestInfo.DestSize / sizeof(int32_t), + ConvertedMatrixResource); + } + + CComPtr UavResource; + CComPtr UavUploadResource; + CComPtr UavReadResource; + + // Create buffer for output and fill with 0xFF to make it obvious if it's not + // written in the shader. + std::vector OutputBufferInit(OutputBufferSize); + std::fill(OutputBufferInit.begin(), OutputBufferInit.end(), (uint8_t)0xFF); + + CreateTestUavs(D3DDevice, CommandList, OutputBufferInit.data(), + OutputBufferSize, &UavResource, &UavUploadResource, + &UavReadResource); + CreateRawUAV(D3DDevice, BaseHandle, OutputBufferSize / 4, UavResource); + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, ComputePipelineState)); + + SetDescriptorHeap(CommandList, DescriptorHeap); + + CD3DX12_GPU_DESCRIPTOR_HANDLE ResHandle( + DescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + + CommandList->SetComputeRootSignature(RootSignature); + CommandList->SetComputeRootDescriptorTable(0, ResHandle); + CommandList->SetPipelineState(ComputePipelineState); + CommandList->Dispatch(1, 1, 1); + RecordTransitionBarrier(CommandList, UavResource, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + CommandList->CopyResource(UavReadResource, UavResource); + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + { + MappedData MappedData(UavReadResource, OutputBufferSize); + + float *ResultBuffer = (float *)MappedData.data(); + bool Equal = true; + for (int i = 0; i < OutputBufferSize / sizeof(float); i++) { + if (isnan(ResultBuffer[i]) || isnan(ExpectedOutputBuffer[i]) || + fabs(ResultBuffer[i] - ExpectedOutputBuffer[i]) > 0.00001) { + LogErrorFmt(L"Result mismatch at index %d", i); + LogErrorFmt(L"ResultBuffer[%d]: %f, ExpectedOutputBuffer[%d]: %f", i, + ResultBuffer[i], i, ExpectedOutputBuffer[i]); + Equal = false; + break; + } + } + VERIFY_IS_TRUE(Equal); + } +} +#endif // HAVE_COOPVEC_API + +TEST_F(ExecutionTest, CoopVec_Mul) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + runCoopVecMulTest(); +} + +void ExecutionTest::runCoopVecOuterProductTest() { +#if !HAVE_COOPVEC_API + WEX::Logging::Log::Comment( + "Cooperative vector API not supported in build configuration. Skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; +#else + // Create device and verify coopvec support + CComPtr D3DDevice; + if (!CreateDevice(&D3DDevice, D3D_SHADER_MODEL_6_9)) { + return; + } + if (!DoesDeviceSupportCooperativeVector(D3DDevice)) { + WEX::Logging::Log::Comment( + "Device does not support cooperative vector. Skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; + } + + // Query coopvec feature data. First call gets the size of the arrays. The + // second call populates the arrays using memory we allocate. + D3D12_FEATURE_DATA_COOPERATIVE_VECTOR DevOptions = {}; + VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_COOPERATIVE_VECTOR, &DevOptions, + sizeof(DevOptions))); + + // Allocate memory for the arrays in DevOptions + std::vector AccumulateProps( + DevOptions.OuterProductAccumulatePropCount); + DevOptions.pOuterProductAccumulateProperties = AccumulateProps.data(); + + VERIFY_SUCCEEDED(D3DDevice->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_COOPERATIVE_VECTOR, &DevOptions, + sizeof(DevOptions))); + + // Test each supported data type and matrix layout + for (auto AccumulateConfig : AccumulateProps) { + // Run the test + runCoopVecOuterProductTestConfig(D3DDevice, AccumulateConfig); + } +#endif // HAVE_COOPVEC_API +} + +#if HAVE_COOPVEC_API +void ExecutionTest::runCoopVecOuterProductTestConfig( + ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE &AccumulateProps) { + LogCommentFmt( + L"Running test for InputType: %s, AccumulationType: %s", + CoopVecHelpers::DataTypeToFilterString(AccumulateProps.InputType).c_str(), + CoopVecHelpers::DataTypeToFilterString(AccumulateProps.AccumulationType) + .c_str()); + + constexpr CoopVecOuterProductSubtestConfig TestConfigs[] = { + {4, 4, 2, D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL}, + }; + + for (auto Config : TestConfigs) { + if ((AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) && + (Config.MatrixLayout == D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR || + Config.MatrixLayout == + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR)) { + continue; + } + + runCoopVecOuterProductSubtest(D3DDevice, AccumulateProps, Config); + } +} + +void ExecutionTest::runCoopVecOuterProductSubtest( + ID3D12Device *D3DDevice, + D3D12_COOPERATIVE_VECTOR_PROPERTIES_ACCUMULATE &AccumulateProps, + CoopVecOuterProductSubtestConfig &Config) { + + LogCommentFmt( + L"Running test for DimM: %d, DimN: %d, NumThreads: %d, MatrixLayout: %s", + Config.DimM, Config.DimN, Config.NumThreads, + CoopVecHelpers::MatrixLayoutToFilterString(Config.MatrixLayout).c_str()); + + // Create root signature with a single root entry for all SRVs and UAVs + CComPtr RootSignature; + { + CD3DX12_DESCRIPTOR_RANGE ranges[2]; + ranges[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 0, + 0); // InputVector1, InputVector2 + ranges[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0); // AccumMatrix + CreateRootSignatureFromRanges(D3DDevice, &RootSignature, ranges, 2, nullptr, + 0); + } + + // Create descriptor heap with space for 3 descriptors: 2 SRVs and 1 UAV + CComPtr DescriptorHeap; + { + D3D12_DESCRIPTOR_HEAP_DESC Desc = {}; + Desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + Desc.NumDescriptors = 3; + Desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + VERIFY_SUCCEEDED( + D3DDevice->CreateDescriptorHeap(&Desc, IID_PPV_ARGS(&DescriptorHeap))); + } + CD3DX12_CPU_DESCRIPTOR_HANDLE BaseHandle( + DescriptorHeap->GetCPUDescriptorHandleForHeapStart()); + + // Create a compute pipeline state object. + CComPtr ComputePipelineState; + { + std::string ShaderSource = R"( +#include "dx/linalg.h" + +ByteAddressBuffer InputVector1 : register(t0); +ByteAddressBuffer InputVector2 : register(t1); +RWByteAddressBuffer AccumMatrix : register(u0); + +[shader("compute")] +[numthreads(NUM_THREADS, 1, 1)] +void main(uint threadIdx : SV_GroupThreadID) +{ +#if 1 + using namespace dx::linalg; + + // Ensure 4-byte alignment for vector loads + uint inputOffset1 = (DIM_M * threadIdx * sizeof(INPUT_DATA_TYPE)); + inputOffset1 = (inputOffset1 + 3) & ~3; // Align to 4 bytes + vector input1 = InputVector1.Load >(inputOffset1); + + uint inputOffset2 = (DIM_N * threadIdx * sizeof(INPUT_DATA_TYPE)); + inputOffset2 = (inputOffset2 + 3) & ~3; // Align to 4 bytes + vector input2 = InputVector2.Load >(inputOffset2); + + RWMatrixRef mat = { AccumMatrix, 0, STRIDE }; + + OuterProductAccumulate(input1, input2, mat); +#endif +} + )"; + + auto CreateDefineFromInt = [](const wchar_t *Name, int Value) { + std::wstringstream Stream; + Stream << L"-D" << Name << L"=" << Value; + return Stream.str(); + }; + + auto CreateDefineFromString = [](const wchar_t *Name, + const wchar_t *Value) { + std::wstringstream Stream; + Stream << L"-D" << Name << L"=" << Value; + return Stream.str(); + }; + + int Stride = 0; + const std::wstring HlslMatrixLayout = + CoopVecHelpers::MatrixLayoutToHlslLayoutString(Config.MatrixLayout); + int StrideMultiplier = CoopVecHelpers::GetStrideMultiplierForMatrixDataType( + AccumulateProps.AccumulationType); + switch (Config.MatrixLayout) { + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR: + Stride = Config.DimN * StrideMultiplier; + break; + case D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR: + Stride = Config.DimM * StrideMultiplier; + break; + } + + const int InputDivisor = + CoopVecHelpers::GetNumPackedElementsForInputDataType( + AccumulateProps.InputType); + const std::wstring InputDataType = + CoopVecHelpers::GetHlslDataTypeForDataType(AccumulateProps.InputType); + const std::wstring AccumDataType = + CoopVecHelpers::GetHlslDataTypeForDataType( + AccumulateProps.AccumulationType); + const std::wstring MatrixDataTypeEnum = + CoopVecHelpers::GetHlslInterpretationForDataType( + AccumulateProps.AccumulationType); + const std::wstring InputInterpretationEnum = + CoopVecHelpers::GetHlslInterpretationForDataType( + AccumulateProps.InputType); + + auto DimMDefine = CreateDefineFromInt(L"DIM_M", Config.DimM); + auto DimNDefine = CreateDefineFromInt(L"DIM_N", Config.DimN); + auto NumThreadsDefine = + CreateDefineFromInt(L"NUM_THREADS", Config.NumThreads); + auto StrideDefine = CreateDefineFromInt(L"STRIDE", Stride); + auto InputDataTypeDefine = + CreateDefineFromString(L"INPUT_DATA_TYPE", InputDataType.c_str()); + auto InputDivisorDefine = + CreateDefineFromInt(L"INPUT_DIVISOR", InputDivisor); + auto AccumDataTypeDefine = + CreateDefineFromString(L"ACCUM_DATA_TYPE", AccumDataType.c_str()); + auto InputInterpretationEnumDefine = CreateDefineFromString( + L"INPUT_INTERPRETATION_ENUM", InputInterpretationEnum.c_str()); + auto HlslMatrixLayoutDefine = + CreateDefineFromString(L"HLSL_MATRIX_LAYOUT", HlslMatrixLayout.c_str()); + auto MatrixDataTypeEnumDefine = CreateDefineFromString( + L"MATRIX_DATA_TYPE_ENUM", MatrixDataTypeEnum.c_str()); + + LPCWSTR Options[] = { + L"-enable-16bit-types", + DimMDefine.c_str(), + DimNDefine.c_str(), + NumThreadsDefine.c_str(), + StrideDefine.c_str(), + InputDataTypeDefine.c_str(), + InputDivisorDefine.c_str(), + AccumDataTypeDefine.c_str(), + InputInterpretationEnumDefine.c_str(), + HlslMatrixLayoutDefine.c_str(), + MatrixDataTypeEnumDefine.c_str(), + }; + + CComPtr IncludeHandler = + new LinAlgHeaderIncludeHandler(m_support); + + CreateComputePSO(D3DDevice, RootSignature, ShaderSource.c_str(), L"cs_6_9", + &ComputePipelineState, Options, _countof(Options), + IncludeHandler); + } + + // Create a command list for the compute shader. + CComPtr CommandList; + CComPtr CommandAllocator; + CComPtr CommandQueue; + FenceObj FO; + CreateCommandQueue(D3DDevice, L"CoopVec Test Command Queue", &CommandQueue, + D3D12_COMMAND_LIST_TYPE_DIRECT); + InitFenceObj(D3DDevice, &FO); + VERIFY_SUCCEEDED(D3DDevice->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&CommandAllocator))); + VERIFY_SUCCEEDED(D3DDevice->CreateCommandList( + 0, D3D12_COMMAND_LIST_TYPE_DIRECT, CommandAllocator, ComputePipelineState, + IID_PPV_ARGS(&CommandList))); + + // Setup input matrix as all-ones in sint8/fp32 format. This will later be + // converted to the appropriate data type by the matrix conversion API. + CComPtr InputMatrixSRVResource, InputMatrixSRVUploadResource; + std::vector InputMatrix; + if (AccumulateProps.AccumulationType == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 || + AccumulateProps.AccumulationType == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8) { + InputMatrix = CoopVecHelpers::CreateAllOnesInputMatrix(Config.DimN, + Config.DimM); + } else if (AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 || + AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) { + // Matrix source data is fp32, which gets converted to fp16 during matrix + // conversion + InputMatrix = CoopVecHelpers::CreateAllOnesInputMatrix(Config.DimN, + Config.DimM); + } else { + WEX::Logging::Log::Error(L"Unsupported matrix data type"); + return; + } + + CreateTestResources(D3DDevice, CommandList, InputMatrix.data(), + InputMatrix.size(), + CD3DX12_RESOURCE_DESC::Buffer(InputMatrix.size()), + &InputMatrixSRVResource, &InputMatrixSRVUploadResource); + + // Create input vectors + CComPtr InputVecSRVResource1, InputVecSRVUploadResource1; + std::vector InputVector1; + CComPtr InputVecSRVResource2, InputVecSRVUploadResource2; + std::vector InputVector2; + + if (AccumulateProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8 || + AccumulateProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_UINT8) { + InputVector1 = CoopVecHelpers::CreateInputVector(Config.NumThreads, + Config.DimM); + InputVector2 = CoopVecHelpers::CreateInputVector(Config.NumThreads, + Config.DimN); + } else if (AccumulateProps.InputType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 || + AccumulateProps.InputType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + AccumulateProps.InputType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) { + InputVector1 = + CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.DimM); + InputVector2 = + CoopVecHelpers::CreateInputVector( + Config.NumThreads, Config.DimN); + } else if (AccumulateProps.InputType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32) { + InputVector1 = CoopVecHelpers::CreateInputVector(Config.NumThreads, + Config.DimM); + InputVector2 = CoopVecHelpers::CreateInputVector(Config.NumThreads, + Config.DimN); + } else { + WEX::Logging::Log::Error(L"Unsupported input data type"); + return; + } + if (InputVector1.size() % 4 != 0) { + // Align size to 4 bytes for ByteAddressBuffer + InputVector1.resize(InputVector1.size() + 4 - (InputVector1.size() % 4)); + } + if (InputVector2.size() % 4 != 0) { + // Align size to 4 bytes for ByteAddressBuffer + InputVector2.resize(InputVector2.size() + 4 - (InputVector2.size() % 4)); + } + CreateTestResources(D3DDevice, CommandList, InputVector1.data(), + InputVector1.size(), + CD3DX12_RESOURCE_DESC::Buffer(InputVector1.size()), + &InputVecSRVResource1, &InputVecSRVUploadResource1); + CreateTestResources(D3DDevice, CommandList, InputVector2.data(), + InputVector2.size(), + CD3DX12_RESOURCE_DESC::Buffer(InputVector2.size()), + &InputVecSRVResource2, &InputVecSRVUploadResource2); + + // This increments baseHandle + CreateRawSRV(D3DDevice, BaseHandle, + (UINT)(InputVector1.size() / sizeof(int32_t)), + InputVecSRVResource1); + CreateRawSRV(D3DDevice, BaseHandle, + (UINT)(InputVector2.size() / sizeof(int32_t)), + InputVecSRVResource2); + + // Calculate reference output + auto ExpectedOutputBufferI8 = + CoopVecHelpers::CreateAllOnesInputMatrix(Config.DimN, Config.DimM); + std::vector ExpectedOutputBuffer(ExpectedOutputBufferI8.size() / + sizeof(float)); + std::memcpy(ExpectedOutputBuffer.data(), ExpectedOutputBufferI8.data(), + ExpectedOutputBufferI8.size()); + + if (AccumulateProps.InputType == D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16) { + std::vector InputVector1FP16( + InputVector1.size() / sizeof(DirectX::PackedVector::HALF)); + std::memcpy(InputVector1FP16.data(), InputVector1.data(), + InputVector1.size()); + + std::vector InputVector2FP16( + InputVector2.size() / sizeof(DirectX::PackedVector::HALF)); + std::memcpy(InputVector2FP16.data(), InputVector2.data(), + InputVector2.size()); + + for (int ThreadIdx = 0; ThreadIdx < Config.NumThreads; ++ThreadIdx) { + for (int M = 0; M < Config.DimM; ++M) { + for (int N = 0; N < Config.DimN; ++N) { + float acc = ConvertFloat16ToFloat32(InputVector1FP16[M]) * + ConvertFloat16ToFloat32(InputVector2FP16[N]); + ExpectedOutputBuffer[M * Config.DimN + N] += acc; + } + } + } + } else if (AccumulateProps.InputType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32) { + std::vector InputVector1FP32(InputVector1.size() / sizeof(float)); + std::memcpy(InputVector1FP32.data(), InputVector1.data(), + InputVector1.size()); + + std::vector InputVector2FP32(InputVector2.size() / sizeof(float)); + std::memcpy(InputVector2FP32.data(), InputVector2.data(), + InputVector2.size()); + + for (int ThreadIdx = 0; ThreadIdx < Config.NumThreads; ++ThreadIdx) { + for (int M = 0; M < Config.DimM; ++M) { + for (int N = 0; N < Config.DimN; ++N) { + float Acc = InputVector1FP32[ThreadIdx * Config.DimM + M] * + InputVector2FP32[ThreadIdx * Config.DimN + N]; + ExpectedOutputBuffer[M * Config.DimN + N] += Acc; + } + } + } + } + + CComPtr ConvertedMatrixResource, ConvertedMatrixReadResource; + int ConvertedMatrixSize = 0; + { + // Create source matrix info + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_SRC_INFO SrcInfo = {}; + SrcInfo.SrcDataType = + CoopVecHelpers::GetMatrixSrcDataType(AccumulateProps.AccumulationType); + SrcInfo.SrcLayout = D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR; + + // Create destination matrix info + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DEST_INFO DestInfo = {}; + DestInfo.DestSize = 0; // Will be populated by driver + int SrcEltSize = 0; + int DestEltSize = 0; + switch (AccumulateProps.AccumulationType) { + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8: + case D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8_T4_PACKED: + DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8; + SrcEltSize = 1; + DestEltSize = 1; + break; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16: + DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16; + SrcEltSize = 4; // FP32 + DestEltSize = 2; // FP16 + break; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3: + DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3; + SrcEltSize = 4; // FP32 + DestEltSize = 1; // FP8 + break; + case D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2: + DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2; + SrcEltSize = 4; // FP32 + DestEltSize = 1; // FP8 + break; + } + SrcInfo.SrcStride = Config.DimM * SrcEltSize; + SrcInfo.SrcSize = Config.DimM * Config.DimN * SrcEltSize; + + DestInfo.DestLayout = Config.MatrixLayout; + DestInfo.DestStride = 0; + DestInfo.NumRows = Config.DimM; + DestInfo.NumColumns = Config.DimN; + + if (Config.MatrixLayout == D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR) { + DestInfo.DestStride = Config.DimM * DestEltSize; + } else if (Config.MatrixLayout == + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_COLUMN_MAJOR) { + DestInfo.DestStride = Config.DimM * DestEltSize; + } + + // Create conversion info + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_INFO ConvertInfo = {}; + ConvertInfo.SrcInfo = SrcInfo; + ConvertInfo.DestInfo = DestInfo; + + // Get preview device interface + { + CComPtr PreviewDevice; + VERIFY_SUCCEEDED(D3DDevice->QueryInterface(__uuidof(ID3D12DevicePreview), + (void **)&PreviewDevice)); + + // Query required destination size + PreviewDevice->GetLinearAlgebraMatrixConversionDestinationInfo( + &ConvertInfo.DestInfo); + } + + ConvertedMatrixSize = ConvertInfo.DestInfo.DestSize; + + // Hack to prevent read resource from being created with size 0 + std::vector TempData(ConvertInfo.DestInfo.DestSize); + CreateTestUavs(D3DDevice, CommandList, TempData.data(), TempData.size(), + &ConvertedMatrixResource, nullptr, + &ConvertedMatrixReadResource); + + // Set up data descriptors + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_DATA DataDesc = {}; + DataDesc.DestVA = ConvertedMatrixResource->GetGPUVirtualAddress(); + DataDesc.SrcVA = InputMatrixSRVResource->GetGPUVirtualAddress(); + ConvertInfo.DataDesc = DataDesc; + + // Get command list interface and perform conversion + CComPtr CommandList11; + VERIFY_SUCCEEDED(CommandList->QueryInterface( + __uuidof(ID3D12GraphicsCommandList11), (void **)&CommandList11)); + CommandList11->ConvertLinearAlgebraMatrix(&ConvertInfo, 1); + + // This increments baseHandle + if ((ConvertInfo.DestInfo.DestSize % 4) != 0) { + WEX::Logging::Log::Error(L"DestSize is not aligned to 4 bytes"); + return; + } + CreateRawUAV(D3DDevice, BaseHandle, + ConvertInfo.DestInfo.DestSize / sizeof(int32_t), + ConvertedMatrixResource); + } + + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, ComputePipelineState)); + + SetDescriptorHeap(CommandList, DescriptorHeap); + + CD3DX12_GPU_DESCRIPTOR_HANDLE ResHandle( + DescriptorHeap->GetGPUDescriptorHandleForHeapStart()); + + CommandList->SetComputeRootSignature(RootSignature); + CommandList->SetComputeRootDescriptorTable(0, ResHandle); + CommandList->SetPipelineState(ComputePipelineState); + CommandList->Dispatch(1, 1, 1); + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + VERIFY_SUCCEEDED(CommandAllocator->Reset()); + VERIFY_SUCCEEDED(CommandList->Reset(CommandAllocator, ComputePipelineState)); + + // Convert matrix to sint8/fp32 row-major format before reading back to the + // CPU. A new resource is created, along with a readback resource, for the + // matrix copy. + CComPtr MatrixRowMajorResource, MatrixRowMajorReadResource; + { + // Create source matrix info + D3D12_LINEAR_ALGEBRA_MATRIX_CONVERSION_INFO ConvertInfo = {}; + ConvertInfo.SrcInfo.SrcLayout = Config.MatrixLayout; + ConvertInfo.SrcInfo.SrcSize = ConvertedMatrixSize; + ConvertInfo.SrcInfo.SrcDataType = AccumulateProps.AccumulationType; + ConvertInfo.SrcInfo.SrcStride = 0; // OUTER_PRODUCT_OPTIMAL + + // Create destination matrix info + ConvertInfo.DestInfo.DestSize = 0; // Will be populated by driver + ConvertInfo.DestInfo.DestLayout = + D3D12_LINEAR_ALGEBRA_MATRIX_LAYOUT_ROW_MAJOR; + ConvertInfo.DestInfo.NumRows = Config.DimM; + ConvertInfo.DestInfo.NumColumns = Config.DimN; + + if (AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32 || + AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT16 || + AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E4M3 || + AccumulateProps.AccumulationType == + D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT_E5M2) { + ConvertInfo.DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_FLOAT32; + ConvertInfo.DestInfo.DestStride = Config.DimN * sizeof(float); + } else { + ConvertInfo.DestInfo.DestDataType = D3D12_LINEAR_ALGEBRA_DATATYPE_SINT8; + ConvertInfo.DestInfo.DestStride = Config.DimN * sizeof(int8_t); + } + + // Get destination size using preview interface + { + CComPtr PreviewDevice; + VERIFY_SUCCEEDED(D3DDevice->QueryInterface(__uuidof(ID3D12DevicePreview), + (void **)&PreviewDevice)); + + // Query required destination size + PreviewDevice->GetLinearAlgebraMatrixConversionDestinationInfo( + &ConvertInfo.DestInfo); + } + + // Create resource to hold matrix copy and a readback resource for it + // Init vector is a hack to prevent read resource from being created with + // size 0 + // TODO: Fix CreateTestUavs to allow creating readback resource without init + // data + std::vector TempData(ConvertInfo.DestInfo.DestSize); + CreateTestUavs(D3DDevice, CommandList, TempData.data(), TempData.size(), + &MatrixRowMajorResource, nullptr, + &MatrixRowMajorReadResource); + + // Set up data descriptors + ConvertInfo.DataDesc.DestVA = + MatrixRowMajorResource->GetGPUVirtualAddress(); + ConvertInfo.DataDesc.SrcVA = + ConvertedMatrixResource->GetGPUVirtualAddress(); + + // Get command list interface and perform conversion + CComPtr CommandList11; + VERIFY_SUCCEEDED(CommandList->QueryInterface( + __uuidof(ID3D12GraphicsCommandList11), (void **)&CommandList11)); + CommandList11->ConvertLinearAlgebraMatrix(&ConvertInfo, 1); + } + + RecordTransitionBarrier(CommandList, MatrixRowMajorResource, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + D3D12_RESOURCE_STATE_COPY_SOURCE); + CommandList->CopyResource(MatrixRowMajorReadResource, MatrixRowMajorResource); + CommandList->Close(); + ExecuteCommandList(CommandQueue, CommandList); + WaitForSignal(CommandQueue, FO); + + { + MappedData MappedData(MatrixRowMajorReadResource, (UINT)InputMatrix.size()); + + float *ResultBuffer = (float *)MappedData.data(); + bool Equal = true; + for (int i = 0; i < (UINT)InputMatrix.size() / sizeof(float); i++) { + if (isnan(ResultBuffer[i]) || isnan(ExpectedOutputBuffer[i]) || + fabs(ResultBuffer[i] - ExpectedOutputBuffer[i]) > 0.00001) { + LogErrorFmt(L"Result mismatch at index %d", i); + LogErrorFmt(L"ResultBuffer[%d]: %f, ExpectedOutputBuffer[%d]: %f", i, + ResultBuffer[i], i, ExpectedOutputBuffer[i]); + Equal = false; + break; + } + } + VERIFY_IS_TRUE(Equal); + } +} +#endif // HAVE_COOPVEC_API + +TEST_F(ExecutionTest, CoopVec_OuterProduct) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + runCoopVecOuterProductTest(); +} + +// This test expects a that retrieves a signal value from each of a +// few resources that are initialized here. determines if it uses +// the 6.6 Dynamic Resources feature. Values are read back from the result UAV +// and compared to the expected signals +void ExecutionTest::RunResourceTest(ID3D12Device *pDevice, const char *pShader, + const wchar_t *sm, bool isDynamic) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + const int NumSRVs = 3; + const int NumUAVs = 4; + const int NumResources = NumSRVs + NumUAVs; + const int NumSamplers = 2; + const int valueSize = 16; + + static const int DispatchGroupX = 1; + static const int DispatchGroupY = 1; + static const int DispatchGroupZ = 1; + + CComPtr pCommandList; + CComPtr pCommandQueue; + CComPtr pCommandAllocator; + FenceObj FO; + + UINT valueSizeInBytes = valueSize * sizeof(float); + CreateComputeCommandQueue(pDevice, L"DynamicResourcesTest Command Queue", + &pCommandQueue); + InitFenceObj(pDevice, &FO); + + // Create root signature. + CComPtr pRootSignature; + if (!isDynamic) { + // Not dynamic, create a range for each resource and from them, the root + // signature + CD3DX12_DESCRIPTOR_RANGE ranges[NumResources]; + CD3DX12_DESCRIPTOR_RANGE srange[NumSamplers]; + for (int i = 0; i < NumSRVs; i++) + ranges[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, i, 0); + + for (int i = NumSRVs; i < NumResources; i++) + ranges[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, i - NumSRVs, 0); + + for (int i = 0; i < NumSamplers; i++) + srange[i].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, i, 0); + + CreateRootSignatureFromRanges(pDevice, &pRootSignature, ranges, + NumResources, srange, NumSamplers); + } else { + // Dynamic just requires the flags indicating that the builtin arrays should + // be accessible +#if !defined(D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED) +#define D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED \ + (D3D12_ROOT_SIGNATURE_FLAGS)0x400 +#define D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED \ + (D3D12_ROOT_SIGNATURE_FLAGS)0x800 +#endif + CD3DX12_ROOT_SIGNATURE_DESC rootSignatureDesc; + rootSignatureDesc.Init( + 0, nullptr, 0, nullptr, + D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | + D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED); + CreateRootSignatureFromDesc(pDevice, &rootSignatureDesc, &pRootSignature); + } + + // Create pipeline state object. + CComPtr pComputeState; + CreateComputePSO(pDevice, pRootSignature, pShader, sm, &pComputeState); + + // Create a command allocator and list for compute. + VERIFY_SUCCEEDED(pDevice->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_COMPUTE, IID_PPV_ARGS(&pCommandAllocator))); + VERIFY_SUCCEEDED(pDevice->CreateCommandList( + 0, D3D12_COMMAND_LIST_TYPE_COMPUTE, pCommandAllocator, pComputeState, + IID_PPV_ARGS(&pCommandList))); + + // Set up SRV resources + CComPtr pSRVResources[NumSRVs]; + CComPtr pUAVResources[NumUAVs]; + CComPtr pUploadResources[NumResources]; + { + D3D12_RESOURCE_DESC bufDesc = + CD3DX12_RESOURCE_DESC::Buffer(valueSizeInBytes); + float values[valueSize]; + for (int i = 0; i < NumSRVs - 1; i++) { + for (int j = 0; j < valueSize; j++) + values[j] = 10.0f + i; + CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, + bufDesc, &pSRVResources[i], &pUploadResources[i]); + } + D3D12_RESOURCE_DESC tex2dDesc = + CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_FLOAT, 4, 4); + for (int j = 0; j < valueSize; j++) + values[j] = 10.0 + (NumSRVs - 1); + CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, + tex2dDesc, &pSRVResources[NumSRVs - 1], + &pUploadResources[NumSRVs - 1]); + } + + // Set up UAV resources + CComPtr pReadBuffer; + float values[valueSize]; + for (int i = 0; i < NumUAVs - 2; i++) { + for (int j = 0; j < valueSize; j++) + values[j] = 20.0f + i; + CreateTestUavs(pDevice, pCommandList, values, valueSizeInBytes, + &pUAVResources[i], &pUploadResources[NumSRVs + i]); + } + for (int j = 0; j < valueSize; j++) + values[j] = 20.0 + (NumUAVs - 1); + CreateTestUavs(pDevice, pCommandList, values, valueSizeInBytes, + &pUAVResources[NumUAVs - 2], + &pUploadResources[NumResources - 2], &pReadBuffer); + + for (int j = 0; j < valueSize; j++) + values[j] = 20.0 + (NumUAVs - 2); + D3D12_RESOURCE_DESC tex1dDesc = + CD3DX12_RESOURCE_DESC::Tex1D(DXGI_FORMAT_R32_FLOAT, valueSize, 1, 0, + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + CreateTestResources(pDevice, pCommandList, values, valueSizeInBytes, + tex1dDesc, &pUAVResources[NumUAVs - 1], + &pUploadResources[NumResources - 1]); + + // Close the command list and execute it to perform the GPU setup. + pCommandList->Close(); + ExecuteCommandList(pCommandQueue, pCommandList); + WaitForSignal(pCommandQueue, FO); + VERIFY_SUCCEEDED(pCommandAllocator->Reset()); + VERIFY_SUCCEEDED(pCommandList->Reset(pCommandAllocator, pComputeState)); + + CComPtr pResHeap; + CComPtr pSampHeap; + CreateDefaultDescHeaps(pDevice, NumSRVs + NumUAVs, NumSamplers, &pResHeap, &pSampHeap); // Create Rootsignature and descriptor tables @@ -11641,20 +14696,23 @@ void ExecuteWaveSizeRangeInstance(UINT minWaveSize, UINT maxWaveSize, })"; // format compiler args - char compilerOptions[64]; + char compilerOptions[70]; if (usePreferred) { // putting spaces in between the %d's below will cause compilation issues. - VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), - "-D WAVE_SIZE_ATTR=[wavesize(%d,%d,%d)]", - minShaderWaveSize, maxShaderWaveSize, - prefShaderWaveSize) != -1); + VERIFY_IS_TRUE( + sprintf_s( + compilerOptions, sizeof(compilerOptions), + "-D WAVE_SIZE_ATTR=[wavesize(%d,%d,%d)] -select-validator internal", + minShaderWaveSize, maxShaderWaveSize, prefShaderWaveSize) != -1); LogCommentFmt(L"Verifying wave size range test results for (min, max, " L"preferred): (%d, %d, %d)", minShaderWaveSize, maxShaderWaveSize, prefShaderWaveSize); } else { - VERIFY_IS_TRUE(sprintf_s(compilerOptions, sizeof(compilerOptions), - "-D WAVE_SIZE_ATTR=[wavesize(%d,%d)]", - minShaderWaveSize, maxShaderWaveSize) != -1); + VERIFY_IS_TRUE( + sprintf_s( + compilerOptions, sizeof(compilerOptions), + "-D WAVE_SIZE_ATTR=[wavesize(%d,%d)] -select-validator internal", + minShaderWaveSize, maxShaderWaveSize) != -1); LogCommentFmt( L"Verifying wave size range test results for (min, max): (%d, %d)", minShaderWaveSize, maxShaderWaveSize); diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest_SER.h b/tools/clang/unittests/HLSLExec/ExecutionTest_SER.h new file mode 100644 index 0000000000..553a913fa5 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/ExecutionTest_SER.h @@ -0,0 +1,3216 @@ +//===--------- ExecutionTest_SER.h - SER Execution Tests -------*- C++ -*-===// +/////////////////////////////////////////////////////////////////////////////// +// // +// ExecutionTest_SER.h // +// Copyright (C) Nvidia Corporation. All rights reserved. // +// This file is distributed under the University of Illinois Open Source // +// License. See LICENSE.TXT for details. // +// // +// This file contains the execution tests for SER. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#pragma once + +struct SERAccessor { + enum ScalarTypes { + UINT = 0, + FLOAT = 1, + }; + + ScalarTypes ScalarType; + int ValRows; + int ValCols; + + LPCWSTR HitObjectGetter; + LPCWSTR CHGetter; + LPCWSTR MSGetter; + LPCWSTR NOPGetter; + + LPCWSTR getScalarTypeName() const { + switch (ScalarType) { + case UINT: + return L"uint"; + case FLOAT: + return L"float"; + default: + return L"UNKOWN TYPE"; + } + } + + void addCompileArgs(std::vector &OwnedArgs, + std::vector &ArgVec) const { + // Value dimensions + OwnedArgs.emplace_back(L"-DM_ROWS=" + std::to_wstring(ValRows)); + ArgVec.push_back(OwnedArgs.back().c_str()); + OwnedArgs.emplace_back(L"-DM_COLS=" + std::to_wstring(ValCols)); + ArgVec.push_back(OwnedArgs.back().c_str()); + + LPCWSTR ScalarTypeName = getScalarTypeName(); + OwnedArgs.emplace_back(L"-DSCALAR_TYPE=" + std::wstring(ScalarTypeName)); + ArgVec.push_back(OwnedArgs.back().c_str()); + if (ValRows == 0 && ValCols == 0) { + // Scalar + OwnedArgs.emplace_back(L"-DRESULT_TYPE=" + std::wstring(ScalarTypeName)); + ArgVec.push_back(OwnedArgs.back().c_str()); + } else if (ValRows >= 1 && ValCols == 0) { + // Vector + OwnedArgs.emplace_back(L"-DRESULT_TYPE=" + std::wstring(ScalarTypeName) + + std::to_wstring(ValRows)); + ArgVec.push_back(OwnedArgs.back().c_str()); + } else if (ValRows > 0 && ValCols > 0) { + // Matrix + OwnedArgs.emplace_back(L"-DMATRIX_ELEMENT_TYPE=" + + std::wstring(ScalarTypeName)); + ArgVec.push_back(OwnedArgs.back().c_str()); + } + + OwnedArgs.emplace_back(L"-DHITOBJECT_GET_RESULT=" + + std::wstring(HitObjectGetter)); + ArgVec.push_back(OwnedArgs.back().c_str()); + OwnedArgs.emplace_back(L"-DCH_GET_RESULT=" + std::wstring(CHGetter)); + ArgVec.push_back(OwnedArgs.back().c_str()); + OwnedArgs.emplace_back(L"-DMS_GET_RESULT=" + std::wstring(MSGetter)); + ArgVec.push_back(OwnedArgs.back().c_str()); + OwnedArgs.emplace_back(L"-DNOP_GET_RESULT=" + std::wstring(NOPGetter)); + ArgVec.push_back(OwnedArgs.back().c_str()); + } +}; + +struct SERTestConfig { + // Source of the hit object or reference value under test. + enum Method { + TraceRay = + 0, // Source queried in closesthit, miss shaders called by TraceRay + RayQuery = 1, // Source is HitObject::FromRayQuery + HitObject_TraceRay = 2, // Source is HitObject::TraceRay + HitObject_Invoke = 3, // [only used for recursion] + }; + std::wstring getMethodStr(Method src) const { + switch (src) { + case TraceRay: + return L"TraceRay"; + case RayQuery: + return L"RayQuery"; + case HitObject_TraceRay: + return L"HitObject_TraceRay"; + default: + return L"UNKNOWN"; + } + } + + enum ResultFrom { + FromShaders = 0, // Call getters in CH, MS + FromHitObject = 1, // Call getters on HitObject + }; + std::wstring getResultFromStr(ResultFrom resultFrom) const { + switch (resultFrom) { + case FromShaders: + return L"FromShaders"; + case FromHitObject: + return L"FromHitObject"; + default: + return L"UNKNOWN"; + } + } + + // Where the hit object code is located. + enum TestLocation { + RayGen = 0, // In raygen shader + ClosestHit = 1, // In closesthit shader + Miss = 2, // In miss shader + }; + std::wstring getTestLocationStr(TestLocation loc) const { + switch (loc) { + case RayGen: + return L"RayGen"; + case ClosestHit: + return L"ClosestHit"; + case Miss: + return L"Miss"; + default: + return L"UNKNOWN"; + } + } + + bool UseTriangles; + bool UseProceduralGeometry; + + bool ReorderHitObject; + TestLocation TestLoc; + + Method TraceMethod; + ResultFrom ResultSrc; + + Method RecMethod; // only used if TestLoc != RayGen + + // TestLocation TestLocation; + // + const bool hasRecursion() const { return TestLoc != TestLocation::RayGen; } + + void addCompileArgs(std::vector &ArgVec) const { + // How to produce the hit object and get the value from it + switch (TraceMethod) { + case TraceRay: + // Getter called on HitObject produced by HitObject::TraceRay + ArgVec.push_back(L"-DMETHOD_TRACERAY=1"); + break; + case HitObject_TraceRay: + // Getter called on HitObject produced by HitObject::TraceRay + ArgVec.push_back(L"-DMETHOD_HITOBJECT_TRACERAY=1"); + break; + case RayQuery: + // Getter called on HitObject produced by HitObject::FromRayQuery + ArgVec.push_back(L"-DMETHOD_HITOBJECT_FROMRQ=1"); + break; + default: + VERIFY_IS_TRUE(false); + break; + } + + switch (ResultSrc) { + case FromShaders: + ArgVec.push_back(L"-DRESULT_FROM_SHADERS=1"); + break; + case FromHitObject: + ArgVec.push_back(L"-DRESULT_FROM_HITOBJECT=1"); + break; + default: + VERIFY_IS_TRUE(false); + break; + } + + if (ReorderHitObject) + ArgVec.push_back(L"-DREORDER_HITOBJECT=1"); + + switch (TestLoc) { + case TestLocation::RayGen: + ArgVec.push_back(L"-DTESTLOC_RAYGEN=1"); + break; + case TestLocation::ClosestHit: + ArgVec.push_back(L"-DTESTLOC_CLOSESTHIT=1"); + case TestLocation::Miss: + ArgVec.push_back(L"-DTESTLOC_MISS=1"); + break; + default: + VERIFY_IS_TRUE(false); + break; + } + + if (hasRecursion()) { + ArgVec.push_back(L"-DENABLE_RECURSION=1"); + + // Primary shading call to test HitObject in CH/MS + switch (RecMethod) { + case TraceRay: + ArgVec.push_back(L"-DRECMETHOD_TRACERAY=1"); + break; + case HitObject_Invoke: + ArgVec.push_back(L"-DRECMETHOD_HITOBJECT_INVOKE=1"); + break; + default: + VERIFY_IS_TRUE(false); + break; + } + } + } + + std::wstring str() const { + std::wstring txt; + if (UseTriangles) + txt += L"tris;"; + if (UseProceduralGeometry) + txt += L"aabbs;"; + txt += L"trace=" + getMethodStr(TraceMethod) + L";"; + txt += L"result=" + getResultFromStr(ResultSrc) + L";"; + txt += L"loc=" + getTestLocationStr(TestLoc) + L";"; + if (ReorderHitObject) { + txt += L"reorder;"; + } + if (hasRecursion()) { + txt += L"rec;"; + } + return txt; + } +}; + +// clang-format off +static constexpr SERAccessor Accessors[] = { + // Scalar + {SERAccessor::FLOAT, 0, 0, L"GetRayTMin", L"RayTMin", L"RayTMin", L"getFloatZero"}, + {SERAccessor::FLOAT, 0, 0, L"GetRayTCurrent", L"RayTCurrent", L"RayTCurrent", L"getFloatZero"}, + {SERAccessor::UINT, 0, 0, L"GetRayFlags", L"RayFlags", L"RayFlags", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"GetHitKind", L"HitKind", L"getIntZero", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"GetGeometryIndex", L"GeometryIndex", L"getIntZero", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"GetInstanceIndex", L"InstanceIndex", L"getIntZero", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"GetInstanceID", L"InstanceID", L"getIntZero", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"GetPrimitiveIndex", L"PrimitiveIndex", L"getIntZero", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"IsHit", L"getIntOne", L"getIntZero", L"getIntZero"}, + {SERAccessor::UINT, 0, 0, L"IsNop", L"getIntZero", L"getIntZero", L"getIntOne"}, + {SERAccessor::UINT, 0, 0, L"IsMiss", L"getIntZero", L"getIntOne", L"getIntZero"}, + // Vector + {SERAccessor::FLOAT, 3, 0, L"GetWorldRayOrigin", L"WorldRayOrigin", L"WorldRayOrigin", L"getVec3Zero"}, + {SERAccessor::FLOAT, 3, 0, L"GetWorldRayDirection", L"WorldRayDirection", L"WorldRayDirection", L"getVec3Zero"}, + {SERAccessor::FLOAT, 3, 0, L"GetObjectRayOrigin", L"ObjectRayOrigin", L"WorldRayOrigin", L"getVec3Zero"}, + {SERAccessor::FLOAT, 3, 0, L"GetObjectRayDirection", L"ObjectRayDirection", L"WorldRayDirection", L"getVec3Zero"}, + // Matrix + {SERAccessor::FLOAT, 3, 4, L"GetWorldToObject3x4", L"WorldToObject3x4", L"getOneDiagonalMat", L"getOneDiagonalMat"}, + {SERAccessor::FLOAT, 4, 3, L"GetWorldToObject4x3", L"WorldToObject4x3", L"getOneDiagonalMat", L"getOneDiagonalMat"}, + {SERAccessor::FLOAT, 3, 4, L"GetObjectToWorld3x4", L"ObjectToWorld3x4", L"getOneDiagonalMat", L"getOneDiagonalMat"}, + {SERAccessor::FLOAT, 4, 3, L"GetObjectToWorld4x3", L"ObjectToWorld4x3", L"getOneDiagonalMat", L"getOneDiagonalMat"}, +}; +// clang-format on + +static const char *SERPermutationTestShaderSrc = R"( + +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +#ifdef MATRIX_ELEMENT_TYPE +typedef matrix ValueType; +#else +typedef RESULT_TYPE ValueType; +#endif + +struct [raypayload] PerRayData +{ + int recursionDepth : read(caller,closesthit,miss) : write(caller,closesthit,miss); +}; + +struct TriangleAttrs +{ + float2 barycentrics; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +#ifdef MATRIX_ELEMENT_TYPE +typedef matrix MatrixType; + +matrix getOneDiagonalMat() { + matrix mat = 0; + mat[0][0] = 1.f; + mat[1][1] = 1.f; + mat[2][2] = 1.f; + return mat; +} +#endif + +void StoreResult(ValueType result) { + const int numRows = M_ROWS > 0 ? M_ROWS : 1; + const int numCols = M_COLS > 0 ? M_COLS : 1; + const int numResultElements = numRows * numCols; + + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + const int id = numResultElements * (launchIndex.x + launchIndex.y * launchDim.x); + +#ifdef MATRIX_ELEMENT_TYPE +#if M_ROWS == 0 || M_COLS == 0 +#error "Zero-sized matrix dimension" +#endif + + // Matrix + for (int r = 0; r < M_ROWS; r++) { + for (int c = 0; c < M_COLS; c++) { + testBuffer[id + (r * M_COLS + c)] = result[r][c]; + } + } + +#elif M_ROWS +#if M_COLS +#error "Rows specified for vector" +#endif + // Vector + for (int r = 0; r < M_ROWS; r++) { + testBuffer[id + r] = result[r]; + } +#else + testBuffer[id] = result; +#endif +} + +// Procedural geometry for use by RayQuery and intersection shader +static const int ProceduralHitKind = 11; + +struct CustomAttrs +{ + float dist; +}; + +bool evalIntersection(float3 objRayOrigin, float3 objRayDir, float rayTMax, float rayTMin, out CustomAttrs attrs, out float rayT) +{ + rayT = 0; + // Intersection with circle on a plane (base, n, radius) + // hitPos is intersection point with plane (base, n) + float3 base = {0.0f,0.0f,0.5f}; + float3 n = normalize(float3(0.0f,0.5f,0.5f)); + float radius = 500.f; + // Plane hit + float t = dot(n, base - objRayOrigin) / dot(n, objRayDir); + if (t > rayTMax || t < rayTMin) { + return false; + } + float3 hitPos = objRayOrigin + t * objRayDir; + float3 relHitPos = hitPos - base; + // Circle hit + float hitDist = length(relHitPos); + if (hitDist > radius) + return false; + + attrs.dist = hitDist; + rayT = t; + return true; +} + +#if ATTRIBUTES_TEST +void StoreTriangleAttributes(TriangleAttrs attrs) { + float2 resValue = attrs.barycentrics; + StoreResult(resValue); +} + +void StoreProceduralAttributes(CustomAttrs attrs) { + float2 resValue = {attrs.dist, 0}; + StoreResult(resValue); +} +#endif + + +static dx::HitObject hitObjectTraceFromRQ(RayDesc ray) { + RayQuery rayQ; + rayQ.TraceRayInline(topObject, RAY_FLAG_NONE, 0xFF, ray); + + float tHit = 0; + CustomAttrs customAttrs = {0}; + + while (rayQ.Proceed()) { + switch (rayQ.CandidateType()) { + + // Acccept all triangle hits + case CANDIDATE_NON_OPAQUE_TRIANGLE: { + rayQ.CommitNonOpaqueTriangleHit(); + break; + } + + // Use same decision logic as intersection shader + case CANDIDATE_PROCEDURAL_PRIMITIVE: { + if (evalIntersection(rayQ.CandidateObjectRayOrigin(), rayQ.CandidateObjectRayDirection(), rayQ.CommittedRayT(), rayQ.RayTMin(), customAttrs, tHit)) { + rayQ.CommitProceduralPrimitiveHit(tHit); + } + break; + } + + default: + break; + } + } + + switch (rayQ.CommittedStatus()) { + case COMMITTED_NOTHING: + return dx::HitObject::MakeMiss(RAY_FLAG_NONE, 0, ray); + case COMMITTED_TRIANGLE_HIT: { + TriangleAttrs attrs; + attrs.barycentrics = rayQ.CommittedTriangleBarycentrics(); + uint HitKind = rayQ.CommittedTriangleFrontFace() ? HIT_KIND_TRIANGLE_FRONT_FACE : HIT_KIND_TRIANGLE_BACK_FACE; + dx::HitObject hitObject = dx::HitObject::FromRayQuery(rayQ, HitKind, attrs); + hitObject.SetShaderTableIndex(0); + return hitObject; + } + case COMMITTED_PROCEDURAL_PRIMITIVE_HIT: { + dx::HitObject hitObject = dx::HitObject::FromRayQuery(rayQ, ProceduralHitKind, customAttrs); + hitObject.SetShaderTableIndex(0); + return hitObject; + } + default: + return dx::HitObject(); + } +} + +void CallTraceMethod(int recursionDepth) { + const int numRows = M_ROWS > 0 ? M_ROWS : 1; + const int numCols = M_COLS > 0 ? M_COLS : 1; + const int numResultElements = numRows * numCols; + + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + const int id = numResultElements * (launchIndex.x + launchIndex.y * launchDim.x); + + RayDesc ray = ComputeRay(); + + PerRayData payload; +#ifdef ENABLE_RECURSION + payload.recursionDepth = recursionDepth; +#endif + + +#if METHOD_TRACERAY +///// Reference result + TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); +#if !RESULT_FROM_SHADERS + #error "TraceRay() implicitly gets results from shaders" +#endif + +///// Produce hit object +#elif METHOD_HITOBJECT_TRACERAY + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + +#elif METHOD_HITOBJECT_FROMRQ + dx::HitObject hitObject = hitObjectTraceFromRQ(ray); +#endif + +#if REORDER_HITOBJECT + dx::MaybeReorderThread(hitObject); +#endif + +///// Query hit object getter directly +#if RESULT_FROM_HITOBJECT +#if ATTRIBUTES_TEST + if (hitObject.IsMiss()) { + // Test for zero-init of miss + TriangleAttrs attrs; +#if NEW_GETATTRIBUTES_API + hitObject.GetAttributes(attrs); +#else + attrs = hitObject.GetAttributes(); +#endif + StoreTriangleAttributes(attrs); + } else if (hitObject.GetHitKind() == ProceduralHitKind) { + CustomAttrs attrs; +#if NEW_GETATTRIBUTES_API + hitObject.GetAttributes(attrs); +#else + attrs = hitObject.GetAttributes(); +#endif + StoreProceduralAttributes(attrs); + } else { + TriangleAttrs attrs; +#if NEW_GETATTRIBUTES_API + hitObject.GetAttributes(attrs); +#else + attrs = hitObject.GetAttributes(); +#endif + StoreTriangleAttributes(attrs); + } +#else + StoreResult(hitObject.HITOBJECT_GET_RESULT()); +#endif + +#elif RESULT_FROM_SHADERS +#if !METHOD_TRACERAY + // Already invoked in TraceRay() + dx::HitObject::Invoke(hitObject, payload); +#endif +#endif +} + +[shader("raygeneration")] +void raygen() +{ +#if ENABLE_RECURSION + RayDesc ray = ComputeRay(); + PerRayData recPayload; + recPayload.recursionDepth = 1; +#if RECMETHOD_TRACERAY + TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, recPayload); +#elif RECMETHOD_HITOBJECT_INVOKE + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, recPayload); + dx::HitObject::Invoke(hitObject, recPayload); +#else +#error "Unsupported shading method in recursive tests" +#endif + return; +#endif + +#if TESTLOC_RAYGEN + CallTraceMethod(1); + return; +#if ENABLE_RECURSION +#error "Must disable recursion when testing in raygen" +#endif +#endif +} + +float getFloatZero() { return 0.0f; } +int getIntZero() { return 0; } +int getIntOne() { return 1; } + +[shader("miss")] +void miss(inout PerRayData payload) +{ +#if TESTLOC_MISS + if (payload.recursionDepth == 1) + { + CallTraceMethod(payload.recursionDepth + 1); + return; + } +#endif + +#if ATTRIBUTES_TEST + StoreResult(float2(0,0)); +#else + StoreResult(MS_GET_RESULT()); +#endif +} + +///// Triangle hit group +[shader("anyhit")] +void anyhit(inout PerRayData payload, in TriangleAttrs attrs) +{ + // UNUSED +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in TriangleAttrs attrs) +{ +#if TESTLOC_CLOSESTHIT + if (payload.recursionDepth == 1) + { + CallTraceMethod(payload.recursionDepth + 1); + return; + } +#endif + +#if ATTRIBUTES_TEST + StoreTriangleAttributes(attrs); +#else + StoreResult(CH_GET_RESULT()); +#endif +} + +///// AABB hit group +[shader("closesthit")] +void chAABB(inout PerRayData payload, in CustomAttrs customAttrs) +{ +#if TESTLOC_CLOSESTHIT + if (payload.recursionDepth == 1) + { + CallTraceMethod(payload.recursionDepth + 1); + return; + } +#endif + +#if ATTRIBUTES_TEST + StoreProceduralAttributes(customAttrs); +#else + StoreResult(CH_GET_RESULT()); +#endif +} + +[shader("intersection")] +void intersection() +{ + CustomAttrs attrs = {0}; + float rayT; + if (evalIntersection(ObjectRayOrigin(), ObjectRayDirection(), RayTCurrent(), RayTMin(), attrs, rayT)) { + ReportHit(rayT, ProceduralHitKind, attrs); + } +} + +[shader("anyhit")] +void ahAABB(inout PerRayData payload, in CustomAttrs attrs) +{ + // UNUSED +} + +)"; + +template +static void VerifyTestArray(const T* RefData, const T* TestData, int NumElements); + +template<> +void VerifyTestArray(const int* RefData, const int* TestData, int NumElements) { + for (int i = 0; i < NumElements; i++) { + if (RefData[i] != TestData[i]) { + VERIFY_ARE_EQUAL(RefData[i], TestData[i]); + } + } +} + +template<> +void VerifyTestArray(const float* RefData, const float* TestData, int NumElements) { + for (int i = 0; i < NumElements; i++) { + const float RefVal = RefData[i]; + const float TestVal = TestData[i]; + if (!CompareFloatEpsilon(TestVal, RefVal, 0.0008f)) { + VERIFY_ARE_EQUAL(TestVal, RefVal); + } + } +} + +TEST_F(ExecutionTest, SERGetterPermutationTest) { + // SER: Test basic function of HitObject getters. + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + SERTestConfig RefConfig = {true, + true, + false, + SERTestConfig::RayGen, + SERTestConfig::TraceRay, + SERTestConfig::FromShaders, + SERTestConfig::TraceRay}; + + std::vector TestConfigs; + for (SERTestConfig::TestLocation TestLoc : + {SERTestConfig::RayGen, SERTestConfig::Miss, + SERTestConfig::ClosestHit}) { + for (bool Reorder : {true, false}) { + // MaybeReorderThreads only supported in RayGens + if (TestLoc != SERTestConfig::RayGen && Reorder) + continue; + + for (SERTestConfig::Method TestMethod : + {SERTestConfig::HitObject_TraceRay, SERTestConfig::RayQuery}) { + for (SERTestConfig::ResultFrom ResultSrc : + {SERTestConfig::FromShaders, SERTestConfig::FromHitObject}) { + SERTestConfig TestConfig = RefConfig; + TestConfig.TestLoc = TestLoc; + TestConfig.TraceMethod = TestMethod; + TestConfig.ReorderHitObject = Reorder; + TestConfig.ResultSrc = ResultSrc; + + if (TestLoc == SERTestConfig::RayGen) { + TestConfigs.push_back(TestConfig); + continue; + } + + // Variations on primary shading call to test HitObject in CH/MS + for (SERTestConfig::Method RecMethod : + {SERTestConfig::TraceRay, SERTestConfig::HitObject_Invoke}) { + TestConfig.RecMethod = RecMethod; + TestConfigs.push_back(TestConfig); + } + } + } + } + } + + // 64 x 64 test window size + const int WindowSize = 64; + + for (const auto &Accessor : Accessors) { + const int NumResultRows = Accessor.ValRows > 0 ? Accessor.ValRows : 1; + const int NumResultCols = Accessor.ValCols > 0 ? Accessor.ValCols : 1; + const int NumResultElements = NumResultRows * NumResultCols; + const int RefMaxRecursion = RefConfig.hasRecursion() ? 2 : 1; + + // Query reference result + std::vector RefData(WindowSize * WindowSize * NumResultElements); + std::vector RefArgs; + std::vector OwnedRefArgs; + RefArgs.push_back(L"-HV 2021"); + RefArgs.push_back(L"-Vd"); + Accessor.addCompileArgs(OwnedRefArgs, RefArgs); + RefConfig.addCompileArgs(RefArgs); + + const int ExtraRec = 0; + DXRRunConfig RefRunConfig = { + WindowSize, + WindowSize, + RefConfig.UseTriangles, + RefConfig.UseProceduralGeometry, + RefMaxRecursion + ExtraRec, + }; + RunDXRTest(Device, SERPermutationTestShaderSrc, L"lib_6_9", RefArgs.data(), + (int)RefArgs.size(), RefData, RefRunConfig); + + // Test permutations + for (const auto &TestConfig : TestConfigs) { + DXRRunConfig TestRunConfig(RefRunConfig); + TestRunConfig.MaxRecursion = + ExtraRec + (TestConfig.hasRecursion() ? 2 : 1); + + std::wstring TestConfigTxt = L"HitObject::"; + TestConfigTxt += Accessor.HitObjectGetter; + TestConfigTxt += L"() with config " + TestConfig.str(); + + { + std::wstring TestingMsg = L"Testing " + TestConfigTxt; + WEX::Logging::Log::Comment(TestingMsg.c_str()); + } + + std::vector Args; + std::vector OwnedArgs; + Args.push_back(L"-HV 2021"); + Args.push_back(L"-Vd"); + Accessor.addCompileArgs(OwnedArgs, Args); + TestConfig.addCompileArgs(Args); + + std::vector TestData(WindowSize * WindowSize * NumResultElements, 0); + + RunDXRTest(Device, SERPermutationTestShaderSrc, L"lib_6_9", Args.data(), + (int)Args.size(), TestData, TestRunConfig); + + const int NumArrayElems = WindowSize * WindowSize * NumResultElements; + switch (Accessor.ScalarType) { + case SERAccessor::FLOAT: + VerifyTestArray(reinterpret_cast(RefData.data()), + reinterpret_cast(TestData.data()), + NumArrayElems); + break; + case SERAccessor::UINT: + VerifyTestArray(reinterpret_cast(RefData.data()), + reinterpret_cast(TestData.data()), + NumArrayElems); + break; + } + } + } +} + +TEST_F(ExecutionTest, SERAttributesPermutationTest) { + // SER: Test basic function of HitObject getters. + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // All test variatinos + SERTestConfig RefConfig = {true, + true, + false, + SERTestConfig::RayGen, + SERTestConfig::TraceRay, + SERTestConfig::FromShaders, + SERTestConfig::TraceRay}; + + std::vector TestConfigs; + for (SERTestConfig::TestLocation TestLoc : + {SERTestConfig::RayGen, SERTestConfig::Miss, + SERTestConfig::ClosestHit}) { + for (bool Reorder : {true, false}) { + // MaybeReorderThreads only supported in RayGens + if (TestLoc != SERTestConfig::RayGen && Reorder) + continue; + + for (SERTestConfig::Method TestMethod : + {SERTestConfig::HitObject_TraceRay, SERTestConfig::RayQuery}) { + for (SERTestConfig::ResultFrom ResultSrc : + {SERTestConfig::FromShaders, SERTestConfig::FromHitObject}) { + SERTestConfig TestConfig = RefConfig; + TestConfig.TestLoc = TestLoc; + TestConfig.TraceMethod = TestMethod; + TestConfig.ReorderHitObject = Reorder; + TestConfig.ResultSrc = ResultSrc; + + if (TestLoc == SERTestConfig::RayGen) { + TestConfigs.push_back(TestConfig); + continue; + } + + // Variations on primary shading call to test HitObject in CH/MS + for (SERTestConfig::Method RecMethod : + {SERTestConfig::TraceRay, SERTestConfig::HitObject_Invoke}) { + TestConfig.RecMethod = RecMethod; + TestConfigs.push_back(TestConfig); + } + } + } + } + } + + // 64 x 64 test window size + const int WindowSize = 64; + + const int NumResultElements = 2; // Just for Attrs + const int RefMaxRecursion = RefConfig.hasRecursion() ? 2 : 1; + + std::vector BaseArgs; + BaseArgs.push_back(L"-HV 2021"); + BaseArgs.push_back(L"-Vd"); + BaseArgs.push_back(L"-DSCALAR_TYPE=float"); + BaseArgs.push_back(L"-DRESULT_TYPE=float2"); + BaseArgs.push_back(L"-DM_ROWS=2"); + BaseArgs.push_back(L"-DM_COLS=0"); + BaseArgs.push_back(L"-DATTRIBUTES_TEST=1"); + + // Query reference result + std::vector RefData(WindowSize * WindowSize * NumResultElements); + std::vector RefArgs(BaseArgs); + RefConfig.addCompileArgs(RefArgs); + + DXRRunConfig RunConfig = { + WindowSize, + WindowSize, + RefConfig.UseTriangles, + RefConfig.UseProceduralGeometry, + RefMaxRecursion, + }; + RunDXRTest(Device, SERPermutationTestShaderSrc, L"lib_6_9", RefArgs.data(), + (int)RefArgs.size(), RefData, RunConfig); + + // Test permutations + for (const auto &TestConfig : TestConfigs) { + DXRRunConfig TestRunConfig(RunConfig); + TestRunConfig.MaxRecursion = TestConfig.hasRecursion() ? 2 : 1; + + std::wstring TestConfigTxt = + L"HitObject attributes with config " + TestConfig.str(); + + { + std::wstring TestingMsg = L"Testing " + TestConfigTxt; + WEX::Logging::Log::Comment(TestingMsg.c_str()); + } + + std::vector Args(BaseArgs); + TestConfig.addCompileArgs(Args); + + std::vector TestData(WindowSize * WindowSize * NumResultElements, 0); + + RunDXRTest(Device, SERPermutationTestShaderSrc, L"lib_6_9", Args.data(), + (int)Args.size(), TestData, TestRunConfig); + + const int NumArrayElems = WindowSize * WindowSize * NumResultElements; + VerifyTestArray(reinterpret_cast(RefData.data()), + reinterpret_cast(TestData.data()), + NumArrayElems); + } +} + +TEST_F(ExecutionTest, SERNOPValuesTest) { + // SER: Test NOP HitObject default values + static const char *ShaderSrc = R"( + +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +float getFloatZero() { return 0.0f; } +int getIntZero() { return 0; } +int getIntOne() { return 1; } +float3 getVec3Zero() { return (float3)0; } + +struct [raypayload] PerRayData +{ + int unused : read() : write(); +}; + +#ifdef MATRIX_ELEMENT_TYPE +matrix getOneDiagonalMat() { + matrix mat = 0; + mat[0][0] = 1.f; + mat[1][1] = 1.f; + mat[2][2] = 1.f; + return mat; +} +#endif + +#if TEST_ATTRIBUTES +struct CustomAttrs { + uint x; + uint y; + uint z; + uint w; +}; +#endif + +[shader("raygeneration")] +void raygen() +{ + dx::HitObject hitObject = dx::HitObject::MakeNop(); +#if TEST_ATTRIBUTES + CustomAttrs attrs; +#if NEW_GETATTRIBUTES_API + hitObject.GetAttributes(attrs); +#else + attrs = hitObject.GetAttributes(); +#endif + testBuffer[0] = attrs.x; + testBuffer[1] = attrs.y; + testBuffer[2] = attrs.z; + testBuffer[3] = attrs.w; +#else + const bool pass = hitObject.HITOBJECT_GET_RESULT() == NOP_GET_RESULT(); + testBuffer[0] = pass ? 1 : 0; + PerRayData pld; + dx::HitObject::Invoke(hitObject, pld); +#endif +} + + +[shader("miss")] +void miss(inout PerRayData payload) +{ + testBuffer[1] = 1; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in BuiltInTriangleIntersectionAttributes attrs) +{ + testBuffer[3] = 1; +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in BuiltInTriangleIntersectionAttributes attrs) +{ + testBuffer[2] = 1; +} + + +)"; + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Test GetAttributes<> on NOP HitObject + { + WEX::Logging::Log::Comment(L"Testing NOPHitObject::GetAttributes"); + + LPCWSTR Args[] = { + L"-HV 2021", + L"-Vd", + L"-DTEST_ATTRIBUTES=1", + }; + + std::vector TestData(4, 0); + DXRRunConfig RunConfig = {1, 1, true, false, 1}; + RunConfig.AttributeCount = 4; + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, (int)std::size(Args), + TestData, RunConfig); + + // Expect zero-init of attribute structure + VERIFY_ARE_EQUAL(TestData[0], 0); + VERIFY_ARE_EQUAL(TestData[1], 0); + VERIFY_ARE_EQUAL(TestData[2], 0); + VERIFY_ARE_EQUAL(TestData[3], 0); + } + + for (const auto &Accessor : Accessors) { + std::wstring TestConfigTxt = L"NOPHitObject::"; + TestConfigTxt += Accessor.HitObjectGetter; + + { + std::wstring TestingMsg = L"Testing " + TestConfigTxt; + WEX::Logging::Log::Comment(TestingMsg.c_str()); + } + + std::vector Args; + std::vector OwnedArgs; + Args.push_back(L"-HV 2021"); + Args.push_back(L"-Vd"); + Accessor.addCompileArgs(OwnedArgs, Args); + + std::vector TestData(4, 0); + DXRRunConfig RunConfig = {1, 1, true, false, 1}; + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args.data(), (int)Args.size(), + TestData, RunConfig); + + VERIFY_ARE_EQUAL(TestData[0], 1); // hitObject.GET == expected nop value + VERIFY_ARE_EQUAL(TestData[1], 0); // miss NOT called + VERIFY_ARE_EQUAL(TestData[2], 0); // closesthit NOT called + VERIFY_ARE_EQUAL(TestData[3], 0); // anyhit NOT called + } +} + +TEST_F(ExecutionTest, SERMultiPayloadTest) { + static const char *ShaderSrc = R"( + +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +// Procedural geometry for use by RayQuery and intersection shader +static const int ProceduralHitKind = 11; + +struct CustomAttrs +{ + float dist; +}; + +bool evalIntersection(float3 objRayOrigin, float3 objRayDir, float rayTMax, float rayTMin, out CustomAttrs attrs, out float rayT) +{ + rayT = 0; + // Intersection with circle on a plane (base, n, radius) + // hitPos is intersection point with plane (base, n) + float3 base = {0.0f,0.0f,0.5f}; + float3 n = normalize(float3(0.0f,0.5f,0.5f)); + float radius = 500.f; + // Plane hit + float t = dot(n, base - objRayOrigin) / dot(n, objRayDir); + if (t > rayTMax || t < rayTMin) { + return false; + } + float3 hitPos = objRayOrigin + t * objRayDir; + float3 relHitPos = hitPos - base; + // Circle hit + float hitDist = length(relHitPos); + if (hitDist > radius) + return false; + + attrs.dist = hitDist; + rayT = t; + return true; +} + +#if ENABLE_PAQS +#define READ_PAQS(X, ...) : read(X, __VA_ARGS__) +#define WRITE_PAQS(X, ...) : write(X, __VA_ARGS__) +#else +#define READ_PAQS(X, ...) +#define WRITE_PAQS(X, ...) +#endif + +struct +#if ENABLE_PAQS +[raypayload] +#endif +PayloadA +{ + float unusedPad READ_PAQS(caller, anyhit, closesthit) WRITE_PAQS(anyhit, closesthit, caller); + uint ahCounter READ_PAQS(caller,anyhit) WRITE_PAQS(anyhit,caller); + float unusedPad2 READ_PAQS(caller) WRITE_PAQS(closesthit,miss); + uint chCounter READ_PAQS(caller,closesthit) WRITE_PAQS(closesthit,caller); +#if ENABLE_RECURSION + int recursionDepth READ_PAQS(caller,miss,closesthit) WRITE_PAQS(caller); +#endif + uint aabbCHCounter READ_PAQS(caller,closesthit) WRITE_PAQS(closesthit,caller); + uint aabbAHCounter READ_PAQS(caller,anyhit) WRITE_PAQS(anyhit,caller); + uint missCounter READ_PAQS(caller,miss) WRITE_PAQS(miss,caller); +}; + +struct +#if ENABLE_PAQS +[raypayload] +#endif +PayloadB +{ + uint ahCounter READ_PAQS(caller,anyhit) WRITE_PAQS(anyhit,caller); + float unusedPad READ_PAQS(caller, anyhit, closesthit) WRITE_PAQS(anyhit, closesthit, caller); + float unusedPad2 READ_PAQS(caller) WRITE_PAQS(closesthit,miss); + uint chCounter READ_PAQS(caller,closesthit) WRITE_PAQS(closesthit,caller); + uint aabbCHCounter READ_PAQS(caller,closesthit) WRITE_PAQS(closesthit,caller); + uint aabbAHCounter READ_PAQS(caller,anyhit) WRITE_PAQS(anyhit,caller); + uint missCounter READ_PAQS(caller,miss) WRITE_PAQS(miss,caller); +#if ENABLE_RECURSION + int recursionDepth READ_PAQS(caller,miss,closesthit) WRITE_PAQS(caller); +#endif +}; + +/// Result tracking +static const uint NumRayResults = 10; +static void storeRayResult(int resIdx, uint value) { + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + int baseIdx = NumRayResults * (launchIndex.x + launchIndex.y * launchDim.x); + testBuffer[baseIdx + resIdx] += value; +} + +void RunTest(int recursionDepth) +{ + RayDesc baseRay = ComputeRay(); + + PayloadA pldA; +#if ENABLE_RECURSION + pldA.recursionDepth = recursionDepth; +#endif + pldA.ahCounter = 0; + pldA.chCounter = 0; + pldA.aabbCHCounter = 0; + pldA.aabbAHCounter = 0; + pldA.missCounter = 0; + + // First HitObject::TraceRay() + dx::HitObject hitA = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_TRIANGLES, 0xFF, 0, 1, 0, baseRay, pldA); + + // Second HitObject::TraceRay() while other HitObject is live + PayloadB pldB; +#if ENABLE_RECURSION + pldB.recursionDepth = recursionDepth; +#endif + pldB.ahCounter = 0; + pldB.chCounter = 0; + pldB.aabbCHCounter = 0; + pldB.aabbAHCounter = 0; + pldB.missCounter = 0; + RayDesc rayB = baseRay; + rayB.Origin.x += 0.1f; + dx::HitObject hitB = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, 1, 1, 1, rayB, pldB); + + // TraceRay() while HitObject is live + TraceRay(topObject, RAY_FLAG_SKIP_TRIANGLES, 0xFF, 0, 1, 0, baseRay, pldA); + + // Concurrent HitObject with complex control flow + dx::HitObject loopHit; + int dynamicBound = hitA.GetGeometryIndex(); + for (int i = 0; i < dynamicBound + 5; ++i) { + RayDesc loopRay = baseRay; + loopRay.Origin.y += 0.001f * i; + loopHit = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_TRIANGLES, 0xFF, 1, 1, 1, loopRay, pldB); +#if !ENABLE_RECURSION + dx::MaybeReorderThread(loopHit); +#endif + } + + // Invoke all HitObject (repeatedly) + loopHit.SetShaderTableIndex(0); // pldA <- pldB + dx::HitObject::Invoke(loopHit, pldA); + hitA.SetShaderTableIndex(1); // pldB <- pldA + int differentDynamicBound = hitA.GetInstanceIndex(); + for (int i = 0; i < differentDynamicBound + 3; ++i) { + dx::HitObject::Invoke(hitA, pldB); + } + dx::HitObject::Invoke(hitB, pldB); + + // Write individual counters to distinct result slots + // PayloadA + storeRayResult(0, pldA.ahCounter); + storeRayResult(1, pldA.chCounter); + storeRayResult(2, pldA.aabbCHCounter); + storeRayResult(3, pldA.aabbAHCounter); + storeRayResult(4, pldA.missCounter); + // PayloadB + storeRayResult(5, pldB.ahCounter); + storeRayResult(6, pldB.chCounter); + storeRayResult(7, pldB.aabbCHCounter); + storeRayResult(8, pldB.aabbAHCounter); + storeRayResult(9, pldB.missCounter); +} + +[shader("raygeneration")] +void raygen() +{ +#if ENABLE_RECURSION + RayDesc ray = ComputeRay(); + PayloadA recPayload; + recPayload.recursionDepth = 1; + dx::HitObject missObject = dx::HitObject::MakeMiss(RAY_FLAG_NONE, 2, ray); + dx::HitObject::Invoke(missObject, recPayload); + +#else + RunTest(1); +#endif +} + + +///// Miss shaders +[shader("miss")] +void miss(inout PayloadA payload) +{ + payload.missCounter++; +} + +[shader("miss")] +void miss1(inout PayloadB payload) +{ + payload.missCounter++; +} + +#if ENABLE_RECURSION +[shader("miss")] +void miss2(inout PayloadA payload) +{ + if (payload.recursionDepth == 1) + { + RunTest(payload.recursionDepth + 1); + return; + } +} +#endif + +///// Triangle HitGroup 0 +[shader("anyhit")] +void anyhit(inout PayloadA payload, in BuiltInTriangleIntersectionAttributes attrs) +{ + payload.ahCounter++; +} + +[shader("closesthit")] +void closesthit(inout PayloadA payload, in BuiltInTriangleIntersectionAttributes attrs) +{ + payload.chCounter++; +} + +///// Triangle HitGroup 1 +[shader("anyhit")] +void anyhit1(inout PayloadB payload, in BuiltInTriangleIntersectionAttributes attrs) +{ + payload.ahCounter++; +} + +[shader("closesthit")] +void closesthit1(inout PayloadB payload, in BuiltInTriangleIntersectionAttributes attrs) +{ + payload.chCounter++; +} + + +///// Procedural HitGroup 0 +[shader("closesthit")] +void chAABB(inout PayloadA payload, in CustomAttrs customAttrs) +{ + payload.aabbCHCounter++; +} + +[shader("anyhit")] +void ahAABB(inout PayloadA payload, in CustomAttrs attrs) +{ + payload.aabbAHCounter++; +} + +[shader("intersection")] +void intersection() +{ + CustomAttrs attrs = {0}; + float rayT; + if (evalIntersection(ObjectRayOrigin(), ObjectRayDirection(), RayTCurrent(), RayTMin(), attrs, rayT)) { + ReportHit(rayT, ProceduralHitKind, attrs); + } +} + + +///// Procedural HitGroup 1 +[shader("closesthit")] +void chAABB1(inout PayloadB payload, in CustomAttrs customAttrs) +{ + payload.aabbCHCounter++; +} + +[shader("anyhit")] +void ahAABB1(inout PayloadB payload, in CustomAttrs attrs) +{ + payload.aabbAHCounter++; +} + +[shader("intersection")] +void intersection1() +{ + CustomAttrs attrs = {0}; + float rayT; + if (evalIntersection(ObjectRayOrigin(), ObjectRayDirection(), RayTCurrent(), RayTMin(), attrs, rayT)) { + ReportHit(rayT, ProceduralHitKind, attrs); + } +} + +)"; + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + struct PayloadTestConfig { + bool EnablePAQs; + bool EnableRecursion; + + void addCompileArgs(std::vector &OwnedArgs, + std::vector &ArgVec) const { + (void) OwnedArgs; + if (EnablePAQs) { + ArgVec.push_back(L"-DENABLE_PAQS=1"); + } else { + ArgVec.push_back(L"-disable-payload-qualifiers"); + } + if (EnableRecursion) { + ArgVec.push_back(L"-DENABLE_RECURSION=1"); + } + } + }; + + // Expected histogram results for each result key, as {value, count} pairs. + static const std::map ExpectedResults[10] = { + // result key 0 + {{0, 4096}}, + // result key 1 + {{0, 847}, {1, 3249}}, + // result key 2 + {{0, 847}, {1, 3249}}, + // result key 3 + {{0, 847}, {2, 3249}}, + // result key 4 + {{0, 3249}, {2, 847}}, + // result key 5 + {{0, 4030}, {1, 66}}, + // result key 6 + {{0, 847}, {4, 3183}, {5, 66}}, + // result key 7 + {{0, 4096}}, + // result key 8 + {{0, 847}, {5, 3249}}, + // result key 9 + {{0, 66}, {1, 3183}, {4, 847}}}; + + const int WindowSize = 64; + const int NumRayResults = 10; + + std::vector TestConfigs; + for (bool EnablePAQs : {false, true}) { + for (bool EnableRecursion : {false, true}) { + PayloadTestConfig TestConfig; + TestConfig.EnablePAQs = EnablePAQs; + TestConfig.EnableRecursion = EnableRecursion; + TestConfigs.push_back(TestConfig); + } + } + + for (const auto &TestConfig : TestConfigs) { + std::vector TestData(WindowSize * WindowSize * NumRayResults, 0); + DXRRunConfig RunConfig = {WindowSize, WindowSize, true, true, 1}; + RunConfig.PayloadCount = 7 + TestConfig.EnableRecursion; + RunConfig.NumMissShaders = 2 + TestConfig.EnableRecursion; + RunConfig.NumHitGroups = 2; + RunConfig.MaxRecursion = 1 + TestConfig.EnableRecursion; + + std::vector Args; + std::vector OwnedArgs; + Args.push_back(L"-HV 2021"); + Args.push_back(L"-Vd"); + TestConfig.addCompileArgs(OwnedArgs, Args); + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args.data(), (int)Args.size(), + TestData, RunConfig); + + for (int ResIdx = 0; ResIdx < NumRayResults; ++ResIdx) { + std::map Histo; + for (int RayIdx = 0; RayIdx < WindowSize * WindowSize; ++RayIdx) { + int Val = TestData[ResIdx + (NumRayResults * RayIdx)]; + ++Histo[Val]; + } + for (auto [Key, Value] : Histo) { + VERIFY_IS_TRUE(ExpectedResults[ResIdx].count(Key)); + const int ExpectedValue = ExpectedResults[ResIdx].at(Key); + VERIFY_ARE_EQUAL(Value, ExpectedValue); + } + } + } +} + +TEST_F(ExecutionTest, SERBasicTest) { + // SER: Test basic functionality. + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + // SER Test + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + dx::MaybeReorderThread(hitObject); + dx::HitObject::Invoke(hitObject, payload); + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 2U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*useMesh*/, + false /*useProceduralGeometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 2); + VERIFY_ARE_EQUAL(Histo[2], 4030); + VERIFY_ARE_EQUAL(Histo[5], 66); +} + +TEST_F(ExecutionTest, SERShaderTableIndexTest) { + // Test SER with HitObject::SetShaderTableIndex and + // HitObject::GetShaderTableIndex + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +struct CustomAttrs +{ + float dist; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, 0, 1, 0, ray, payload); + dx::MaybeReorderThread(hitObject); + + // Invoke hit/miss for triangle + dx::HitObject::Invoke( hitObject, payload ); + + if (hitObject.IsHit()) + { + // Transform to an 'aabb' hit. + hitObject.SetShaderTableIndex( 1 ); + } + + // Invoke hit/miss for aabb + dx::HitObject::Invoke( hitObject, payload ); + + if (hitObject.IsHit()) + { + // Poison the test data if GetShaderTableIndex does not match SetShaderTableIndex. + if (hitObject.GetShaderTableIndex() != 1) + payload.visited = 12345; + } + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + if ((payload.visited & 4U) == 0) + payload.visited |= 4U; // First 'miss' invocation + else + payload.visited |= 8U; // Second 'miss' invocation +} + +// Triangles +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + AcceptHitAndEndSearch(); +} + +// Triangle closest hit +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; +} + +// AABB closest hit +[shader("closesthit")] +void chAABB(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 2U; +} + +// Procedural +[shader("intersection")] +void intersection() +{ + // UNUSED +} + +[shader("anyhit")] +void ahAABB(inout PerRayData payload, in CustomAttrs attrs) +{ + // UNUSED +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*mesh*/, + true /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + + VERIFY_ARE_EQUAL(Histo.size(), 2); + VERIFY_ARE_EQUAL( + Histo[3], + 66); // 'closesthit' invoked at index 0, then 'chAABB' invoked at index 1 + VERIFY_ARE_EQUAL(Histo[12], 4030); // Miss shader invoked twice +} + +TEST_F(ExecutionTest, SERLoadLocalRootTableConstantTest) { + // Test SER with HitObject::LoadLocalRootTableConstant + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint res : read(caller) : write(miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +struct LocalConstants +{ + int c0; + int c1; + int c2; + int c3; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); +ConstantBuffer localConstants : register(b1); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.res = 0; + + // SER Test +#if 1 + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + dx::MaybeReorderThread(hitObject); + int c0 = hitObject.LoadLocalRootTableConstant(0); + int c1 = hitObject.LoadLocalRootTableConstant(4); + int c2 = hitObject.LoadLocalRootTableConstant(8); + int c3 = hitObject.LoadLocalRootTableConstant(12); + int res = c0 | c1 | c2 | c3; +#else + TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + int res = payload.res; +#endif + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = res; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.res = localConstants.c0 | localConstants.c1 | localConstants.c2 | localConstants.c3; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + // UNUSED +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.res = localConstants.c0 | localConstants.c1 | localConstants.c2 | localConstants.c3; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*useMesh*/, + false /*useProceduralGeometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 1); + VERIFY_ARE_EQUAL(Histo[126], 4096); +} + +TEST_F(ExecutionTest, SERRayQueryTest) { + // Test SER RayQuery + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + // Template parameter set at runtime before compilation + RayQuery rayQ; + + // Funtion parameter set at runtime before compilation + rayQ.TraceRayInline(topObject, RAY_FLAG_NONE, 0xFF, ray); + + // Storage for procedural primitive hit attributes + Attrs attrs; + attrs.barycentrics = float2(1, 1); + + while (rayQ.Proceed()) + { + switch (rayQ.CandidateType()) + { + + case CANDIDATE_NON_OPAQUE_TRIANGLE: + { + // The system has already determined that the candidate would be the closest + // hit so far in the ray extents + rayQ.CommitNonOpaqueTriangleHit(); + } + } + } + +#if 0 + switch (rayQ.CommittedStatus()) + { + case COMMITTED_TRIANGLE_HIT: + { + if (rayQ.CommittedTriangleFrontFace()) + { + // Hit + payload.visited |= 4U; + } + break; + } + case COMMITTED_PROCEDURAL_PRIMITIVE_HIT: + { + // Unused + break; + } + case COMMITTED_NOTHING: + { + // Miss + payload.visited |= 2U; + break; + } + } +#else + dx::HitObject hit; + if (rayQ.CommittedStatus() == COMMITTED_NOTHING) + { + hit = dx::HitObject::MakeMiss(RAY_FLAG_NONE, 0, ray); + } + else + { + hit = dx::HitObject::FromRayQuery(rayQ); + } + dx::MaybeReorderThread(hit); + dx::HitObject::Invoke(hit, payload); +#endif + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 2U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; + AcceptHitAndEndSearch(); +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*useMesh*/, + false /*useProceduralGeometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 2); + VERIFY_ARE_EQUAL(Histo[0], 66); + VERIFY_ARE_EQUAL(Histo[2], 4030); +} + +TEST_F(ExecutionTest, SERIntersectionTest) { + // Test SER with Intersection and procedural geometry + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit, closesthit, miss, caller) : write(anyhit, miss, closesthit, caller); +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x * sceneConstants.U.xyz + d.y * sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + +#if 0 + dx::HitObject hitObject; + TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); +#else + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + dx::MaybeReorderThread(hitObject); + dx::HitObject::Invoke(hitObject, payload); +#endif + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 2U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; + AcceptHitAndEndSearch(); +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +[shader("intersection")] +void intersection() +{ + Attrs attrs; + + ReportHit(0.1, 0, attrs); +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, false /*mesh*/, + true /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 1); + VERIFY_ARE_EQUAL(Histo[5], 4096); // All rays hitting the procedural geometry +} + +TEST_F(ExecutionTest, SERGetAttributesTest) { + // Test SER with HitObject::GetAttributes + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct CustomAttrs +{ + float dist; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit, closesthit, miss, caller) : write(anyhit, miss, closesthit, caller); +}; + +// reordercoherent // Requires #7250 +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x * sceneConstants.U.xyz + d.y * sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + dx::MaybeReorderThread(hitObject); + + // Check Attributes for hit detection. + CustomAttrs customAttrs; +#if NEW_GETATTRIBUTES_API + hitObject.GetAttributes(customAttrs); +#else + customAttrs = hitObject.GetAttributes(); +#endif + bool isHit = hitObject.IsHit(); + + int testVal = 0; + if (isHit) { + if (int(floor(customAttrs.dist)) % 2 == 0) + testVal = hitObject.GetHitKind(); + } + else + { + // Use 255 to keep outside the HitKind range [0,15] we passthru for hits. + testVal = 255; + } + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = testVal; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + // UNUSED +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in CustomAttrs attrs) +{ + AcceptHitAndEndSearch(); +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in CustomAttrs attrs) +{ + // UNUSED +} + +[shader("intersection")] +void intersection() +{ + // Intersection with circle on a plane (base, n, radius) + // hitPos is intersection point with plane (base, n) + float3 base = {0.0f,0.0f,0.5f}; + float3 n = normalize(float3(0.0f,0.5f,0.5f)); + float radius = 500.f; + // Plane hit + float t = dot(n, base - ObjectRayOrigin()) / dot(n, ObjectRayDirection()); + if (t > RayTCurrent() || t < RayTMin()) + return; + float3 hitPos = ObjectRayOrigin() + t * ObjectRayDirection(); + float3 relHitPos = hitPos - base; + // Circle hit + float hitDist = length(relHitPos); + if (hitDist > radius) + return; + + CustomAttrs attrs; + attrs.dist = hitDist; + + // Generate wave-incoherent hitKind + uint2 launchIndex = DispatchRaysIndex().xy; + uint hitKind = 1U; + if (launchIndex.x >= 32) + hitKind |= 2U; + if (launchIndex.y >= 32) + hitKind |= 4U; + if ((launchIndex.x + launchIndex.y) % 2 == 0) + hitKind |= 8U; + + ReportHit(t, hitKind, attrs); +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, false /*mesh*/, + true /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + + VERIFY_ARE_EQUAL(Histo.size(), 10); + VERIFY_ARE_EQUAL(Histo[0], 1587); + VERIFY_ARE_EQUAL(Histo[1], 277); + VERIFY_ARE_EQUAL(Histo[3], 256); + VERIFY_ARE_EQUAL(Histo[5], 167); + VERIFY_ARE_EQUAL(Histo[7], 153); + VERIFY_ARE_EQUAL(Histo[9], 249); + VERIFY_ARE_EQUAL(Histo[11], 260); + VERIFY_ARE_EQUAL(Histo[13], 158); + VERIFY_ARE_EQUAL(Histo[15], 142); + VERIFY_ARE_EQUAL(Histo[255], 847); +} + +TEST_F(ExecutionTest, SERTraceHitMissNopTest) { + // Test SER with conditional HitObject::TraceRay, HitObject::IsHit, + // HitObject::IsMiss, HitObject::IsNop + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + // SER Test + dx::HitObject hitObject; + if (launchIndex.x % 2 == 0) { + hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + } + dx::MaybeReorderThread(hitObject); + + // Check hitObject for hit detection. + if (hitObject.IsHit()) + payload.visited |= 4U; + if (hitObject.IsMiss()) + payload.visited |= 2U; + if (hitObject.IsNop()) + payload.visited |= 1U; + + dx::HitObject::Invoke(hitObject, payload); + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 16U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 8U; +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 32U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*mesh*/, + false /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 3); + VERIFY_ARE_EQUAL( + Histo[1], + 2048); // isNop && !isMiss && !isHit && !anyhit && !closesthit && !miss + VERIFY_ARE_EQUAL( + Histo[18], + 2015); // !isNop && isMiss && !isHit && !anyhit && !closesthit && miss + VERIFY_ARE_EQUAL( + Histo[44], + 33); // !isNop && !isMiss && isHit && anyhit && closesthit && !miss +} + +TEST_F(ExecutionTest, SERIsMissTest) { + // Test SER with HitObject::IsMiss + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + // SER Test + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + dx::MaybeReorderThread(hitObject); + dx::HitObject::Invoke(hitObject, payload); + + // Check hitObject for hit detection. + if (hitObject.IsMiss()) + payload.visited |= 2U; + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + // UNUSED +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, true)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*mesh*/, + false /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 2); + VERIFY_ARE_EQUAL(Histo[2], 4030); + VERIFY_ARE_EQUAL(Histo[5], 66); +} + +TEST_F(ExecutionTest, SERInvokeNoSBTTest) { + // Test SER RayQuery with Invoke + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 WindowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + // Template parameter set at runtime before compilation + RayQuery rayQ; + + // Funtion parameter set at runtime before compilation + rayQ.TraceRayInline(topObject, RAY_FLAG_NONE, 0xFF, ray); + + // Storage for procedural primitive hit attributes + Attrs attrs; + attrs.barycentrics = float2(1, 1); + + while (rayQ.Proceed()) + { + switch (rayQ.CandidateType()) + { + case CANDIDATE_NON_OPAQUE_TRIANGLE: + { + // The system has already determined that the candidate would be the closest + // hit so far in the ray extents + rayQ.CommitNonOpaqueTriangleHit(); + } + } + } + + dx::HitObject hit = dx::HitObject::FromRayQuery(rayQ); + dx::MaybeReorderThread(hit); + // Set the payload based on the HitObject. + if (hit.IsHit()) + payload.visited |= 8U; + else + payload.visited |= 16U; + // Invoke should not trigger any shader. + dx::HitObject::Invoke(hit, payload); + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 2U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; + AcceptHitAndEndSearch(); +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, false)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*useMesh*/, + false /*useProceduralGeometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 2); + VERIFY_ARE_EQUAL(Histo[8], 66); + VERIFY_ARE_EQUAL(Histo[16], 4030); +} + +TEST_F(ExecutionTest, SERMaybeReorderThreadTest) { + // SER: Test MaybeReorderThread variants. + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 windowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + + if (launchIndex.x % 3 == 0) { + dx::MaybeReorderThread(hitObject); + } + else if (launchIndex.x % 3 == 1) { + dx::MaybeReorderThread(hitObject, 0xFF, 7); + } + else { + dx::MaybeReorderThread(0xFFF, 5); + } + + dx::HitObject::Invoke(hitObject, payload); + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 2U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, false)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*useMesh*/, + false /*useProceduralGeometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 2); + VERIFY_ARE_EQUAL(Histo[2], 4030); + VERIFY_ARE_EQUAL(Histo[5], 66); +} + +TEST_F(ExecutionTest, SERDynamicHitObjectArrayTest) { + // Test SER with dynamic access to local HitObject array + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 windowSize; + int rayFlags; +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +struct[raypayload] PerRayData +{ + uint dummy : read(caller) : write(miss, closesthit); +}; + +struct LocalConstants +{ + int c0; + int c1; + int c2; + int c3; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); +ConstantBuffer localConstants : register(b1); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x * sceneConstants.U.xyz + d.y * sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + int constants[4] = { localConstants.c0, localConstants.c1, localConstants.c2, localConstants.c3 }; + + const int NUM_SAMPLES = 64; + const int NUM_HITOBJECTS = 8; + + // Generate wave-incoerent sample positions + int sampleIndices[NUM_SAMPLES]; + int threadOffset = launchIndex.x; + for (int i = 0; i < NUM_SAMPLES; i++) + { + int baseIndex = i % 4; // Cycle through the 4 constants + sampleIndices[i] = abs(constants[baseIndex] + threadOffset + i * 3) % NUM_HITOBJECTS; + } + + // Define an array of ray flags + uint rayFlagsArray[NUM_HITOBJECTS] = { + RAY_FLAG_NONE, + RAY_FLAG_FORCE_OPAQUE, + RAY_FLAG_FORCE_NON_OPAQUE, + RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH, + RAY_FLAG_SKIP_CLOSEST_HIT_SHADER, + RAY_FLAG_CULL_BACK_FACING_TRIANGLES, + RAY_FLAG_CULL_FRONT_FACING_TRIANGLES, + RAY_FLAG_CULL_OPAQUE + }; + + // Create a local array of HitObjects with TraceRay + dx::HitObject hitObjects[NUM_HITOBJECTS]; + for (uint i = 0; i < NUM_HITOBJECTS; ++i) + { + PerRayData payload; + uint expectedRayFlags = rayFlagsArray[i]; + hitObjects[i] = dx::HitObject::TraceRay( + topObject, // Acceleration structure + expectedRayFlags, // Unique ray flag + 0xFF, // Instance mask + 0, // Ray contribution to hit group index + 1, // Multiplier for geometry contribution + 0, // Miss shader index + ray, // Ray description + payload // Payload + ); + } + + // Evaluate at sample positions. + int testVal = 0; + + for (uint i = 0; i < NUM_SAMPLES; i++) + { + int idx = sampleIndices[i]; + // Verify that the rayFlags match + uint actualRayFlags = hitObjects[idx].GetRayFlags(); + uint expectedRayFlags = rayFlagsArray[idx]; + if (expectedRayFlags != actualRayFlags) + { + testVal = 1; // Mark as failure if flags do not match + } + } + + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = testVal; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + // UNUSED +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + AcceptHitAndEndSearch(); +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + // UNUSED +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, false)) + return; + + // Initialize test data. + const int WindowSize = 64; + + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*mesh*/, + false /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 1); + VERIFY_ARE_EQUAL(Histo[0], 4096); +} + +TEST_F(ExecutionTest, SERWaveIncoherentHitTest) { + // Test SER with wave incoherent conditional assignment of HitObject values + // with and without procedural attributes. + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 windowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +struct CustomAttrs +{ + float dist; +}; + +RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +static const uint ProceduralHitKind = 11; + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + dx::HitObject hitObject; + + int cat = (launchIndex.x + launchIndex.y) % 4; + + // Use wave incoherence to decide how to create the HitObject + if (cat == 1) + { + // Turn this into an expected miss by moving eye behind triangles + ray.Origin.z -= 1000.0f; + hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, 0, 0, 0, ray, payload); + } + else if (cat == 2) + { + hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, 0, 0, 0, ray, payload); + } + else if (cat == 3) + { + hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_SKIP_TRIANGLES, 0xFF, 0, 0, 0, ray, payload); + } + + dx::MaybeReorderThread(hitObject); + + if (hitObject.IsNop()) + payload.visited |= 1U; + if (hitObject.IsMiss()) + payload.visited |= 2U; + + if (hitObject.GetHitKind() == ProceduralHitKind) + payload.visited |= 8U; + else if (hitObject.IsHit()) + payload.visited |= 4U; + + dx::HitObject::Invoke(hitObject, payload); + + // Store the result in the buffer + int id = launchIndex.x + launchIndex.y * launchDim.x; + testBuffer[id] = payload.visited; +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + // UNUSED +} + +// Triangles +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + AcceptHitAndEndSearch(); +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 16U; +} + +// Procedural +[shader("closesthit")] +void chAABB(inout PerRayData payload, in CustomAttrs attrs) +{ + payload.visited |= 32U; +} + +[shader("anyhit")] +void ahAABB(inout PerRayData payload, in CustomAttrs attrs) +{ + // UNUSED +} + +[shader("intersection")] +void intersection() +{ + // Intersection with circle on a plane (base, n, radius) + // hitPos is intersection point with plane (base, n) + float3 base = {0.0f,0.0f,0.5f}; + float3 n = normalize(float3(0.0f,0.5f,0.5f)); + float radius = 500.f; + // Plane hit + float t = dot(n, base - ObjectRayOrigin()) / dot(n, ObjectRayDirection()); + if (t > RayTCurrent() || t < RayTMin()) { + return; + } + float3 hitPos = ObjectRayOrigin() + t * ObjectRayDirection(); + float3 relHitPos = hitPos - base; + // Circle hit + float hitDist = length(relHitPos); + if (hitDist > radius) + return; + + CustomAttrs attrs; + attrs.dist = hitDist; + ReportHit(t, ProceduralHitKind, attrs); +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, false)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*mesh*/, + true /*procedural geometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + + VERIFY_ARE_EQUAL(Histo.size(), 4); + VERIFY_ARE_EQUAL(Histo[1], 1024); // nop + VERIFY_ARE_EQUAL(Histo[2], 2243); // miss + VERIFY_ARE_EQUAL(Histo[20], 16); // triangle hit + VERIFY_ARE_EQUAL(Histo[40], 813); // procedural hit +} + +TEST_F(ExecutionTest, SERReorderCoherentTest) { + // SER: Test reordercoherent + static const char *ShaderSrc = R"( +struct SceneConstants +{ + float4 eye; + float4 U; + float4 V; + float4 W; + float sceneScale; + uint2 windowSize; + int rayFlags; +}; + +struct[raypayload] PerRayData +{ + uint visited : read(anyhit,closesthit,miss,caller) : write(anyhit,miss,closesthit,caller); +}; + +struct Attrs +{ + float2 barycentrics : BARYCENTRICS; +}; + +reordercoherent RWStructuredBuffer testBuffer : register(u0); +RaytracingAccelerationStructure topObject : register(t0); +ConstantBuffer sceneConstants : register(b0); + +RayDesc ComputeRay() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + + float2 d = float2(DispatchRaysIndex().xy) / float2(DispatchRaysDimensions().xy) * 2.0f - 1.0f; + RayDesc ray; + ray.Origin = sceneConstants.eye.xyz; + ray.Direction = normalize(d.x*sceneConstants.U.xyz + d.y*sceneConstants.V.xyz + sceneConstants.W.xyz); + ray.TMin = 0; + ray.TMax = 1e18; + + return ray; +} + +[shader("raygeneration")] +void raygen() +{ + uint2 launchIndex = DispatchRaysIndex().xy; + uint2 launchDim = DispatchRaysDimensions().xy; + uint threadId = launchIndex.x + launchIndex.y * launchDim.x; + + RayDesc ray = ComputeRay(); + + PerRayData payload; + payload.visited = 0; + + // Initial test value. + testBuffer[threadId] = threadId; + + dx::HitObject hitObject = dx::HitObject::TraceRay(topObject, RAY_FLAG_NONE, 0xFF, 0, 1, 0, ray, payload); + + // Conditionally update the test value. + if (hitObject.IsHit()) + { + testBuffer[threadId] += 10; // Add 10 to hits + } + else + { + testBuffer[threadId] += 20; // Add 20 to misses + } + + Barrier(UAV_MEMORY, REORDER_SCOPE); + dx::MaybeReorderThread(hitObject); + + // Conditionally update the test value. + if (threadId % 2 == 0) + { + testBuffer[threadId] += 1000; // Add 1000 to even threads + } + else + { + testBuffer[threadId] += 2000; // Add 2000 to odd threads + } + + // Verify test value. + uint expectedValue = (hitObject.IsHit() ? threadId + 10 : threadId + 20); + expectedValue += (threadId % 2 == 0 ? 1000 : 2000); + if (testBuffer[threadId] != expectedValue) + { + // Mark failure in the buffer if the result does not match + testBuffer[threadId] = 0; + } + else + { + testBuffer[threadId] = 1; + } +} + +[shader("miss")] +void miss(inout PerRayData payload) +{ + payload.visited |= 2U; +} + +[shader("anyhit")] +void anyhit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 1U; +} + +[shader("closesthit")] +void closesthit(inout PerRayData payload, in Attrs attrs) +{ + payload.visited |= 4U; +} + +)"; + + CComPtr Device; + if (!CreateDXRDevice(&Device, D3D_SHADER_MODEL_6_9, false)) + return; + + // Initialize test data. + const int WindowSize = 64; + std::vector TestData(WindowSize * WindowSize, 0); + LPCWSTR Args[] = {L"-HV 2021", L"-Vd"}; + + RunDXRTest(Device, ShaderSrc, L"lib_6_9", Args, _countof(Args), TestData, + WindowSize, WindowSize, true /*useMesh*/, + false /*useProceduralGeometry*/, 1 /*payloadCount*/, + 2 /*attributeCount*/); + std::map Histo; + for (int Val : TestData) + ++Histo[Val]; + VERIFY_ARE_EQUAL(Histo.size(), 1); + VERIFY_ARE_EQUAL(Histo[1], 4096); +} \ No newline at end of file diff --git a/tools/clang/unittests/HLSLExec/LongVectors.h b/tools/clang/unittests/HLSLExec/LongVectors.h new file mode 100644 index 0000000000..eb3b37a570 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.h @@ -0,0 +1,414 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +template struct LongVectorOpTestConfig; // Forward declaration +enum LongVectorOpType; // Forward declaration + +// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes. +// Take int32_t as a constuctor argument and convert it to bool when needed. +// Comparisons cast to a bool because we only care if the bool representation is +// true or false. +struct HLSLBool_t { + HLSLBool_t() : val(0) {} + HLSLBool_t(int32_t val) : val(val) {} + HLSLBool_t(bool val) : val(val) {} + HLSLBool_t(const HLSLBool_t &other) : val(other.val) {} + + bool operator==(const HLSLBool_t &other) const { + return static_cast(val) == static_cast(other.val); + } + + bool operator!=(const HLSLBool_t &other) const { + return static_cast(val) != static_cast(other.val); + } + + bool operator<(const HLSLBool_t &other) const { return val < other.val; } + + bool operator>(const HLSLBool_t &other) const { return val > other.val; } + + bool operator<=(const HLSLBool_t &other) const { return val <= other.val; } + + bool operator>=(const HLSLBool_t &other) const { return val >= other.val; } + + HLSLBool_t operator*(const HLSLBool_t &other) const { + return HLSLBool_t(val * other.val); + } + + HLSLBool_t operator+(const HLSLBool_t &other) const { + return HLSLBool_t(val + other.val); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &os, const HLSLBool_t &obj) { + os << static_cast(obj.val); + return os; + } + + // So we can construct std::strings using std::ostream + friend std::ostream &operator<<(std::ostream &os, const HLSLBool_t &obj) { + os << static_cast(obj.val); + return os; + } + + int32_t val = 0; +}; + +// No native float16 type in C++ until C++23 . So we use uint16_t to represent +// it. Simple little wrapping struct to help handle the right behavior. +struct HLSLHalf_t { + HLSLHalf_t() : val(0) {} + HLSLHalf_t(DirectX::PackedVector::HALF val) : val(val) {} + HLSLHalf_t(const HLSLHalf_t &other) : val(other.val) {} + + bool operator==(const HLSLHalf_t &other) const { return val == other.val; } + + bool operator<(const HLSLHalf_t &other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(val) < + DirectX::PackedVector::XMConvertHalfToFloat(other.val); + } + + bool operator>(const HLSLHalf_t &other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(val) > + DirectX::PackedVector::XMConvertHalfToFloat(other.val); + } + + // Used by tolerance checks in the tests. + bool operator>(float d) const { + float a = DirectX::PackedVector::XMConvertHalfToFloat(val); + return a > d; + } + + bool operator<(float d) const { + float a = DirectX::PackedVector::XMConvertHalfToFloat(val); + return a < d; + } + + bool operator<=(const HLSLHalf_t &other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(val) <= + DirectX::PackedVector::XMConvertHalfToFloat(other.val); + } + + bool operator>=(const HLSLHalf_t &other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(val) >= + DirectX::PackedVector::XMConvertHalfToFloat(other.val); + } + + bool operator!=(const HLSLHalf_t &other) const { return val != other.val; } + + HLSLHalf_t operator*(const HLSLHalf_t &other) const { + float a = DirectX::PackedVector::XMConvertHalfToFloat(val); + float b = DirectX::PackedVector::XMConvertHalfToFloat(other.val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(a * b)); + } + + HLSLHalf_t operator+(const HLSLHalf_t &other) const { + float a = DirectX::PackedVector::XMConvertHalfToFloat(val); + float b = DirectX::PackedVector::XMConvertHalfToFloat(other.val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(a + b)); + } + + HLSLHalf_t operator-(const HLSLHalf_t &other) const { + float a = DirectX::PackedVector::XMConvertHalfToFloat(val); + float b = DirectX::PackedVector::XMConvertHalfToFloat(other.val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(a - b)); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &os, const HLSLHalf_t &obj) { + os << DirectX::PackedVector::XMConvertHalfToFloat(obj.val); + return os; + } + + // So we can construct std::wstrings using std::wostream + friend std::ostream &operator<<(std::ostream &os, const HLSLHalf_t &obj) { + os << DirectX::PackedVector::XMConvertHalfToFloat(obj.val); + return os; + } + + // HALF is an alias to uint16_t + DirectX::PackedVector::HALF val = 0; +}; + +// Helper to fill the shader buffer based on type. Convenient to be used when +// copying HLSL*_t types so we can copy the underlying type directly instead of +// the struct. +template +void FillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, + std::array &TestData) { + + // Note: DataSize for HLSLHalf_t and HLSLBool_t may be larger than the + // underlying type in some cases. Thats fine. Resize just makes sure we have + // enough space. + const size_t DataSize = sizeof(T) * N; + ShaderBuffer.resize(DataSize); + + if constexpr (std::is_same_v) { + DirectX::PackedVector::HALF *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < N; ++i) { + ShaderBufferPtr[i] = TestData[i].val; + } + } else if constexpr (std::is_same_v) { + int32_t *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < N; ++i) { + ShaderBufferPtr[i] = TestData[i].val; + } + } else { + T *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < N; ++i) { + ShaderBufferPtr[i] = TestData[i]; + } + } +} + +// Helper to fill the test data from the shader buffer based on type. Convenient +// to be used when copying HLSL*_t types so we can use the underlying type. +template +void FillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, + std::array &TestData) { + + if constexpr (std::is_same_v) { + DirectX::PackedVector::HALF *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < N; ++i) { + // HLSLHalf_t has a DirectX::PackedVector::HALF based constructor. + TestData[i] = ShaderBufferPtr[i]; + } + } else if constexpr (std::is_same_v) { + int32_t *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < N; ++i) { + // HLSLBool_t has a int32_t based constructor. + TestData[i] = ShaderBufferPtr[i]; + } + } else { + T *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < N; ++i) { + TestData[i] = ShaderBufferPtr[i]; + } + } +} + +enum LongVectorOpType { + LongVectorOpType_ScalarAdd, + LongVectorOpType_ScalarMultiply, + LongVectorOpType_Multiply, + LongVectorOpType_Add, + LongVectorOpType_Min, + LongVectorOpType_Max, + LongVectorOpType_Clamp, + LongVectorOpType_Initialize, + LongVectorOpType_UnInitialized +}; + +// Used to pass into LongVectorOpTestBase +template struct LongVectorOpTestConfig { + LongVectorOpTestConfig() = default; + + LongVectorOpTestConfig(LongVectorOpType OpType) : OpType(OpType) { + IntrinsicString = ""; + + if (IsFloatingPointType()) + Tolerance = 1; + + switch (OpType) { + case LongVectorOpType_ScalarAdd: + OperatorString = "+"; + IsScalarOp = true; + break; + case LongVectorOpType_ScalarMultiply: + OperatorString = "*"; + IsScalarOp = true; + break; + case LongVectorOpType_Multiply: + OperatorString = "*"; + break; + case LongVectorOpType_Add: + OperatorString = "+"; + break; + case LongVectorOpType_Min: + OperatorString = ","; + IntrinsicString = "min"; + break; + case LongVectorOpType_Max: + OperatorString = ","; + IntrinsicString = "max"; + break; + case LongVectorOpType_Clamp: + OperatorString = ","; + IntrinsicString = "TestClamp"; + IsBinaryOp = false; + break; + case LongVectorOpType_Initialize: + IntrinsicString = "TestInitialize"; + IsBinaryOp = false; + break; + default: + VERIFY_FAIL("Invalid LongVectorOpType"); + } + } + + bool IsFloatingPointType() const { + return std::is_same_v || std::is_same_v || + std::is_same_v; + } + + // A helper to get the hlsl type as a string for a given C++ type. + // Used in the long vector tests. + std::string GetHLSLTypeString() { + if (std::is_same_v) + return "bool"; + if (std::is_same_v) + return "half"; + if (std::is_same_v) + return "float"; + if (std::is_same_v) + return "double"; + if (std::is_same_v) + return "int16_t"; + if (std::is_same_v) + return "int"; + if (std::is_same_v) + return "int64_t"; + if (std::is_same_v) + return "uint16_t"; + if (std::is_same_v) + return "uint32_t"; + if (std::is_same_v) + return "uint64_t"; + + std::string ErrStr("GetHLSLTypeString() Unsupported type: "); + ErrStr.append(typeid(T).name()); + VERIFY_IS_TRUE(false, ErrStr.c_str()); + return "UnknownType"; + } + + // To be used for the value of -DOPERATOR + std::string OperatorString; + // To be used for the value of -DFUNC + std::string IntrinsicString; + // Optional, can be used to override shader code. + bool IsScalarOp = false; + bool IsBinaryOp = true; + float Tolerance = 0.0; + LongVectorOpType OpType = LongVectorOpType_UnInitialized; +}; + +template struct LongVectorTestTraits { + std::uniform_int_distribution UD = std::uniform_int_distribution( + std::numeric_limits::min(), std::numeric_limits::max()); +}; + +template <> struct LongVectorTestTraits { + // Float values for this were taken from Microsoft online documentation for + // the DirectX HALF data type. HALF is equivalent to IEEE 754 binary 16 + // format. + std::uniform_int_distribution UD = + std::uniform_int_distribution( + DirectX::PackedVector::XMConvertFloatToHalf(float(6.10e-5f)), + DirectX::PackedVector::XMConvertFloatToHalf(float(65504.0f))); +}; + +template <> struct LongVectorTestTraits { + std::uniform_int_distribution UD = + std::uniform_int_distribution(0u, 1u); +}; + +template <> struct LongVectorTestTraits { + // The ranges for generation. A std::uniform_real_distribution can only + // have a range that is equal to the types largest value. This is due to + // precision issues. So instead we define some large values. + std::uniform_real_distribution UD = + std::uniform_real_distribution(-1e20f, 1e20f); +}; + +template <> struct LongVectorTestTraits { + // The ranges for generation. A std::uniform_real_distribution can only + // have a range that is equal to the types largest value. This is due to + // precision issues. So instead we define some large values. + std::uniform_real_distribution UD = + std::uniform_real_distribution(-1e100, 1e100); +}; + +template class DeterministicNumberGenerator { + // Mersenne Twister 'random' number generator. Generated numbers are based + // on the seed value and are deterministic for any given seed. + std::mt19937 Generator; + + LongVectorTestTraits UD; + +public: + DeterministicNumberGenerator(unsigned SeedValue) : Generator(SeedValue) {} + + T generate() { return UD.UD(Generator); } +}; + +template +bool DoArraysMatch(const std::array &ActualValues, + const std::array &ExpectedValues, float Tolerance) { + // Stash mismatched indexes for easy failure logging later + std::vector MismatchedIndexes; + for (size_t Index = 0; Index < N; ++Index) { + if constexpr (std::is_same_v) { + // Compiler was very picky and wanted an explicit case for any T that + // doesn't implement the operators in the below else. ( > and -). It + // wouldn't accept putting this constexpr as an or case with other + // statements. + if (ActualValues[Index] != ExpectedValues[Index]) { + MismatchedIndexes.push_back(Index); + } + } else if constexpr (std::is_same_v) { + const DirectX::PackedVector::HALF a = ActualValues[Index].val; + const DirectX::PackedVector::HALF b = ExpectedValues[Index].val; + if (!CompareHalfULP(a, b, Tolerance)) { + MismatchedIndexes.push_back(Index); + } + } else if constexpr (std::is_same_v) { + const int IntTolerance = static_cast(Tolerance); + if (!CompareFloatULP(ActualValues[Index], ExpectedValues[Index], + IntTolerance)) { + MismatchedIndexes.push_back(Index); + } + } else if constexpr (std::is_same_v) { + const int64_t IntTolerance = static_cast(Tolerance); + if (!CompareDoubleULP(ActualValues[Index], ExpectedValues[Index], + IntTolerance)) { + MismatchedIndexes.push_back(Index); + } + } else if (Tolerance == 0 && ActualValues[Index] != ExpectedValues[Index]) { + MismatchedIndexes.push_back(Index); + } else { + T Diff = ActualValues[Index] > ExpectedValues[Index] + ? ActualValues[Index] - ExpectedValues[Index] + : ExpectedValues[Index] - ActualValues[Index]; + if (Diff > Tolerance) { + MismatchedIndexes.push_back(Index); + } + } + } + + if (MismatchedIndexes.empty()) + return true; + + if (!MismatchedIndexes.empty()) { + for (size_t Index : MismatchedIndexes) { + std::wstringstream Wss(L""); + Wss << L"Mismatch at Index: " << Index; + Wss << L" Actual Value:" << ActualValues[Index] << ","; + Wss << L" Expected Value:" << ExpectedValues[Index]; + WEX::Logging::Log::Error(Wss.str().c_str()); + } + } + + return false; +} diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index e768f205f1..5e95ad2502 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -3750,4 +3750,80 @@ void MSMain(uint GID : SV_GroupIndex, + + RootFlags(0), UAV(u0), UAV(u1), UAV(u2), + UAV(u3) + + + + + + + + + + + + + + + TestInitialize(vector Vector) + { + vector VectorCopy = Vector; + return VectorCopy; + } + #endif + + #ifdef FUNC_CLAMP + vector TestClamp(vector Vector, vector ClampArgMinMax) + { + TYPE ClampArgMin = ClampArgMinMax[0]; + TYPE ClampArgMax = ClampArgMinMax[1]; + return clamp(Vector, ClampArgMin, ClampArgMax); + } + #endif + + RWByteAddressBuffer g_InputFuncArgs : register(u0); + RWByteAddressBuffer g_InputVector1 : register(u1); + RWByteAddressBuffer g_InputVector2 : register(u2); + RWByteAddressBuffer g_OutputVector : register(u3); + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + + vector InputVector1 = g_InputVector1.Load< vector >(0); + + #ifdef IS_BINARY_VECTOR_OP + vector InputVector2 = g_InputVector2.Load< vector >(0); + #endif + + #ifdef IS_SCALAR_OP + TYPE InputScalar = g_InputFuncArgs.Load(0); + #endif + + #ifdef FUNC_CLAMP + TYPE Clamp_ArgMin = g_InputFuncArgs.Load(0); + TYPE Clamp_ArgMax = g_InputFuncArgs.Load(sizeof(TYPE)); + vector ClampArgMinMax = {Clamp_ArgMin, Clamp_ArgMax}; + #endif + + vector OutputVector = FUNC(InputVector1 OPERATOR OPERAND2); + + g_OutputVector.Store< vector >(0, OutputVector); + }; + ]]> + + diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp index 8dde3faa0b..e2ea375097 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp @@ -86,6 +86,9 @@ static void ShaderOpLogFmt(const wchar_t *fmt, ...) { // Check the specified HRESULT and return the success value. static HRESULT CHECK_HR_RET(HRESULT hr) { + if (FAILED(hr)) { + DebugBreak(); + } CHECK_HR(hr); return hr; } @@ -866,6 +869,11 @@ void ShaderOpTest::CreateShaders() { CHECK_HR(pLibrary->CreateBlobWithEncodingFromPinned( pText, (UINT32)strlen(pText), CP_UTF8, &pTextBlob)); CHECK_HR(m_pDxcSupport->CreateInstance(CLSID_DxcCompiler, &pCompiler)); + WEX::Logging::Log::Comment(L"Compiling shader:"); + ShaderOpLogFmt(L"\tTarget profile: %S", S.Target); + if (argumentsWList.size() > 0) { + ShaderOpLogFmt(L"\tArguments: %S", pArguments); + } CHECK_HR(pCompiler->Compile(pTextBlob, nameW, entryPointW, targetW, (LPCWSTR *)argumentsWList.data(), (UINT32)argumentsWList.size(), nullptr, 0,