diff --git a/CMakeLists.txt b/CMakeLists.txt index 4af6f90b753486..416a8182627a88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ option(ENABLE_HEVC "Enable HEVC encoders" ON) add_subdirectory(libobs) if(OS_WINDOWS) add_subdirectory(libobs-d3d11) + add_subdirectory(libobs-d3d12) add_subdirectory(libobs-winrt) endif() add_subdirectory(libobs-opengl) diff --git a/cmake/windows/helpers.cmake b/cmake/windows/helpers.cmake index b4d0bf4ddd5658..31922cec4d5e65 100644 --- a/cmake/windows/helpers.cmake +++ b/cmake/windows/helpers.cmake @@ -61,7 +61,12 @@ function(set_target_properties_obs target) elseif(target_type STREQUAL MODULE_LIBRARY) set_target_properties(${target} PROPERTIES VERSION 0 SOVERSION ${OBS_VERSION_CANONICAL}) - if(target STREQUAL libobs-d3d11 OR target STREQUAL libobs-opengl OR target STREQUAL libobs-winrt) + if( + target STREQUAL libobs-d3d11 + OR target STREQUAL libobs-d3d12 + OR target STREQUAL libobs-opengl + OR target STREQUAL libobs-winrt + ) set(target_destination "${OBS_EXECUTABLE_DESTINATION}") elseif(target STREQUAL "obspython" OR target STREQUAL "obslua") set(target_destination "${OBS_SCRIPT_PLUGIN_DESTINATION}") diff --git a/frontend/CMakeLists.txt b/frontend/CMakeLists.txt index 6f677d4fe53eb3..71cb29c00664a7 100644 --- a/frontend/CMakeLists.txt +++ b/frontend/CMakeLists.txt @@ -85,7 +85,7 @@ elseif(OS_FREEBSD OR OS_OPENBSD) include(cmake/os-freebsd.cmake) endif() -foreach(graphics_library IN ITEMS opengl metal d3d11) +foreach(graphics_library IN ITEMS opengl metal d3d11 d3d12) string(TOUPPER ${graphics_library} graphics_library_U) if(TARGET OBS::libobs-${graphics_library}) target_compile_definitions( diff --git a/frontend/OBSApp.cpp b/frontend/OBSApp.cpp index 3e66fba22bc02b..dcf51ac9209404 100644 --- a/frontend/OBSApp.cpp +++ b/frontend/OBSApp.cpp @@ -1090,12 +1090,23 @@ void OBSApp::checkForUncleanShutdown() const char *OBSApp::GetRenderModule() const { #if defined(_WIN32) + // open PIX for windows support + /* if (GetModuleHandle(L"WinPixGpuCapturer.dll") == 0) { + HMODULE hModule = LoadLibraryW(L"C:\\Program Files\\Microsoft PIX\\2509.25\\WinPixGpuCapturer.dll"); + if (hModule) { + blog(LOG_INFO, "Load Pixel"); + } + }*/ const char *renderer = config_get_string(appConfig, "Video", "Renderer"); - - return (astrcmpi(renderer, "Direct3D 11") == 0) ? DL_D3D11 : DL_OPENGL; + if (astrcmpi(renderer, "Direct3D 12") == 0) { + return DL_D3D12; + } else if (astrcmpi(renderer, "Direct3D 11") == 0) { + return DL_D3D11; + } else { + return DL_OPENGL; + } #elif defined(__APPLE__) && defined(__aarch64__) const char *renderer = config_get_string(appConfig, "Video", "Renderer"); - return (astrcmpi(renderer, "Metal") == 0) ? DL_METAL : DL_OPENGL; #else return DL_OPENGL; diff --git a/frontend/settings/OBSBasicSettings.cpp b/frontend/settings/OBSBasicSettings.cpp index 6e47f93ac924df..8dacf2ff520cfb 100644 --- a/frontend/settings/OBSBasicSettings.cpp +++ b/frontend/settings/OBSBasicSettings.cpp @@ -1397,6 +1397,7 @@ void OBSBasicSettings::LoadRendererList() const char *renderer = config_get_string(App()->GetAppConfig(), "Video", "Renderer"); #ifdef _WIN32 ui->renderer->addItem(QString("Direct3D 11"), QString("Direct3D 11")); + ui->renderer->addItem(QString("Direct3D 12"), QString("Direct3D 12")); if (opt_allow_opengl || strcmp(renderer, "OpenGL") == 0) { ui->renderer->addItem(QString("OpenGL"), QString("OpenGL")); } diff --git a/libobs-d3d11/d3d11-subsystem.cpp b/libobs-d3d11/d3d11-subsystem.cpp index 1ef40576327a11..e747f6217f4897 100644 --- a/libobs-d3d11/d3d11-subsystem.cpp +++ b/libobs-d3d11/d3d11-subsystem.cpp @@ -2189,6 +2189,8 @@ void device_begin_frame(gs_device_t *device) reset_duplicators(); } +void device_end_frame(gs_device_t *device) {} + void device_begin_scene(gs_device_t *device) { clear_textures(device); diff --git a/libobs-d3d12/CMakeLists.txt b/libobs-d3d12/CMakeLists.txt new file mode 100644 index 00000000000000..d3ba12118e0227 --- /dev/null +++ b/libobs-d3d12/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.28...3.30) + +add_library(libobs-d3d12 MODULE) +add_library(OBS::libobs-d3d12 ALIAS libobs-d3d12) + +target_sources( + libobs-d3d12 + PRIVATE # cmake-format: unsort + d3d12-command-context.cpp + d3d12-command-context.hpp + d3d12-duplicator.cpp + d3d12-indexbuffer.cpp + d3d12-samplerstate.cpp + d3d12-shader.cpp + d3d12-shaderprocessor.cpp + d3d12-shaderprocessor.hpp + d3d12-stagesurf.cpp + d3d12-subsystem.cpp + d3d12-subsystem.hpp + d3d12-texture2d.cpp + d3d12-texture3d.cpp + d3d12-vertexbuffer.cpp + d3d12-zstencilbuffer.cpp +) + +configure_file(cmake/windows/obs-module.rc.in libobs-d3d12.rc) +target_sources(libobs-d3d12 PRIVATE libobs-d3d12.rc) + +target_compile_definitions( + libobs-d3d12 + PRIVATE + $<$:USE_GPU_PRIORITY> + "$,GPU_PRIORITY_VAL=${GPU_PRIORITY_VAL},GPU_PRIORITY_VAL=0>" +) + +target_link_libraries( + libobs-d3d12 + PRIVATE OBS::libobs d3d11 d3d12 dxguid d3dcompiler dxgi shcore +) + +target_enable_feature(libobs "Direct3D 12 renderer") + +set_target_properties_obs( + libobs-d3d12 + PROPERTIES FOLDER core + VERSION 0 + SOVERSION ${OBS_VERSION_MAJOR} COMPILE_WARNING_AS_ERROR FALSE +) diff --git a/libobs-d3d12/Common.h b/libobs-d3d12/Common.h new file mode 100644 index 00000000000000..65ea374bccd499 --- /dev/null +++ b/libobs-d3d12/Common.h @@ -0,0 +1,217 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard +// + +#pragma once + +#include + +#define INLINE __forceinline + +namespace Math { +template __forceinline T AlignUpWithMask(T value, size_t mask) +{ + return (T)(((size_t)value + mask) & ~mask); +} + +template __forceinline T AlignDownWithMask(T value, size_t mask) +{ + return (T)((size_t)value & ~mask); +} + +template __forceinline T AlignUp(T value, size_t alignment) +{ + return AlignUpWithMask(value, alignment - 1); +} + +template __forceinline T AlignDown(T value, size_t alignment) +{ + return AlignDownWithMask(value, alignment - 1); +} + +template __forceinline bool IsAligned(T value, size_t alignment) +{ + return 0 == ((size_t)value & (alignment - 1)); +} + +template __forceinline T DivideByMultiple(T value, size_t alignment) +{ + return (T)((value + alignment - 1) / alignment); +} + +template __forceinline bool IsPowerOfTwo(T value) +{ + return 0 == (value & (value - 1)); +} + +template __forceinline bool IsDivisible(T value, T divisor) +{ + return (value / divisor) * divisor == value; +} + +__forceinline uint8_t Log2(uint64_t value) +{ + unsigned long mssb; // most significant set bit + unsigned long lssb; // least significant set bit + + // If perfect power of two (only one set bit), return index of bit. Otherwise round up + // fractional log by adding 1 to most signicant set bit's index. + if (_BitScanReverse64(&mssb, value) > 0 && _BitScanForward64(&lssb, value) > 0) + return uint8_t(mssb + (mssb == lssb ? 0 : 1)); + else + return 0; +} + +template __forceinline T AlignPowerOfTwo(T value) +{ + return value == 0 ? 0 : 1 << Log2(value); +} + +} // namespace Math + +// A faster version of memcopy that uses SSE instructions. TODO: Write an ARM variant if necessary. +inline static void SIMDMemCopy(void *__restrict _Dest, const void *__restrict _Source, size_t NumQuadwords) +{ + //ASSERT(Math::IsAligned(_Dest, 16)); + //ASSERT(Math::IsAligned(_Source, 16)); + + __m128i *__restrict Dest = (__m128i *__restrict)_Dest; + const __m128i *__restrict Source = (const __m128i *__restrict)_Source; + + // Discover how many quadwords precede a cache line boundary. Copy them separately. + size_t InitialQuadwordCount = (4 - ((size_t)Source >> 4) & 3) & 3; + if (InitialQuadwordCount > NumQuadwords) + InitialQuadwordCount = NumQuadwords; + + switch (InitialQuadwordCount) { + case 3: + _mm_stream_si128(Dest + 2, _mm_load_si128(Source + 2)); // Fall through + case 2: + _mm_stream_si128(Dest + 1, _mm_load_si128(Source + 1)); // Fall through + case 1: + _mm_stream_si128(Dest + 0, _mm_load_si128(Source + 0)); // Fall through + default: + break; + } + + if (NumQuadwords == InitialQuadwordCount) + return; + + Dest += InitialQuadwordCount; + Source += InitialQuadwordCount; + NumQuadwords -= InitialQuadwordCount; + + size_t CacheLines = NumQuadwords >> 2; + + switch (CacheLines) { + default: + case 10: + _mm_prefetch((char *)(Source + 36), _MM_HINT_NTA); // Fall through + case 9: + _mm_prefetch((char *)(Source + 32), _MM_HINT_NTA); // Fall through + case 8: + _mm_prefetch((char *)(Source + 28), _MM_HINT_NTA); // Fall through + case 7: + _mm_prefetch((char *)(Source + 24), _MM_HINT_NTA); // Fall through + case 6: + _mm_prefetch((char *)(Source + 20), _MM_HINT_NTA); // Fall through + case 5: + _mm_prefetch((char *)(Source + 16), _MM_HINT_NTA); // Fall through + case 4: + _mm_prefetch((char *)(Source + 12), _MM_HINT_NTA); // Fall through + case 3: + _mm_prefetch((char *)(Source + 8), _MM_HINT_NTA); // Fall through + case 2: + _mm_prefetch((char *)(Source + 4), _MM_HINT_NTA); // Fall through + case 1: + _mm_prefetch((char *)(Source + 0), _MM_HINT_NTA); // Fall through + + // Do four quadwords per loop to minimize stalls. + for (size_t i = CacheLines; i > 0; --i) { + // If this is a large copy, start prefetching future cache lines. This also prefetches the + // trailing quadwords that are not part of a whole cache line. + if (i >= 10) + _mm_prefetch((char *)(Source + 40), _MM_HINT_NTA); + + _mm_stream_si128(Dest + 0, _mm_load_si128(Source + 0)); + _mm_stream_si128(Dest + 1, _mm_load_si128(Source + 1)); + _mm_stream_si128(Dest + 2, _mm_load_si128(Source + 2)); + _mm_stream_si128(Dest + 3, _mm_load_si128(Source + 3)); + + Dest += 4; + Source += 4; + } + + case 0: // No whole cache lines to read + break; + } + + // Copy the remaining quadwords + switch (NumQuadwords & 3) { + case 3: + _mm_stream_si128(Dest + 2, _mm_load_si128(Source + 2)); // Fall through + case 2: + _mm_stream_si128(Dest + 1, _mm_load_si128(Source + 1)); // Fall through + case 1: + _mm_stream_si128(Dest + 0, _mm_load_si128(Source + 0)); // Fall through + default: + break; + } + + _mm_sfence(); +} + +inline static void SIMDMemFill(void *__restrict _Dest, __m128 FillVector, size_t NumQuadwords) +{ + // ASSERT(Math::IsAligned(_Dest, 16)); + + const __m128i Source = _mm_castps_si128(FillVector); + __m128i *__restrict Dest = (__m128i *__restrict)_Dest; + + switch (((size_t)Dest >> 4) & 3) { + case 1: + _mm_stream_si128(Dest++, Source); + --NumQuadwords; // Fall through + case 2: + _mm_stream_si128(Dest++, Source); + --NumQuadwords; // Fall through + case 3: + _mm_stream_si128(Dest++, Source); + --NumQuadwords; // Fall through + default: + break; + } + + size_t WholeCacheLines = NumQuadwords >> 2; + + // Do four quadwords per loop to minimize stalls. + while (WholeCacheLines--) { + _mm_stream_si128(Dest++, Source); + _mm_stream_si128(Dest++, Source); + _mm_stream_si128(Dest++, Source); + _mm_stream_si128(Dest++, Source); + } + + // Copy the remaining quadwords + switch (NumQuadwords & 3) { + case 3: + _mm_stream_si128(Dest++, Source); // Fall through + case 2: + _mm_stream_si128(Dest++, Source); // Fall through + case 1: + _mm_stream_si128(Dest++, Source); // Fall through + default: + break; + } + + _mm_sfence(); +} diff --git a/libobs-d3d12/Hash.h b/libobs-d3d12/Hash.h new file mode 100644 index 00000000000000..dfb8d8c517acd2 --- /dev/null +++ b/libobs-d3d12/Hash.h @@ -0,0 +1,65 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +// Developed by Minigraph +// +// Author: James Stanard + +#pragma once + +#include + +// This requires SSE4.2 which is present on Intel Nehalem (Nov. 2008) +// and AMD Bulldozer (Oct. 2011) processors. I could put a runtime +// check for this, but I'm just going to assume people playing with +// DirectX 12 on Windows 10 have fairly recent machines. +//#ifdef _M_X64 +//#define ENABLE_SSE_CRC32 1 +//#else +#define ENABLE_SSE_CRC32 0 +//#endif + +#if ENABLE_SSE_CRC32 +#pragma intrinsic(_mm_crc32_u32) +#pragma intrinsic(_mm_crc32_u64) +#endif + +namespace Utility { +inline size_t HashRange(const uint32_t *const Begin, const uint32_t *const End, size_t Hash) +{ +#if ENABLE_SSE_CRC32 + const uint64_t *Iter64 = (const uint64_t *)Math::AlignUp(Begin, 8); + const uint64_t *const End64 = (const uint64_t *const)Math::AlignDown(End, 8); + + // If not 64-bit aligned, start with a single u32 + if ((uint32_t *)Iter64 > Begin) + Hash = _mm_crc32_u32((uint32_t)Hash, *Begin); + + // Iterate over consecutive u64 values + while (Iter64 < End64) + Hash = _mm_crc32_u64((uint64_t)Hash, *Iter64++); + + // If there is a 32-bit remainder, accumulate that + if ((uint32_t *)Iter64 < End) + Hash = _mm_crc32_u32((uint32_t)Hash, *(uint32_t *)Iter64); +#else + // An inexpensive hash for CPUs lacking SSE4.2 + for (const uint32_t *Iter = Begin; Iter < End; ++Iter) + Hash = 16777619U * Hash ^ *Iter; +#endif + + return Hash; +} + +template inline size_t HashState(const T *StateDesc, size_t Count = 1, size_t Hash = 2166136261U) +{ + static_assert((sizeof(T) & 3) == 0 && alignof(T) >= 4, "State object is not word-aligned"); + return HashRange((uint32_t *)StateDesc, (uint32_t *)(StateDesc + Count), Hash); +} + +} // namespace Utility diff --git a/libobs-d3d12/cmake/windows/obs-module.rc.in b/libobs-d3d12/cmake/windows/obs-module.rc.in new file mode 100644 index 00000000000000..1233fd329fc629 --- /dev/null +++ b/libobs-d3d12/cmake/windows/obs-module.rc.in @@ -0,0 +1,24 @@ +1 VERSIONINFO +FILEVERSION ${OBS_VERSION_MAJOR},${OBS_VERSION_MINOR},${OBS_VERSION_PATCH},0 +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904B0" + BEGIN + VALUE "CompanyName", "${OBS_COMPANY_NAME}" + VALUE "FileDescription", "OBS Library D3D12 wrapper" + VALUE "FileVersion", "${OBS_VERSION_CANONICAL}" + VALUE "ProductName", "${OBS_PRODUCT_NAME}" + VALUE "ProductVersion", "${OBS_VERSION_CANONICAL}" + VALUE "Comments", "${OBS_COMMENTS}" + VALUE "LegalCopyright", "${OBS_LEGAL_COPYRIGHT}" + VALUE "InternalName", "libobs-d3d12" + VALUE "OriginalFilename", "libobs-d3d12" + END + END + + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x0409, 0x04B0 + END +END diff --git a/libobs-d3d12/d3d12-command-context.cpp b/libobs-d3d12/d3d12-command-context.cpp new file mode 100644 index 00000000000000..a37d050a1c7d98 --- /dev/null +++ b/libobs-d3d12/d3d12-command-context.cpp @@ -0,0 +1,4558 @@ +#include "d3d12-command-context.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +static const uint32_t vendorID_Nvidia = 0x10DE; +static const uint32_t vendorID_AMD = 0x1002; +static const uint32_t vendorID_Intel = 0x8086; + +namespace D3D12Graphics { + +void RootSignature::InitStaticSampler(UINT Register, const D3D12_SAMPLER_DESC &NonStaticSamplerDesc, + D3D12_SHADER_VISIBILITY Visibility) +{ + if (!(m_NumInitializedStaticSamplers < m_NumSamplers)) { + throw HRError("RootSignature: m_NumInitializedStaticSamplers < m_NumSamplers"); + } + D3D12_STATIC_SAMPLER_DESC &StaticSamplerDesc = m_SamplerArray[m_NumInitializedStaticSamplers++]; + + StaticSamplerDesc.Filter = NonStaticSamplerDesc.Filter; + StaticSamplerDesc.AddressU = NonStaticSamplerDesc.AddressU; + StaticSamplerDesc.AddressV = NonStaticSamplerDesc.AddressV; + StaticSamplerDesc.AddressW = NonStaticSamplerDesc.AddressW; + StaticSamplerDesc.MipLODBias = NonStaticSamplerDesc.MipLODBias; + StaticSamplerDesc.MaxAnisotropy = NonStaticSamplerDesc.MaxAnisotropy; + StaticSamplerDesc.ComparisonFunc = NonStaticSamplerDesc.ComparisonFunc; + StaticSamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; + StaticSamplerDesc.MinLOD = NonStaticSamplerDesc.MinLOD; + StaticSamplerDesc.MaxLOD = NonStaticSamplerDesc.MaxLOD; + StaticSamplerDesc.ShaderRegister = Register; + StaticSamplerDesc.RegisterSpace = 0; + StaticSamplerDesc.ShaderVisibility = Visibility; + + if (StaticSamplerDesc.AddressU == D3D12_TEXTURE_ADDRESS_MODE_BORDER || + StaticSamplerDesc.AddressV == D3D12_TEXTURE_ADDRESS_MODE_BORDER || + StaticSamplerDesc.AddressW == D3D12_TEXTURE_ADDRESS_MODE_BORDER) { + bool checkedStaticSample = + (NonStaticSamplerDesc.BorderColor[0] == 0.0f && NonStaticSamplerDesc.BorderColor[1] == 0.0f && + NonStaticSamplerDesc.BorderColor[2] == 0.0f && + NonStaticSamplerDesc.BorderColor[3] == 0.0f || + NonStaticSamplerDesc.BorderColor[0] == 0.0f && NonStaticSamplerDesc.BorderColor[1] == 0.0f && + NonStaticSamplerDesc.BorderColor[2] == 0.0f && + NonStaticSamplerDesc.BorderColor[3] == 1.0f || + NonStaticSamplerDesc.BorderColor[0] == 1.0f && NonStaticSamplerDesc.BorderColor[1] == 1.0f && + NonStaticSamplerDesc.BorderColor[2] == 1.0f && + NonStaticSamplerDesc.BorderColor[3] == 1.0f); + if (!checkedStaticSample) { + throw HRError("Sampler border color does not match static sampler limitations"); + } + + if (NonStaticSamplerDesc.BorderColor[3] == 1.0f) { + if (NonStaticSamplerDesc.BorderColor[0] == 1.0f) + StaticSamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; + else + StaticSamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; + } else + StaticSamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + } +} + +void RootSignature::Finalize(const std::wstring &name, D3D12_ROOT_SIGNATURE_FLAGS Flags) +{ + if (m_Finalized) + return; + + if (m_NumInitializedStaticSamplers != m_NumSamplers) { + throw HRError("RootSignature: m_NumInitializedStaticSamplers != m_NumSamplers"); + } + + D3D12_ROOT_SIGNATURE_DESC RootDesc; + RootDesc.NumParameters = m_NumParameters; + RootDesc.pParameters = (const D3D12_ROOT_PARAMETER *)m_ParamArray.get(); + RootDesc.NumStaticSamplers = m_NumSamplers; + RootDesc.pStaticSamplers = (const D3D12_STATIC_SAMPLER_DESC *)m_SamplerArray.get(); + RootDesc.Flags = Flags; + + m_DescriptorTableBitMap = 0; + m_SamplerTableBitMap = 0; + + size_t HashCode = Utility::HashState(&RootDesc.Flags); + HashCode = Utility::HashState(RootDesc.pStaticSamplers, m_NumSamplers, HashCode); + + for (UINT Param = 0; Param < m_NumParameters; ++Param) { + const D3D12_ROOT_PARAMETER &RootParam = RootDesc.pParameters[Param]; + m_DescriptorTableSize[Param] = 0; + + if (RootParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) { + if (RootParam.DescriptorTable.pDescriptorRanges == nullptr) { + throw HRError("RootSignature: DescriptorTable with null pDescriptorRanges"); + } + + HashCode = Utility::HashState(RootParam.DescriptorTable.pDescriptorRanges, + RootParam.DescriptorTable.NumDescriptorRanges, HashCode); + + // We keep track of sampler descriptor tables separately from CBV_SRV_UAV descriptor tables + if (RootParam.DescriptorTable.pDescriptorRanges->RangeType == + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER) + m_SamplerTableBitMap |= (1 << Param); + else + m_DescriptorTableBitMap |= (1 << Param); + + for (UINT TableRange = 0; TableRange < RootParam.DescriptorTable.NumDescriptorRanges; + ++TableRange) + m_DescriptorTableSize[Param] += + RootParam.DescriptorTable.pDescriptorRanges[TableRange].NumDescriptors; + } else + HashCode = Utility::HashState(&RootParam, 1, HashCode); + } + + ComPtr RSRef = nullptr; + bool firstCompile = false; + { + auto iter = m_DeviceInstance->GetRootSignatureHashMap().find(HashCode); + + // Reserve space so the next inquiry will find that someone got here first. + if (iter == m_DeviceInstance->GetRootSignatureHashMap().end()) { + RSRef = m_DeviceInstance->GetRootSignatureHashMap()[HashCode]; + firstCompile = true; + } else + RSRef = iter->second; + } + if (firstCompile) { + ComPtr pOutBlob, pErrorBlob; + + HRESULT hr = + (D3D12SerializeRootSignature(&RootDesc, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + if (FAILED(hr)) { + throw HRError("D3D12SerializeRootSignature failed", hr); + } + hr = (m_DeviceInstance->GetDevice()->CreateRootSignature( + 1, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_Signature))); + + if (FAILED(hr)) { + auto DeviceRemoveReason = m_DeviceInstance->GetDevice()->GetDeviceRemovedReason(); + throw HRError("CreateRootSignature failed", hr); + } + m_Signature->SetName(name.c_str()); + + m_DeviceInstance->GetRootSignatureHashMap()[HashCode].Set(m_Signature); + } else { + while (RSRef == nullptr) + std::this_thread::yield(); + m_Signature = RSRef; + } + + m_Finalized = TRUE; +} + +ID3D12RootSignature *RootSignature::GetSignature() const +{ + return m_Signature; +} + +ID3D12CommandSignature *CommandSignature::GetSignature() const +{ + return m_Signature.Get(); +} + +void CommandSignature::Finalize(const RootSignature *RootSignature) +{ + if (m_Finalized) + return; + + UINT ByteStride = 0; + bool RequiresRootSignature = false; + + for (UINT i = 0; i < m_NumParameters; ++i) { + switch (m_ParamArray[i].GetDesc().Type) { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + ByteStride += sizeof(D3D12_DRAW_ARGUMENTS); + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + ByteStride += sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + ByteStride += sizeof(D3D12_DISPATCH_ARGUMENTS); + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: + ByteStride += m_ParamArray[i].GetDesc().Constant.Num32BitValuesToSet * 4; + RequiresRootSignature = true; + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: + ByteStride += sizeof(D3D12_VERTEX_BUFFER_VIEW); + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: + ByteStride += sizeof(D3D12_INDEX_BUFFER_VIEW); + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: + ByteStride += 8; + RequiresRootSignature = true; + break; + } + } + + D3D12_COMMAND_SIGNATURE_DESC CommandSignatureDesc; + CommandSignatureDesc.ByteStride = ByteStride; + CommandSignatureDesc.NumArgumentDescs = m_NumParameters; + CommandSignatureDesc.pArgumentDescs = (const D3D12_INDIRECT_ARGUMENT_DESC *)m_ParamArray.get(); + CommandSignatureDesc.NodeMask = 1; + + ComPtr pOutBlob, pErrorBlob; + + ID3D12RootSignature *pRootSig = RootSignature ? RootSignature->GetSignature() : nullptr; + if (!RequiresRootSignature) { + pRootSig = nullptr; + } + + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommandSignature(&CommandSignatureDesc, pRootSig, + IID_PPV_ARGS(&m_Signature)); + if (FAILED(hr)) { + throw HRError("CreateCommandSignature failed", hr); + } + m_Signature->SetName(L"CommandSignature"); + + m_Finalized = TRUE; +} + +DescriptorAllocator::DescriptorAllocator(D3D12DeviceInstance *DeviceInstance, D3D12_DESCRIPTOR_HEAP_TYPE Type) + : m_DeviceInstance(DeviceInstance), + m_Type(Type), + m_CurrentHeap(nullptr), + m_DescriptorSize(0), + m_RemainingFreeHandles(0) +{ + m_CurrentHandle.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; +} + +D3D12_CPU_DESCRIPTOR_HANDLE DescriptorAllocator::Allocate(uint32_t Count) +{ + if (m_CurrentHeap == nullptr || m_RemainingFreeHandles < Count) { + m_CurrentHeap = m_DeviceInstance->RequestCommonHeap(m_Type); + m_CurrentHandle = m_CurrentHeap->GetCPUDescriptorHandleForHeapStart(); + m_RemainingFreeHandles = kMaxNumDescriptors; + + if (m_DescriptorSize == 0) + m_DescriptorSize = m_DeviceInstance->GetDevice()->GetDescriptorHandleIncrementSize(m_Type); + + for (int32_t i = 0; i < kMaxNumDescriptors; ++i) { + m_DescriptorPoolNodes[i].index = i; + if (i != kMaxNumDescriptors - 1) { + m_DescriptorPoolNodes[i].next = &m_DescriptorPoolNodes[i + 1]; + } + } + + m_DescriptorPoolHead = &m_DescriptorPoolNodes[0]; + } + + if (m_RemainingFreeHandles <= 0) { + throw HRError("DescriptorAllocator: No remaining free handles"); + } + + D3D12_CPU_DESCRIPTOR_HANDLE ret = m_CurrentHandle; + m_CurrentHandle.ptr += Count * m_DescriptorSize; + m_RemainingFreeHandles -= Count; + return ret; +} + +UINT64 DescriptorAllocator::GetAvailableIndex() +{ + if (m_DescriptorPoolHead) { + SIZE_T index = m_DescriptorPoolHead->index; + m_DescriptorPoolHead = (DescriptorHandleNode *)(m_DescriptorPoolHead->next); + return index; + } else { + return D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } +} + +void DescriptorAllocator::FreeIndex(UINT64 index) +{ + m_DescriptorPoolNodes[index].next = m_DescriptorPoolHead; + m_DescriptorPoolHead = &m_DescriptorPoolNodes[index]; +} + +DescriptorHandle::DescriptorHandle() +{ + m_CpuHandle.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_GpuHandle.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; +} + +DescriptorHandle::DescriptorHandle(D3D12_CPU_DESCRIPTOR_HANDLE CpuHandle, D3D12_GPU_DESCRIPTOR_HANDLE GpuHandle) + : m_CpuHandle(CpuHandle), + m_GpuHandle(GpuHandle) +{ +} + +DescriptorHandleCache::DescriptorHandleCache(D3D12DeviceInstance *DeviceInstance) : m_DeviceInstance(DeviceInstance) +{ + ClearCache(); +} + +void DescriptorHandleCache::ClearCache() +{ + m_RootDescriptorTablesBitMap = 0; + m_StaleRootParamsBitMap = 0; + m_MaxCachedDescriptors = 0; +} + +uint32_t DescriptorHandleCache::ComputeStagedSize() +{ + // Sum the maximum assigned offsets of stale descriptor tables to determine total needed space. + uint32_t NeededSpace = 0; + uint32_t RootIndex; + uint32_t StaleParams = m_StaleRootParamsBitMap; + while (_BitScanForward((unsigned long *)&RootIndex, StaleParams)) { + StaleParams ^= (1 << RootIndex); + + uint32_t MaxSetHandle; + if (TRUE != _BitScanReverse((unsigned long *)&MaxSetHandle, + m_RootDescriptorTable[RootIndex].AssignedHandlesBitMap)) { + throw HRError("Root entry marked as stale but has no stale descriptors"); + } + + NeededSpace += MaxSetHandle + 1; + } + return NeededSpace; +} + +void DescriptorHandleCache::CopyAndBindStaleTables( + D3D12_DESCRIPTOR_HEAP_TYPE Type, uint32_t DescriptorSize, DescriptorHandle DestHandleStart, + ID3D12GraphicsCommandList *CmdList, + void (STDMETHODCALLTYPE ID3D12GraphicsCommandList::*SetFunc)(UINT, D3D12_GPU_DESCRIPTOR_HANDLE)) +{ + uint32_t StaleParamCount = 0; + uint32_t TableSize[kMaxNumDescriptorTables]; + uint32_t RootIndices[kMaxNumDescriptorTables]; + uint32_t NeededSpace = 0; + uint32_t RootIndex; + + // Sum the maximum assigned offsets of stale descriptor tables to determine total needed space. + uint32_t StaleParams = m_StaleRootParamsBitMap; + while (_BitScanForward((unsigned long *)&RootIndex, StaleParams)) { + RootIndices[StaleParamCount] = RootIndex; + StaleParams ^= (1 << RootIndex); + + uint32_t MaxSetHandle; + if (TRUE != _BitScanReverse((unsigned long *)&MaxSetHandle, + m_RootDescriptorTable[RootIndex].AssignedHandlesBitMap)) { + throw HRError("Root entry marked as stale but has no stale descriptors"); + } + + NeededSpace += MaxSetHandle + 1; + TableSize[StaleParamCount] = MaxSetHandle + 1; + + ++StaleParamCount; + } + if (StaleParamCount > kMaxNumDescriptorTables) { + throw HRError("We're only equipped to handle so many descriptor tables"); + } + + m_StaleRootParamsBitMap = 0; + + UINT NumDestDescriptorRanges = 0; + D3D12_CPU_DESCRIPTOR_HANDLE pDestDescriptorRangeStarts[kMaxNumDescriptorTables]; + UINT pDestDescriptorRangeSizes[kMaxNumDescriptorTables]; + + UINT NumSrcDescriptorRanges = 0; + D3D12_CPU_DESCRIPTOR_HANDLE pSrcDescriptorRangeStarts[kMaxNumDescriptorTables]; + UINT pSrcDescriptorRangeSizes[kMaxNumDescriptorTables]; + + for (uint32_t i = 0; i < StaleParamCount; ++i) { + RootIndex = RootIndices[i]; + (CmdList->*SetFunc)(RootIndex, DestHandleStart); + + DescriptorTableCache &RootDescTable = m_RootDescriptorTable[RootIndex]; + + D3D12_CPU_DESCRIPTOR_HANDLE *SrcHandles = RootDescTable.TableStart; + uint64_t SetHandles = (uint64_t)RootDescTable.AssignedHandlesBitMap; + D3D12_CPU_DESCRIPTOR_HANDLE CurDest = DestHandleStart; + DestHandleStart += TableSize[i] * DescriptorSize; + + unsigned long SkipCount; + while (_BitScanForward64(&SkipCount, SetHandles)) { + // Skip over unset descriptor handles + SetHandles >>= SkipCount; + SrcHandles += SkipCount; + CurDest.ptr += SkipCount * DescriptorSize; + + unsigned long DescriptorCount; + _BitScanForward64(&DescriptorCount, ~SetHandles); + SetHandles >>= DescriptorCount; + + // If we run out of temp room, copy what we've got so far + if (NumSrcDescriptorRanges + DescriptorCount > kMaxNumDescriptorTables) { + m_DeviceInstance->GetDevice()->CopyDescriptors( + NumDestDescriptorRanges, pDestDescriptorRangeStarts, pDestDescriptorRangeSizes, + NumSrcDescriptorRanges, pSrcDescriptorRangeStarts, pSrcDescriptorRangeSizes, + Type); + + NumSrcDescriptorRanges = 0; + NumDestDescriptorRanges = 0; + } + + // Setup destination range + pDestDescriptorRangeStarts[NumDestDescriptorRanges] = CurDest; + pDestDescriptorRangeSizes[NumDestDescriptorRanges] = DescriptorCount; + ++NumDestDescriptorRanges; + + // Setup source ranges (one descriptor each because we don't assume they are contiguous) + for (uint32_t j = 0; j < DescriptorCount; ++j) { + pSrcDescriptorRangeStarts[NumSrcDescriptorRanges] = SrcHandles[j]; + pSrcDescriptorRangeSizes[NumSrcDescriptorRanges] = 1; + ++NumSrcDescriptorRanges; + } + + // Move the destination pointer forward by the number of descriptors we will copy + SrcHandles += DescriptorCount; + CurDest.ptr += DescriptorCount * DescriptorSize; + } + } + + m_DeviceInstance->GetDevice()->CopyDescriptors(NumDestDescriptorRanges, pDestDescriptorRangeStarts, + pDestDescriptorRangeSizes, NumSrcDescriptorRanges, + pSrcDescriptorRangeStarts, pSrcDescriptorRangeSizes, Type); +} + +void DescriptorHandleCache::UnbindAllValid() +{ + m_StaleRootParamsBitMap = 0; + + unsigned long TableParams = m_RootDescriptorTablesBitMap; + unsigned long RootIndex; + while (_BitScanForward(&RootIndex, TableParams)) { + TableParams ^= (1 << RootIndex); + if (m_RootDescriptorTable[RootIndex].AssignedHandlesBitMap != 0) + m_StaleRootParamsBitMap |= (1 << RootIndex); + } +} + +void DescriptorHandleCache::StageDescriptorHandles(UINT RootIndex, UINT Offset, UINT NumHandles, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + if (((1 << RootIndex) & m_RootDescriptorTablesBitMap) == 0) { + throw HRError("Root parameter is not a CBV_SRV_UAV descriptor table"); + } + + if (Offset + NumHandles > m_RootDescriptorTable[RootIndex].TableSize) { + throw HRError("Attempting to stage more descriptors than exist in the descriptor table"); + } + + DescriptorTableCache &TableCache = m_RootDescriptorTable[RootIndex]; + D3D12_CPU_DESCRIPTOR_HANDLE *CopyDest = TableCache.TableStart + Offset; + for (UINT i = 0; i < NumHandles; ++i) + CopyDest[i] = Handles[i]; + TableCache.AssignedHandlesBitMap |= ((1 << NumHandles) - 1) << Offset; + m_StaleRootParamsBitMap |= (1 << RootIndex); +} + +void DescriptorHandleCache::ParseRootSignature(D3D12_DESCRIPTOR_HEAP_TYPE Type, const RootSignature &RootSig) +{ + UINT CurrentOffset = 0; + + if (RootSig.m_NumParameters > kMaxNumDescriptorTables) { + throw HRError("Maybe we need to support something greater"); + } + + m_StaleRootParamsBitMap = 0; + m_RootDescriptorTablesBitMap = (Type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? RootSig.m_SamplerTableBitMap + : RootSig.m_DescriptorTableBitMap); + + unsigned long TableParams = m_RootDescriptorTablesBitMap; + unsigned long RootIndex; + while (_BitScanForward(&RootIndex, TableParams)) { + TableParams ^= (1 << RootIndex); + + UINT TableSize = RootSig.m_DescriptorTableSize[RootIndex]; + + if (TableSize == 0) { + throw HRError("Descriptor table has zero size"); + } + + DescriptorTableCache &RootDescriptorTable = m_RootDescriptorTable[RootIndex]; + RootDescriptorTable.AssignedHandlesBitMap = 0; + RootDescriptorTable.TableStart = m_HandleCache + CurrentOffset; + RootDescriptorTable.TableSize = TableSize; + + CurrentOffset += TableSize; + } + + m_MaxCachedDescriptors = CurrentOffset; + + if (m_MaxCachedDescriptors > kMaxNumDescriptors) { + throw HRError("Exceeded user-supplied maximum cache size"); + } +} + +DynamicDescriptorHeap::DynamicDescriptorHeap(D3D12DeviceInstance *DeviceInstance, CommandContext &OwningContext, + D3D12_DESCRIPTOR_HEAP_TYPE HeapType) + : m_DeviceInstance(DeviceInstance), + m_OwningContext(OwningContext), + m_DescriptorType(HeapType), + m_GraphicsHandleCache(std::make_unique(DeviceInstance)), + m_ComputeHandleCache(std::make_unique(DeviceInstance)) +{ + m_CurrentHeapPtr = nullptr; + m_CurrentOffset = 0; + m_DescriptorSize = m_DeviceInstance->GetDevice()->GetDescriptorHandleIncrementSize(HeapType); +} + +DynamicDescriptorHeap::~DynamicDescriptorHeap() {} + +void DynamicDescriptorHeap::CleanupUsedHeaps(uint64_t fenceValue) +{ + RetireCurrentHeap(); + RetireUsedHeaps(fenceValue); + m_GraphicsHandleCache->ClearCache(); + m_ComputeHandleCache->ClearCache(); +} + +void DynamicDescriptorHeap::SetGraphicsDescriptorHandles(UINT RootIndex, UINT Offset, UINT NumHandles, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + m_GraphicsHandleCache->StageDescriptorHandles(RootIndex, Offset, NumHandles, Handles); +} + +void DynamicDescriptorHeap::SetComputeDescriptorHandles(UINT RootIndex, UINT Offset, UINT NumHandles, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + m_ComputeHandleCache->StageDescriptorHandles(RootIndex, Offset, NumHandles, Handles); +} + +D3D12_GPU_DESCRIPTOR_HANDLE DynamicDescriptorHeap::UploadDirect(D3D12_CPU_DESCRIPTOR_HANDLE Handle) +{ + if (!HasSpace(1)) { + RetireCurrentHeap(); + UnbindAllValid(); + } + + m_OwningContext.SetDescriptorHeap(m_DescriptorType, GetHeapPointer()); + + DescriptorHandle DestHandle = m_FirstDescriptor + m_CurrentOffset * m_DescriptorSize; + m_CurrentOffset += 1; + + m_DeviceInstance->GetDevice()->CopyDescriptorsSimple(1, DestHandle, Handle, m_DescriptorType); + + return DestHandle; +} +void DynamicDescriptorHeap::ParseGraphicsRootSignature(const RootSignature &RootSig) +{ + m_GraphicsHandleCache->ParseRootSignature(m_DescriptorType, RootSig); +} + +void DynamicDescriptorHeap::ParseComputeRootSignature(const RootSignature &RootSig) +{ + m_ComputeHandleCache->ParseRootSignature(m_DescriptorType, RootSig); +} + +void DynamicDescriptorHeap::CommitGraphicsRootDescriptorTables(ID3D12GraphicsCommandList *CmdList) +{ + if (m_GraphicsHandleCache->m_StaleRootParamsBitMap != 0) + CopyAndBindStagedTables(*m_GraphicsHandleCache, CmdList, + &ID3D12GraphicsCommandList::SetGraphicsRootDescriptorTable); +} + +void DynamicDescriptorHeap::CommitComputeRootDescriptorTables(ID3D12GraphicsCommandList *CmdList) +{ + if (m_ComputeHandleCache->m_StaleRootParamsBitMap != 0) + CopyAndBindStagedTables(*m_ComputeHandleCache, CmdList, + &ID3D12GraphicsCommandList::SetComputeRootDescriptorTable); +} + +bool DynamicDescriptorHeap::HasSpace(uint32_t Count) +{ + return (m_CurrentHeapPtr != nullptr && m_CurrentOffset + Count <= kMaxNumDescriptors); +} + +void DynamicDescriptorHeap::RetireCurrentHeap(void) +{ + if (m_CurrentOffset == 0) { + return; + } + + if (m_CurrentHeapPtr == nullptr) { + throw HRError("No current heap to retire"); + } + m_RetiredHeaps.push_back(m_CurrentHeapPtr); + m_CurrentHeapPtr = nullptr; + m_CurrentOffset = 0; +} + +void DynamicDescriptorHeap::RetireUsedHeaps(uint64_t fenceValue) +{ + m_DeviceInstance->DiscardDynamicDescriptorHeaps(m_DescriptorType, fenceValue, m_RetiredHeaps); + m_RetiredHeaps.clear(); +} + +ID3D12DescriptorHeap *DynamicDescriptorHeap::GetHeapPointer() +{ + if (m_CurrentHeapPtr == nullptr) { + m_CurrentHeapPtr = m_DeviceInstance->RequestDynamicDescriptorHeap(m_DescriptorType); + m_FirstDescriptor = DescriptorHandle(m_CurrentHeapPtr->GetCPUDescriptorHandleForHeapStart(), + m_CurrentHeapPtr->GetGPUDescriptorHandleForHeapStart()); + } + + return m_CurrentHeapPtr; +} + +DescriptorHandle DynamicDescriptorHeap::Allocate(UINT Count) +{ + DescriptorHandle ret = m_FirstDescriptor + m_CurrentOffset * m_DescriptorSize; + m_CurrentOffset += Count; + return ret; +} + +void DynamicDescriptorHeap::CopyAndBindStagedTables( + DescriptorHandleCache &HandleCache, ID3D12GraphicsCommandList *CmdList, + void (STDMETHODCALLTYPE ID3D12GraphicsCommandList::*SetFunc)(UINT, D3D12_GPU_DESCRIPTOR_HANDLE)) +{ + uint32_t NeededSize = HandleCache.ComputeStagedSize(); + if (!HasSpace(NeededSize)) { + RetireCurrentHeap(); + UnbindAllValid(); + NeededSize = HandleCache.ComputeStagedSize(); + } + + // This can trigger the creation of a new heap + m_OwningContext.SetDescriptorHeap(m_DescriptorType, GetHeapPointer()); + HandleCache.CopyAndBindStaleTables(m_DescriptorType, m_DescriptorSize, Allocate(NeededSize), CmdList, SetFunc); +} + +void DynamicDescriptorHeap::UnbindAllValid(void) +{ + m_GraphicsHandleCache->UnbindAllValid(); + m_ComputeHandleCache->UnbindAllValid(); +} + +ContextManager::ContextManager(D3D12DeviceInstance *DeviceInstance) : m_DeviceInstance(DeviceInstance) {} + +CommandContext *ContextManager::AllocateContext(D3D12_COMMAND_LIST_TYPE Type) +{ + auto &AvailableContexts = m_AvailableContexts[Type]; + + CommandContext *ret = nullptr; + if (AvailableContexts.empty()) { + ret = new CommandContext(m_DeviceInstance, Type); + m_ContextPool[Type].emplace_back(ret); + ret->Initialize(); + } else { + ret = AvailableContexts.front(); + AvailableContexts.pop(); + ret->Reset(); + } + + return ret; +} + +void ContextManager::FreeContext(CommandContext *UsedContext) +{ + if (UsedContext == nullptr) { + throw HRError("ContextManager: FreeContext called with null UsedContext"); + } + m_AvailableContexts[UsedContext->m_Type].push(UsedContext); +} + +void ContextManager::DestroyAllContexts() +{ + for (uint32_t i = 0; i < 4; ++i) + m_ContextPool[i].clear(); +} + +UploadBuffer::UploadBuffer(D3D12DeviceInstance *DeviceInstance) : m_DeviceInstance(DeviceInstance), m_BufferSize(0) {} + +UploadBuffer::~UploadBuffer() +{ + Destroy(); +} + +void UploadBuffer::Create(const std::wstring &name, size_t BufferSize) +{ + Destroy(); + + m_BufferSize = BufferSize; + + // Create an upload buffer. This is CPU-visible, but it's write combined memory, so + // avoid reading back from it. + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + // Upload buffers must be 1-dimensional + D3D12_RESOURCE_DESC ResourceDesc = {}; + ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + ResourceDesc.Width = m_BufferSize; + ResourceDesc.Height = 1; + ResourceDesc.DepthOrArraySize = 1; + ResourceDesc.MipLevels = 1; + ResourceDesc.Format = DXGI_FORMAT_UNKNOWN; + ResourceDesc.SampleDesc.Count = 1; + ResourceDesc.SampleDesc.Quality = 0; + ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + ResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommittedResource(&HeapProps, D3D12_HEAP_FLAG_NONE, + &ResourceDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + throw HRError("UploadBuffer: CreateCommittedResource failed", hr); + } + + m_GpuVirtualAddress = m_pResource->GetGPUVirtualAddress(); + +#ifdef RELEASE + (name); +#else + m_pResource->SetName(name.c_str()); +#endif +} + +void *UploadBuffer::Map(void) +{ + void *Memory; + CD3DX12_RANGE temp = CD3DX12_RANGE(0, m_BufferSize); + m_pResource->Map(0, &temp, &Memory); + return Memory; +} + +void UploadBuffer::Unmap(size_t begin, size_t end) +{ + CD3DX12_RANGE temp = CD3DX12_RANGE(begin, std::min(end, m_BufferSize)); + m_pResource->Unmap(0, &temp); +} + +size_t UploadBuffer::GetBufferSize() const +{ + return m_BufferSize; +} + +GpuBuffer::GpuBuffer(D3D12DeviceInstance *DeviceInstance) + : m_DeviceInstance(DeviceInstance), + m_BufferSize(0), + m_ElementCount(0), + m_ElementSize(0) +{ + m_ResourceFlags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + m_UAV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_SRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; +} + +GpuBuffer::~GpuBuffer() +{ + Destroy(); +} + +void GpuBuffer::Destroy() +{ + if (m_UAV.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_UAV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (m_SRV.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_SRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + GpuResource::Destroy(); +} + +void GpuBuffer::Create(const std::wstring &name, uint32_t NumElements, uint32_t ElementSize, const void *initialData) +{ + Destroy(); + + m_ElementCount = NumElements; + m_ElementSize = ElementSize; + m_BufferSize = NumElements * ElementSize; + + D3D12_RESOURCE_DESC ResourceDesc = DescribeBuffer(); + + m_UsageState = D3D12_RESOURCE_STATE_COMMON; + + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &ResourceDesc, m_UsageState, nullptr, IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + throw HRError("GpuBuffer: CreateCommittedResource failed", hr); + } + + m_GpuVirtualAddress = m_pResource->GetGPUVirtualAddress(); + + if (initialData) + m_DeviceInstance->InitializeBuffer(*this, initialData, m_BufferSize); + +#ifdef RELEASE + (name); +#else + m_pResource->SetName(name.c_str()); +#endif + + CreateDerivedViews(); +} + +void GpuBuffer::Create(const std::wstring &name, uint32_t NumElements, uint32_t ElementSize, + const UploadBuffer &srcData, uint32_t srcOffset) +{ + Destroy(); + + m_ElementCount = NumElements; + m_ElementSize = ElementSize; + m_BufferSize = NumElements * ElementSize; + + D3D12_RESOURCE_DESC ResourceDesc = DescribeBuffer(); + + m_UsageState = D3D12_RESOURCE_STATE_COMMON; + + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &ResourceDesc, m_UsageState, nullptr, IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + throw HRError("GpuBuffer: CreateCommittedResource failed", hr); + } + + m_GpuVirtualAddress = m_pResource->GetGPUVirtualAddress(); + + m_DeviceInstance->InitializeBuffer(*this, srcData, srcOffset); + +#ifdef RELEASE + (name); +#else + m_pResource->SetName(name.c_str()); +#endif + + CreateDerivedViews(); +} + +// Sub-Allocate a buffer out of a pre-allocated heap. If initial data is provided, it will be copied into the buffer using the default command context. +void GpuBuffer::CreatePlaced(const std::wstring &name, ID3D12Heap *pBackingHeap, uint32_t HeapOffset, + uint32_t NumElements, uint32_t ElementSize, const void *initialData) +{ + m_ElementCount = NumElements; + m_ElementSize = ElementSize; + m_BufferSize = NumElements * ElementSize; + + D3D12_RESOURCE_DESC ResourceDesc = DescribeBuffer(); + + m_UsageState = D3D12_RESOURCE_STATE_COMMON; + + HRESULT hr = m_DeviceInstance->GetDevice()->CreatePlacedResource( + pBackingHeap, HeapOffset, &ResourceDesc, m_UsageState, nullptr, IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + throw HRError("GpuBuffer: CreatePlacedResource failed", hr); + } + + m_GpuVirtualAddress = m_pResource->GetGPUVirtualAddress(); + + if (initialData) + m_DeviceInstance->InitializeBuffer(*this, initialData, m_BufferSize); + +#ifdef RELEASE + (name); +#else + m_pResource->SetName(name.c_str()); +#endif + + CreateDerivedViews(); +} + +const D3D12_CPU_DESCRIPTOR_HANDLE &GpuBuffer::GetUAV(void) const +{ + return m_UAV; +} + +const D3D12_CPU_DESCRIPTOR_HANDLE &GpuBuffer::GetSRV(void) const +{ + return m_SRV; +} + +D3D12_GPU_VIRTUAL_ADDRESS GpuBuffer::RootConstantBufferView(void) const +{ + return m_GpuVirtualAddress; +} + +D3D12_CPU_DESCRIPTOR_HANDLE GpuBuffer::CreateConstantBufferView(uint32_t Offset, uint32_t Size) const +{ + if (Offset + Size > m_BufferSize) { + throw HRError("GpuBuffer: CreateConstantBufferView out of bounds"); + } + + Size = Math::AlignUp(Size, 16); + + D3D12_CONSTANT_BUFFER_VIEW_DESC CBVDesc; + CBVDesc.BufferLocation = m_GpuVirtualAddress + (size_t)Offset; + CBVDesc.SizeInBytes = Size; + + D3D12_CPU_DESCRIPTOR_HANDLE hCBV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_DeviceInstance->GetDevice()->CreateConstantBufferView(&CBVDesc, hCBV); + return hCBV; +} + +D3D12_VERTEX_BUFFER_VIEW GpuBuffer::VertexBufferView(size_t Offset, uint32_t Size, uint32_t Stride) const +{ + D3D12_VERTEX_BUFFER_VIEW VBView; + VBView.BufferLocation = m_GpuVirtualAddress + Offset; + VBView.SizeInBytes = Size; + VBView.StrideInBytes = Stride; + return VBView; +} + +D3D12_VERTEX_BUFFER_VIEW GpuBuffer::VertexBufferView(size_t BaseVertexIndex) const +{ + size_t Offset = BaseVertexIndex * m_ElementSize; + return VertexBufferView(Offset, (uint32_t)(m_BufferSize - Offset), m_ElementSize); +} + +D3D12_INDEX_BUFFER_VIEW GpuBuffer::IndexBufferView(size_t Offset, uint32_t Size, bool b32Bit) const +{ + D3D12_INDEX_BUFFER_VIEW IBView; + IBView.BufferLocation = m_GpuVirtualAddress + Offset; + IBView.Format = b32Bit ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; + IBView.SizeInBytes = Size; + return IBView; +} + +D3D12_INDEX_BUFFER_VIEW GpuBuffer::IndexBufferView(size_t StartIndex) const +{ + size_t Offset = StartIndex * m_ElementSize; + return IndexBufferView(Offset, (uint32_t)(m_BufferSize - Offset), m_ElementSize == 4); +} + +size_t GpuBuffer::GetBufferSize() const +{ + return m_BufferSize; +} + +uint32_t GpuBuffer::GetElementCount() const +{ + return m_ElementCount; +} + +uint32_t GpuBuffer::GetElementSize() const +{ + return m_ElementSize; +} + +D3D12_RESOURCE_DESC GpuBuffer::DescribeBuffer(void) +{ + D3D12_RESOURCE_DESC Desc = {}; + Desc.Alignment = 0; + Desc.DepthOrArraySize = 1; + Desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + Desc.Flags = m_ResourceFlags; + Desc.Format = DXGI_FORMAT_UNKNOWN; + Desc.Height = 1; + Desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + Desc.MipLevels = 1; + Desc.SampleDesc.Count = 1; + Desc.SampleDesc.Quality = 0; + Desc.Width = (UINT64)m_BufferSize; + return Desc; +} + +void GpuBuffer::CreateDerivedViews(void) +{ + D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + SRVDesc.Format = DXGI_FORMAT_R32_TYPELESS; + SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + SRVDesc.Buffer.NumElements = (UINT)m_BufferSize / 4; + SRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + + if (m_SRV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_SRV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_DeviceInstance->GetDevice()->CreateShaderResourceView(m_pResource.Get(), &SRVDesc, m_SRV); + + D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {}; + UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + UAVDesc.Format = DXGI_FORMAT_R32_TYPELESS; + UAVDesc.Buffer.NumElements = (UINT)m_BufferSize / 4; + UAVDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + + if (m_UAV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_UAV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_DeviceInstance->GetDevice()->CreateUnorderedAccessView(m_pResource.Get(), nullptr, &UAVDesc, m_UAV); +} + +ByteAddressBuffer::ByteAddressBuffer(D3D12DeviceInstance *DeviceInstance) : GpuBuffer(DeviceInstance) {} + +void ByteAddressBuffer::CreateDerivedViews(void) +{ + D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + SRVDesc.Format = DXGI_FORMAT_R32_TYPELESS; + SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + SRVDesc.Buffer.NumElements = (UINT)m_BufferSize / 4; + SRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + + if (m_SRV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_SRV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_DeviceInstance->GetDevice()->CreateShaderResourceView(m_pResource.Get(), &SRVDesc, m_SRV); + + D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc = {}; + UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + UAVDesc.Format = DXGI_FORMAT_R32_TYPELESS; + UAVDesc.Buffer.NumElements = (UINT)m_BufferSize / 4; + UAVDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + + if (m_UAV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_UAV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + m_DeviceInstance->GetDevice()->CreateUnorderedAccessView(m_pResource.Get(), nullptr, &UAVDesc, m_UAV); +} + +ReadbackBuffer::ReadbackBuffer(D3D12DeviceInstance *DeviceInstance) : GpuBuffer(DeviceInstance) {} + +ReadbackBuffer::~ReadbackBuffer() +{ + Destroy(); +} + +void ReadbackBuffer::Create(const std::wstring &name, uint32_t NumElements, uint32_t ElementSize) +{ + Destroy(); + + m_ElementCount = NumElements; + m_ElementSize = ElementSize; + m_BufferSize = NumElements * ElementSize; + m_UsageState = D3D12_RESOURCE_STATE_COPY_DEST; + + // Create a readback buffer large enough to hold all texel data + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_READBACK; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + // Readback buffers must be 1-dimensional, i.e. "buffer" not "texture2d" + D3D12_RESOURCE_DESC ResourceDesc = {}; + ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + ResourceDesc.Width = m_BufferSize; + ResourceDesc.Height = 1; + ResourceDesc.DepthOrArraySize = 1; + ResourceDesc.MipLevels = 1; + ResourceDesc.Format = DXGI_FORMAT_UNKNOWN; + ResourceDesc.SampleDesc.Count = 1; + ResourceDesc.SampleDesc.Quality = 0; + ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + ResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommittedResource(&HeapProps, D3D12_HEAP_FLAG_NONE, + &ResourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, nullptr, + IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + auto remoteReason = m_DeviceInstance->GetDevice()->GetDeviceRemovedReason(); + throw HRError("ReadbackBuffer: CreateCommittedResource failed", remoteReason); + } + + m_GpuVirtualAddress = m_pResource->GetGPUVirtualAddress(); + +#ifdef RELEASE + (name); +#else + m_pResource->SetName(name.c_str()); +#endif +} + +void *ReadbackBuffer::Map(void) +{ + void *Memory; + CD3DX12_RANGE temp = CD3DX12_RANGE(0, m_BufferSize); + m_pResource->Map(0, &temp, &Memory); + return Memory; +} + +void ReadbackBuffer::Unmap(void) +{ + CD3DX12_RANGE temp = CD3DX12_RANGE(0, 0); + m_pResource->Unmap(0, &temp); +} + +void ReadbackBuffer::CreateDerivedViews(void) {} + +PixelBuffer::PixelBuffer() : m_Width(0), m_Height(0), m_ArraySize(0), m_Format(DXGI_FORMAT_UNKNOWN), m_BankRotation(0) +{ +} + +uint32_t PixelBuffer::GetWidth(void) const +{ + return m_Width; +} + +uint32_t PixelBuffer::GetHeight(void) const +{ + return m_Height; +} + +uint32_t PixelBuffer::GetDepth(void) const +{ + return m_ArraySize; +} + +const DXGI_FORMAT &PixelBuffer::GetFormat(void) const +{ + return m_Format; +} + +void PixelBuffer::SetBankRotation(uint32_t RotationAmount) +{ + (RotationAmount); +} + +D3D12_RESOURCE_DESC PixelBuffer::DescribeTex2D(uint32_t Width, uint32_t Height, uint32_t DepthOrArraySize, + uint32_t NumMips, DXGI_FORMAT Format, UINT Flags) +{ + m_Width = Width; + m_Height = Height; + m_ArraySize = DepthOrArraySize; + m_Format = Format; + + D3D12_RESOURCE_DESC Desc = {}; + Desc.Alignment = 0; + Desc.DepthOrArraySize = (UINT16)DepthOrArraySize; + Desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + Desc.Flags = (D3D12_RESOURCE_FLAGS)Flags; + Desc.Format = GetBaseFormat(); + Desc.Height = (UINT)Height; + Desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + Desc.MipLevels = (UINT16)NumMips; + Desc.SampleDesc.Count = 1; + Desc.SampleDesc.Quality = 0; + Desc.Width = (UINT64)Width; + return Desc; +} + +void PixelBuffer::AssociateWithResource(ID3D12Device *Device, const std::wstring &Name, ID3D12Resource *Resource, + D3D12_RESOURCE_STATES CurrentState) +{ + (Device); // Unused until we support multiple adapters + D3D12_RESOURCE_DESC ResourceDesc = Resource->GetDesc(); + + m_pResource.Set(Resource); + m_UsageState = CurrentState; + + m_Width = (uint32_t)ResourceDesc.Width; // We don't care about large virtual textures yet + m_Height = ResourceDesc.Height; + m_ArraySize = ResourceDesc.DepthOrArraySize; + m_Format = ResourceDesc.Format; + +#ifndef RELEASE + m_pResource->SetName(Name.c_str()); +#else + (Name); +#endif +} + +void PixelBuffer::CreateTextureResource(ID3D12Device *Device, const std::wstring &Name, + const D3D12_RESOURCE_DESC &ResourceDesc, D3D12_CLEAR_VALUE ClearValue, + D3D12_GPU_VIRTUAL_ADDRESS VidMemPtr) +{ + Destroy(); + + (void)VidMemPtr; + + CD3DX12_HEAP_PROPERTIES HeapProps(D3D12_HEAP_TYPE_DEFAULT); + HRESULT hr = Device->CreateCommittedResource(&HeapProps, D3D12_HEAP_FLAG_NONE, &ResourceDesc, + D3D12_RESOURCE_STATE_COMMON, &ClearValue, + IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + throw HRError("PixelBuffer: CreateCommittedResource failed", hr); + } + + m_UsageState = D3D12_RESOURCE_STATE_COMMON; + m_GpuVirtualAddress = D3D12_GPU_VIRTUAL_ADDRESS_NULL; + +#ifndef RELEASE + m_pResource->SetName(Name.c_str()); +#else + (Name); +#endif +} + +DXGI_FORMAT PixelBuffer::GetBaseFormat() +{ + switch (m_Format) { + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + return DXGI_FORMAT_R8G8B8A8_TYPELESS; + + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + return DXGI_FORMAT_B8G8R8A8_TYPELESS; + + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + return DXGI_FORMAT_B8G8R8X8_TYPELESS; + + // 32-bit Z w/ Stencil + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return DXGI_FORMAT_R32G8X24_TYPELESS; + + // No Stencil + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + return DXGI_FORMAT_R32_TYPELESS; + + // 24-bit Z + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return DXGI_FORMAT_R24G8_TYPELESS; + + // 16-bit Z w/o Stencil + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + return DXGI_FORMAT_R16_TYPELESS; + + default: + return m_Format; + } +} + +DXGI_FORMAT PixelBuffer::GetUAVFormat() +{ + switch (m_Format) { + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + return DXGI_FORMAT_R8G8B8A8_UNORM; + + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + return DXGI_FORMAT_B8G8R8A8_UNORM; + + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + return DXGI_FORMAT_B8G8R8X8_UNORM; + + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_R32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_D16_UNORM: + throw HRError("PixelBuffer: GetUAVFormat called with a depth stencil format."); + + default: + return m_Format; + } +} + +DXGI_FORMAT PixelBuffer::GetDSVFormat() +{ + switch (m_Format) { + // 32-bit Z w/ Stencil + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + + // No Stencil + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + return DXGI_FORMAT_D32_FLOAT; + + // 24-bit Z + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return DXGI_FORMAT_D24_UNORM_S8_UINT; + + // 16-bit Z w/o Stencil + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + return DXGI_FORMAT_D16_UNORM; + + default: + return m_Format; + } +} + +DXGI_FORMAT PixelBuffer::GetDepthFormat() +{ + switch (m_Format) { + // 32-bit Z w/ Stencil + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + + // No Stencil + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + return DXGI_FORMAT_R32_FLOAT; + + // 24-bit Z + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + + // 16-bit Z w/o Stencil + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + return DXGI_FORMAT_R16_UNORM; + + default: + return DXGI_FORMAT_UNKNOWN; + } +} + +DXGI_FORMAT PixelBuffer::GetStencilFormat() +{ + switch (m_Format) { + // 32-bit Z w/ Stencil + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return DXGI_FORMAT_X32_TYPELESS_G8X24_UINT; + + // 24-bit Z + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + return DXGI_FORMAT_X24_TYPELESS_G8_UINT; + + default: + return DXGI_FORMAT_UNKNOWN; + } +} + +size_t PixelBuffer::BytesPerPixel(DXGI_FORMAT Format) +{ + switch (Format) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + return 16; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + return 12; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + return 8; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + return 4; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_A8P8: + case DXGI_FORMAT_B4G4R4A4_UNORM: + return 2; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_P8: + return 1; + + default: + return 0; + } +} + +DepthBuffer::DepthBuffer(D3D12DeviceInstance *DeviceInstance, float ClearDepth, uint8_t ClearStencil) + : m_DeviceInstance(DeviceInstance), + m_ClearDepth(ClearDepth), + m_ClearStencil(ClearStencil) +{ + m_hDSV[0].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_hDSV[1].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_hDSV[2].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_hDSV[3].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_hDepthSRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + m_hStencilSRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; +} + +DepthBuffer::~DepthBuffer() +{ + Destroy(); +} + +void DepthBuffer::Create(const std::wstring &Name, uint32_t Width, uint32_t Height, DXGI_FORMAT Format, + D3D12_GPU_VIRTUAL_ADDRESS VidMemPtr) +{ + Create(Name, Width, Height, 1, Format, VidMemPtr); +} + +void DepthBuffer::Create(const std::wstring &Name, uint32_t Width, uint32_t Height, uint32_t Samples, + DXGI_FORMAT Format, D3D12_GPU_VIRTUAL_ADDRESS VidMemPtr) +{ + D3D12_RESOURCE_DESC ResourceDesc = + DescribeTex2D(Width, Height, 1, 1, Format, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + ResourceDesc.SampleDesc.Count = Samples; + + D3D12_CLEAR_VALUE ClearValue = {}; + ClearValue.Format = Format; + CreateTextureResource(m_DeviceInstance->GetDevice(), Name, ResourceDesc, ClearValue, VidMemPtr); + CreateDerivedViews(m_DeviceInstance->GetDevice()); +} + +void DepthBuffer::Destroy() +{ + if (m_hDSV[0].ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDSV[0].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (m_hDSV[1].ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDSV[1].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (m_hDSV[2].ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDSV[2].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (m_hDSV[3].ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDSV[3].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (m_hDepthSRV.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDepthSRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (m_hStencilSRV.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hStencilSRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + GpuResource::Destroy(); +} + +const D3D12_CPU_DESCRIPTOR_HANDLE &DepthBuffer::GetDSV() const +{ + return m_hDSV[0]; +} +const D3D12_CPU_DESCRIPTOR_HANDLE &DepthBuffer::GetDSV_DepthReadOnly() const +{ + return m_hDSV[1]; +} +const D3D12_CPU_DESCRIPTOR_HANDLE &DepthBuffer::GetDSV_StencilReadOnly() const +{ + return m_hDSV[2]; +} +const D3D12_CPU_DESCRIPTOR_HANDLE &DepthBuffer::GetDSV_ReadOnly() const +{ + return m_hDSV[3]; +} +const D3D12_CPU_DESCRIPTOR_HANDLE &DepthBuffer::GetDepthSRV() const +{ + return m_hDepthSRV; +} +const D3D12_CPU_DESCRIPTOR_HANDLE &DepthBuffer::GetStencilSRV() const +{ + return m_hStencilSRV; +} + +float DepthBuffer::GetClearDepth() const +{ + return m_ClearDepth; +} +uint8_t DepthBuffer::GetClearStencil() const +{ + return m_ClearStencil; +} + +void DepthBuffer::CreateDerivedViews(ID3D12Device *Device) +{ + ID3D12Resource *Resource = m_pResource.Get(); + + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc; + dsvDesc.Format = GetDSVFormat(); + if (Resource->GetDesc().SampleDesc.Count == 1) { + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Texture2D.MipSlice = 0; + } else { + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; + } + + if (m_hDSV[0].ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDSV[0] = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + m_hDSV[1] = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + } + + dsvDesc.Flags = D3D12_DSV_FLAG_NONE; + Device->CreateDepthStencilView(Resource, &dsvDesc, m_hDSV[0]); + + dsvDesc.Flags = D3D12_DSV_FLAG_READ_ONLY_DEPTH; + Device->CreateDepthStencilView(Resource, &dsvDesc, m_hDSV[1]); + + DXGI_FORMAT stencilReadFormat = GetStencilFormat(); + if (stencilReadFormat != DXGI_FORMAT_UNKNOWN) { + if (m_hDSV[2].ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + m_hDSV[2] = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + m_hDSV[3] = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + } + + dsvDesc.Flags = D3D12_DSV_FLAG_READ_ONLY_STENCIL; + Device->CreateDepthStencilView(Resource, &dsvDesc, m_hDSV[2]); + + dsvDesc.Flags = D3D12_DSV_FLAG_READ_ONLY_DEPTH | D3D12_DSV_FLAG_READ_ONLY_STENCIL; + Device->CreateDepthStencilView(Resource, &dsvDesc, m_hDSV[3]); + } else { + m_hDSV[2] = m_hDSV[0]; + m_hDSV[3] = m_hDSV[1]; + } + + if (m_hDepthSRV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_hDepthSRV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + // Create the shader resource view + D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = GetDepthFormat(); + if (dsvDesc.ViewDimension == D3D12_DSV_DIMENSION_TEXTURE2D) { + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + SRVDesc.Texture2D.MipLevels = 1; + } else { + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; + } + SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + Device->CreateShaderResourceView(Resource, &SRVDesc, m_hDepthSRV); + + if (stencilReadFormat != DXGI_FORMAT_UNKNOWN) { + if (m_hStencilSRV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_hStencilSRV = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + SRVDesc.Format = stencilReadFormat; + SRVDesc.Texture2D.PlaneSlice = 1; + + Device->CreateShaderResourceView(Resource, &SRVDesc, m_hStencilSRV); + } +} + +LinearAllocatorPageManager::LinearAllocatorPageManager(D3D12DeviceInstance *DeviceInstance, LinearAllocatorType Type) + : m_DeviceInstance(DeviceInstance), + m_AllocationType(Type) +{ +} + +LinearAllocationPage *LinearAllocatorPageManager::RequestPage(void) +{ + while (!m_RetiredPages.empty() && + m_DeviceInstance->GetCommandManager().IsFenceComplete(m_RetiredPages.front().first)) { + m_AvailablePages.push(m_RetiredPages.front().second); + m_RetiredPages.pop(); + } + + LinearAllocationPage *PagePtr = nullptr; + + if (!m_AvailablePages.empty()) { + PagePtr = m_AvailablePages.front(); + m_AvailablePages.pop(); + } else { + PagePtr = CreateNewPage(); + m_PagePool.emplace_back(PagePtr); + } + + return PagePtr; +} + +LinearAllocationPage *LinearAllocatorPageManager::CreateNewPage(size_t PageSize) +{ + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC ResourceDesc; + ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + ResourceDesc.Alignment = 0; + ResourceDesc.Height = 1; + ResourceDesc.DepthOrArraySize = 1; + ResourceDesc.MipLevels = 1; + ResourceDesc.Format = DXGI_FORMAT_UNKNOWN; + ResourceDesc.SampleDesc.Count = 1; + ResourceDesc.SampleDesc.Quality = 0; + ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + D3D12_RESOURCE_STATES DefaultUsage; + + if (m_AllocationType == kGpuExclusive) { + HeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + ResourceDesc.Width = PageSize == 0 ? kGpuAllocatorPageSize : PageSize; + ResourceDesc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + DefaultUsage = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } else { + HeapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + ResourceDesc.Width = PageSize == 0 ? kCpuAllocatorPageSize : PageSize; + ResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + DefaultUsage = D3D12_RESOURCE_STATE_GENERIC_READ; + } + + ID3D12Resource *pBuffer; + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &ResourceDesc, DefaultUsage, nullptr, IID_PPV_ARGS(&pBuffer)); + if (FAILED(hr)) { + throw HRError("LinearAllocatorPageManager: Failed to create a new allocation page."); + } + pBuffer->SetName(L"LinearAllocator Page"); + + return new LinearAllocationPage(pBuffer, DefaultUsage); +} + +void LinearAllocatorPageManager::DiscardPages(uint64_t FenceValue, const std::vector &UsedPages) +{ + for (auto iter = UsedPages.begin(); iter != UsedPages.end(); ++iter) + m_RetiredPages.push(std::make_pair(FenceValue, *iter)); +} + +void LinearAllocatorPageManager::FreeLargePages(uint64_t FenceValue, + const std::vector &LargePages) +{ + while (!m_DeletionQueue.empty() && + m_DeviceInstance->GetCommandManager().IsFenceComplete(m_DeletionQueue.front().first)) { + delete m_DeletionQueue.front().second; + m_DeletionQueue.pop(); + } + + for (auto iter = LargePages.begin(); iter != LargePages.end(); ++iter) { + (*iter)->Unmap(); + m_DeletionQueue.push(std::make_pair(FenceValue, *iter)); + } +} + +void LinearAllocatorPageManager::Destroy(void) +{ + m_PagePool.clear(); +} + +LinearAllocator::LinearAllocator(D3D12DeviceInstance *DeviceInstance, LinearAllocatorType Type) + : m_DeviceInstance(DeviceInstance), + m_AllocationType(Type), + m_PageSize(0), + m_CurOffset(~(size_t)0), + m_CurPage(nullptr) +{ + m_PageSize = (Type == kGpuExclusive ? kGpuAllocatorPageSize : kCpuAllocatorPageSize); +} + +DynAlloc LinearAllocator::Allocate(size_t SizeInBytes, size_t Alignment) +{ + const size_t AlignmentMask = Alignment - 1; + + // Assert that it's a power of two. + if ((AlignmentMask & Alignment) != 0) { + throw HRError("LinearAllocator: Alignment must be a power of two."); + } + + // Align the allocation + const size_t AlignedSize = Math::AlignUpWithMask(SizeInBytes, AlignmentMask); + + if (AlignedSize > m_PageSize) + return AllocateLargePage(AlignedSize); + + m_CurOffset = Math::AlignUp(m_CurOffset, Alignment); + + if (m_CurOffset + AlignedSize > m_PageSize) { + if (m_CurPage == nullptr) { + throw HRError("LinearAllocator: Current page is null when trying to allocate a new page."); + } + m_RetiredPages.push_back(m_CurPage); + m_CurPage = nullptr; + } + + if (m_CurPage == nullptr) { + m_CurPage = m_DeviceInstance->GetPageManager(m_AllocationType)->RequestPage(); + m_CurOffset = 0; + } + + DynAlloc ret(*m_CurPage, m_CurOffset, AlignedSize); + ret.DataPtr = (uint8_t *)m_CurPage->m_CpuVirtualAddress + m_CurOffset; + ret.GpuAddress = m_CurPage->m_GpuVirtualAddress + m_CurOffset; + + m_CurOffset += AlignedSize; + + return ret; +} + +void LinearAllocator::CleanupUsedPages(uint64_t FenceID) +{ + if (m_CurPage != nullptr) { + m_RetiredPages.push_back(m_CurPage); + m_CurPage = nullptr; + m_CurOffset = 0; + } + + m_DeviceInstance->GetPageManager(m_AllocationType)->DiscardPages(FenceID, m_RetiredPages); + m_RetiredPages.clear(); + + m_DeviceInstance->GetPageManager(m_AllocationType)->FreeLargePages(FenceID, m_LargePageList); + m_LargePageList.clear(); +} + +DynAlloc LinearAllocator::AllocateLargePage(size_t SizeInBytes) +{ + LinearAllocationPage *OneOff = m_DeviceInstance->GetPageManager(m_AllocationType)->CreateNewPage(SizeInBytes); + m_LargePageList.push_back(OneOff); + + DynAlloc ret(*OneOff, 0, SizeInBytes); + ret.DataPtr = OneOff->m_CpuVirtualAddress; + ret.GpuAddress = OneOff->m_GpuVirtualAddress; + + return ret; +} + +GraphicsPSO::GraphicsPSO(D3D12DeviceInstance *DeviceInstance, const wchar_t *Name) : PSO(DeviceInstance, Name) +{ + ZeroMemory(&m_PSODesc, sizeof(m_PSODesc)); + m_PSODesc.NodeMask = 1; + m_PSODesc.SampleMask = 0xFFFFFFFFu; + m_PSODesc.SampleDesc.Count = 1; + m_PSODesc.InputLayout.NumElements = 0; +} + +void GraphicsPSO::SetBlendState(const D3D12_BLEND_DESC &BlendDesc) +{ + m_PSODesc.BlendState = BlendDesc; +} + +void GraphicsPSO::SetRasterizerState(const D3D12_RASTERIZER_DESC &RasterizerDesc) +{ + m_PSODesc.RasterizerState = RasterizerDesc; +} + +void GraphicsPSO::SetDepthStencilState(const D3D12_DEPTH_STENCIL_DESC &DepthStencilDesc) +{ + m_PSODesc.DepthStencilState = DepthStencilDesc; +} + +void GraphicsPSO::SetSampleMask(UINT SampleMask) +{ + m_PSODesc.SampleMask = SampleMask; +} + +void GraphicsPSO::SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE TopologyType) +{ + if (TopologyType == D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED) { + throw HRError("GraphicsPSO: Can't draw with undefined topology."); + } + m_PSODesc.PrimitiveTopologyType = TopologyType; +} + +void GraphicsPSO::SetDepthTargetFormat(DXGI_FORMAT DSVFormat, UINT MsaaCount, UINT MsaaQuality) +{ + SetRenderTargetFormats(0, nullptr, DSVFormat, MsaaCount, MsaaQuality); +} + +void GraphicsPSO::SetRenderTargetFormat(DXGI_FORMAT RTVFormat, DXGI_FORMAT DSVFormat, UINT MsaaCount, UINT MsaaQuality) +{ + SetRenderTargetFormats(1, &RTVFormat, DSVFormat, MsaaCount, MsaaQuality); +} + +void GraphicsPSO::SetRenderTargetFormats(UINT NumRTVs, const DXGI_FORMAT *RTVFormats, DXGI_FORMAT DSVFormat, + UINT MsaaCount, UINT MsaaQuality) +{ + if (!(NumRTVs == 0 || RTVFormats != nullptr)) { + throw HRError("GraphicsPSO: RTVFormats pointer is null with non-zero NumRTVs."); + } + for (UINT i = 0; i < NumRTVs; ++i) { + if (RTVFormats[i] == DXGI_FORMAT_UNKNOWN) { + throw HRError("GraphicsPSO: RTVFormats contains an UNKNOWN format."); + } + m_PSODesc.RTVFormats[i] = RTVFormats[i]; + } + for (UINT i = NumRTVs; i < m_PSODesc.NumRenderTargets; ++i) + m_PSODesc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN; + m_PSODesc.NumRenderTargets = NumRTVs; + m_PSODesc.DSVFormat = DSVFormat; + m_PSODesc.SampleDesc.Count = MsaaCount; + m_PSODesc.SampleDesc.Quality = MsaaQuality; +} + +void GraphicsPSO::SetInputLayout(UINT NumElements, const D3D12_INPUT_ELEMENT_DESC *pInputElementDescs) +{ + m_PSODesc.InputLayout.NumElements = NumElements; + + if (NumElements > 0) { + D3D12_INPUT_ELEMENT_DESC *NewElements = + (D3D12_INPUT_ELEMENT_DESC *)malloc(sizeof(D3D12_INPUT_ELEMENT_DESC) * NumElements); + memcpy(NewElements, pInputElementDescs, NumElements * sizeof(D3D12_INPUT_ELEMENT_DESC)); + m_InputLayouts.reset((const D3D12_INPUT_ELEMENT_DESC *)NewElements); + } else + m_InputLayouts = nullptr; +} + +void GraphicsPSO::SetPrimitiveRestart(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBProps) +{ + m_PSODesc.IBStripCutValue = IBProps; +} + +void GraphicsPSO::SetVertexShader(const void *Binary, size_t Size) +{ + m_PSODesc.VS = CD3DX12_SHADER_BYTECODE(const_cast(Binary), Size); +} + +void GraphicsPSO::SetPixelShader(const void *Binary, size_t Size) +{ + m_PSODesc.PS = CD3DX12_SHADER_BYTECODE(const_cast(Binary), Size); +} + +void GraphicsPSO::SetGeometryShader(const void *Binary, size_t Size) +{ + m_PSODesc.GS = CD3DX12_SHADER_BYTECODE(const_cast(Binary), Size); +} + +void GraphicsPSO::SetHullShader(const void *Binary, size_t Size) +{ + m_PSODesc.HS = CD3DX12_SHADER_BYTECODE(const_cast(Binary), Size); +} + +void GraphicsPSO::SetDomainShader(const void *Binary, size_t Size) +{ + m_PSODesc.DS = CD3DX12_SHADER_BYTECODE(const_cast(Binary), Size); +} + +void GraphicsPSO::SetVertexShader(const D3D12_SHADER_BYTECODE &Binary) +{ + m_PSODesc.VS = Binary; +} + +void GraphicsPSO::SetPixelShader(const D3D12_SHADER_BYTECODE &Binary) +{ + m_PSODesc.PS = Binary; +} + +void GraphicsPSO::SetGeometryShader(const D3D12_SHADER_BYTECODE &Binary) +{ + m_PSODesc.GS = Binary; +} + +void GraphicsPSO::SetHullShader(const D3D12_SHADER_BYTECODE &Binary) +{ + m_PSODesc.HS = Binary; +} + +void GraphicsPSO::SetDomainShader(const D3D12_SHADER_BYTECODE &Binary) +{ + m_PSODesc.DS = Binary; +} + +void GraphicsPSO::Finalize() +{ + m_PSODesc.pRootSignature = m_RootSignature->GetSignature(); + + m_PSODesc.InputLayout.pInputElementDescs = nullptr; + size_t HashCode = Utility::HashState(&m_PSODesc); + HashCode = Utility::HashState(m_InputLayouts.get(), m_PSODesc.InputLayout.NumElements, HashCode); + m_PSODesc.InputLayout.pInputElementDescs = m_InputLayouts.get(); + + ComPtr PSORef = nullptr; + bool firstCompile = false; + { + auto iter = m_DeviceInstance->GetGraphicsPSOHashMap().find(HashCode); + + // Reserve space so the next inquiry will find that someone got here first. + if (iter == m_DeviceInstance->GetGraphicsPSOHashMap().end()) { + firstCompile = true; + PSORef = m_DeviceInstance->GetGraphicsPSOHashMap()[HashCode]; + } else + PSORef = iter->second; + } + if (firstCompile) { + HRESULT hr = + m_DeviceInstance->GetDevice()->CreateGraphicsPipelineState(&m_PSODesc, IID_PPV_ARGS(&m_PSO)); + if (FAILED(hr)) { + throw HRError("GraphicsPSO: Failed to create graphics pipeline state object."); + } + m_DeviceInstance->GetGraphicsPSOHashMap()[HashCode].Set(m_PSO); + m_PSO->SetName(m_Name); + } else { + while (PSORef == nullptr) + std::this_thread::yield(); + m_PSO = PSORef; + } +} + +ComputePSO::ComputePSO(D3D12DeviceInstance *DeviceInstance, const wchar_t *Name) : PSO(DeviceInstance, Name) +{ + ZeroMemory(&m_PSODesc, sizeof(m_PSODesc)); + m_PSODesc.NodeMask = 1; +} + +void ComputePSO::SetComputeShader(const void *Binary, size_t Size) +{ + m_PSODesc.CS = CD3DX12_SHADER_BYTECODE(const_cast(Binary), Size); +} + +void ComputePSO::SetComputeShader(const D3D12_SHADER_BYTECODE &Binary) +{ + m_PSODesc.CS = Binary; +} + +void ComputePSO::Finalize() +{ + m_PSODesc.pRootSignature = m_RootSignature->GetSignature(); + size_t HashCode = Utility::HashState(&m_PSODesc); + + ComPtr PSORef = nullptr; + bool firstCompile = false; + { + auto iter = m_DeviceInstance->GetComputePSOHashMap().find(HashCode); + + // Reserve space so the next inquiry will find that someone got here first. + if (iter == m_DeviceInstance->GetComputePSOHashMap().end()) { + firstCompile = true; + PSORef = m_DeviceInstance->GetComputePSOHashMap()[HashCode]; + } else + PSORef = iter->second; + } + + if (firstCompile) { + HRESULT hr = + m_DeviceInstance->GetDevice()->CreateComputePipelineState(&m_PSODesc, IID_PPV_ARGS(&m_PSO)); + if (FAILED(hr)) { + throw HRError("ComputePSO: Failed to create compute pipeline state object."); + } + m_DeviceInstance->GetComputePSOHashMap()[HashCode].Set(m_PSO); + m_PSO->SetName(m_Name); + } else { + while (PSORef == nullptr) + std::this_thread::yield(); + m_PSO = PSORef; + } +} + +CommandAllocatorPool::CommandAllocatorPool(D3D12_COMMAND_LIST_TYPE Type) + : m_cCommandListType(Type), + m_DeviceInstance(nullptr) +{ +} + +CommandAllocatorPool::~CommandAllocatorPool() +{ + Shutdown(); +} + +void CommandAllocatorPool::Create(D3D12DeviceInstance *DeviceInstance) +{ + m_DeviceInstance = DeviceInstance; +} + +void CommandAllocatorPool::Shutdown() +{ + for (size_t i = 0; i < m_AllocatorPool.size(); ++i) + m_AllocatorPool[i]->Release(); + + m_AllocatorPool.clear(); +} + +ID3D12CommandAllocator *CommandAllocatorPool::RequestAllocator(uint64_t CompletedFenceValue) +{ + ID3D12CommandAllocator *pAllocator = nullptr; + + if (!m_ReadyAllocators.empty()) { + std::pair &AllocatorPair = m_ReadyAllocators.front(); + + if (AllocatorPair.first <= CompletedFenceValue) { + pAllocator = AllocatorPair.second; + HRESULT hr = pAllocator->Reset(); + if (FAILED(hr)) { + throw HRError("CommandAllocatorPool: Failed to reset command allocator for reuse.", hr); + } + m_ReadyAllocators.pop(); + } + } + + // If no allocator's were ready to be reused, create a new one + if (pAllocator == nullptr) { + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommandAllocator(m_cCommandListType, + IID_PPV_ARGS(&pAllocator)); + if (FAILED(hr)) { + throw HRError("CommandAllocatorPool: Failed to create a new command allocator.", hr); + } + + wchar_t AllocatorName[32]; + swprintf(AllocatorName, 32, L"CommandAllocator %zu", m_AllocatorPool.size()); + pAllocator->SetName(AllocatorName); + m_AllocatorPool.push_back(pAllocator); + } + + return pAllocator; +} + +void CommandAllocatorPool::DiscardAllocator(uint64_t FenceValue, ID3D12CommandAllocator *Allocator) +{ + m_ReadyAllocators.push(std::make_pair(FenceValue, Allocator)); +} + +inline size_t CommandAllocatorPool::Size() +{ + return m_AllocatorPool.size(); +} + +CommandQueue::CommandQueue(D3D12_COMMAND_LIST_TYPE Type) + : m_Type(Type), + m_CommandQueue(nullptr), + m_pFence(nullptr), + m_NextFenceValue((uint64_t)Type << 56 | 1), + m_LastCompletedFenceValue((uint64_t)Type << 56), + m_AllocatorPool(std::make_unique(m_Type)) +{ +} + +CommandQueue::~CommandQueue() +{ + Shutdown(); +} + +void CommandQueue::Create(D3D12DeviceInstance *DeviceInstance) +{ + m_DeviceInstance = DeviceInstance; + ID3D12Device *pDevice = m_DeviceInstance->GetDevice(); + + D3D12_COMMAND_QUEUE_DESC QueueDesc = {}; + QueueDesc.Type = m_Type; + QueueDesc.NodeMask = 1; + HRESULT hr = pDevice->CreateCommandQueue(&QueueDesc, IID_PPV_ARGS(&m_CommandQueue)); + if (FAILED(hr)) { + throw HRError("CommandQueue: Failed to create command queue.", hr); + } + + m_CommandQueue->SetName(L"CommandListManager::m_CommandQueue"); + + hr = pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_pFence)); + if (FAILED(hr)) { + throw HRError("CommandQueue: Failed to create fence for command queue.", hr); + } + + m_pFence->SetName(L"CommandListManager::m_pFence"); + m_pFence->Signal((uint64_t)m_Type << 56); + + m_FenceEventHandle = CreateEvent(nullptr, false, false, nullptr); + if (m_FenceEventHandle == NULL) { + throw HRError("CommandQueue: Failed to create fence event handle."); + } + + m_AllocatorPool->Create(m_DeviceInstance); +} + +void CommandQueue::Shutdown() +{ + if (m_CommandQueue == nullptr) + return; + + m_AllocatorPool->Shutdown(); + + CloseHandle(m_FenceEventHandle); + + m_pFence->Release(); + m_pFence = nullptr; + + m_CommandQueue->Release(); + m_CommandQueue = nullptr; +} + +inline bool CommandQueue::IsReady() +{ + return m_CommandQueue != nullptr; +} + +uint64_t CommandQueue::IncrementFence(void) +{ + m_CommandQueue->Signal(m_pFence, m_NextFenceValue); + return m_NextFenceValue++; +} + +bool CommandQueue::IsFenceComplete(uint64_t FenceValue) +{ // Avoid querying the fence value by testing against the last one seen. + // The max() is to protect against an unlikely race condition that could cause the last + // completed fence value to regress. + if (FenceValue > m_LastCompletedFenceValue) + m_LastCompletedFenceValue = std::max(m_LastCompletedFenceValue, m_pFence->GetCompletedValue()); + + return FenceValue <= m_LastCompletedFenceValue; +} + +void CommandQueue::StallForFence(uint64_t FenceValue) +{ + CommandQueue &Producer = + m_DeviceInstance->GetCommandManager().GetQueue((D3D12_COMMAND_LIST_TYPE)(FenceValue >> 56)); + m_CommandQueue->Wait(Producer.m_pFence, FenceValue); +} + +void CommandQueue::StallForProducer(CommandQueue &Producer) +{ + m_CommandQueue->Wait(Producer.m_pFence, Producer.m_NextFenceValue - 1); +} + +void CommandQueue::WaitForFence(uint64_t FenceValue) +{ + if (IsFenceComplete(FenceValue)) + return; + + // TODO: Think about how this might affect a multi-threaded situation. Suppose thread A + // wants to wait for fence 100, then thread B comes along and wants to wait for 99. If + // the fence can only have one event set on completion, then thread B has to wait for + // 100 before it knows 99 is ready. Maybe insert sequential events? + { + m_pFence->SetEventOnCompletion(FenceValue, m_FenceEventHandle); + WaitForSingleObject(m_FenceEventHandle, INFINITE); + m_LastCompletedFenceValue = FenceValue; + } +} + +void CommandQueue::WaitForIdle(void) +{ + WaitForFence(IncrementFence()); +} + +ID3D12CommandQueue *CommandQueue::GetCommandQueue() +{ + return m_CommandQueue; +} + +uint64_t CommandQueue::GetNextFenceValue() +{ + return m_NextFenceValue; +} + +uint64_t CommandQueue::ExecuteCommandList(ID3D12CommandList *List) +{ + HRESULT hr = ((ID3D12GraphicsCommandList *)List)->Close(); + if (FAILED(hr)) { + auto removeReason = m_DeviceInstance->GetDevice()->GetDeviceRemovedReason(); + throw HRError("CommandQueue: Failed to close command list before execution.", removeReason); + } + + // Kickoff the command list + m_CommandQueue->ExecuteCommandLists(1, &List); + + // Signal the next fence value (with the GPU) + m_CommandQueue->Signal(m_pFence, m_NextFenceValue); + + // And increment the fence value. + return m_NextFenceValue++; +} + +ID3D12CommandAllocator *CommandQueue::RequestAllocator(void) +{ + uint64_t CompletedFence = m_pFence->GetCompletedValue(); + + return m_AllocatorPool->RequestAllocator(CompletedFence); +} + +void CommandQueue::DiscardAllocator(uint64_t FenceValueForReset, ID3D12CommandAllocator *Allocator) +{ + m_AllocatorPool->DiscardAllocator(FenceValueForReset, Allocator); +} + +CommandListManager::CommandListManager() : m_DeviceInstance(nullptr) {} + +CommandListManager::~CommandListManager() +{ + Shutdown(); +} + +void CommandListManager::Create(D3D12DeviceInstance *DeviceInstance) +{ + m_DeviceInstance = DeviceInstance; + ID3D12Device *pDevice = m_DeviceInstance->GetDevice(); + m_GraphicsQueue = std::make_unique(D3D12_COMMAND_LIST_TYPE::D3D12_COMMAND_LIST_TYPE_DIRECT); + m_GraphicsQueue->Create(m_DeviceInstance); + + m_ComputeQueue = std::make_unique(D3D12_COMMAND_LIST_TYPE::D3D12_COMMAND_LIST_TYPE_COMPUTE); + m_ComputeQueue->Create(m_DeviceInstance); + + m_CopyQueue = std::make_unique(D3D12_COMMAND_LIST_TYPE::D3D12_COMMAND_LIST_TYPE_COPY); + m_CopyQueue->Create(m_DeviceInstance); +} + +void CommandListManager::Shutdown() +{ + m_GraphicsQueue->Shutdown(); + m_ComputeQueue->Shutdown(); + m_CopyQueue->Shutdown(); +} + +CommandQueue &CommandListManager::GetGraphicsQueue(void) +{ + return *m_GraphicsQueue; +} + +CommandQueue &CommandListManager::GetComputeQueue(void) +{ + return *m_ComputeQueue; +} + +CommandQueue &CommandListManager::GetCopyQueue(void) +{ + return *m_CopyQueue; +} + +CommandQueue &CommandListManager::GetQueue(D3D12_COMMAND_LIST_TYPE Type) +{ + switch (Type) { + case D3D12_COMMAND_LIST_TYPE_COMPUTE: + return *m_ComputeQueue; + case D3D12_COMMAND_LIST_TYPE_COPY: + return *m_CopyQueue; + default: + return *m_GraphicsQueue; + } +} + +ID3D12CommandQueue *CommandListManager::GetCommandQueue() +{ + return m_GraphicsQueue->GetCommandQueue(); +} + +void CommandListManager::CreateNewCommandList(D3D12_COMMAND_LIST_TYPE Type, ID3D12GraphicsCommandList **List, + ID3D12CommandAllocator **Allocator) +{ + switch (Type) { + case D3D12_COMMAND_LIST_TYPE_DIRECT: + *Allocator = m_GraphicsQueue->RequestAllocator(); + break; + case D3D12_COMMAND_LIST_TYPE_BUNDLE: + break; + case D3D12_COMMAND_LIST_TYPE_COMPUTE: + *Allocator = m_ComputeQueue->RequestAllocator(); + break; + case D3D12_COMMAND_LIST_TYPE_COPY: + *Allocator = m_CopyQueue->RequestAllocator(); + break; + } + + HRESULT hr = m_DeviceInstance->GetDevice()->CreateCommandList(1, Type, *Allocator, nullptr, IID_PPV_ARGS(List)); + if (FAILED(hr)) { + auto removeReason = m_DeviceInstance->GetDevice()->GetDeviceRemovedReason(); + __debugbreak(); + throw HRError("CommandListManager: Failed to create a new command list.", hr); + } + + (*List)->SetName(L"CommandList"); +} + +bool CommandListManager::IsFenceComplete(uint64_t FenceValue) +{ + return GetQueue(D3D12_COMMAND_LIST_TYPE(FenceValue >> 56)).IsFenceComplete(FenceValue); +} + +void CommandListManager::WaitForFence(uint64_t FenceValue) +{ + CommandQueue &Producer = + m_DeviceInstance->GetCommandManager().GetQueue((D3D12_COMMAND_LIST_TYPE)(FenceValue >> 56)); + Producer.WaitForFence(FenceValue); +} + +void CommandListManager::IdleGPU(void) +{ + m_GraphicsQueue->WaitForIdle(); + m_ComputeQueue->WaitForIdle(); + m_CopyQueue->WaitForIdle(); +} + +CommandContext::CommandContext(D3D12DeviceInstance *DeviceInstance, D3D12_COMMAND_LIST_TYPE Type) + : m_DeviceInstance(DeviceInstance), + m_Type(Type), + m_DynamicViewDescriptorHeap(m_DeviceInstance, *this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV), + m_DynamicSamplerDescriptorHeap(m_DeviceInstance, *this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER), + m_CpuLinearAllocator(std::make_unique(DeviceInstance, kCpuWritable)), + m_GpuLinearAllocator(std::make_unique(DeviceInstance, kGpuExclusive)) +{ + m_CommandList = nullptr; + m_CurrentAllocator = nullptr; + ZeroMemory(m_CurrentDescriptorHeaps, sizeof(m_CurrentDescriptorHeaps)); + + m_CurGraphicsRootSignature = nullptr; + m_CurComputeRootSignature = nullptr; + m_CurPipelineState = nullptr; + m_NumBarriersToFlush = 0; +} + +CommandContext::~CommandContext(void) +{ + if (m_CommandList != nullptr) + m_CommandList->Release(); +} + +void CommandContext::Reset(void) +{ + if (m_CommandList == nullptr) { + throw HRError("Trying to reset a command list that is null."); + } + if (m_CurrentAllocator != nullptr) { + throw HRError("Trying to reset a command list with no allocator set."); + } + + m_CurrentAllocator = m_DeviceInstance->GetCommandManager().GetQueue(m_Type).RequestAllocator(); + m_CommandList->Reset(m_CurrentAllocator, nullptr); + + m_CurGraphicsRootSignature = nullptr; + m_CurComputeRootSignature = nullptr; + m_CurPipelineState = nullptr; + m_NumBarriersToFlush = 0; + + BindDescriptorHeaps(); +} + +uint64_t CommandContext::Flush(bool WaitForCompletion) +{ + FlushResourceBarriers(); + + if (m_CurrentAllocator == nullptr) { + throw HRError("Trying to flush a command list with no allocator set."); + } + + uint64_t FenceValue = m_DeviceInstance->GetCommandManager().GetQueue(m_Type).ExecuteCommandList(m_CommandList); + + if (WaitForCompletion) + m_DeviceInstance->GetCommandManager().WaitForFence(FenceValue); + + m_CommandList->Reset(m_CurrentAllocator, nullptr); + + if (m_CurGraphicsRootSignature) { + m_CommandList->SetGraphicsRootSignature(m_CurGraphicsRootSignature); + } + if (m_CurComputeRootSignature) { + m_CommandList->SetComputeRootSignature(m_CurComputeRootSignature); + } + if (m_CurPipelineState) { + m_CommandList->SetPipelineState(m_CurPipelineState); + } + + BindDescriptorHeaps(); + + return FenceValue; +} + +uint64_t CommandContext::Finish(bool WaitForCompletion) +{ + FlushResourceBarriers(); + + if (m_CurrentAllocator == nullptr) { + throw HRError("Trying to finish a command list with no allocator set."); + } + + CommandQueue &Queue = m_DeviceInstance->GetCommandManager().GetQueue(m_Type); + + uint64_t FenceValue = Queue.ExecuteCommandList(m_CommandList); + Queue.DiscardAllocator(FenceValue, m_CurrentAllocator); + m_CurrentAllocator = nullptr; + + m_CpuLinearAllocator->CleanupUsedPages(FenceValue); + m_GpuLinearAllocator->CleanupUsedPages(FenceValue); + m_DynamicViewDescriptorHeap.CleanupUsedHeaps(FenceValue); + m_DynamicSamplerDescriptorHeap.CleanupUsedHeaps(FenceValue); + + if (WaitForCompletion) + m_DeviceInstance->GetCommandManager().WaitForFence(FenceValue); + + m_DeviceInstance->GetContextManager().FreeContext(this); + + return FenceValue; +} + +void CommandContext::Initialize(void) +{ + m_DeviceInstance->GetCommandManager().CreateNewCommandList(m_Type, &m_CommandList, &m_CurrentAllocator); +} + +ID3D12GraphicsCommandList *CommandContext::GetCommandList() +{ + return m_CommandList; +} + +void CommandContext::CopyBuffer(GpuResource &Dest, GpuResource &Src) +{ + TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST); + TransitionResource(Src, D3D12_RESOURCE_STATE_COPY_SOURCE); + FlushResourceBarriers(); + m_CommandList->CopyResource(Dest.GetResource(), Src.GetResource()); +} + +void CommandContext::CopyBufferRegion(GpuResource &Dest, size_t DestOffset, GpuResource &Src, size_t SrcOffset, + size_t NumBytes) +{ + TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST); + TransitionResource(Src, D3D12_RESOURCE_STATE_COPY_SOURCE); + FlushResourceBarriers(); + m_CommandList->CopyBufferRegion(Dest.GetResource(), DestOffset, Src.GetResource(), SrcOffset, NumBytes); +} + +void CommandContext::CopySubresource(GpuResource &Dest, UINT DestSubIndex, GpuResource &Src, UINT SrcSubIndex) +{ + FlushResourceBarriers(); + + D3D12_TEXTURE_COPY_LOCATION DestLocation = {Dest.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + DestSubIndex}; + + D3D12_TEXTURE_COPY_LOCATION SrcLocation = {Src.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + SrcSubIndex}; + + m_CommandList->CopyTextureRegion(&DestLocation, 0, 0, 0, &SrcLocation, nullptr); +} + +void CommandContext::CopyTextureRegion(GpuResource &Dest, UINT x, UINT y, UINT z, GpuResource &Source, RECT &Rect) +{ + TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST); + TransitionResource(Source, D3D12_RESOURCE_STATE_COPY_SOURCE); + FlushResourceBarriers(); + + D3D12_TEXTURE_COPY_LOCATION destLoc = CD3DX12_TEXTURE_COPY_LOCATION(Dest.GetResource(), 0); + D3D12_TEXTURE_COPY_LOCATION srcLoc = CD3DX12_TEXTURE_COPY_LOCATION(Source.GetResource(), 0); + + D3D12_BOX box = {}; + box.back = 1; + box.left = Rect.left; + box.right = Rect.right; + box.top = Rect.top; + box.bottom = Rect.bottom; + + m_CommandList->CopyTextureRegion(&destLoc, x, y, z, &srcLoc, &box); +} + +void CommandContext::UpdateTexture(GpuResource &Dest, UploadBuffer &buffer) +{ + TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST); + FlushResourceBarriers(); + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedTextureDesc; + D3D12_TEXTURE_COPY_LOCATION srcLocation; + D3D12_TEXTURE_COPY_LOCATION dstLocation; + UINT NumRows, RowPitch; + UINT64 RowLength; + + auto texDesc = Dest.GetResource()->GetDesc(); + UINT64 TotalBytes; + + m_DeviceInstance->GetDevice()->GetCopyableFootprints(&texDesc, 0, 1, 0, &placedTextureDesc, &NumRows, + &RowLength, &TotalBytes); + RowPitch = placedTextureDesc.Footprint.RowPitch; + + dstLocation.pResource = Dest.GetResource(); + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLocation.SubresourceIndex = 0; + + srcLocation.pResource = buffer.GetResource(); + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + srcLocation.PlacedFootprint = placedTextureDesc; + + m_CommandList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, NULL); +} + +uint32_t CommandContext::ReadbackTexture(ReadbackBuffer &DstBuffer, GpuResource &SrcBuffer) +{ + uint64_t CopySize = 0; + + // The footprint may depend on the device of the resource, but we assume there is only one device. + D3D12_PLACED_SUBRESOURCE_FOOTPRINT PlacedFootprint; + + auto resourceDesc = SrcBuffer.GetResource()->GetDesc(); + m_DeviceInstance->GetDevice()->GetCopyableFootprints(&resourceDesc, 0, 1, 0, &PlacedFootprint, nullptr, nullptr, + &CopySize); + + DstBuffer.Create(L"Readback", (uint32_t)CopySize, 1); + + TransitionResource(SrcBuffer, D3D12_RESOURCE_STATE_COPY_SOURCE, true); + + auto DescLocation = CD3DX12_TEXTURE_COPY_LOCATION(DstBuffer.GetResource(), PlacedFootprint); + auto SrcLocation = CD3DX12_TEXTURE_COPY_LOCATION(SrcBuffer.GetResource(), 0); + m_CommandList->CopyTextureRegion(&DescLocation, 0, 0, 0, &SrcLocation, nullptr); + + return PlacedFootprint.Footprint.RowPitch; +} + +DynAlloc CommandContext::ReserveUploadMemory(size_t SizeInBytes) +{ + return m_CpuLinearAllocator->Allocate(SizeInBytes); +} + +void CommandContext::WriteBuffer(GpuResource &Dest, size_t DestOffset, const void *BufferData, size_t NumBytes) +{ + if (BufferData == nullptr) { + return; + } + + if (!Math::IsAligned(BufferData, 16)) { + throw HRError("BufferData pointer passed to WriteBuffer must be 16-byte aligned."); + } + DynAlloc TempSpace = m_CpuLinearAllocator->Allocate(NumBytes, 512); + SIMDMemCopy(TempSpace.DataPtr, BufferData, Math::DivideByMultiple(NumBytes, 16)); + CopyBufferRegion(Dest, DestOffset, TempSpace.Buffer, TempSpace.Offset, NumBytes); +} + +void CommandContext::TransitionResource(GpuResource &Resource, D3D12_RESOURCE_STATES NewState, bool FlushImmediate) +{ + D3D12_RESOURCE_STATES OldState = Resource.m_UsageState; + + if (m_Type == D3D12_COMMAND_LIST_TYPE_COMPUTE) { + if ((OldState & VALID_COMPUTE_QUEUE_RESOURCE_STATES) != OldState) { + throw HRError("Invalid resource state for compute command queue."); + } + + if ((NewState & VALID_COMPUTE_QUEUE_RESOURCE_STATES) != NewState) { + throw HRError("Invalid resource state for compute command queue."); + } + } + + if (OldState != NewState) { + if (m_NumBarriersToFlush >= kMaxNumDescriptorTables) { + throw HRError("Exceeded arbitrary limit on buffered barriers"); + } + D3D12_RESOURCE_BARRIER &BarrierDesc = m_ResourceBarrierBuffer[m_NumBarriersToFlush++]; + + BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + BarrierDesc.Transition.pResource = Resource.GetResource(); + BarrierDesc.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + BarrierDesc.Transition.StateBefore = OldState; + BarrierDesc.Transition.StateAfter = NewState; + + // Check to see if we already started the transition + if (NewState == Resource.m_TransitioningState) { + BarrierDesc.Flags = D3D12_RESOURCE_BARRIER_FLAG_END_ONLY; + Resource.m_TransitioningState = (D3D12_RESOURCE_STATES)-1; + } else + BarrierDesc.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + + Resource.m_UsageState = NewState; + } else if (NewState == D3D12_RESOURCE_STATE_UNORDERED_ACCESS) + InsertUAVBarrier(Resource, FlushImmediate); + + if (FlushImmediate || m_NumBarriersToFlush == kMaxNumDescriptorTables) + FlushResourceBarriers(); +} + +void CommandContext::BeginResourceTransition(GpuResource &Resource, D3D12_RESOURCE_STATES NewState, bool FlushImmediate) +{ + // If it's already transitioning, finish that transition + if (Resource.m_TransitioningState != (D3D12_RESOURCE_STATES)-1) + TransitionResource(Resource, Resource.m_TransitioningState); + + D3D12_RESOURCE_STATES OldState = Resource.m_UsageState; + + if (OldState != NewState) { + if (m_NumBarriersToFlush >= kMaxNumDescriptorTables) { + throw HRError("Exceeded arbitrary limit on buffered barriers"); + } + D3D12_RESOURCE_BARRIER &BarrierDesc = m_ResourceBarrierBuffer[m_NumBarriersToFlush++]; + + BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + BarrierDesc.Transition.pResource = Resource.GetResource(); + BarrierDesc.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + BarrierDesc.Transition.StateBefore = OldState; + BarrierDesc.Transition.StateAfter = NewState; + + BarrierDesc.Flags = D3D12_RESOURCE_BARRIER_FLAG_BEGIN_ONLY; + + Resource.m_TransitioningState = NewState; + } + + if (FlushImmediate || m_NumBarriersToFlush == kMaxNumDescriptorTables) + FlushResourceBarriers(); +} + +void CommandContext::InsertUAVBarrier(GpuResource &Resource, bool FlushImmediate) +{ + if (m_NumBarriersToFlush >= kMaxNumDescriptorTables) { + throw HRError("Exceeded arbitrary limit on buffered barriers"); + } + D3D12_RESOURCE_BARRIER &BarrierDesc = m_ResourceBarrierBuffer[m_NumBarriersToFlush++]; + + BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + BarrierDesc.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + BarrierDesc.UAV.pResource = Resource.GetResource(); + + if (FlushImmediate) + FlushResourceBarriers(); +} + +void CommandContext::InsertAliasBarrier(GpuResource &Before, GpuResource &After, bool FlushImmediate) +{ + if (m_NumBarriersToFlush >= kMaxNumDescriptorTables) { + throw HRError("Exceeded arbitrary limit on buffered barriers"); + } + D3D12_RESOURCE_BARRIER &BarrierDesc = m_ResourceBarrierBuffer[m_NumBarriersToFlush++]; + + BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; + BarrierDesc.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + BarrierDesc.Aliasing.pResourceBefore = Before.GetResource(); + BarrierDesc.Aliasing.pResourceAfter = After.GetResource(); + + if (FlushImmediate) + FlushResourceBarriers(); +} + +inline void CommandContext::FlushResourceBarriers(void) +{ + if (m_NumBarriersToFlush > 0) { + m_CommandList->ResourceBarrier(m_NumBarriersToFlush, m_ResourceBarrierBuffer); + m_NumBarriersToFlush = 0; + } +} + +void CommandContext::InsertTimeStamp(ID3D12QueryHeap *pQueryHeap, uint32_t QueryIdx) +{ + m_CommandList->EndQuery(pQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, QueryIdx); +} + +void CommandContext::ResolveTimeStamps(ID3D12Resource *pReadbackHeap, ID3D12QueryHeap *pQueryHeap, uint32_t NumQueries) +{ + m_CommandList->ResolveQueryData(pQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, NumQueries, pReadbackHeap, 0); +} + +void CommandContext::PIXBeginEvent(const wchar_t *label) {} + +void CommandContext::PIXEndEvent(void) {} + +void CommandContext::PIXSetMarker(const wchar_t *label) {} + +void CommandContext::SetDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE Type, ID3D12DescriptorHeap *HeapPtr) +{ + if (m_CurrentDescriptorHeaps[Type] != HeapPtr) { + m_CurrentDescriptorHeaps[Type] = HeapPtr; + BindDescriptorHeaps(); + } +} + +void CommandContext::SetDescriptorHeaps(UINT HeapCount, D3D12_DESCRIPTOR_HEAP_TYPE Type[], + ID3D12DescriptorHeap *HeapPtrs[]) +{ + bool AnyChanged = false; + + for (UINT i = 0; i < HeapCount; ++i) { + if (m_CurrentDescriptorHeaps[Type[i]] != HeapPtrs[i]) { + m_CurrentDescriptorHeaps[Type[i]] = HeapPtrs[i]; + AnyChanged = true; + } + } + + if (AnyChanged) + BindDescriptorHeaps(); +} + +void CommandContext::SetPipelineState(const PSO &PSO) +{ + ID3D12PipelineState *PipelineState = PSO.GetPipelineStateObject(); + if (PipelineState == m_CurPipelineState) + return; + + m_CommandList->SetPipelineState(PipelineState); + m_CurPipelineState = PipelineState; +} + +void CommandContext::SetPredication(ID3D12Resource *Buffer, UINT64 BufferOffset, D3D12_PREDICATION_OP Op) +{ + m_CommandList->SetPredication(Buffer, BufferOffset, Op); +} + +void CommandContext::BindDescriptorHeaps(void) +{ + UINT NonNullHeaps = 0; + ID3D12DescriptorHeap *HeapsToBind[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + for (UINT i = 0; i < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; ++i) { + ID3D12DescriptorHeap *HeapIter = m_CurrentDescriptorHeaps[i]; + if (HeapIter != nullptr) + HeapsToBind[NonNullHeaps++] = HeapIter; + } + + if (NonNullHeaps > 0) + m_CommandList->SetDescriptorHeaps(NonNullHeaps, HeapsToBind); +} + +void GraphicsContext::ClearUAV(GpuBuffer &Target) +{ + FlushResourceBarriers(); + + // After binding a UAV, we can get a GPU handle that is required to clear it as a UAV (because it essentially runs + // a shader to set all of the values). + D3D12_GPU_DESCRIPTOR_HANDLE GpuVisibleHandle = m_DynamicViewDescriptorHeap.UploadDirect(Target.GetUAV()); + const UINT ClearColor[4] = {}; + m_CommandList->ClearUnorderedAccessViewUint(GpuVisibleHandle, Target.GetUAV(), Target.GetResource(), ClearColor, + 0, nullptr); +} + +void GraphicsContext::ClearColor(D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView, const FLOAT ColorRGBA[4], UINT NumRects, + const D3D12_RECT *pRects) +{ + FlushResourceBarriers(); + m_CommandList->ClearRenderTargetView(RenderTargetView, ColorRGBA, (pRects == nullptr) ? 0 : 1, pRects); +} + +void GraphicsContext::ClearDepth(DepthBuffer &Target) +{ + FlushResourceBarriers(); + m_CommandList->ClearDepthStencilView(Target.GetDSV(), D3D12_CLEAR_FLAG_DEPTH, Target.GetClearDepth(), + Target.GetClearStencil(), 0, nullptr); +} + +void GraphicsContext::ClearStencil(DepthBuffer &Target) +{ + FlushResourceBarriers(); + m_CommandList->ClearDepthStencilView(Target.GetDSV(), D3D12_CLEAR_FLAG_STENCIL, Target.GetClearDepth(), + Target.GetClearStencil(), 0, nullptr); +} + +void GraphicsContext::ClearDepthAndStencil(DepthBuffer &Target) +{ + FlushResourceBarriers(); + m_CommandList->ClearDepthStencilView(Target.GetDSV(), D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, + Target.GetClearDepth(), Target.GetClearStencil(), 0, nullptr); +} + +void GraphicsContext::BeginQuery(ID3D12QueryHeap *QueryHeap, D3D12_QUERY_TYPE Type, UINT HeapIndex) +{ + m_CommandList->BeginQuery(QueryHeap, Type, HeapIndex); +} + +void GraphicsContext::EndQuery(ID3D12QueryHeap *QueryHeap, D3D12_QUERY_TYPE Type, UINT HeapIndex) +{ + m_CommandList->EndQuery(QueryHeap, Type, HeapIndex); +} + +void GraphicsContext::ResolveQueryData(ID3D12QueryHeap *QueryHeap, D3D12_QUERY_TYPE Type, UINT StartIndex, + UINT NumQueries, ID3D12Resource *DestinationBuffer, + UINT64 DestinationBufferOffset) +{ + m_CommandList->ResolveQueryData(QueryHeap, Type, StartIndex, NumQueries, DestinationBuffer, + DestinationBufferOffset); +} + +void GraphicsContext::SetRootSignature(const RootSignature &RootSig) +{ + if (RootSig.GetSignature() == m_CurGraphicsRootSignature) + return; + + m_CommandList->SetGraphicsRootSignature(m_CurGraphicsRootSignature = RootSig.GetSignature()); + + m_DynamicViewDescriptorHeap.ParseGraphicsRootSignature(RootSig); + m_DynamicSamplerDescriptorHeap.ParseGraphicsRootSignature(RootSig); +} + +void GraphicsContext::SetRenderTargets(UINT NumRTVs, const D3D12_CPU_DESCRIPTOR_HANDLE RTVs[]) +{ + m_CommandList->OMSetRenderTargets(NumRTVs, RTVs, FALSE, nullptr); +} + +void GraphicsContext::SetRenderTargets(UINT NumRenderTargetDescriptors, + const D3D12_CPU_DESCRIPTOR_HANDLE *pRenderTargetDescriptors, + BOOL RTsSingleHandleToDescriptorRange, + const D3D12_CPU_DESCRIPTOR_HANDLE *pDepthStencilDescriptor) +{ + m_CommandList->OMSetRenderTargets(NumRenderTargetDescriptors, pRenderTargetDescriptors, + RTsSingleHandleToDescriptorRange, pDepthStencilDescriptor); +} + +void GraphicsContext::SetRenderTargets(UINT NumRTVs, const D3D12_CPU_DESCRIPTOR_HANDLE RTVs[], + D3D12_CPU_DESCRIPTOR_HANDLE DSV) +{ + m_CommandList->OMSetRenderTargets(NumRTVs, RTVs, FALSE, &DSV); +} + +void GraphicsContext::SetRenderTarget(D3D12_CPU_DESCRIPTOR_HANDLE RTV) +{ + SetRenderTargets(1, &RTV); +} + +void GraphicsContext::SetNullRenderTarget() +{ + m_CommandList->OMSetRenderTargets(0, nullptr, FALSE, nullptr); +} + +void GraphicsContext::SetRenderTarget(D3D12_CPU_DESCRIPTOR_HANDLE RTV, D3D12_CPU_DESCRIPTOR_HANDLE DSV) +{ + SetRenderTargets(1, &RTV, DSV); +} + +void GraphicsContext::SetDepthStencilTarget(D3D12_CPU_DESCRIPTOR_HANDLE DSV) +{ + SetRenderTargets(0, nullptr, DSV); +} + +void GraphicsContext::SetViewport(const D3D12_VIEWPORT &vp) +{ + m_CommandList->RSSetViewports(1, &vp); +} + +void GraphicsContext::SetViewport(FLOAT x, FLOAT y, FLOAT w, FLOAT h, FLOAT minDepth, FLOAT maxDepth) +{ + D3D12_VIEWPORT vp; + vp.Width = w; + vp.Height = h; + vp.MinDepth = minDepth; + vp.MaxDepth = maxDepth; + vp.TopLeftX = x; + vp.TopLeftY = y; + m_CommandList->RSSetViewports(1, &vp); +} + +void GraphicsContext::SetScissor(const D3D12_RECT &rect) +{ + if (!(rect.left < rect.right && rect.top < rect.bottom)) { + throw HRError("Invalid scissor rectangle passed to SetViewportAndScissor."); + } + m_CommandList->RSSetScissorRects(1, &rect); +} + +void GraphicsContext::SetScissor(UINT left, UINT top, UINT right, UINT bottom) +{ + SetScissor(CD3DX12_RECT(left, top, right, bottom)); +} + +void GraphicsContext::SetViewportAndScissor(const D3D12_VIEWPORT &vp, const D3D12_RECT &rect) +{ + if (!(rect.left < rect.right && rect.top < rect.bottom)) { + throw HRError("Invalid scissor rectangle passed to SetViewportAndScissor."); + } + m_CommandList->RSSetViewports(1, &vp); + m_CommandList->RSSetScissorRects(1, &rect); +} + +void GraphicsContext::SetViewportAndScissor(UINT x, UINT y, UINT w, UINT h) +{ + SetViewport((float)x, (float)y, (float)w, (float)h); + SetScissor(x, y, x + w, y + h); +} + +void GraphicsContext::SetStencilRef(UINT StencilRef) +{ + m_CommandList->OMSetStencilRef(StencilRef); +} + +void GraphicsContext::SetBlendFactor(Color BlendFactor) +{ + m_CommandList->OMSetBlendFactor(BlendFactor.ptr); +} + +void GraphicsContext::SetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY Topology) +{ + m_CommandList->IASetPrimitiveTopology(Topology); +} + +void GraphicsContext::SetConstantArray(UINT RootIndex, UINT NumConstants, const void *pConstants) +{ + m_CommandList->SetGraphicsRoot32BitConstants(RootIndex, NumConstants, pConstants, 0); +} + +void GraphicsContext::SetConstant(UINT RootIndex, UINT Offset, UINT Val) +{ + m_CommandList->SetGraphicsRoot32BitConstant(RootIndex, Val, Offset); +} + +void GraphicsContext::SetConstantBuffer(UINT RootIndex, D3D12_GPU_VIRTUAL_ADDRESS CBV) +{ + m_CommandList->SetGraphicsRootConstantBufferView(RootIndex, CBV); +} + +void GraphicsContext::SetDynamicConstantBufferView(UINT RootIndex, size_t BufferSize, const void *BufferData) +{ + if (!Math::IsAligned(BufferData, 16)) { + throw HRError("BufferData pointer passed to SetDynamicConstantBufferView must be 16-byte aligned."); + } + + DynAlloc cb = m_CpuLinearAllocator->Allocate(BufferSize); + SIMDMemCopy(cb.DataPtr, BufferData, Math::AlignUp(BufferSize, 16) >> 4); + // memcpy(cb.DataPtr, BufferData, BufferSize); + m_CommandList->SetGraphicsRootConstantBufferView(RootIndex, cb.GpuAddress); +} + +void GraphicsContext::SetBufferSRV(UINT RootIndex, const GpuBuffer &SRV, UINT64 Offset) +{ + if ((SRV.m_UsageState & + (D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)) == 0) { + throw HRError("Trying to set a SRV root view on a buffer that is not in a SRV state."); + } + + m_CommandList->SetGraphicsRootShaderResourceView(RootIndex, SRV.GetGpuVirtualAddress() + Offset); +} + +void GraphicsContext::SetBufferUAV(UINT RootIndex, const GpuBuffer &UAV, UINT64 Offset) +{ + if ((UAV.m_UsageState & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) == 0) { + throw HRError("Trying to set a UAV root view on a buffer that is not in the UAV state."); + } + + m_CommandList->SetGraphicsRootUnorderedAccessView(RootIndex, UAV.GetGpuVirtualAddress() + Offset); +} + +void GraphicsContext::SetDescriptorTable(UINT RootIndex, D3D12_GPU_DESCRIPTOR_HANDLE FirstHandle) +{ + m_CommandList->SetGraphicsRootDescriptorTable(RootIndex, FirstHandle); +} + +void GraphicsContext::SetDynamicDescriptor(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle) +{ + SetDynamicDescriptors(RootIndex, Offset, 1, &Handle); +} + +void GraphicsContext::SetDynamicDescriptors(UINT RootIndex, UINT Offset, UINT Count, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + m_DynamicViewDescriptorHeap.SetGraphicsDescriptorHandles(RootIndex, Offset, Count, Handles); +} + +void GraphicsContext::SetDynamicSampler(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle) +{ + SetDynamicSamplers(RootIndex, Offset, 1, &Handle); +} + +void GraphicsContext::SetDynamicSamplers(UINT RootIndex, UINT Offset, UINT Count, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + m_DynamicSamplerDescriptorHeap.SetGraphicsDescriptorHandles(RootIndex, Offset, Count, Handles); +} + +void GraphicsContext::SetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW &IBView) +{ + m_CommandList->IASetIndexBuffer(&IBView); +} + +void GraphicsContext::SetVertexBuffer(UINT Slot, const D3D12_VERTEX_BUFFER_VIEW &VBView) +{ + SetVertexBuffers(Slot, 1, &VBView); +} + +void GraphicsContext::SetVertexBuffers(UINT StartSlot, UINT Count, const D3D12_VERTEX_BUFFER_VIEW VBViews[]) +{ + m_CommandList->IASetVertexBuffers(StartSlot, Count, VBViews); +} + +void GraphicsContext::SetDynamicVB(UINT Slot, size_t NumVertices, size_t VertexStride, const void *VertexData) +{ + if (!Math::IsAligned(VertexData, 16)) { + throw HRError("BufferData pointer passed to SetDynamicVB must be 16-byte aligned."); + } + size_t BufferSize = Math::AlignUp(NumVertices * VertexStride, 16); + DynAlloc vb = m_CpuLinearAllocator->Allocate(BufferSize); + + SIMDMemCopy(vb.DataPtr, VertexData, BufferSize >> 4); + + D3D12_VERTEX_BUFFER_VIEW VBView; + VBView.BufferLocation = vb.GpuAddress; + VBView.SizeInBytes = (UINT)BufferSize; + VBView.StrideInBytes = (UINT)VertexStride; + + m_CommandList->IASetVertexBuffers(Slot, 1, &VBView); +} + +void GraphicsContext::SetDynamicIB(size_t IndexCount, const uint16_t *IndexData) +{ + if (!Math::IsAligned(IndexData, 16)) { + throw HRError("BufferData pointer passed to SetDynamicIB must be 16-byte aligned."); + } + size_t BufferSize = Math::AlignUp(IndexCount * sizeof(uint16_t), 16); + DynAlloc ib = m_CpuLinearAllocator->Allocate(BufferSize); + + SIMDMemCopy(ib.DataPtr, IndexData, BufferSize >> 4); + + D3D12_INDEX_BUFFER_VIEW IBView; + IBView.BufferLocation = ib.GpuAddress; + IBView.SizeInBytes = (UINT)(IndexCount * sizeof(uint16_t)); + IBView.Format = DXGI_FORMAT_R16_UINT; + + m_CommandList->IASetIndexBuffer(&IBView); +} + +void GraphicsContext::SetDynamicSRV(UINT RootIndex, size_t BufferSize, const void *BufferData) +{ + if (!Math::IsAligned(BufferData, 16)) { + throw HRError("BufferData pointer passed to SetDynamicSRV must be 16-byte aligned."); + } + + DynAlloc cb = m_CpuLinearAllocator->Allocate(BufferSize); + SIMDMemCopy(cb.DataPtr, BufferData, Math::AlignUp(BufferSize, 16) >> 4); + m_CommandList->SetGraphicsRootShaderResourceView(RootIndex, cb.GpuAddress); +} + +void GraphicsContext::Draw(UINT VertexCount, UINT VertexStartOffset) +{ + DrawInstanced(VertexCount, 1, VertexStartOffset, 0); +} + +void GraphicsContext::DrawIndexed(UINT IndexCount, UINT StartIndexLocation, INT BaseVertexLocation) +{ + DrawIndexedInstanced(IndexCount, 1, StartIndexLocation, BaseVertexLocation, 0); +} + +void GraphicsContext::DrawInstanced(UINT VertexCountPerInstance, UINT InstanceCount, UINT StartVertexLocation, + UINT StartInstanceLocation) +{ + FlushResourceBarriers(); + m_DynamicViewDescriptorHeap.CommitGraphicsRootDescriptorTables(m_CommandList); + m_DynamicSamplerDescriptorHeap.CommitGraphicsRootDescriptorTables(m_CommandList); + m_CommandList->DrawInstanced(VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation); +} + +void GraphicsContext::DrawIndexedInstanced(UINT IndexCountPerInstance, UINT InstanceCount, UINT StartIndexLocation, + INT BaseVertexLocation, UINT StartInstanceLocation) +{ + FlushResourceBarriers(); + m_DynamicViewDescriptorHeap.CommitGraphicsRootDescriptorTables(m_CommandList); + m_DynamicSamplerDescriptorHeap.CommitGraphicsRootDescriptorTables(m_CommandList); + m_CommandList->DrawIndexedInstanced(IndexCountPerInstance, InstanceCount, StartIndexLocation, + BaseVertexLocation, StartInstanceLocation); +} + +void GraphicsContext::DrawIndirect(GpuBuffer &ArgumentBuffer, uint64_t ArgumentBufferOffset) +{ + ExecuteIndirect(m_DeviceInstance->GetDrawIndirectCommandSignature(), ArgumentBuffer, ArgumentBufferOffset); +} + +void GraphicsContext::ExecuteIndirect(CommandSignature &CommandSig, GpuBuffer &ArgumentBuffer, + uint64_t ArgumentStartOffset, uint32_t MaxCommands, + GpuBuffer *CommandCounterBuffer, uint64_t CounterOffset) +{ + FlushResourceBarriers(); + m_DynamicViewDescriptorHeap.CommitGraphicsRootDescriptorTables(m_CommandList); + m_DynamicSamplerDescriptorHeap.CommitGraphicsRootDescriptorTables(m_CommandList); + m_CommandList->ExecuteIndirect(CommandSig.GetSignature(), MaxCommands, ArgumentBuffer.GetResource(), + ArgumentStartOffset, + CommandCounterBuffer == nullptr ? nullptr : CommandCounterBuffer->GetResource(), + CounterOffset); +} + +void ComputeContext::ClearUAV(GpuBuffer &Target) +{ + FlushResourceBarriers(); + + // After binding a UAV, we can get a GPU handle that is required to clear it as a UAV (because it essentially runs + // a shader to set all of the values). + D3D12_GPU_DESCRIPTOR_HANDLE GpuVisibleHandle = m_DynamicViewDescriptorHeap.UploadDirect(Target.GetUAV()); + const UINT ClearColor[4] = {}; + m_CommandList->ClearUnorderedAccessViewUint(GpuVisibleHandle, Target.GetUAV(), Target.GetResource(), ClearColor, + 0, nullptr); +} + +void ComputeContext::SetRootSignature(const RootSignature &RootSig) +{ + if (RootSig.GetSignature() == m_CurComputeRootSignature) + return; + + m_CommandList->SetComputeRootSignature(m_CurComputeRootSignature = RootSig.GetSignature()); + + m_DynamicViewDescriptorHeap.ParseComputeRootSignature(RootSig); + m_DynamicSamplerDescriptorHeap.ParseComputeRootSignature(RootSig); +} + +void ComputeContext::SetConstantArray(UINT RootIndex, UINT NumConstants, const void *pConstants) +{ + m_CommandList->SetComputeRoot32BitConstants(RootIndex, NumConstants, pConstants, 0); +} + +void ComputeContext::SetConstant(UINT RootIndex, UINT Offset, UINT Val) +{ + m_CommandList->SetComputeRoot32BitConstant(RootIndex, Val, Offset); +} + +void ComputeContext::SetConstantBuffer(UINT RootIndex, D3D12_GPU_VIRTUAL_ADDRESS CBV) +{ + m_CommandList->SetComputeRootConstantBufferView(RootIndex, CBV); +} + +void ComputeContext::SetDynamicConstantBufferView(UINT RootIndex, size_t BufferSize, const void *BufferData) +{ + if (!Math::IsAligned(BufferData, 16)) { + throw HRError("BufferData pointer passed to SetDynamicConstantBufferView must be 16-byte aligned."); + } + + DynAlloc cb = m_CpuLinearAllocator->Allocate(BufferSize); + SIMDMemCopy(cb.DataPtr, BufferData, Math::AlignUp(BufferSize, 16) >> 4); + //memcpy(cb.DataPtr, BufferData, BufferSize); + m_CommandList->SetComputeRootConstantBufferView(RootIndex, cb.GpuAddress); +} + +void ComputeContext::SetDynamicSRV(UINT RootIndex, size_t BufferSize, const void *BufferData) +{ + if (BufferData == nullptr) { + return; + } + if (!Math::IsAligned(BufferData, 16)) { + throw HRError("BufferData pointer passed to SetDynamicSRV must be 16-byte aligned."); + } + + DynAlloc cb = m_CpuLinearAllocator->Allocate(BufferSize); + SIMDMemCopy(cb.DataPtr, BufferData, Math::AlignUp(BufferSize, 16) >> 4); + m_CommandList->SetComputeRootShaderResourceView(RootIndex, cb.GpuAddress); +} + +void ComputeContext::SetBufferSRV(UINT RootIndex, const GpuBuffer &SRV, UINT64 Offset) +{ + if ((SRV.m_UsageState & D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE) == 0) { + throw HRError( + "ComputeContext::SetBufferSRV: SRV resource is not in D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE state."); + } + m_CommandList->SetComputeRootShaderResourceView(RootIndex, SRV.GetGpuVirtualAddress() + Offset); +} + +void ComputeContext::SetBufferUAV(UINT RootIndex, const GpuBuffer &UAV, UINT64 Offset) +{ + if ((UAV.m_UsageState & D3D12_RESOURCE_STATE_UNORDERED_ACCESS) == 0) { + throw HRError( + "ComputeContext::SetBufferUAV: UAV resource is not in D3D12_RESOURCE_STATE_UNORDERED_ACCESS state."); + } + + m_CommandList->SetComputeRootUnorderedAccessView(RootIndex, UAV.GetGpuVirtualAddress() + Offset); +} + +void ComputeContext::SetDescriptorTable(UINT RootIndex, D3D12_GPU_DESCRIPTOR_HANDLE FirstHandle) +{ + m_CommandList->SetComputeRootDescriptorTable(RootIndex, FirstHandle); +} + +void ComputeContext::SetDynamicDescriptor(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle) +{ + SetDynamicDescriptors(RootIndex, Offset, 1, &Handle); +} + +void ComputeContext::SetDynamicDescriptors(UINT RootIndex, UINT Offset, UINT Count, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + m_DynamicViewDescriptorHeap.SetComputeDescriptorHandles(RootIndex, Offset, Count, Handles); +} + +void ComputeContext::SetDynamicSampler(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle) +{ + SetDynamicSamplers(RootIndex, Offset, 1, &Handle); +} + +void ComputeContext::SetDynamicSamplers(UINT RootIndex, UINT Offset, UINT Count, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]) +{ + m_DynamicSamplerDescriptorHeap.SetComputeDescriptorHandles(RootIndex, Offset, Count, Handles); +} + +void ComputeContext::Dispatch(size_t GroupCountX, size_t GroupCountY, size_t GroupCountZ) +{ + FlushResourceBarriers(); + m_DynamicViewDescriptorHeap.CommitComputeRootDescriptorTables(m_CommandList); + m_DynamicSamplerDescriptorHeap.CommitComputeRootDescriptorTables(m_CommandList); + m_CommandList->Dispatch((UINT)GroupCountX, (UINT)GroupCountY, (UINT)GroupCountZ); +} + +void ComputeContext::Dispatch1D(size_t ThreadCountX, size_t GroupSizeX) +{ + Dispatch(Math::DivideByMultiple(ThreadCountX, GroupSizeX), 1, 1); +} + +void ComputeContext::Dispatch2D(size_t ThreadCountX, size_t ThreadCountY, size_t GroupSizeX, size_t GroupSizeY) +{ + Dispatch(Math::DivideByMultiple(ThreadCountX, GroupSizeX), Math::DivideByMultiple(ThreadCountY, GroupSizeY), 1); +} + +void ComputeContext::Dispatch3D(size_t ThreadCountX, size_t ThreadCountY, size_t ThreadCountZ, size_t GroupSizeX, + size_t GroupSizeY, size_t GroupSizeZ) +{ + Dispatch(Math::DivideByMultiple(ThreadCountX, GroupSizeX), Math::DivideByMultiple(ThreadCountY, GroupSizeY), + Math::DivideByMultiple(ThreadCountZ, GroupSizeZ)); +} + +void ComputeContext::DispatchIndirect(GpuBuffer &ArgumentBuffer, uint64_t ArgumentBufferOffset) +{ + ExecuteIndirect(m_DeviceInstance->GetDispatchIndirectCommandSignature(), ArgumentBuffer, ArgumentBufferOffset); +} + +void ComputeContext::ExecuteIndirect(CommandSignature &CommandSig, GpuBuffer &ArgumentBuffer, + uint64_t ArgumentStartOffset, uint32_t MaxCommands, + GpuBuffer *CommandCounterBuffer, uint64_t CounterOffset) +{ + FlushResourceBarriers(); + m_DynamicViewDescriptorHeap.CommitComputeRootDescriptorTables(m_CommandList); + m_DynamicSamplerDescriptorHeap.CommitComputeRootDescriptorTables(m_CommandList); + m_CommandList->ExecuteIndirect(CommandSig.GetSignature(), MaxCommands, ArgumentBuffer.GetResource(), + ArgumentStartOffset, + CommandCounterBuffer == nullptr ? nullptr : CommandCounterBuffer->GetResource(), + CounterOffset); +} + +SamplerDesc::SamplerDesc(D3D12DeviceInstance *DeviceInstance) : m_DeviceInstance(DeviceInstance) +{ + Sampler.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + Filter = D3D12_FILTER_ANISOTROPIC; + AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + MipLODBias = 0.0f; + MaxAnisotropy = 16; + ComparisonFunc = D3D12_COMPARISON_FUNC(0); + BorderColor[0] = 1.0f; + BorderColor[1] = 1.0f; + BorderColor[2] = 1.0f; + BorderColor[3] = 1.0f; + MinLOD = 0.0f; + MaxLOD = D3D12_FLOAT32_MAX; +} + +SamplerDesc::~SamplerDesc() +{ + if (Sampler.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + Sampler.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } +} + +void SamplerDesc::SetTextureAddressMode(D3D12_TEXTURE_ADDRESS_MODE AddressMode) +{ + AddressU = AddressMode; + AddressV = AddressMode; + AddressW = AddressMode; +} + +void SamplerDesc::SetBorderColor(Color Border) +{ + BorderColor[0] = Border.x; + BorderColor[1] = Border.y; + BorderColor[2] = Border.z; + BorderColor[3] = Border.w; +} + +void SamplerDesc::CreateDescriptor(void) +{ + Sampler = m_DeviceInstance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + m_DeviceInstance->GetDevice()->CreateSampler(this, Sampler); +} + +void SamplerDesc::CreateDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE Handle) +{ + if (Handle.ptr == 0 || Handle.ptr == -1) { + throw HRError("Invalid descriptor handle for sampler creation"); + } + m_DeviceInstance->GetDevice()->CreateSampler(this, Handle); +} + +class HagsStatus { +public: + enum DriverSupport { ALWAYS_OFF, ALWAYS_ON, EXPERIMENTAL, STABLE, UNKNOWN }; + + bool enabled; + bool enabled_by_default; + DriverSupport support; + + explicit HagsStatus(const D3DKMT_WDDM_2_7_CAPS *caps); + + void SetDriverSupport(const UINT DXGKVal); + + std::string ToString() const; + +private: + const char *DriverSupportToString() const; +}; + +HagsStatus::HagsStatus(const D3DKMT_WDDM_2_7_CAPS *caps) +{ + enabled = caps->HwSchEnabled; + enabled_by_default = caps->HwSchEnabledByDefault; + support = caps->HwSchSupported ? DriverSupport::STABLE : DriverSupport::ALWAYS_OFF; +} + +void HagsStatus::SetDriverSupport(const UINT DXGKVal) +{ + switch (DXGKVal) { + case DXGK_FEATURE_SUPPORT_ALWAYS_OFF: + support = ALWAYS_OFF; + break; + case DXGK_FEATURE_SUPPORT_ALWAYS_ON: + support = ALWAYS_ON; + break; + case DXGK_FEATURE_SUPPORT_EXPERIMENTAL: + support = EXPERIMENTAL; + break; + case DXGK_FEATURE_SUPPORT_STABLE: + support = STABLE; + break; + default: + support = UNKNOWN; + } +} + +std::string HagsStatus::ToString() const +{ + std::string status = enabled ? "Enabled" : "Disabled"; + status += " (Default: "; + status += enabled_by_default ? "Yes" : "No"; + status += ", Driver status: "; + status += DriverSupportToString(); + status += ")"; + + return status; +} + +const char *HagsStatus::DriverSupportToString() const +{ + switch (support) { + case ALWAYS_OFF: + return "Unsupported"; + case ALWAYS_ON: + return "Always On"; + case EXPERIMENTAL: + return "Experimental"; + case STABLE: + return "Supported"; + default: + return "Unknown"; + } +} + +D3D12DeviceInstance::D3D12DeviceInstance() {} + +#define DEBUG_D3D12 0 + +void D3D12DeviceInstance::Initialize(int32_t adaptorIndex) +{ + ComPtr pDevice; + uint32_t useDebugLayers = 0; + DWORD dxgiFactoryFlags = 0; +#if DEBUG_D3D12 + useDebugLayers = 1; + dxgiFactoryFlags = DXGI_CREATE_FACTORY_DEBUG; +#endif + if (useDebugLayers) { + EnableDebugLayer(); + } + + // Obtain the DXGI factory + ComPtr dxgiFactory; + HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&dxgiFactory)); + if (FAILED(hr)) { + throw HRError("Failed to create DXGI Factory", hr); + } + + m_DxgiFactory = dxgiFactory.Detach(); + CreateD3DAdapterAndDevice(adaptorIndex); + if (useDebugLayers) { + EnableDebugInofQueue(); + } + + // We like to do read-modify-write operations on UAVs during post processing. To support that, we + // need to either have the hardware do typed UAV loads of R11G11B10_FLOAT or we need to manually + // decode an R32_UINT representation of the same buffer. This code determines if we get the hardware + // load support. + + CheckFeatureSupports(); + + m_PageManager[0] = std::make_unique(this, kGpuExclusive); + m_PageManager[1] = std::make_unique(this, kCpuWritable); + + m_DispatchIndirectCommandSignature = std::make_unique(this, 1); + m_DrawIndirectCommandSignature = std::make_unique(this, 1); + + m_CommandManager = std::make_unique(); + m_ContextManager = std::make_unique(this); + + m_CommandManager->Create(this); +} + +void D3D12DeviceInstance::Uninitialize() {} + +ID3D12Device *D3D12DeviceInstance::GetDevice() +{ + return m_Device; +} + +IDXGIAdapter1 *D3D12DeviceInstance::GetAdapter() +{ + return m_Adapter; +} + +IDXGIFactory6 *D3D12DeviceInstance::GetDxgiFactory() +{ + return m_DxgiFactory; +} + +CommandListManager &D3D12DeviceInstance::GetCommandManager() +{ + return *m_CommandManager; +} + +ContextManager &D3D12DeviceInstance::GetContextManager() +{ + return *m_ContextManager; +} + +std::map> &D3D12DeviceInstance::GetRootSignatureHashMap() +{ + return m_RootSignatureHashMap; +} + +LinearAllocatorPageManager *D3D12DeviceInstance::GetPageManager(LinearAllocatorType AllocatorType) +{ + return m_PageManager[AllocatorType].get(); +} + +std::map> &D3D12DeviceInstance::GetGraphicsPSOHashMap() +{ + return m_GraphicsPSOHashMap; +} + +std::map> &D3D12DeviceInstance::GetComputePSOHashMap() +{ + return m_ComputePSOHashMap; +} + +CommandSignature &D3D12DeviceInstance::GetDispatchIndirectCommandSignature() +{ + return *m_DispatchIndirectCommandSignature; +} + +CommandSignature &D3D12DeviceInstance::GetDrawIndirectCommandSignature() +{ + return *m_DrawIndirectCommandSignature; +} + +DescriptorAllocator *D3D12DeviceInstance::GetDescriptorAllocator() +{ + return m_DescriptorAllocator; +} + +ID3D12DescriptorHeap *D3D12DeviceInstance::RequestCommonHeap(D3D12_DESCRIPTOR_HEAP_TYPE Type) +{ + D3D12_DESCRIPTOR_HEAP_DESC Desc; + Desc.Type = Type; + Desc.NumDescriptors = kMaxNumDescriptors; + Desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + Desc.NodeMask = 1; + + ComPtr pHeap; + HRESULT hr = m_Device->CreateDescriptorHeap(&Desc, IID_PPV_ARGS(&pHeap)); + if (FAILED(hr)) { + throw HRError("create common heap failed", hr); + } + + m_DescriptorHeapPool.emplace_back(pHeap); + return pHeap.Get(); +} + +ID3D12DescriptorHeap *D3D12DeviceInstance::RequestDynamicDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE HeapType) +{ + uint32_t idx = HeapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? 1 : 0; + + while (!m_DynamicRetiredDescriptorHeaps[idx].empty() && + m_CommandManager->IsFenceComplete(m_DynamicRetiredDescriptorHeaps[idx].front().first)) { + m_DynamicAvailableDescriptorHeaps[idx].push(m_DynamicRetiredDescriptorHeaps[idx].front().second); + m_DynamicRetiredDescriptorHeaps[idx].pop(); + } + + if (!m_DynamicAvailableDescriptorHeaps[idx].empty()) { + ID3D12DescriptorHeap *HeapPtr = m_DynamicAvailableDescriptorHeaps[idx].front(); + m_DynamicAvailableDescriptorHeaps[idx].pop(); + return HeapPtr; + } else { + D3D12_DESCRIPTOR_HEAP_DESC HeapDesc = {}; + HeapDesc.Type = HeapType; + HeapDesc.NumDescriptors = kMaxNumDescriptors; + HeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + HeapDesc.NodeMask = 1; + ComPtr HeapPtr; + HRESULT hr = m_Device->CreateDescriptorHeap(&HeapDesc, IID_PPV_ARGS(&HeapPtr)); + if (FAILED(hr)) { + throw HRError("create shader heap failed", hr); + } + + m_DynamicDescriptorHeapPool[idx].emplace_back(HeapPtr); + return HeapPtr.Get(); + } +} + +void D3D12DeviceInstance::DiscardDynamicDescriptorHeaps(D3D12_DESCRIPTOR_HEAP_TYPE HeapType, + uint64_t FenceValueForReset, + const std::vector &UsedHeaps) +{ + uint32_t idx = HeapType == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? 1 : 0; + for (auto iter = UsedHeaps.begin(); iter != UsedHeaps.end(); ++iter) + m_DynamicRetiredDescriptorHeaps[idx].push(std::make_pair(FenceValueForReset, *iter)); +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3D12DeviceInstance::AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE Type, UINT Count) +{ + return m_DescriptorAllocator[Type].Allocate(Count); +} + +GraphicsContext *D3D12DeviceInstance::GetNewGraphicsContext(const std::wstring &ID) +{ + CommandContext *NewContext = m_ContextManager->AllocateContext(D3D12_COMMAND_LIST_TYPE_DIRECT); + NewContext->SetID(ID); + return (GraphicsContext *)NewContext; +} + +ComputeContext *D3D12DeviceInstance::GetNewComputeContext(const std::wstring &ID, bool Async) +{ + CommandContext *NewContext = m_ContextManager->AllocateContext(Async ? D3D12_COMMAND_LIST_TYPE_COMPUTE + : D3D12_COMMAND_LIST_TYPE_DIRECT); + NewContext->SetID(ID); + return (ComputeContext *)NewContext; +} + +inline void MemcpySubresource(_In_ const D3D12_MEMCPY_DEST *pDest, _In_ const D3D12_SUBRESOURCE_DATA *pSrc, + SIZE_T RowSizeInBytes, UINT NumRows, UINT NumSlices) noexcept +{ + for (UINT z = 0; z < NumSlices; ++z) { + auto pDestSlice = static_cast(pDest->pData) + pDest->SlicePitch * z; + auto pSrcSlice = static_cast(pSrc->pData) + pSrc->SlicePitch * LONG_PTR(z); + for (UINT y = 0; y < NumRows; ++y) { + memcpy(pDestSlice + pDest->RowPitch * y, pSrcSlice + pSrc->RowPitch * LONG_PTR(y), + RowSizeInBytes); + } + } +} + +//------------------------------------------------------------------------------------------------ +// Row-by-row memcpy +inline void MemcpySubresource(_In_ const D3D12_MEMCPY_DEST *pDest, _In_ const void *pResourceData, + _In_ const D3D12_SUBRESOURCE_INFO *pSrc, SIZE_T RowSizeInBytes, UINT NumRows, + UINT NumSlices) noexcept +{ + for (UINT z = 0; z < NumSlices; ++z) { + auto pDestSlice = static_cast(pDest->pData) + pDest->SlicePitch * z; + auto pSrcSlice = + (static_cast(pResourceData) + pSrc->Offset) + pSrc->DepthPitch * ULONG_PTR(z); + for (UINT y = 0; y < NumRows; ++y) { + memcpy(pDestSlice + pDest->RowPitch * y, pSrcSlice + pSrc->RowPitch * ULONG_PTR(y), + RowSizeInBytes); + } + } +} + +inline UINT64 UpdateSubresources(_In_ ID3D12GraphicsCommandList *pCmdList, _In_ ID3D12Resource *pDestinationResource, + _In_ ID3D12Resource *pIntermediate, + _In_range_(0, D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0, D3D12_REQ_SUBRESOURCES - FirstSubresource) UINT NumSubresources, + UINT64 RequiredSize, + _In_reads_(NumSubresources) const D3D12_PLACED_SUBRESOURCE_FOOTPRINT *pLayouts, + _In_reads_(NumSubresources) const UINT *pNumRows, + _In_reads_(NumSubresources) const UINT64 *pRowSizesInBytes, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA *pSrcData) noexcept +{ + // Minor validation + auto IntermediateDesc = pIntermediate->GetDesc(); + auto DestinationDesc = pDestinationResource->GetDesc(); + if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || + IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset || RequiredSize > SIZE_T(-1) || + (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + (FirstSubresource != 0 || NumSubresources != 1))) { + return 0; + } + + BYTE *pData; + HRESULT hr = pIntermediate->Map(0, nullptr, reinterpret_cast(&pData)); + if (FAILED(hr)) { + return 0; + } + + for (UINT i = 0; i < NumSubresources; ++i) { + if (pRowSizesInBytes[i] > SIZE_T(-1)) + return 0; + D3D12_MEMCPY_DEST DestData = {pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, + SIZE_T(pLayouts[i].Footprint.RowPitch) * SIZE_T(pNumRows[i])}; + MemcpySubresource(&DestData, &pSrcData[i], static_cast(pRowSizesInBytes[i]), pNumRows[i], + pLayouts[i].Footprint.Depth); + } + pIntermediate->Unmap(0, nullptr); + + if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + pCmdList->CopyBufferRegion(pDestinationResource, 0, pIntermediate, pLayouts[0].Offset, + pLayouts[0].Footprint.Width); + } else { + for (UINT i = 0; i < NumSubresources; ++i) { + CD3DX12_TEXTURE_COPY_LOCATION Dst(pDestinationResource, i + FirstSubresource); + CD3DX12_TEXTURE_COPY_LOCATION Src(pIntermediate, pLayouts[i]); + pCmdList->CopyTextureRegion(&Dst, 0, 0, 0, &Src, nullptr); + } + } + return RequiredSize; +} + +inline UINT64 UpdateSubresources(_In_ ID3D12GraphicsCommandList *pCmdList, _In_ ID3D12Resource *pDestinationResource, + _In_ ID3D12Resource *pIntermediate, UINT64 IntermediateOffset, + _In_range_(0, D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0, D3D12_REQ_SUBRESOURCES - FirstSubresource) UINT NumSubresources, + _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA *pSrcData) noexcept +{ + UINT64 RequiredSize = 0; + auto MemToAlloc = + static_cast(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)) * + NumSubresources; + if (MemToAlloc > SIZE_MAX) { + return 0; + } + void *pMem = HeapAlloc(GetProcessHeap(), 0, static_cast(MemToAlloc)); + if (pMem == nullptr) { + return 0; + } + auto pLayouts = static_cast(pMem); + auto pRowSizesInBytes = reinterpret_cast(pLayouts + NumSubresources); + auto pNumRows = reinterpret_cast(pRowSizesInBytes + NumSubresources); + + auto Desc = pDestinationResource->GetDesc(); + ID3D12Device *pDevice = nullptr; + pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, pLayouts, pNumRows, + pRowSizesInBytes, &RequiredSize); + pDevice->Release(); + + UINT64 Result = UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, + NumSubresources, RequiredSize, pLayouts, pNumRows, pRowSizesInBytes, + pSrcData); + HeapFree(GetProcessHeap(), 0, pMem); + return Result; +} + +void D3D12DeviceInstance::InitializeTexture(GpuResource &Dest, UINT NumSubresources, D3D12_SUBRESOURCE_DATA SubData[]) +{ + UINT64 uploadBufferSize = GetRequiredIntermediateSize(Dest.GetResource(), 0, NumSubresources); + + CommandContext &InitContext = *GetNewGraphicsContext(); + + // copy data to the intermediate upload heap and then schedule a copy from the upload heap to the default texture + DynAlloc mem = InitContext.ReserveUploadMemory(uploadBufferSize); + UpdateSubresources(InitContext.m_CommandList, Dest.GetResource(), mem.Buffer.GetResource(), 0, 0, + NumSubresources, SubData); + InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_GENERIC_READ); + + // Execute the command list and wait for it to finish so we can release the upload buffer + InitContext.Finish(true); +} + +void D3D12DeviceInstance::InitializeBuffer(GpuBuffer &Dest, const void *BufferData, size_t NumBytes, size_t DestOffset) +{ + CommandContext &InitContext = *GetNewGraphicsContext(); + + DynAlloc mem = InitContext.ReserveUploadMemory(NumBytes); + SIMDMemCopy(mem.DataPtr, BufferData, Math::DivideByMultiple(NumBytes, 16)); + + // copy data to the intermediate upload heap and then schedule a copy from the upload heap to the default texture + InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST, true); + InitContext.m_CommandList->CopyBufferRegion(Dest.GetResource(), DestOffset, mem.Buffer.GetResource(), 0, + NumBytes); + InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_GENERIC_READ, true); + + // Execute the command list and wait for it to finish so we can release the upload buffer + InitContext.Finish(true); +} + +void D3D12DeviceInstance::InitializeBuffer(GpuBuffer &Dest, const UploadBuffer &Src, size_t SrcOffset, size_t NumBytes, + size_t DestOffset) +{ + CommandContext &InitContext = *GetNewGraphicsContext(); + + size_t MaxBytes = std::min(Dest.GetBufferSize() - DestOffset, Src.GetBufferSize() - SrcOffset); + NumBytes = std::min(MaxBytes, NumBytes); + + // copy data to the intermediate upload heap and then schedule a copy from the upload heap to the default texture + InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST, true); + InitContext.m_CommandList->CopyBufferRegion(Dest.GetResource(), DestOffset, (ID3D12Resource *)Src.GetResource(), + SrcOffset, NumBytes); + InitContext.TransitionResource(Dest, D3D12_RESOURCE_STATE_GENERIC_READ, true); + + // Execute the command list and wait for it to finish so we can release the upload buffer + InitContext.Finish(true); +} + +void D3D12DeviceInstance::InitializeTextureArraySlice(GpuResource &Dest, UINT SliceIndex, GpuResource &Src) +{ + CommandContext &Context = *GetNewGraphicsContext(); + + Context.TransitionResource(Dest, D3D12_RESOURCE_STATE_COPY_DEST); + Context.FlushResourceBarriers(); + + const D3D12_RESOURCE_DESC &DestDesc = Dest.GetResource()->GetDesc(); + const D3D12_RESOURCE_DESC &SrcDesc = Src.GetResource()->GetDesc(); + + if (!(SliceIndex < DestDesc.DepthOrArraySize && SrcDesc.DepthOrArraySize == 1 && + DestDesc.Width == SrcDesc.Width && DestDesc.Height == SrcDesc.Height && + DestDesc.MipLevels <= SrcDesc.MipLevels)) { + throw HRError("InitializeTextureArraySlice: incompatible source and destination textures"); + } + + UINT SubResourceIndex = SliceIndex * DestDesc.MipLevels; + + for (UINT i = 0; i < DestDesc.MipLevels; ++i) { + D3D12_TEXTURE_COPY_LOCATION destCopyLocation = { + Dest.GetResource(), D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, SubResourceIndex + i}; + + D3D12_TEXTURE_COPY_LOCATION srcCopyLocation = {Src.GetResource(), + D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, i}; + + Context.m_CommandList->CopyTextureRegion(&destCopyLocation, 0, 0, 0, &srcCopyLocation, nullptr); + } + + Context.TransitionResource(Dest, D3D12_RESOURCE_STATE_GENERIC_READ); + Context.Finish(true); +} + +bool D3D12DeviceInstance::IsNV12TextureSupported() const +{ + return m_NV12Supported; +} + +bool D3D12DeviceInstance::IsP010TextureSupported() const +{ + return m_P010Supported; +} + +bool D3D12DeviceInstance::FastClearSupported() const +{ + return m_FastClearSupported; +} + +void D3D12DeviceInstance::EnableDebugLayer() +{ + ComPtr debugInterface; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugInterface)))) { + debugInterface->EnableDebugLayer(); + + uint32_t useGPUBasedValidation = 1; + if (useGPUBasedValidation) { + ComPtr debugInterface1; + if (SUCCEEDED((debugInterface->QueryInterface(IID_PPV_ARGS(&debugInterface1))))) { + debugInterface1->SetEnableGPUBasedValidation(true); + } + } + } else { + } + + ComPtr dxgiInfoQueue; + if (SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&dxgiInfoQueue)))) { + + dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, true); + dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, true); + + DXGI_INFO_QUEUE_MESSAGE_ID hide[] = { + 80 /* IDXGISwapChain::GetContainingOutput: The swapchain's adapter does not control the output on which the swapchain's window resides. */ + , + }; + DXGI_INFO_QUEUE_FILTER filter = {}; + filter.DenyList.NumIDs = _countof(hide); + filter.DenyList.pIDList = hide; + dxgiInfoQueue->AddStorageFilterEntries(DXGI_DEBUG_DXGI, &filter); + } +} + +void D3D12DeviceInstance::EnableDebugInofQueue() +{ + ID3D12InfoQueue *pInfoQueue = nullptr; + if (SUCCEEDED(m_Device->QueryInterface(IID_PPV_ARGS(&pInfoQueue)))) { + // Suppress whole categories of messages + //D3D12_MESSAGE_CATEGORY Categories[] = {}; + + // Suppress messages based on their severity level + D3D12_MESSAGE_SEVERITY Severities[] = {D3D12_MESSAGE_SEVERITY_INFO}; + + // Suppress individual messages by their ID + D3D12_MESSAGE_ID DenyIds[] = { + // This occurs when there are uninitialized descriptors in a descriptor table, even when a + // shader does not access the missing descriptors. I find this is common when switching + // shader permutations and not wanting to change much code to reorder resources. + D3D12_MESSAGE_ID_INVALID_DESCRIPTOR_HANDLE, + + // Triggered when a shader does not export all color components of a render target, such as + // when only writing RGB to an R10G10B10A2 buffer, ignoring alpha. + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_PS_OUTPUT_RT_OUTPUT_MISMATCH, + + // This occurs when a descriptor table is unbound even when a shader does not access the missing + // descriptors. This is common with a root signature shared between disparate shaders that + // don't all need the same types of resources. + D3D12_MESSAGE_ID_COMMAND_LIST_DESCRIPTOR_TABLE_NOT_SET, + + // RESOURCE_BARRIER_DUPLICATE_SUBRESOURCE_TRANSITIONS + D3D12_MESSAGE_ID_RESOURCE_BARRIER_DUPLICATE_SUBRESOURCE_TRANSITIONS, + + // Suppress errors from calling ResolveQueryData with timestamps that weren't requested on a given frame. + D3D12_MESSAGE_ID_RESOLVE_QUERY_INVALID_QUERY_STATE, + + // Ignoring InitialState D3D12_RESOURCE_STATE_COPY_DEST. Buffers are effectively created in state D3D12_RESOURCE_STATE_COMMON. + D3D12_MESSAGE_ID_CREATERESOURCE_STATE_IGNORED, + }; + + D3D12_INFO_QUEUE_FILTER NewFilter = {}; + //NewFilter.DenyList.NumCategories = _countof(Categories); + //NewFilter.DenyList.pCategoryList = Categories; + NewFilter.DenyList.NumSeverities = _countof(Severities); + NewFilter.DenyList.pSeverityList = Severities; + NewFilter.DenyList.NumIDs = _countof(DenyIds); + NewFilter.DenyList.pIDList = DenyIds; + + pInfoQueue->PushStorageFilter(&NewFilter); + pInfoQueue->Release(); + } +} + +void D3D12DeviceInstance::CheckFeatureSupports() +{ + D3D12_FEATURE_DATA_D3D12_OPTIONS FeatureData = {}; + if (SUCCEEDED(m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &FeatureData, sizeof(FeatureData)))) { + if (FeatureData.TypedUAVLoadAdditionalFormats) { + // HDR + D3D12_FEATURE_DATA_FORMAT_SUPPORT Support = { + DXGI_FORMAT_R11G11B10_FLOAT, D3D12_FORMAT_SUPPORT1_NONE, D3D12_FORMAT_SUPPORT2_NONE}; + + if (SUCCEEDED(m_Device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &Support, + sizeof(Support))) && + (Support.Support2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD) != 0) { + m_TypedUAVLoadSupport_R11G11B10_FLOAT = true; + } + + // SRGB_LINEAR + Support.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + + if (SUCCEEDED(m_Device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &Support, + sizeof(Support))) && + (Support.Support2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD) != 0) { + m_TypedUAVLoadSupport_R16G16B16A16_FLOAT = true; + } + + // DXGI_FORMAT_NV12 + Support.Format = DXGI_FORMAT_NV12; + if (SUCCEEDED(m_Device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &Support, + sizeof(Support))) && + (Support.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D) != 0) { + m_NV12Supported = true; + } + + // DXGI_FORMAT_P010 + Support.Format = DXGI_FORMAT_P010; + if (SUCCEEDED(m_Device->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &Support, + sizeof(Support))) && + (Support.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D) != 0) { + m_P010Supported = true; + } + } + } +} + +std::optional D3D12DeviceInstance::GetAdapterHagsStatus(const DXGI_ADAPTER_DESC *desc) +{ + std::optional ret; + D3DKMT_OPENADAPTERFROMLUID d3dkmt_openluid{}; + d3dkmt_openluid.AdapterLuid = desc->AdapterLuid; + + NTSTATUS res = D3DKMTOpenAdapterFromLuid(&d3dkmt_openluid); + if (FAILED(res)) { + blog(LOG_DEBUG, "Failed opening D3DKMT adapter: %x", res); + return ret; + } + + D3DKMT_WDDM_2_7_CAPS caps = {}; + D3DKMT_QUERYADAPTERINFO args = {}; + args.hAdapter = d3dkmt_openluid.hAdapter; + args.Type = KMTQAITYPE_WDDM_2_7_CAPS; + args.pPrivateDriverData = ∩︀ + args.PrivateDriverDataSize = sizeof(caps); + res = D3DKMTQueryAdapterInfo(&args); + + /* If this still fails we're likely on Windows 10 pre-2004 + * where HAGS is not supported anyway. */ + if (SUCCEEDED(res)) { + HagsStatus status(&caps); + + /* Starting with Windows 10 21H2 we can query more detailed + * support information (e.g. experimental status). + * This Is optional and failure doesn't matter. */ + D3DKMT_WDDM_2_9_CAPS ext_caps = {}; + args.hAdapter = d3dkmt_openluid.hAdapter; + args.Type = KMTQAITYPE_WDDM_2_9_CAPS; + args.pPrivateDriverData = &ext_caps; + args.PrivateDriverDataSize = sizeof(ext_caps); + res = D3DKMTQueryAdapterInfo(&args); + + if (SUCCEEDED(res)) + status.SetDriverSupport(ext_caps.HwSchSupportState); + + ret = status; + } else { + blog(LOG_WARNING, "Failed querying WDDM 2.7 caps: %x", res); + } + + D3DKMT_CLOSEADAPTER d3dkmt_close = {d3dkmt_openluid.hAdapter}; + res = D3DKMTCloseAdapter(&d3dkmt_close); + if (FAILED(res)) { + blog(LOG_DEBUG, "Failed closing D3DKMT adapter %x: %x", d3dkmt_openluid.hAdapter, res); + } + + return ret; +} + +void D3D12DeviceInstance::EnumD3DAdapters(bool (*callback)(void *, const char *, uint32_t), void *param) +{ + ComPtr factory; + ComPtr adapter; + HRESULT hr; + UINT i; + + hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); + if (FAILED(hr)) + throw HRError("Failed to create DXGIFactory", hr); + + for (i = 0; factory->EnumAdapters1(i, adapter.Assign()) == S_OK; ++i) { + DXGI_ADAPTER_DESC desc; + char name[512] = ""; + + hr = adapter->GetDesc(&desc); + if (FAILED(hr)) + continue; + + /* ignore Microsoft's 'basic' renderer' */ + if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c) + continue; + + os_wcs_to_utf8(desc.Description, 0, name, sizeof(name)); + + if (!callback(param, name, i)) + break; + } +} + +bool D3D12DeviceInstance::GetMonitorTarget(const MONITORINFOEX &info, DISPLAYCONFIG_TARGET_DEVICE_NAME &target) +{ + bool found = false; + + UINT32 numPath, numMode; + if (GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, &numPath, &numMode) == ERROR_SUCCESS) { + std::vector paths(numPath); + std::vector modes(numMode); + if (QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS, &numPath, paths.data(), &numMode, modes.data(), + nullptr) == ERROR_SUCCESS) { + paths.resize(numPath); + for (size_t i = 0; i < numPath; ++i) { + const DISPLAYCONFIG_PATH_INFO &path = paths[i]; + + DISPLAYCONFIG_SOURCE_DEVICE_NAME + source; + source.header.type = DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME; + source.header.size = sizeof(source); + source.header.adapterId = path.sourceInfo.adapterId; + source.header.id = path.sourceInfo.id; + if (DisplayConfigGetDeviceInfo(&source.header) == ERROR_SUCCESS && + wcscmp(info.szDevice, source.viewGdiDeviceName) == 0) { + target.header.type = DISPLAYCONFIG_DEVICE_INFO_GET_TARGET_NAME; + target.header.size = sizeof(target); + target.header.adapterId = path.sourceInfo.adapterId; + target.header.id = path.targetInfo.id; + found = DisplayConfigGetDeviceInfo(&target.header) == ERROR_SUCCESS; + break; + } + } + } + } + + return found; +} + +bool D3D12DeviceInstance::GetOutputDesc1(IDXGIOutput *const output, DXGI_OUTPUT_DESC1 *desc1) +{ + ComPtr output6; + HRESULT hr = output->QueryInterface(IID_PPV_ARGS(output6.Assign())); + bool success = SUCCEEDED(hr); + if (success) { + hr = output6->GetDesc1(desc1); + success = SUCCEEDED(hr); + if (!success) { + blog(LOG_WARNING, "IDXGIOutput6::GetDesc1 failed: 0x%08lX", hr); + } + } + + return success; +} + +// Note: Since an hmon can represent multiple monitors while in clone, this function as written will return +// the value for the internal monitor if one exists, and otherwise the highest clone-path priority. +HRESULT D3D12DeviceInstance::GetPathInfo(_In_ PCWSTR pszDeviceName, _Out_ DISPLAYCONFIG_PATH_INFO *pPathInfo) +{ + HRESULT hr = S_OK; + UINT32 NumPathArrayElements = 0; + UINT32 NumModeInfoArrayElements = 0; + DISPLAYCONFIG_PATH_INFO *PathInfoArray = nullptr; + DISPLAYCONFIG_MODE_INFO *ModeInfoArray = nullptr; + + do { + // In case this isn't the first time through the loop, delete the buffers allocated + delete[] PathInfoArray; + PathInfoArray = nullptr; + + delete[] ModeInfoArray; + ModeInfoArray = nullptr; + + hr = HRESULT_FROM_WIN32(GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, &NumPathArrayElements, + &NumModeInfoArrayElements)); + if (FAILED(hr)) { + break; + } + + PathInfoArray = new (std::nothrow) DISPLAYCONFIG_PATH_INFO[NumPathArrayElements]; + if (PathInfoArray == nullptr) { + hr = E_OUTOFMEMORY; + break; + } + + ModeInfoArray = new (std::nothrow) DISPLAYCONFIG_MODE_INFO[NumModeInfoArrayElements]; + if (ModeInfoArray == nullptr) { + hr = E_OUTOFMEMORY; + break; + } + + hr = HRESULT_FROM_WIN32(QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS, &NumPathArrayElements, PathInfoArray, + &NumModeInfoArrayElements, ModeInfoArray, nullptr)); + } while (hr == HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER)); + + INT DesiredPathIdx = -1; + + if (SUCCEEDED(hr)) { + // Loop through all sources until the one which matches the 'monitor' is found. + for (UINT PathIdx = 0; PathIdx < NumPathArrayElements; ++PathIdx) { + DISPLAYCONFIG_SOURCE_DEVICE_NAME SourceName = {}; + SourceName.header.type = DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME; + SourceName.header.size = sizeof(SourceName); + SourceName.header.adapterId = PathInfoArray[PathIdx].sourceInfo.adapterId; + SourceName.header.id = PathInfoArray[PathIdx].sourceInfo.id; + + hr = HRESULT_FROM_WIN32(DisplayConfigGetDeviceInfo(&SourceName.header)); + if (SUCCEEDED(hr)) { + if (wcscmp(pszDeviceName, SourceName.viewGdiDeviceName) == 0) { + // Found the source which matches this hmonitor. The paths are given in path-priority order + // so the first found is the most desired, unless we later find an internal. + if (DesiredPathIdx == -1 || + IsInternalVideoOutput(PathInfoArray[PathIdx].targetInfo.outputTechnology)) { + DesiredPathIdx = PathIdx; + } + } + } + } + } + + if (DesiredPathIdx != -1) { + *pPathInfo = PathInfoArray[DesiredPathIdx]; + } else { + hr = E_INVALIDARG; + } + + delete[] PathInfoArray; + PathInfoArray = nullptr; + + delete[] ModeInfoArray; + ModeInfoArray = nullptr; + + return hr; +} + +// Overloaded function accepts an HMONITOR and converts to DeviceName +HRESULT D3D12DeviceInstance::GetPathInfo(HMONITOR hMonitor, _Out_ DISPLAYCONFIG_PATH_INFO *pPathInfo) +{ + HRESULT hr = S_OK; + + // Get the name of the 'monitor' being requested + MONITORINFOEXW ViewInfo; + RtlZeroMemory(&ViewInfo, sizeof(ViewInfo)); + ViewInfo.cbSize = sizeof(ViewInfo); + if (!GetMonitorInfoW(hMonitor, &ViewInfo)) { + // Error condition, likely invalid monitor handle, could log error + hr = HRESULT_FROM_WIN32(GetLastError()); + } + + if (SUCCEEDED(hr)) { + hr = GetPathInfo(ViewInfo.szDevice, pPathInfo); + } + + return hr; +} + +ULONG D3D12DeviceInstance::GetSdrMaxNits(HMONITOR monitor) +{ + ULONG nits = 80; + + DISPLAYCONFIG_PATH_INFO info; + if (SUCCEEDED(GetPathInfo(monitor, &info))) { + const DISPLAYCONFIG_PATH_TARGET_INFO &targetInfo = info.targetInfo; + + DISPLAYCONFIG_SDR_WHITE_LEVEL level; + DISPLAYCONFIG_DEVICE_INFO_HEADER &header = level.header; + header.type = DISPLAYCONFIG_DEVICE_INFO_GET_SDR_WHITE_LEVEL; + header.size = sizeof(level); + header.adapterId = targetInfo.adapterId; + header.id = targetInfo.id; + if (DisplayConfigGetDeviceInfo(&header) == ERROR_SUCCESS) + nits = (level.SDRWhiteLevel * 80) / 1000; + } + + return nits; +} + +MonitorColorInfo D3D12DeviceInstance::GetMonitorColorInfo(HMONITOR hMonitor) +{ + std::vector> monitor_to_hdr; + ComPtr factory1; + CreateDXGIFactory2(0, IID_PPV_ARGS(&factory1)); + + ComPtr adapter; + ComPtr output; + ComPtr output6; + for (UINT adapterIndex = 0; SUCCEEDED(factory1->EnumAdapters(adapterIndex, &adapter)); ++adapterIndex) { + for (UINT outputIndex = 0; SUCCEEDED(adapter->EnumOutputs(outputIndex, &output)); ++outputIndex) { + DXGI_OUTPUT_DESC1 desc1; + if (SUCCEEDED(output->QueryInterface(&output6)) && SUCCEEDED(output6->GetDesc1(&desc1)) && + (desc1.Monitor == hMonitor)) { + const bool hdr = desc1.ColorSpace == DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020; + const UINT bits = desc1.BitsPerColor; + const ULONG nits = GetSdrMaxNits(desc1.Monitor); + return monitor_to_hdr.emplace_back(hMonitor, MonitorColorInfo(hdr, bits, nits)).second; + } + } + } + + return MonitorColorInfo(false, 8, 80); +} + +void D3D12DeviceInstance::PopulateMonitorIds(HMONITOR handle, char *id, char *alt_id, size_t capacity) +{ + MONITORINFOEXA mi; + mi.cbSize = sizeof(mi); + if (GetMonitorInfoA(handle, (LPMONITORINFO)&mi)) { + strcpy_s(alt_id, capacity, mi.szDevice); + DISPLAY_DEVICEA device; + device.cb = sizeof(device); + if (EnumDisplayDevicesA(mi.szDevice, 0, &device, EDD_GET_DEVICE_INTERFACE_NAME)) { + strcpy_s(id, capacity, device.DeviceID); + } + } +} + +void D3D12DeviceInstance::LogAdapterMonitors(IDXGIAdapter1 *adapter) +{ + UINT i; + ComPtr output; + + for (i = 0; adapter->EnumOutputs(i, &output) == S_OK; ++i) { + DXGI_OUTPUT_DESC desc; + if (FAILED(output->GetDesc(&desc))) + continue; + + unsigned refresh = 0; + + bool target_found = false; + DISPLAYCONFIG_TARGET_DEVICE_NAME target; + + constexpr size_t id_capacity = 128; + char id[id_capacity]{}; + char alt_id[id_capacity]{}; + PopulateMonitorIds(desc.Monitor, id, alt_id, id_capacity); + + MONITORINFOEX info; + info.cbSize = sizeof(info); + if (GetMonitorInfo(desc.Monitor, &info)) { + target_found = GetMonitorTarget(info, target); + + DEVMODE mode; + mode.dmSize = sizeof(mode); + mode.dmDriverExtra = 0; + if (EnumDisplaySettings(info.szDevice, ENUM_CURRENT_SETTINGS, &mode)) { + refresh = mode.dmDisplayFrequency; + } + } + + if (!target_found) { + target.monitorFriendlyDeviceName[0] = 0; + } + + UINT bits_per_color = 8; + DXGI_COLOR_SPACE_TYPE type = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + FLOAT primaries[4][2]{}; + double gamut_size = 0.; + FLOAT min_luminance = 0.f; + FLOAT max_luminance = 0.f; + FLOAT max_full_frame_luminance = 0.f; + DXGI_OUTPUT_DESC1 desc1; + if (GetOutputDesc1(output, &desc1)) { + bits_per_color = desc1.BitsPerColor; + type = desc1.ColorSpace; + primaries[0][0] = desc1.RedPrimary[0]; + primaries[0][1] = desc1.RedPrimary[1]; + primaries[1][0] = desc1.GreenPrimary[0]; + primaries[1][1] = desc1.GreenPrimary[1]; + primaries[2][0] = desc1.BluePrimary[0]; + primaries[2][1] = desc1.BluePrimary[1]; + primaries[3][0] = desc1.WhitePoint[0]; + primaries[3][1] = desc1.WhitePoint[1]; + gamut_size = DoubleTriangleArea(desc1.RedPrimary[0], desc1.RedPrimary[1], desc1.GreenPrimary[0], + desc1.GreenPrimary[1], desc1.BluePrimary[0], + desc1.BluePrimary[1]); + min_luminance = desc1.MinLuminance; + max_luminance = desc1.MaxLuminance; + max_full_frame_luminance = desc1.MaxFullFrameLuminance; + } + + const char *space = "Unknown"; + switch (type) { + case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709: + space = "RGB_FULL_G22_NONE_P709"; + break; + case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: + space = "RGB_FULL_G2084_NONE_P2020"; + break; + default: + blog(LOG_WARNING, "Unexpected DXGI_COLOR_SPACE_TYPE: %u", (unsigned)type); + } + + // These are always identical, but you still have to supply both, thanks Microsoft! + UINT dpiX, dpiY; + unsigned scaling = 100; + if (GetDpiForMonitor(desc.Monitor, MDT_EFFECTIVE_DPI, &dpiX, &dpiY) == S_OK) { + scaling = (unsigned)(dpiX * 100.0f / 96.0f); + } else { + dpiX = 0; + } + + const RECT &rect = desc.DesktopCoordinates; + const ULONG sdr_white_nits = GetSdrMaxNits(desc.Monitor); + + char *friendly_name; + os_wcs_to_utf8_ptr(target.monitorFriendlyDeviceName, 0, &friendly_name); + + blog(LOG_INFO, + "\t output %u:\n" + "\t name=%s\n" + "\t pos={%d, %d}\n" + "\t size={%d, %d}\n" + "\t attached=%s\n" + "\t refresh=%u\n" + "\t bits_per_color=%u\n" + "\t space=%s\n" + "\t primaries=[r=(%f, %f), g=(%f, %f), b=(%f, %f), wp=(%f, %f)]\n" + "\t relative_gamut_area=[709=%f, P3=%f, 2020=%f]\n" + "\t sdr_white_nits=%lu\n" + "\t nit_range=[min=%f, max=%f, max_full_frame=%f]\n" + "\t dpi=%u (%u%%)\n" + "\t id=%s\n" + "\t alt_id=%s", + i, friendly_name, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, + desc.AttachedToDesktop ? "true" : "false", refresh, bits_per_color, space, primaries[0][0], + primaries[0][1], primaries[1][0], primaries[1][1], primaries[2][0], primaries[2][1], + primaries[3][0], primaries[3][1], gamut_size / DoubleTriangleArea(.64, .33, .3, .6, .15, .06), + gamut_size / DoubleTriangleArea(.68, .32, .265, .69, .15, .060), + gamut_size / DoubleTriangleArea(.708, .292, .17, .797, .131, .046), sdr_white_nits, min_luminance, + max_luminance, max_full_frame_luminance, dpiX, scaling, id, alt_id); + bfree(friendly_name); + } +} + +void D3D12DeviceInstance::LogD3DAdapters() +{ + ComPtr factory; + ComPtr adapter; + HRESULT hr; + UINT i; + + blog(LOG_INFO, "Available Video Adapters: "); + + hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); + if (FAILED(hr)) + throw HRError("Failed to create DXGIFactory", hr); + + for (i = 0; factory->EnumAdapters1(i, adapter.Assign()) == S_OK; ++i) { + DXGI_ADAPTER_DESC desc; + char name[512] = ""; + + hr = adapter->GetDesc(&desc); + if (FAILED(hr)) + continue; + + /* ignore Microsoft's 'basic' renderer' */ + if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c) + continue; + + os_wcs_to_utf8(desc.Description, 0, name, sizeof(name)); + blog(LOG_INFO, "\tAdapter %u: %s", i, name); + blog(LOG_INFO, "\t Dedicated VRAM: %" PRIu64 " (%.01f GiB)", desc.DedicatedVideoMemory, + to_GiB(desc.DedicatedVideoMemory)); + blog(LOG_INFO, "\t Shared VRAM: %" PRIu64 " (%.01f GiB)", desc.SharedSystemMemory, + to_GiB(desc.SharedSystemMemory)); + blog(LOG_INFO, "\t PCI ID: %x:%.4x", desc.VendorId, desc.DeviceId); + + if (auto hags_support = GetAdapterHagsStatus(&desc)) { + blog(LOG_INFO, "\t HAGS Status: %s", hags_support->ToString().c_str()); + } else { + blog(LOG_WARNING, "\t HAGS Status: Unknown"); + } + + /* driver version */ + LARGE_INTEGER umd; + hr = adapter->CheckInterfaceSupport(__uuidof(IDXGIDevice), &umd); + if (SUCCEEDED(hr)) { + const uint64_t version = umd.QuadPart; + const uint16_t aa = (version >> 48) & 0xffff; + const uint16_t bb = (version >> 32) & 0xffff; + const uint16_t ccccc = (version >> 16) & 0xffff; + const uint16_t ddddd = version & 0xffff; + blog(LOG_INFO, "\t Driver Version: %" PRIu16 ".%" PRIu16 ".%" PRIu16 ".%" PRIu16, aa, bb, + ccccc, ddddd); + } else { + blog(LOG_INFO, "\t Driver Version: Unknown (0x%X)", (unsigned)hr); + } + + LogAdapterMonitors(adapter); + } +} + +// Returns true if this is an integrated display panel e.g. the screen attached to tablets or laptops. +bool D3D12DeviceInstance::IsInternalVideoOutput(const DISPLAYCONFIG_VIDEO_OUTPUT_TECHNOLOGY VideoOutputTechnologyType) +{ + switch (VideoOutputTechnologyType) { + case DISPLAYCONFIG_OUTPUT_TECHNOLOGY_INTERNAL: + case DISPLAYCONFIG_OUTPUT_TECHNOLOGY_DISPLAYPORT_EMBEDDED: + case DISPLAYCONFIG_OUTPUT_TECHNOLOGY_UDI_EMBEDDED: + return TRUE; + + default: + return FALSE; + } +} + +void D3D12DeviceInstance::CreateD3DAdapterAndDevice(uint32_t index) +{ + SIZE_T MaxSize = 0; + std::vector> adapters; + ComPtr pAdapter; + ComPtr pDevice; + for (uint32_t Idx = 0; + DXGI_ERROR_NOT_FOUND != + m_DxgiFactory->EnumAdapterByGpuPreference(Idx, DXGI_GPU_PREFERENCE::DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + IID_PPV_ARGS(&pAdapter)); + ++Idx) { + DXGI_ADAPTER_DESC1 desc; + HRESULT hr = pAdapter->GetDesc1(&desc); + if (FAILED(hr)) + continue; + // Is a software adapter? + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + continue; + + /* ignore Microsoft's 'basic' renderer' */ + if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c) + continue; + + // Can create a D3D12 device? + if (FAILED(D3D12CreateDevice(pAdapter.Get(), D3D_FEATURE_LEVEL_12_0, IID_PPV_ARGS(&pDevice)))) + continue; + + // Does support DXR if required? + /* if (RequireDXRSupport && !IsDirectXRaytracingSupported(pDevice.Get())) + continue;*/ + + // By default, search for the adapter with the most memory because that's usually the dGPU. + if (desc.DedicatedVideoMemory < MaxSize) + continue; + + MaxSize = desc.DedicatedVideoMemory; + + if (m_Device != nullptr) + m_Device->Release(); + + adapters.push_back(pAdapter); + } + + if (adapters.size() == 0) { + throw HRError("No compatible D3D12 adapters found"); + } + + if (index >= adapters.size()) { + throw HRError("Requested adapter index out of range"); + } + + m_Adapter = adapters[index]; + if (FAILED(D3D12CreateDevice(pAdapter.Get(), D3D_FEATURE_LEVEL_12_0, IID_PPV_ARGS(&m_Device)))) { + throw HRError("D3D12CreateDevice failed"); + } + + std::wstring adapterName; + DXGI_ADAPTER_DESC desc; + adapterName = (m_Adapter->GetDesc(&desc) == S_OK) ? desc.Description : L""; + + BPtr adapterNameUTF8; + os_wcs_to_utf8_ptr(adapterName.c_str(), 0, &adapterNameUTF8); + blog(LOG_INFO, "Loading up D3D12 on adapter %s (%" PRIu32 ")", adapterNameUTF8.Get(), index); + + LARGE_INTEGER umd; + uint64_t driverVersion = 0; + HRESULT hr = m_Adapter->CheckInterfaceSupport(__uuidof(IDXGIDevice), &umd); + if (SUCCEEDED(hr)) + driverVersion = umd.QuadPart; + + /* Always true for non-NVIDIA GPUs */ + if (desc.VendorId != 0x10de) + m_FastClearSupported = true; + else { + const uint16_t aa = (driverVersion >> 48) & 0xffff; + const uint16_t bb = (driverVersion >> 32) & 0xffff; + const uint16_t ccccc = (driverVersion >> 16) & 0xffff; + const uint16_t ddddd = driverVersion & 0xffff; + + /* Check for NVIDIA driver version >= 31.0.15.2737 */ + m_FastClearSupported = aa >= 31 && bb >= 0 && ccccc >= 15 && ddddd >= 2737; + } +} + +const wchar_t *D3D12DeviceInstance::GPUVendorToString(uint32_t vendorID) +{ + switch (vendorID) { + case vendorID_Nvidia: + return L"Nvidia"; + case vendorID_AMD: + return L"AMD"; + case vendorID_Intel: + return L"Intel"; + default: + return L"Unknown"; + break; + } +} + +uint32_t D3D12DeviceInstance::GetVendorIdFromDevice(ID3D12Device *pDevice) +{ + LUID luid = pDevice->GetAdapterLuid(); + + ComPtr dxgiFactory; + HRESULT hr = CreateDXGIFactory2(0, IID_PPV_ARGS(&dxgiFactory)); + if (FAILED(hr)) { + throw HRError("CreateDXGIFactory2 failed", hr); + } + + ComPtr pAdapter; + if (SUCCEEDED(dxgiFactory->EnumAdapterByLuid(luid, IID_PPV_ARGS(&pAdapter)))) { + DXGI_ADAPTER_DESC1 desc; + if (SUCCEEDED(pAdapter->GetDesc1(&desc))) { + return desc.VendorId; + } + } + + return 0; +} + +bool D3D12DeviceInstance::IsDeviceNvidia(ID3D12Device *pDevice) +{ + return GetVendorIdFromDevice(pDevice) == vendorID_Nvidia; +} + +bool D3D12DeviceInstance::IsDeviceAMD(ID3D12Device *pDevice) +{ + return GetVendorIdFromDevice(pDevice) == vendorID_AMD; +} + +bool D3D12DeviceInstance::IsDeviceIntel(ID3D12Device *pDevice) +{ + return GetVendorIdFromDevice(pDevice) == vendorID_Intel; +} + +// Check adapter support for DirectX Raytracing. +bool D3D12DeviceInstance::IsDirectXRaytracingSupported(ID3D12Device *testDevice) +{ + D3D12_FEATURE_DATA_D3D12_OPTIONS5 featureSupport = {}; + + if (FAILED(testDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &featureSupport, + sizeof(featureSupport)))) + return false; + + return featureSupport.RaytracingTier != D3D12_RAYTRACING_TIER_NOT_SUPPORTED; +} + +} // namespace D3D12Graphics diff --git a/libobs-d3d12/d3d12-command-context.hpp b/libobs-d3d12/d3d12-command-context.hpp new file mode 100644 index 00000000000000..839abc074a0bfd --- /dev/null +++ b/libobs-d3d12/d3d12-command-context.hpp @@ -0,0 +1,1548 @@ +#pragma once + +// Code from the DirectX-Graphics-Samples and DirectXTK12 repositories +// See: https://github.com/microsoft/DirectX-Graphics-Samples.git +// See: https://github.com/microsoft/DirectXTK12.git + +// This is a standalone D3D12 public module; do not include libobs-related header files. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#define PERF_GRAPH_ERROR uint32_t(0xFFFFFFFF) +#define D3D12_GPU_VIRTUAL_ADDRESS_NULL ((D3D12_GPU_VIRTUAL_ADDRESS)0) +#define D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN ((D3D12_GPU_VIRTUAL_ADDRESS)-1) + +#define DEFAULT_ALIGN 256 + +static const uint32_t kMaxNumDescriptors = 256; +static const uint32_t kMaxNumDescriptorTables = 16; + +typedef uint32_t GraphHandle; + +#define VALID_COMPUTE_QUEUE_RESOURCE_STATES \ + ( D3D12_RESOURCE_STATE_UNORDERED_ACCESS \ + | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE \ + | D3D12_RESOURCE_STATE_COPY_DEST \ + | D3D12_RESOURCE_STATE_COPY_SOURCE ) + +namespace D3D12Graphics { + +class RootParameter; +class RootSignature; +class IndirectParameter; +class CommandSignature; +class DescriptorAllocator; +class DescriptorHandle; +class DescriptorHandleCache; +class DynamicDescriptorHeap; +class ContextManager; +class GpuResource; +class CommandContext; +class GraphicsContext; +class ComputeContext; +class Texture; +class D3D12DeviceInstance; +class HagsStatus; + +enum LinearAllocatorType { + kInvalidAllocator = -1, + kGpuExclusive = 0, // DEFAULT GPU-writeable (via UAV) + kCpuWritable = 1, // UPLOAD CPU-writeable (but write combined) + kNumAllocatorTypes +}; + +enum { + kGpuAllocatorPageSize = 0x10000, // 64K + kCpuAllocatorPageSize = 0x200000 // 2MB +}; + +static size_t BitsPerPixel(_In_ DXGI_FORMAT fmt) +{ + switch (fmt) { + case DXGI_FORMAT_R32G32B32A32_TYPELESS: + case DXGI_FORMAT_R32G32B32A32_FLOAT: + case DXGI_FORMAT_R32G32B32A32_UINT: + case DXGI_FORMAT_R32G32B32A32_SINT: + return 128; + + case DXGI_FORMAT_R32G32B32_TYPELESS: + case DXGI_FORMAT_R32G32B32_FLOAT: + case DXGI_FORMAT_R32G32B32_UINT: + case DXGI_FORMAT_R32G32B32_SINT: + return 96; + + case DXGI_FORMAT_R16G16B16A16_TYPELESS: + case DXGI_FORMAT_R16G16B16A16_FLOAT: + case DXGI_FORMAT_R16G16B16A16_UNORM: + case DXGI_FORMAT_R16G16B16A16_UINT: + case DXGI_FORMAT_R16G16B16A16_SNORM: + case DXGI_FORMAT_R16G16B16A16_SINT: + case DXGI_FORMAT_R32G32_TYPELESS: + case DXGI_FORMAT_R32G32_FLOAT: + case DXGI_FORMAT_R32G32_UINT: + case DXGI_FORMAT_R32G32_SINT: + case DXGI_FORMAT_R32G8X24_TYPELESS: + case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: + case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: + case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: + case DXGI_FORMAT_Y416: + case DXGI_FORMAT_Y210: + case DXGI_FORMAT_Y216: + return 64; + + case DXGI_FORMAT_R10G10B10A2_TYPELESS: + case DXGI_FORMAT_R10G10B10A2_UNORM: + case DXGI_FORMAT_R10G10B10A2_UINT: + case DXGI_FORMAT_R11G11B10_FLOAT: + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + case DXGI_FORMAT_R8G8B8A8_UNORM: + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + case DXGI_FORMAT_R8G8B8A8_UINT: + case DXGI_FORMAT_R8G8B8A8_SNORM: + case DXGI_FORMAT_R8G8B8A8_SINT: + case DXGI_FORMAT_R16G16_TYPELESS: + case DXGI_FORMAT_R16G16_FLOAT: + case DXGI_FORMAT_R16G16_UNORM: + case DXGI_FORMAT_R16G16_UINT: + case DXGI_FORMAT_R16G16_SNORM: + case DXGI_FORMAT_R16G16_SINT: + case DXGI_FORMAT_R32_TYPELESS: + case DXGI_FORMAT_D32_FLOAT: + case DXGI_FORMAT_R32_FLOAT: + case DXGI_FORMAT_R32_UINT: + case DXGI_FORMAT_R32_SINT: + case DXGI_FORMAT_R24G8_TYPELESS: + case DXGI_FORMAT_D24_UNORM_S8_UINT: + case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: + case DXGI_FORMAT_X24_TYPELESS_G8_UINT: + case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: + case DXGI_FORMAT_R8G8_B8G8_UNORM: + case DXGI_FORMAT_G8R8_G8B8_UNORM: + case DXGI_FORMAT_B8G8R8A8_UNORM: + case DXGI_FORMAT_B8G8R8X8_UNORM: + case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM: + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: + case DXGI_FORMAT_AYUV: + case DXGI_FORMAT_Y410: + case DXGI_FORMAT_YUY2: + return 32; + + case DXGI_FORMAT_P010: + case DXGI_FORMAT_P016: + return 24; + + case DXGI_FORMAT_R8G8_TYPELESS: + case DXGI_FORMAT_R8G8_UNORM: + case DXGI_FORMAT_R8G8_UINT: + case DXGI_FORMAT_R8G8_SNORM: + case DXGI_FORMAT_R8G8_SINT: + case DXGI_FORMAT_R16_TYPELESS: + case DXGI_FORMAT_R16_FLOAT: + case DXGI_FORMAT_D16_UNORM: + case DXGI_FORMAT_R16_UNORM: + case DXGI_FORMAT_R16_UINT: + case DXGI_FORMAT_R16_SNORM: + case DXGI_FORMAT_R16_SINT: + case DXGI_FORMAT_B5G6R5_UNORM: + case DXGI_FORMAT_B5G5R5A1_UNORM: + case DXGI_FORMAT_A8P8: + case DXGI_FORMAT_B4G4R4A4_UNORM: + return 16; + + case DXGI_FORMAT_NV12: + case DXGI_FORMAT_420_OPAQUE: + case DXGI_FORMAT_NV11: + return 12; + + case DXGI_FORMAT_R8_TYPELESS: + case DXGI_FORMAT_R8_UNORM: + case DXGI_FORMAT_R8_UINT: + case DXGI_FORMAT_R8_SNORM: + case DXGI_FORMAT_R8_SINT: + case DXGI_FORMAT_A8_UNORM: + case DXGI_FORMAT_AI44: + case DXGI_FORMAT_IA44: + case DXGI_FORMAT_P8: + return 8; + + case DXGI_FORMAT_R1_UNORM: + return 1; + + case DXGI_FORMAT_BC1_TYPELESS: + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC4_TYPELESS: + case DXGI_FORMAT_BC4_UNORM: + case DXGI_FORMAT_BC4_SNORM: + return 4; + + case DXGI_FORMAT_BC2_TYPELESS: + case DXGI_FORMAT_BC2_UNORM: + case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_TYPELESS: + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC3_UNORM_SRGB: + case DXGI_FORMAT_BC5_TYPELESS: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC5_SNORM: + case DXGI_FORMAT_BC6H_TYPELESS: + case DXGI_FORMAT_BC6H_UF16: + case DXGI_FORMAT_BC6H_SF16: + case DXGI_FORMAT_BC7_TYPELESS: + case DXGI_FORMAT_BC7_UNORM: + case DXGI_FORMAT_BC7_UNORM_SRGB: + return 8; + + default: + return 0; + } +} + +static UINT BytesPerPixel(DXGI_FORMAT Format) +{ + return (UINT)BitsPerPixel(Format) / 8; +}; + +inline UINT64 GetRequiredIntermediateSize(_In_ ID3D12Resource *pDestinationResource, + _In_range_(0, D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, + _In_range_(0, D3D12_REQ_SUBRESOURCES - FirstSubresource) + UINT NumSubresources) noexcept +{ + auto Desc = pDestinationResource->GetDesc(); + UINT64 RequiredSize = 0; + + ID3D12Device *pDevice = nullptr; + pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); + pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, 0, nullptr, nullptr, nullptr, + &RequiredSize); + pDevice->Release(); + + return RequiredSize; +} + +struct CD3DX12_RANGE : public D3D12_RANGE { + CD3DX12_RANGE() = default; + explicit CD3DX12_RANGE(const D3D12_RANGE &o) noexcept : D3D12_RANGE(o) {} + CD3DX12_RANGE(SIZE_T begin, SIZE_T end) noexcept + { + Begin = begin; + End = end; + } +}; + +struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES { + CD3DX12_HEAP_PROPERTIES() = default; + explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) noexcept : D3D12_HEAP_PROPERTIES(o) {} + CD3DX12_HEAP_PROPERTIES(D3D12_CPU_PAGE_PROPERTY cpuPageProperty, D3D12_MEMORY_POOL memoryPoolPreference, + UINT creationNodeMask = 1, UINT nodeMask = 1) noexcept + { + Type = D3D12_HEAP_TYPE_CUSTOM; + CPUPageProperty = cpuPageProperty; + MemoryPoolPreference = memoryPoolPreference; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + explicit CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE type, UINT creationNodeMask = 1, UINT nodeMask = 1) noexcept + { + Type = type; + CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + CreationNodeMask = creationNodeMask; + VisibleNodeMask = nodeMask; + } + bool IsCPUAccessible() const noexcept + { + return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || + (Type == D3D12_HEAP_TYPE_CUSTOM && (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || + CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK)); + } +}; + +inline bool operator==(const D3D12_HEAP_PROPERTIES &l, const D3D12_HEAP_PROPERTIES &r) noexcept +{ + return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty && + l.MemoryPoolPreference == r.MemoryPoolPreference && l.CreationNodeMask == r.CreationNodeMask && + l.VisibleNodeMask == r.VisibleNodeMask; +} + +inline bool operator!=(const D3D12_HEAP_PROPERTIES &l, const D3D12_HEAP_PROPERTIES &r) noexcept +{ + return !(l == r); +} + +struct CD3DX12_SHADER_BYTECODE : public D3D12_SHADER_BYTECODE { + CD3DX12_SHADER_BYTECODE() = default; + explicit CD3DX12_SHADER_BYTECODE(const D3D12_SHADER_BYTECODE &o) noexcept : D3D12_SHADER_BYTECODE(o) {} + CD3DX12_SHADER_BYTECODE(_In_ ID3DBlob *pShaderBlob) noexcept + { + pShaderBytecode = pShaderBlob->GetBufferPointer(); + BytecodeLength = pShaderBlob->GetBufferSize(); + } + CD3DX12_SHADER_BYTECODE(const void *_pShaderBytecode, SIZE_T bytecodeLength) noexcept + { + pShaderBytecode = _pShaderBytecode; + BytecodeLength = bytecodeLength; + } +}; + +struct CD3DX12_TEXTURE_COPY_LOCATION : public D3D12_TEXTURE_COPY_LOCATION { + CD3DX12_TEXTURE_COPY_LOCATION() = default; + explicit CD3DX12_TEXTURE_COPY_LOCATION(const D3D12_TEXTURE_COPY_LOCATION &o) noexcept + : D3D12_TEXTURE_COPY_LOCATION(o) + { + } + CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource *pRes) noexcept + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + PlacedFootprint = {}; + } + CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource *pRes, + D3D12_PLACED_SUBRESOURCE_FOOTPRINT const &Footprint) noexcept + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + PlacedFootprint = Footprint; + } + CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource *pRes, UINT Sub) noexcept + { + pResource = pRes; + Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + PlacedFootprint = {}; + SubresourceIndex = Sub; + } +}; + +struct CD3DX12_RECT : public D3D12_RECT { + CD3DX12_RECT() = default; + explicit CD3DX12_RECT(const D3D12_RECT &o) noexcept : D3D12_RECT(o) {} + explicit CD3DX12_RECT(LONG Left, LONG Top, LONG Right, LONG Bottom) noexcept + { + left = Left; + top = Top; + right = Right; + bottom = Bottom; + } +}; + +class RootParameter { +public: + RootParameter() { m_RootParam.ParameterType = (D3D12_ROOT_PARAMETER_TYPE)0xFFFFFFFF; } + + ~RootParameter() { Clear(); } + + void Clear() + { + if (m_RootParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) + delete[] m_RootParam.DescriptorTable.pDescriptorRanges; + + m_RootParam.ParameterType = (D3D12_ROOT_PARAMETER_TYPE)0xFFFFFFFF; + } + + void InitAsConstants(UINT Register, UINT NumDwords, + D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL, UINT Space = 0) + { + m_RootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + m_RootParam.ShaderVisibility = Visibility; + m_RootParam.Constants.Num32BitValues = NumDwords; + m_RootParam.Constants.ShaderRegister = Register; + m_RootParam.Constants.RegisterSpace = Space; + } + + void InitAsConstantBuffer(UINT Register, D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL, + UINT Space = 0) + { + m_RootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + m_RootParam.ShaderVisibility = Visibility; + m_RootParam.Descriptor.ShaderRegister = Register; + m_RootParam.Descriptor.RegisterSpace = Space; + } + + void InitAsBufferSRV(UINT Register, D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL, + UINT Space = 0) + { + m_RootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + m_RootParam.ShaderVisibility = Visibility; + m_RootParam.Descriptor.ShaderRegister = Register; + m_RootParam.Descriptor.RegisterSpace = Space; + } + + void InitAsBufferUAV(UINT Register, D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL, + UINT Space = 0) + { + m_RootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + m_RootParam.ShaderVisibility = Visibility; + m_RootParam.Descriptor.ShaderRegister = Register; + m_RootParam.Descriptor.RegisterSpace = Space; + } + + void InitAsDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE Type, UINT Register, UINT Count, + D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL, UINT Space = 0) + { + InitAsDescriptorTable(1, Visibility); + SetTableRange(0, Type, Register, Count, Space); + } + + void InitAsDescriptorTable(UINT RangeCount, D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL) + { + m_RootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + m_RootParam.ShaderVisibility = Visibility; + m_RootParam.DescriptorTable.NumDescriptorRanges = RangeCount; + m_RootParam.DescriptorTable.pDescriptorRanges = new D3D12_DESCRIPTOR_RANGE[RangeCount]; + } + + void SetTableRange(UINT RangeIndex, D3D12_DESCRIPTOR_RANGE_TYPE Type, UINT Register, UINT Count, UINT Space = 0) + { + D3D12_DESCRIPTOR_RANGE *range = const_cast( + m_RootParam.DescriptorTable.pDescriptorRanges + RangeIndex); + range->RangeType = Type; + range->NumDescriptors = Count; + range->BaseShaderRegister = Register; + range->RegisterSpace = Space; + range->OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + } + + const D3D12_ROOT_PARAMETER &operator()(void) const { return m_RootParam; } + +protected: + D3D12_ROOT_PARAMETER m_RootParam; +}; + +// Maximum 64 DWORDS divied up amongst all root parameters. +// Root constants = 1 DWORD * NumConstants +// Root descriptor (CBV, SRV, or UAV) = 2 DWORDs each +// Descriptor table pointer = 1 DWORD +// Static samplers = 0 DWORDS (compiled into shader) +class RootSignature { +public: + RootSignature(D3D12DeviceInstance *DeviceInstance, UINT NumRootParams = 0, UINT NumStaticSamplers = 0) + : m_DeviceInstance(DeviceInstance), + m_Finalized(FALSE), + m_NumParameters(NumRootParams) + { + Reset(NumRootParams, NumStaticSamplers); + } + + ~RootSignature() {} + + void Reset(UINT NumRootParams, UINT NumStaticSamplers = 0) + { + if (NumRootParams > 0) + m_ParamArray.reset(new RootParameter[NumRootParams]); + else + m_ParamArray = nullptr; + m_NumParameters = NumRootParams; + + if (NumStaticSamplers > 0) + m_SamplerArray.reset(new D3D12_STATIC_SAMPLER_DESC[NumStaticSamplers]); + else + m_SamplerArray = nullptr; + m_NumSamplers = NumStaticSamplers; + m_NumInitializedStaticSamplers = 0; + } + + RootParameter &operator[](size_t EntryIndex) { return m_ParamArray[EntryIndex]; } + + const RootParameter &operator[](size_t EntryIndex) const { return m_ParamArray[EntryIndex]; } + + void InitStaticSampler(UINT Register, const D3D12_SAMPLER_DESC &NonStaticSamplerDesc, + D3D12_SHADER_VISIBILITY Visibility = D3D12_SHADER_VISIBILITY_ALL); + + void Finalize(const std::wstring &name, D3D12_ROOT_SIGNATURE_FLAGS Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE); + + ID3D12RootSignature *GetSignature() const; + + D3D12DeviceInstance *m_DeviceInstance; + BOOL m_Finalized; + UINT m_NumParameters; + UINT m_NumSamplers; + UINT m_NumInitializedStaticSamplers; + uint32_t m_DescriptorTableBitMap; // One bit is set for root parameters that are non-sampler descriptor tables + uint32_t m_SamplerTableBitMap; // One bit is set for root parameters that are sampler descriptor tables + uint32_t m_DescriptorTableSize + [kMaxNumDescriptorTables]; // Non-sampler descriptor tables need to know their descriptor count + std::unique_ptr m_ParamArray; + std::unique_ptr m_SamplerArray; + ID3D12RootSignature *m_Signature; +}; + +class IndirectParameter { +public: + IndirectParameter() { m_IndirectParam.Type = (D3D12_INDIRECT_ARGUMENT_TYPE)0xFFFFFFFF; } + + void Draw(void) { m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; } + + void DrawIndexed(void) { m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; } + + void Dispatch(void) { m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; } + + void VertexBufferView(UINT Slot) + { + m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW; + m_IndirectParam.VertexBuffer.Slot = Slot; + } + + void IndexBufferView(void) { m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW; } + + void Constant(UINT RootParameterIndex, UINT DestOffsetIn32BitValues, UINT Num32BitValuesToSet) + { + m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT; + m_IndirectParam.Constant.RootParameterIndex = RootParameterIndex; + m_IndirectParam.Constant.DestOffsetIn32BitValues = DestOffsetIn32BitValues; + m_IndirectParam.Constant.Num32BitValuesToSet = Num32BitValuesToSet; + } + + void ConstantBufferView(UINT RootParameterIndex) + { + m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW; + m_IndirectParam.ConstantBufferView.RootParameterIndex = RootParameterIndex; + } + + void ShaderResourceView(UINT RootParameterIndex) + { + m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW; + m_IndirectParam.ShaderResourceView.RootParameterIndex = RootParameterIndex; + } + + void UnorderedAccessView(UINT RootParameterIndex) + { + m_IndirectParam.Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW; + m_IndirectParam.UnorderedAccessView.RootParameterIndex = RootParameterIndex; + } + + const D3D12_INDIRECT_ARGUMENT_DESC &GetDesc(void) const { return m_IndirectParam; } + +protected: + D3D12_INDIRECT_ARGUMENT_DESC m_IndirectParam; +}; + +class CommandSignature { +public: + CommandSignature(D3D12DeviceInstance *DeviceInstance, UINT NumParams = 0) + : m_DeviceInstance(DeviceInstance), + m_Finalized(FALSE), + m_NumParameters(NumParams) + { + Reset(NumParams); + } + + void Destroy(void) + { + m_Signature = nullptr; + m_ParamArray = nullptr; + } + + void Reset(UINT NumParams) + { + if (NumParams > 0) + m_ParamArray.reset(new IndirectParameter[NumParams]); + else + m_ParamArray = nullptr; + + m_NumParameters = NumParams; + } + + IndirectParameter &operator[](size_t EntryIndex) { return m_ParamArray.get()[EntryIndex]; } + + const IndirectParameter &operator[](size_t EntryIndex) const { return m_ParamArray.get()[EntryIndex]; } + + ID3D12CommandSignature *GetSignature() const; + + void Finalize(const RootSignature *RootSignature = nullptr); + +protected: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + BOOL m_Finalized; + UINT m_NumParameters; + std::unique_ptr m_ParamArray; + ComPtr m_Signature; +}; + +typedef struct DescriptorHandleNode { + UINT64 index = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + void *next = (void *)D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; +} DescriptorHandleNode; + +typedef struct D3D12_CPU_DESCRIPTOR_HANDLE_NODE { + UINT64 index; + D3D12_CPU_DESCRIPTOR_HANDLE handle = {D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN}; +} D3D12_CPU_DESCRIPTOR_HANDLE_NODE; + +class DescriptorAllocator { +public: + DescriptorAllocator(D3D12DeviceInstance *DeviceInstance, D3D12_DESCRIPTOR_HEAP_TYPE Type); + D3D12_CPU_DESCRIPTOR_HANDLE Allocate(uint32_t Count); + +protected: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + D3D12_DESCRIPTOR_HEAP_TYPE m_Type; + ID3D12DescriptorHeap *m_CurrentHeap = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE m_CurrentHandle; + uint32_t m_DescriptorSize = 0; + uint32_t m_RemainingFreeHandles; + + // TODO + UINT64 GetAvailableIndex(); + void FreeIndex(UINT64 index); + DescriptorHandleNode *m_DescriptorPoolHead; + DescriptorHandleNode m_DescriptorPoolNodes[kMaxNumDescriptors]; +}; + +class DescriptorHandle { +public: + DescriptorHandle(); + DescriptorHandle(D3D12_CPU_DESCRIPTOR_HANDLE CpuHandle, D3D12_GPU_DESCRIPTOR_HANDLE GpuHandle); + + DescriptorHandle operator+(INT OffsetScaledByDescriptorSize) const + { + DescriptorHandle ret = *this; + ret += OffsetScaledByDescriptorSize; + return ret; + } + + void operator+=(INT OffsetScaledByDescriptorSize) + { + if (m_CpuHandle.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_CpuHandle.ptr += OffsetScaledByDescriptorSize; + if (m_GpuHandle.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + m_GpuHandle.ptr += OffsetScaledByDescriptorSize; + } + + const D3D12_CPU_DESCRIPTOR_HANDLE *operator&() const { return &m_CpuHandle; } + operator D3D12_CPU_DESCRIPTOR_HANDLE() const { return m_CpuHandle; } + operator D3D12_GPU_DESCRIPTOR_HANDLE() const { return m_GpuHandle; } + + size_t GetCpuPtr() const { return m_CpuHandle.ptr; } + uint64_t GetGpuPtr() const { return m_GpuHandle.ptr; } + bool IsNull() const { return m_CpuHandle.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; } + bool IsShaderVisible() const { return m_GpuHandle.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; } + +private: + D3D12_CPU_DESCRIPTOR_HANDLE m_CpuHandle; + D3D12_GPU_DESCRIPTOR_HANDLE m_GpuHandle; +}; + +struct DescriptorTableCache { + DescriptorTableCache() : AssignedHandlesBitMap(0), TableStart(nullptr), TableSize(0) {} + uint32_t AssignedHandlesBitMap = 0; + D3D12_CPU_DESCRIPTOR_HANDLE *TableStart = nullptr; + uint32_t TableSize = 0; +}; + +class DescriptorHandleCache { +public: + DescriptorHandleCache(D3D12DeviceInstance *DeviceInstance); + void ClearCache(); + + uint32_t ComputeStagedSize(); + void CopyAndBindStaleTables( + D3D12_DESCRIPTOR_HEAP_TYPE Type, uint32_t DescriptorSize, DescriptorHandle DestHandleStart, + ID3D12GraphicsCommandList *CmdList, + void (STDMETHODCALLTYPE ID3D12GraphicsCommandList::*SetFunc)(UINT, D3D12_GPU_DESCRIPTOR_HANDLE)); + + void UnbindAllValid(); + void StageDescriptorHandles(UINT RootIndex, UINT Offset, UINT NumHandles, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + void ParseRootSignature(D3D12_DESCRIPTOR_HEAP_TYPE Type, const RootSignature &RootSig); + + D3D12DeviceInstance *m_DeviceInstance = nullptr; + DescriptorTableCache m_RootDescriptorTable[kMaxNumDescriptorTables]; + D3D12_CPU_DESCRIPTOR_HANDLE m_HandleCache[kMaxNumDescriptors]; + uint32_t m_RootDescriptorTablesBitMap; + uint32_t m_StaleRootParamsBitMap; + uint32_t m_MaxCachedDescriptors; +}; + +class DynamicDescriptorHeap { +public: + DynamicDescriptorHeap(D3D12DeviceInstance *DeviceInstance, CommandContext &OwningContext, + D3D12_DESCRIPTOR_HEAP_TYPE HeapType); + ~DynamicDescriptorHeap(); + + void CleanupUsedHeaps(uint64_t fenceValue); + + // Copy multiple handles into the cache area reserved for the specified root parameter. + void SetGraphicsDescriptorHandles(UINT RootIndex, UINT Offset, UINT NumHandles, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + + void SetComputeDescriptorHandles(UINT RootIndex, UINT Offset, UINT NumHandles, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + + // Bypass the cache and upload directly to the shader-visible heap + D3D12_GPU_DESCRIPTOR_HANDLE UploadDirect(D3D12_CPU_DESCRIPTOR_HANDLE Handles); + + // Deduce cache layout needed to support the descriptor tables needed by the root signature. + void ParseGraphicsRootSignature(const RootSignature &RootSig); + + void ParseComputeRootSignature(const RootSignature &RootSig); + + // Upload any new descriptors in the cache to the shader-visible heap. + void CommitGraphicsRootDescriptorTables(ID3D12GraphicsCommandList *CmdList); + + void CommitComputeRootDescriptorTables(ID3D12GraphicsCommandList *CmdList); + + bool HasSpace(uint32_t Count); + void RetireCurrentHeap(void); + void RetireUsedHeaps(uint64_t fenceValue); + ID3D12DescriptorHeap *GetHeapPointer(); + + DescriptorHandle Allocate(UINT Count); + + void CopyAndBindStagedTables( + DescriptorHandleCache &HandleCache, ID3D12GraphicsCommandList *CmdList, + void (STDMETHODCALLTYPE ID3D12GraphicsCommandList::*SetFunc)(UINT, D3D12_GPU_DESCRIPTOR_HANDLE)); + + // Mark all descriptors in the cache as stale and in need of re-uploading. + void UnbindAllValid(void); + + D3D12DeviceInstance *m_DeviceInstance = nullptr; + CommandContext &m_OwningContext; + ID3D12DescriptorHeap *m_CurrentHeapPtr; + const D3D12_DESCRIPTOR_HEAP_TYPE m_DescriptorType; + uint32_t m_DescriptorSize; + uint32_t m_CurrentOffset; + DescriptorHandle m_FirstDescriptor; + std::vector m_RetiredHeaps; + + std::unique_ptr m_GraphicsHandleCache; + std::unique_ptr m_ComputeHandleCache; +}; + +class ContextManager { +public: + ContextManager(D3D12DeviceInstance *DeviceInstance); + + CommandContext *AllocateContext(D3D12_COMMAND_LIST_TYPE Type); + void FreeContext(CommandContext *); + void DestroyAllContexts(); + +private: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + std::vector> m_ContextPool[4]; + std::queue m_AvailableContexts[4]; +}; + +struct NonCopyable { + NonCopyable() = default; + NonCopyable(const NonCopyable &) = delete; + NonCopyable &operator=(const NonCopyable &) = delete; +}; + +class GpuResource { +public: + GpuResource() + : m_GpuVirtualAddress(D3D12_GPU_VIRTUAL_ADDRESS_NULL), + m_UsageState(D3D12_RESOURCE_STATE_COMMON), + m_TransitioningState((D3D12_RESOURCE_STATES)-1) + { + } + + GpuResource(ID3D12Resource *pResource, D3D12_RESOURCE_STATES CurrentState) + : m_GpuVirtualAddress(D3D12_GPU_VIRTUAL_ADDRESS_NULL), + m_pResource(pResource), + m_UsageState(CurrentState), + m_TransitioningState((D3D12_RESOURCE_STATES)-1) + { + } + + ~GpuResource() { Destroy(); } + + virtual void Destroy() + { + m_pResource = nullptr; + m_GpuVirtualAddress = D3D12_GPU_VIRTUAL_ADDRESS_NULL; + ++m_VersionID; + } + + ID3D12Resource *operator->() { return m_pResource.Get(); } + const ID3D12Resource *operator->() const { return m_pResource.Get(); } + + ID3D12Resource *GetResource() { return m_pResource.Get(); } + const ID3D12Resource *GetResource() const { return m_pResource.Get(); } + + ID3D12Resource **GetAddressOf() { return &m_pResource; } + + D3D12_GPU_VIRTUAL_ADDRESS GetGpuVirtualAddress() const { return m_GpuVirtualAddress; } + + uint32_t GetVersionID() const { return m_VersionID; } + + ComPtr m_pResource; + D3D12_RESOURCE_STATES m_UsageState; + D3D12_RESOURCE_STATES m_TransitioningState; + D3D12_GPU_VIRTUAL_ADDRESS m_GpuVirtualAddress; + + // Used to identify when a resource changes so descriptors can be copied etc. + uint32_t m_VersionID = 0; +}; + +class UploadBuffer : public GpuResource { +public: + UploadBuffer(D3D12DeviceInstance *DeviceInstance); + virtual ~UploadBuffer(); + + void Create(const std::wstring &name, size_t BufferSize); + + void *Map(void); + void Unmap(size_t begin = 0, size_t end = -1); + + size_t GetBufferSize() const; + +protected: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + size_t m_BufferSize; +}; + +class GpuBuffer : public GpuResource { +public: + GpuBuffer(D3D12DeviceInstance *DeviceInstance); + virtual ~GpuBuffer(); + + virtual void Destroy() override; + + // Create a buffer. If initial data is provided, it will be copied into the buffer using the default command context. + void Create(const std::wstring &name, uint32_t NumElements, uint32_t ElementSize, + const void *initialData = nullptr); + + void Create(const std::wstring &name, uint32_t NumElements, uint32_t ElementSize, const UploadBuffer &srcData, + uint32_t srcOffset = 0); + + // Sub-Allocate a buffer out of a pre-allocated heap. If initial data is provided, it will be copied into the buffer using the default command context. + void CreatePlaced(const std::wstring &name, ID3D12Heap *pBackingHeap, uint32_t HeapOffset, uint32_t NumElements, + uint32_t ElementSize, const void *initialData = nullptr); + + const D3D12_CPU_DESCRIPTOR_HANDLE &GetUAV(void) const; + const D3D12_CPU_DESCRIPTOR_HANDLE &GetSRV(void) const; + + D3D12_GPU_VIRTUAL_ADDRESS RootConstantBufferView(void) const; + + D3D12_CPU_DESCRIPTOR_HANDLE CreateConstantBufferView(uint32_t Offset, uint32_t Size) const; + + D3D12_VERTEX_BUFFER_VIEW VertexBufferView(size_t Offset, uint32_t Size, uint32_t Stride) const; + D3D12_VERTEX_BUFFER_VIEW VertexBufferView(size_t BaseVertexIndex = 0) const; + D3D12_INDEX_BUFFER_VIEW IndexBufferView(size_t Offset, uint32_t Size, bool b32Bit = false) const; + D3D12_INDEX_BUFFER_VIEW IndexBufferView(size_t StartIndex = 0) const; + + size_t GetBufferSize() const; + uint32_t GetElementCount() const; + uint32_t GetElementSize() const; + + D3D12_RESOURCE_DESC DescribeBuffer(void); + virtual void CreateDerivedViews(void) = 0; + +protected: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE m_UAV; + D3D12_CPU_DESCRIPTOR_HANDLE m_SRV; + + size_t m_BufferSize; + uint32_t m_ElementCount; + uint32_t m_ElementSize; + D3D12_RESOURCE_FLAGS m_ResourceFlags; +}; + +class ByteAddressBuffer : public GpuBuffer { +public: + ByteAddressBuffer(D3D12DeviceInstance *DeviceInstance); + virtual void CreateDerivedViews(void) override; +}; + +class ReadbackBuffer : public GpuBuffer { +public: + ReadbackBuffer(D3D12DeviceInstance *DeviceInstance); + virtual ~ReadbackBuffer(); + + void Create(const std::wstring &name, uint32_t NumElements, uint32_t ElementSize); + + void *Map(void); + void Unmap(void); + +protected: + void CreateDerivedViews(void); +}; + +class PixelBuffer : public GpuResource { +public: + PixelBuffer(); + + uint32_t GetWidth(void) const; + uint32_t GetHeight(void) const; + uint32_t GetDepth(void) const; + const DXGI_FORMAT &GetFormat(void) const; + + void SetBankRotation(uint32_t RotationAmount); + + D3D12_RESOURCE_DESC DescribeTex2D(uint32_t Width, uint32_t Height, uint32_t DepthOrArraySize, uint32_t NumMips, + DXGI_FORMAT Format, UINT Flags); + + void AssociateWithResource(ID3D12Device *Device, const std::wstring &Name, ID3D12Resource *Resource, + D3D12_RESOURCE_STATES CurrentState); + + void CreateTextureResource(ID3D12Device *Device, const std::wstring &Name, + const D3D12_RESOURCE_DESC &ResourceDesc, D3D12_CLEAR_VALUE ClearValue, + D3D12_GPU_VIRTUAL_ADDRESS VidMemPtr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN); + + DXGI_FORMAT GetBaseFormat(); + DXGI_FORMAT GetUAVFormat(); + DXGI_FORMAT GetDSVFormat(); + DXGI_FORMAT GetDepthFormat(); + DXGI_FORMAT GetStencilFormat(); + static size_t BytesPerPixel(DXGI_FORMAT Format); + +protected: + uint32_t m_Width; + uint32_t m_Height; + uint32_t m_ArraySize; + DXGI_FORMAT m_Format; + uint32_t m_BankRotation; +}; + +struct Color { + union { + struct { + float x, y, z, w; + }; + float ptr[4]; + }; +}; + +class DepthBuffer : public PixelBuffer { +public: + DepthBuffer(D3D12DeviceInstance *DeviceInstance, float ClearDepth = 0.0f, uint8_t ClearStencil = 0); + virtual ~DepthBuffer(); + // Create a depth buffer. If an address is supplied, memory will not be allocated. + // The vmem address allows you to alias buffers (which can be especially useful for + // reusing ESRAM across a frame.) + void Create(const std::wstring &Name, uint32_t Width, uint32_t Height, DXGI_FORMAT Format, + D3D12_GPU_VIRTUAL_ADDRESS VidMemPtr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN); + + void Create(const std::wstring &Name, uint32_t Width, uint32_t Height, uint32_t NumSamples, DXGI_FORMAT Format, + D3D12_GPU_VIRTUAL_ADDRESS VidMemPtr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN); + virtual void Destroy() override; + + // Get pre-created CPU-visible descriptor handles + const D3D12_CPU_DESCRIPTOR_HANDLE &GetDSV() const; + const D3D12_CPU_DESCRIPTOR_HANDLE &GetDSV_DepthReadOnly() const; + const D3D12_CPU_DESCRIPTOR_HANDLE &GetDSV_StencilReadOnly() const; + const D3D12_CPU_DESCRIPTOR_HANDLE &GetDSV_ReadOnly() const; + const D3D12_CPU_DESCRIPTOR_HANDLE &GetDepthSRV() const; + const D3D12_CPU_DESCRIPTOR_HANDLE &GetStencilSRV() const; + + float GetClearDepth() const; + uint8_t GetClearStencil() const; + + void CreateDerivedViews(ID3D12Device *Device); + +protected: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + float m_ClearDepth; + uint8_t m_ClearStencil; + D3D12_CPU_DESCRIPTOR_HANDLE m_hDSV[4]; + D3D12_CPU_DESCRIPTOR_HANDLE m_hDepthSRV; + D3D12_CPU_DESCRIPTOR_HANDLE m_hStencilSRV; +}; + +// Various types of allocations may contain NULL pointers. Check before dereferencing if you are unsure. +struct DynAlloc { + DynAlloc(GpuResource &BaseResource, size_t ThisOffset, size_t ThisSize) + : Buffer(BaseResource), + Offset(ThisOffset), + Size(ThisSize) + { + } + + GpuResource &Buffer; // The D3D buffer associated with this memory. + size_t Offset; // Offset from start of buffer resource + size_t Size; // Reserved size of this allocation + void *DataPtr; // The CPU-writeable address + D3D12_GPU_VIRTUAL_ADDRESS GpuAddress; // The GPU-visible address +}; + +class LinearAllocationPage : public GpuResource { +public: + LinearAllocationPage(ID3D12Resource *pResource, D3D12_RESOURCE_STATES Usage) : GpuResource() + { + m_pResource = pResource; + m_UsageState = Usage; + m_GpuVirtualAddress = m_pResource->GetGPUVirtualAddress(); + m_pResource->Map(0, nullptr, &m_CpuVirtualAddress); + } + + ~LinearAllocationPage() { Unmap(); } + + void Map(void) + { + if (m_CpuVirtualAddress == nullptr) { + m_pResource->Map(0, nullptr, &m_CpuVirtualAddress); + } + } + + void Unmap(void) + { + if (m_CpuVirtualAddress != nullptr) { + m_pResource->Unmap(0, nullptr); + m_CpuVirtualAddress = nullptr; + } + } + + void *m_CpuVirtualAddress; + D3D12_GPU_VIRTUAL_ADDRESS m_GpuVirtualAddress; +}; + +class LinearAllocatorPageManager { +public: + LinearAllocatorPageManager(D3D12DeviceInstance *DeviceInstance, LinearAllocatorType Type); + LinearAllocationPage *RequestPage(void); + LinearAllocationPage *CreateNewPage(size_t PageSize = 0); + + // Discarded pages will get recycled. This is for fixed size pages. + void DiscardPages(uint64_t FenceID, const std::vector &Pages); + + // Freed pages will be destroyed once their fence has passed. This is for single-use, + // "large" pages. + void FreeLargePages(uint64_t FenceID, const std::vector &Pages); + + void Destroy(void); + +private: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + LinearAllocatorType m_AllocationType; + std::vector> m_PagePool; + std::queue> m_RetiredPages; + std::queue> m_DeletionQueue; + std::queue m_AvailablePages; +}; + +class LinearAllocator { +public: + LinearAllocator(D3D12DeviceInstance *DeviceInstance, LinearAllocatorType Type); + DynAlloc Allocate(size_t SizeInBytes, size_t Alignment = DEFAULT_ALIGN); + void CleanupUsedPages(uint64_t FenceID); + DynAlloc AllocateLargePage(size_t SizeInBytes); + +private: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + LinearAllocatorType m_AllocationType; + size_t m_PageSize; + size_t m_CurOffset; + LinearAllocationPage *m_CurPage; + std::vector m_RetiredPages; + std::vector m_LargePageList; +}; + +class PSO { +public: + PSO(D3D12DeviceInstance *DeviceInstance, const wchar_t *Name) + : m_DeviceInstance(DeviceInstance), + m_Name(Name), + m_RootSignature(nullptr), + m_PSO(nullptr) + { + } + + void SetRootSignature(const RootSignature &BindMappings) { m_RootSignature = &BindMappings; } + + const RootSignature &GetRootSignature(void) const { return *m_RootSignature; } + + ID3D12PipelineState *GetPipelineStateObject(void) const { return m_PSO; } + +protected: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + const wchar_t *m_Name; + const RootSignature *m_RootSignature; + ID3D12PipelineState *m_PSO; +}; + +class GraphicsPSO : public PSO { +public: + // Start with empty state + GraphicsPSO(D3D12DeviceInstance *DeviceInstance, const wchar_t *Name = L"Unnamed Graphics PSO"); + + void SetBlendState(const D3D12_BLEND_DESC &BlendDesc); + void SetRasterizerState(const D3D12_RASTERIZER_DESC &RasterizerDesc); + void SetDepthStencilState(const D3D12_DEPTH_STENCIL_DESC &DepthStencilDesc); + void SetSampleMask(UINT SampleMask); + void SetPrimitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE TopologyType); + void SetDepthTargetFormat(DXGI_FORMAT DSVFormat, UINT MsaaCount = 1, UINT MsaaQuality = 0); + void SetRenderTargetFormat(DXGI_FORMAT RTVFormat, DXGI_FORMAT DSVFormat, UINT MsaaCount = 1, + UINT MsaaQuality = 0); + void SetRenderTargetFormats(UINT NumRTVs, const DXGI_FORMAT *RTVFormats, DXGI_FORMAT DSVFormat, + UINT MsaaCount = 1, UINT MsaaQuality = 0); + void SetInputLayout(UINT NumElements, const D3D12_INPUT_ELEMENT_DESC *pInputElementDescs); + void SetPrimitiveRestart(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBProps); + + // These const_casts shouldn't be necessary, but we need to fix the API to accept "const void* pShaderBytecode" + void SetVertexShader(const void *Binary, size_t Size); + void SetPixelShader(const void *Binary, size_t Size); + void SetGeometryShader(const void *Binary, size_t Size); + void SetHullShader(const void *Binary, size_t Size); + void SetDomainShader(const void *Binary, size_t Size); + + void SetVertexShader(const D3D12_SHADER_BYTECODE &Binary); + void SetPixelShader(const D3D12_SHADER_BYTECODE &Binary); + void SetGeometryShader(const D3D12_SHADER_BYTECODE &Binary); + void SetHullShader(const D3D12_SHADER_BYTECODE &Binary); + void SetDomainShader(const D3D12_SHADER_BYTECODE &Binary); + + // Perform validation and compute a hash value for fast state block comparisons + void Finalize(); + +private: + D3D12_GRAPHICS_PIPELINE_STATE_DESC m_PSODesc; + std::shared_ptr m_InputLayouts; +}; + +class ComputePSO : public PSO { +public: + ComputePSO(D3D12DeviceInstance *DeviceInstance, const wchar_t *Name = L"Unnamed Compute PSO"); + + void SetComputeShader(const void *Binary, size_t Size); + void SetComputeShader(const D3D12_SHADER_BYTECODE &Binary); + + void Finalize(); + +private: + D3D12_COMPUTE_PIPELINE_STATE_DESC m_PSODesc; +}; + +class CommandAllocatorPool { +public: + CommandAllocatorPool(D3D12_COMMAND_LIST_TYPE Type); + ~CommandAllocatorPool(); + + void Create(D3D12DeviceInstance *DeviceInstance); + void Shutdown(); + + ID3D12CommandAllocator *RequestAllocator(uint64_t CompletedFenceValue); + void DiscardAllocator(uint64_t FenceValue, ID3D12CommandAllocator *Allocator); + + inline size_t Size(); + +private: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + const D3D12_COMMAND_LIST_TYPE m_cCommandListType; + std::vector m_AllocatorPool; + std::queue> m_ReadyAllocators; +}; + +class CommandQueue { +public: + CommandQueue(D3D12_COMMAND_LIST_TYPE Type); + ~CommandQueue(); + + void Create(D3D12DeviceInstance *DeviceInstance); + void Shutdown(); + + inline bool IsReady(); + + uint64_t IncrementFence(void); + bool IsFenceComplete(uint64_t FenceValue); + void StallForFence(uint64_t FenceValue); + void StallForProducer(CommandQueue &Producer); + void WaitForFence(uint64_t FenceValue); + void WaitForIdle(void); + + ID3D12CommandQueue *GetCommandQueue(); + + uint64_t GetNextFenceValue(); + + uint64_t ExecuteCommandList(ID3D12CommandList *List); + ID3D12CommandAllocator *RequestAllocator(void); + void DiscardAllocator(uint64_t FenceValueForReset, ID3D12CommandAllocator *Allocator); + +private: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + ID3D12CommandQueue *m_CommandQueue; + + const D3D12_COMMAND_LIST_TYPE m_Type; + + std::unique_ptr m_AllocatorPool; + + // Lifetime of these objects is managed by the descriptor cache + ID3D12Fence *m_pFence; + uint64_t m_NextFenceValue; + uint64_t m_LastCompletedFenceValue; + HANDLE m_FenceEventHandle; +}; + +class CommandListManager { +public: + CommandListManager(); + ~CommandListManager(); + + void Create(D3D12DeviceInstance *DeviceInstance); + void Shutdown(); + + CommandQueue &GetGraphicsQueue(void); + CommandQueue &GetComputeQueue(void); + CommandQueue &GetCopyQueue(void); + + CommandQueue &GetQueue(D3D12_COMMAND_LIST_TYPE Type = D3D12_COMMAND_LIST_TYPE_DIRECT); + + ID3D12CommandQueue *GetCommandQueue(); + + void CreateNewCommandList(D3D12_COMMAND_LIST_TYPE Type, ID3D12GraphicsCommandList **List, + ID3D12CommandAllocator **Allocator); + + bool IsFenceComplete(uint64_t FenceValue); + + // The CPU will wait for a fence to reach a specified value + void WaitForFence(uint64_t FenceValue); + + // The CPU will wait for all command queues to empty (so that the GPU is idle) + void IdleGPU(void); + +private: + D3D12DeviceInstance *m_DeviceInstance = nullptr; + + std::unique_ptr m_GraphicsQueue; + std::unique_ptr m_ComputeQueue; + std::unique_ptr m_CopyQueue; +}; + +class CommandContext : NonCopyable { +public: + CommandContext(D3D12DeviceInstance *DeviceInstance, D3D12_COMMAND_LIST_TYPE Type); + + ~CommandContext(void); + + void Reset(void); + // Flush existing commands to the GPU but keep the context alive + uint64_t Flush(bool WaitForCompletion = false); + + // Flush existing commands and release the current context + uint64_t Finish(bool WaitForCompletion = false); + + // Prepare to render by reserving a command list and command allocator + void Initialize(void); + + ID3D12GraphicsCommandList *GetCommandList(); + + void CopyBuffer(GpuResource &Dest, GpuResource &Src); + void CopyBufferRegion(GpuResource &Dest, size_t DestOffset, GpuResource &Src, size_t SrcOffset, + size_t NumBytes); + void CopySubresource(GpuResource &Dest, UINT DestSubIndex, GpuResource &Src, UINT SrcSubIndex); + void CopyTextureRegion(GpuResource &Dest, UINT x, UINT y, UINT z, GpuResource &Source, RECT &rect); + void UpdateTexture(GpuResource &Dest, UploadBuffer &buffer); + + // Creates a readback buffer of sufficient size, copies the texture into it, + // and returns row pitch in bytes. + uint32_t ReadbackTexture(ReadbackBuffer &DstBuffer, GpuResource &SrcBuffer); + + DynAlloc ReserveUploadMemory(size_t SizeInBytes); + + void WriteBuffer(GpuResource &Dest, size_t DestOffset, const void *Data, size_t NumBytes); + + void TransitionResource(GpuResource &Resource, D3D12_RESOURCE_STATES NewState, bool FlushImmediate = false); + void BeginResourceTransition(GpuResource &Resource, D3D12_RESOURCE_STATES NewState, + bool FlushImmediate = false); + void InsertUAVBarrier(GpuResource &Resource, bool FlushImmediate = false); + void InsertAliasBarrier(GpuResource &Before, GpuResource &After, bool FlushImmediate = false); + inline void FlushResourceBarriers(void); + + void InsertTimeStamp(ID3D12QueryHeap *pQueryHeap, uint32_t QueryIdx); + void ResolveTimeStamps(ID3D12Resource *pReadbackHeap, ID3D12QueryHeap *pQueryHeap, uint32_t NumQueries); + void PIXBeginEvent(const wchar_t *label); + void PIXEndEvent(void); + void PIXSetMarker(const wchar_t *label); + + void SetDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE Type, ID3D12DescriptorHeap *HeapPtr); + void SetDescriptorHeaps(UINT HeapCount, D3D12_DESCRIPTOR_HEAP_TYPE Type[], ID3D12DescriptorHeap *HeapPtrs[]); + void SetPipelineState(const PSO &PSO); + + void SetPredication(ID3D12Resource *Buffer, UINT64 BufferOffset, D3D12_PREDICATION_OP Op); + + void BindDescriptorHeaps(void); + + D3D12DeviceInstance *m_DeviceInstance = nullptr; + ID3D12GraphicsCommandList *m_CommandList; + ID3D12CommandAllocator *m_CurrentAllocator; + + ID3D12RootSignature *m_CurGraphicsRootSignature; + ID3D12RootSignature *m_CurComputeRootSignature; + ID3D12PipelineState *m_CurPipelineState; + + DynamicDescriptorHeap m_DynamicViewDescriptorHeap; // HEAP_TYPE_CBV_SRV_UAV + DynamicDescriptorHeap m_DynamicSamplerDescriptorHeap; // HEAP_TYPE_SAMPLER + + D3D12_RESOURCE_BARRIER m_ResourceBarrierBuffer[kMaxNumDescriptorTables]; + UINT m_NumBarriersToFlush; + + ID3D12DescriptorHeap *m_CurrentDescriptorHeaps[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + + std::unique_ptr m_CpuLinearAllocator; + std::unique_ptr m_GpuLinearAllocator; + + std::wstring m_ID; + void SetID(const std::wstring &ID) { m_ID = ID; } + + D3D12_COMMAND_LIST_TYPE m_Type; +}; + +class GraphicsCopyContext {}; + +class GraphicsContext : public CommandContext { +public: + void ClearUAV(GpuBuffer &Target); + + void ClearColor(D3D12_CPU_DESCRIPTOR_HANDLE RenderTargetView, const FLOAT ColorRGBA[4], UINT NumRects = 0, + const D3D12_RECT *pRects = nullptr); + + void ClearDepth(DepthBuffer &Target); + void ClearStencil(DepthBuffer &Target); + void ClearDepthAndStencil(DepthBuffer &Target); + + void BeginQuery(ID3D12QueryHeap *QueryHeap, D3D12_QUERY_TYPE Type, UINT HeapIndex); + void EndQuery(ID3D12QueryHeap *QueryHeap, D3D12_QUERY_TYPE Type, UINT HeapIndex); + void ResolveQueryData(ID3D12QueryHeap *QueryHeap, D3D12_QUERY_TYPE Type, UINT StartIndex, UINT NumQueries, + ID3D12Resource *DestinationBuffer, UINT64 DestinationBufferOffset); + + void SetRootSignature(const RootSignature &RootSig); + + void SetRenderTargets(UINT NumRTVs, const D3D12_CPU_DESCRIPTOR_HANDLE RTVs[]); + void SetRenderTargets(UINT NumRenderTargetDescriptors, + const D3D12_CPU_DESCRIPTOR_HANDLE *pRenderTargetDescriptors, + BOOL RTsSingleHandleToDescriptorRange, + const D3D12_CPU_DESCRIPTOR_HANDLE *pDepthStencilDescriptor); + void SetRenderTargets(UINT NumRTVs, const D3D12_CPU_DESCRIPTOR_HANDLE RTVs[], D3D12_CPU_DESCRIPTOR_HANDLE DSV); + void SetRenderTarget(D3D12_CPU_DESCRIPTOR_HANDLE RTV); + void SetNullRenderTarget(); + void SetRenderTarget(D3D12_CPU_DESCRIPTOR_HANDLE RTV, D3D12_CPU_DESCRIPTOR_HANDLE DSV); + void SetDepthStencilTarget(D3D12_CPU_DESCRIPTOR_HANDLE DSV); + + void SetViewport(const D3D12_VIEWPORT &vp); + void SetViewport(FLOAT x, FLOAT y, FLOAT w, FLOAT h, FLOAT minDepth = 0.0f, FLOAT maxDepth = 1.0f); + void SetScissor(const D3D12_RECT &rect); + void SetScissor(UINT left, UINT top, UINT right, UINT bottom); + void SetViewportAndScissor(const D3D12_VIEWPORT &vp, const D3D12_RECT &rect); + void SetViewportAndScissor(UINT x, UINT y, UINT w, UINT h); + void SetStencilRef(UINT StencilRef); + void SetBlendFactor(Color BlendFactor); + void SetPrimitiveTopology(D3D12_PRIMITIVE_TOPOLOGY Topology); + + void SetConstantArray(UINT RootIndex, UINT NumConstants, const void *pConstants); + void SetConstant(UINT RootIndex, UINT Offset, UINT Val); + void SetConstantBuffer(UINT RootIndex, D3D12_GPU_VIRTUAL_ADDRESS CBV); + void SetDynamicConstantBufferView(UINT RootIndex, size_t BufferSize, const void *BufferData); + void SetBufferSRV(UINT RootIndex, const GpuBuffer &SRV, UINT64 Offset = 0); + void SetBufferUAV(UINT RootIndex, const GpuBuffer &UAV, UINT64 Offset = 0); + void SetDescriptorTable(UINT RootIndex, D3D12_GPU_DESCRIPTOR_HANDLE FirstHandle); + + void SetDynamicDescriptor(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle); + void SetDynamicDescriptors(UINT RootIndex, UINT Offset, UINT Count, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + void SetDynamicSampler(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle); + void SetDynamicSamplers(UINT RootIndex, UINT Offset, UINT Count, const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + + void SetIndexBuffer(const D3D12_INDEX_BUFFER_VIEW &IBView); + void SetVertexBuffer(UINT Slot, const D3D12_VERTEX_BUFFER_VIEW &VBView); + void SetVertexBuffers(UINT StartSlot, UINT Count, const D3D12_VERTEX_BUFFER_VIEW VBViews[]); + void SetDynamicVB(UINT Slot, size_t NumVertices, size_t VertexStride, const void *VBData); + void SetDynamicIB(size_t IndexCount, const uint16_t *IBData); + void SetDynamicSRV(UINT RootIndex, size_t BufferSize, const void *BufferData); + + void Draw(UINT VertexCount, UINT VertexStartOffset = 0); + void DrawIndexed(UINT IndexCount, UINT StartIndexLocation = 0, INT BaseVertexLocation = 0); + void DrawInstanced(UINT VertexCountPerInstance, UINT InstanceCount, UINT StartVertexLocation = 0, + UINT StartInstanceLocation = 0); + void DrawIndexedInstanced(UINT IndexCountPerInstance, UINT InstanceCount, UINT StartIndexLocation, + INT BaseVertexLocation, UINT StartInstanceLocation); + void DrawIndirect(GpuBuffer &ArgumentBuffer, uint64_t ArgumentBufferOffset = 0); + void ExecuteIndirect(CommandSignature &CommandSig, GpuBuffer &ArgumentBuffer, uint64_t ArgumentStartOffset = 0, + uint32_t MaxCommands = 1, GpuBuffer *CommandCounterBuffer = nullptr, + uint64_t CounterOffset = 0); +}; + +class ComputeContext : public CommandContext { +public: + void ClearUAV(GpuBuffer &Target); + + void SetRootSignature(const RootSignature &RootSig); + + void SetConstantArray(UINT RootIndex, UINT NumConstants, const void *pConstants); + void SetConstant(UINT RootIndex, UINT Offset, UINT Val); + void SetConstantBuffer(UINT RootIndex, D3D12_GPU_VIRTUAL_ADDRESS CBV); + void SetDynamicConstantBufferView(UINT RootIndex, size_t BufferSize, const void *BufferData); + void SetDynamicSRV(UINT RootIndex, size_t BufferSize, const void *BufferData); + void SetBufferSRV(UINT RootIndex, const GpuBuffer &SRV, UINT64 Offset = 0); + void SetBufferUAV(UINT RootIndex, const GpuBuffer &UAV, UINT64 Offset = 0); + void SetDescriptorTable(UINT RootIndex, D3D12_GPU_DESCRIPTOR_HANDLE FirstHandle); + + void SetDynamicDescriptor(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle); + void SetDynamicDescriptors(UINT RootIndex, UINT Offset, UINT Count, + const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + void SetDynamicSampler(UINT RootIndex, UINT Offset, D3D12_CPU_DESCRIPTOR_HANDLE Handle); + void SetDynamicSamplers(UINT RootIndex, UINT Offset, UINT Count, const D3D12_CPU_DESCRIPTOR_HANDLE Handles[]); + + void Dispatch(size_t GroupCountX = 1, size_t GroupCountY = 1, size_t GroupCountZ = 1); + void Dispatch1D(size_t ThreadCountX, size_t GroupSizeX = 64); + void Dispatch2D(size_t ThreadCountX, size_t ThreadCountY, size_t GroupSizeX = 8, size_t GroupSizeY = 8); + void Dispatch3D(size_t ThreadCountX, size_t ThreadCountY, size_t ThreadCountZ, size_t GroupSizeX, + size_t GroupSizeY, size_t GroupSizeZ); + void DispatchIndirect(GpuBuffer &ArgumentBuffer, uint64_t ArgumentBufferOffset = 0); + void ExecuteIndirect(CommandSignature &CommandSig, GpuBuffer &ArgumentBuffer, uint64_t ArgumentStartOffset = 0, + uint32_t MaxCommands = 1, GpuBuffer *CommandCounterBuffer = nullptr, + uint64_t CounterOffset = 0); +}; + +class SamplerDesc : public D3D12_SAMPLER_DESC { +public: + SamplerDesc(D3D12DeviceInstance *DeviceInstance); + ~SamplerDesc(); + void SetTextureAddressMode(D3D12_TEXTURE_ADDRESS_MODE AddressMode); + void SetBorderColor(Color Border); + + void CreateDescriptor(void); + void CreateDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE Handle); + + D3D12DeviceInstance *m_DeviceInstance = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE Sampler; +}; + +struct MonitorColorInfo { + bool m_HDR; + UINT m_BitsPerColor; + ULONG m_SDRWhiteNits; + + MonitorColorInfo(bool HDR, int BitsPerColor, ULONG SDRWhiteNits) + : m_HDR(HDR), + m_BitsPerColor(BitsPerColor), + m_SDRWhiteNits(SDRWhiteNits) + { + } +}; + +static constexpr double DoubleTriangleArea(double ax, double ay, double bx, double by, double cx, double cy) +{ + return ax * (by - cy) + bx * (cy - ay) + cx * (ay - by); +} + +static inline double to_GiB(size_t bytes) +{ + return static_cast(bytes) / (1 << 30); +} + +class D3D12DeviceInstance { +public: + D3D12DeviceInstance(); + void Initialize(int32_t adaptorIndex); + void Uninitialize(); + + ID3D12Device *GetDevice(); + IDXGIAdapter1 *GetAdapter(); + IDXGIFactory6 *GetDxgiFactory(); + CommandListManager &GetCommandManager(); + ContextManager &GetContextManager(); + + std::map> &GetRootSignatureHashMap(); + LinearAllocatorPageManager *GetPageManager(LinearAllocatorType AllocatorType); + std::map> &GetGraphicsPSOHashMap(); + std::map> &GetComputePSOHashMap(); + CommandSignature &GetDispatchIndirectCommandSignature(); + CommandSignature &GetDrawIndirectCommandSignature(); + DescriptorAllocator *GetDescriptorAllocator(); + + DescriptorAllocator m_DescriptorAllocator[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES] = { + {this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, + {this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV}, + {this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV}}; + D3D12_CPU_DESCRIPTOR_HANDLE AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE Type, UINT Count = 1); + + ID3D12DescriptorHeap *RequestCommonHeap(D3D12_DESCRIPTOR_HEAP_TYPE Type); + + ID3D12DescriptorHeap *RequestDynamicDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE HeapType); + void DiscardDynamicDescriptorHeaps(D3D12_DESCRIPTOR_HEAP_TYPE HeapType, uint64_t FenceValueForReset, + const std::vector &UsedHeaps); + + GraphicsContext *GetNewGraphicsContext(const std::wstring &ID = L""); + ComputeContext *GetNewComputeContext(const std::wstring &ID = L"", bool Async = false); + + void InitializeTexture(GpuResource &Dest, UINT NumSubresources, D3D12_SUBRESOURCE_DATA SubData[]); + void InitializeBuffer(GpuBuffer &Dest, const void *Data, size_t NumBytes, size_t DestOffset = 0); + void InitializeBuffer(GpuBuffer &Dest, const UploadBuffer &Src, size_t SrcOffset, size_t NumBytes = -1, + size_t DestOffset = 0); + void InitializeTextureArraySlice(GpuResource &Dest, UINT SliceIndex, GpuResource &Src); + bool IsNV12TextureSupported() const; + bool IsP010TextureSupported() const; + bool FastClearSupported() const; + + static std::optional GetAdapterHagsStatus(const DXGI_ADAPTER_DESC *desc); + static void EnumD3DAdapters(bool (*callback)(void *, const char *, uint32_t), void *param); + static bool GetMonitorTarget(const MONITORINFOEX &info, DISPLAYCONFIG_TARGET_DEVICE_NAME &target); + static bool GetOutputDesc1(IDXGIOutput *const output, DXGI_OUTPUT_DESC1 *desc1); + static HRESULT GetPathInfo(_In_ PCWSTR pszDeviceName, _Out_ DISPLAYCONFIG_PATH_INFO *pPathInfo); + static HRESULT GetPathInfo(HMONITOR hMonitor, _Out_ DISPLAYCONFIG_PATH_INFO *pPathInfo); + static ULONG GetSdrMaxNits(HMONITOR monitor); + static MonitorColorInfo GetMonitorColorInfo(HMONITOR hMonitor); + static void PopulateMonitorIds(HMONITOR handle, char *id, char *alt_id, size_t capacity); + static void LogAdapterMonitors(IDXGIAdapter1 *adapter); + static void LogD3DAdapters(); + static bool IsInternalVideoOutput(const DISPLAYCONFIG_VIDEO_OUTPUT_TECHNOLOGY VideoOutputTechnologyType); + +private: + void EnableDebugLayer(); + void EnableDebugInofQueue(); + void CheckFeatureSupports(); + void CreateD3DAdapterAndDevice(uint32_t index); + + const wchar_t *GPUVendorToString(uint32_t vendorID); + uint32_t GetVendorIdFromDevice(ID3D12Device *pDevice); + bool IsDeviceNvidia(ID3D12Device *pDevice); + bool IsDeviceAMD(ID3D12Device *pDevice); + bool IsDeviceIntel(ID3D12Device *pDevice); + // Check adapter support for DirectX Raytracing. + bool IsDirectXRaytracingSupported(ID3D12Device *testDevice); + +private: + std::unique_ptr m_DispatchIndirectCommandSignature; + std::unique_ptr m_DrawIndirectCommandSignature; + + std::vector> m_DescriptorHeapPool; + + ComPtr m_Device = nullptr; + ComPtr m_Adapter = nullptr; + ComPtr m_DxgiFactory = nullptr; + + std::unique_ptr m_CommandManager; + std::unique_ptr m_ContextManager; + + std::map> m_RootSignatureHashMap; + + std::vector> m_DynamicDescriptorHeapPool[2]; + std::queue> m_DynamicRetiredDescriptorHeaps[2]; + std::queue m_DynamicAvailableDescriptorHeaps[2]; + + std::unique_ptr m_PageManager[2]; + + std::map> m_GraphicsPSOHashMap; + std::map> m_ComputePSOHashMap; + + bool m_TypedUAVLoadSupport_R11G11B10_FLOAT = false; + bool m_TypedUAVLoadSupport_R16G16B16A16_FLOAT = false; + bool m_NV12Supported = false; + bool m_P010Supported = false; + bool m_FastClearSupported = false; +}; + +} // namespace D3D12Graphics diff --git a/libobs-d3d12/d3d12-duplicator.cpp b/libobs-d3d12/d3d12-duplicator.cpp new file mode 100644 index 00000000000000..ebdd27061862be --- /dev/null +++ b/libobs-d3d12/d3d12-duplicator.cpp @@ -0,0 +1,380 @@ +#include "d3d12-subsystem.hpp" +#include + +static inline bool get_monitor(IDXGIAdapter1 *adaptor, int monitor_idx, IDXGIOutput **dxgiOutput) +{ + HRESULT hr; + + hr = adaptor->EnumOutputs(monitor_idx, dxgiOutput); + if (FAILED(hr)) { + if (hr == DXGI_ERROR_NOT_FOUND) + return false; + + throw HRError("Failed to get output", hr); + } + + return true; +} + +void gs_duplicator::Start() +{ + ComPtr output5; + ComPtr output1; + ComPtr output; + HRESULT hr; + + hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); + if (FAILED(hr)) + throw HRError("Failed to create DXGIFactory", hr); + + hr = factory->EnumAdapters1(idx, &adapter); + if (FAILED(hr)) + throw HRError("Failed to enumerate DXGIAdapter", hr); + + hr = D3D11CreateDevice(adapter.Get(), D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0, D3D11_SDK_VERSION, + device11.Assign(), NULL, context11.Assign()); + if (FAILED(hr)) + throw HRError("Failed to create D3D11 Device", hr); + + if (!get_monitor(adapter, idx, output.Assign())) + throw "Invalid monitor index"; + + hr = output->QueryInterface(IID_PPV_ARGS(output5.Assign())); + hdr = false; + sdr_white_nits = 80.f; + if (SUCCEEDED(hr)) { + constexpr DXGI_FORMAT supportedFormats[]{ + DXGI_FORMAT_R16G16B16A16_FLOAT, + DXGI_FORMAT_B8G8R8A8_UNORM, + }; + hr = output5->DuplicateOutput1(device11, 0, _countof(supportedFormats), supportedFormats, + duplicator.Assign()); + if (FAILED(hr)) + throw HRError("Failed to DuplicateOutput1", hr); + DXGI_OUTPUT_DESC desc; + if (SUCCEEDED(output->GetDesc(&desc))) { + gs_monitor_color_info info = device->GetMonitorColorInfo(desc.Monitor); + hdr = info.hdr; + sdr_white_nits = (float)info.sdr_white_nits; + } + } else { + hr = output->QueryInterface(IID_PPV_ARGS(output1.Assign())); + if (FAILED(hr)) + throw HRError("Failed to query IDXGIOutput1", hr); + + hr = output1->DuplicateOutput(device11, duplicator.Assign()); + if (FAILED(hr)) + throw HRError("Failed to DuplicateOutput", hr); + } +} + +gs_duplicator::gs_duplicator(gs_device_t *device_, int monitor_idx) + : gs_obj(device_, gs_type::gs_duplicator), + texture(nullptr), + idx(monitor_idx), + refs(1), + updated(false) +{ + Start(); +} + +gs_duplicator::~gs_duplicator() +{ + delete texture; + if (texSharedHandle != NULL) { + texSharedHandle = NULL; + } +} + +extern "C" { + +EXPORT bool device_get_duplicator_monitor_info(gs_device_t *device, int monitor_idx, struct gs_monitor_info *info) +{ + DXGI_OUTPUT_DESC desc; + + try { + ComPtr output; + HRESULT hr; + ComPtr factory; + ComPtr adapter; + hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); + if (FAILED(hr)) + throw HRError("Failed to create DXGIFactory", hr); + + hr = factory->EnumAdapters1(monitor_idx, &adapter); + if (FAILED(hr)) + throw HRError("Failed to enumerate DXGIAdapter", hr); + if (!get_monitor(adapter, monitor_idx, output.Assign())) + return false; + + hr = output->GetDesc(&desc); + if (FAILED(hr)) + throw HRError("GetDesc failed", hr); + + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_get_duplicator_monitor_info: " + "%s (%08lX)", + error.str, error.hr); + return false; + } + + switch (desc.Rotation) { + case DXGI_MODE_ROTATION_UNSPECIFIED: + case DXGI_MODE_ROTATION_IDENTITY: + info->rotation_degrees = 0; + break; + + case DXGI_MODE_ROTATION_ROTATE90: + info->rotation_degrees = 90; + break; + + case DXGI_MODE_ROTATION_ROTATE180: + info->rotation_degrees = 180; + break; + + case DXGI_MODE_ROTATION_ROTATE270: + info->rotation_degrees = 270; + break; + } + + info->x = desc.DesktopCoordinates.left; + info->y = desc.DesktopCoordinates.top; + info->cx = desc.DesktopCoordinates.right - info->x; + info->cy = desc.DesktopCoordinates.bottom - info->y; + + return true; +} + +EXPORT int device_duplicator_get_monitor_index(gs_device_t *device, void *monitor) +{ + const HMONITOR handle = (HMONITOR)monitor; + + int index = -1; + + UINT output = 0; + while (index == -1) { + IDXGIOutput *pOutput; + const HRESULT hr = device->d3d12Instance->GetAdapter()->EnumOutputs(output, &pOutput); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + + if (SUCCEEDED(hr)) { + DXGI_OUTPUT_DESC desc; + if (SUCCEEDED(pOutput->GetDesc(&desc))) { + if (desc.Monitor == handle) + index = output; + } else { + blog(LOG_ERROR, + "device_duplicator_get_monitor_index: " + "Failed to get desc (%08lX)", + hr); + } + + pOutput->Release(); + } else if (hr == DXGI_ERROR_NOT_FOUND) { + blog(LOG_ERROR, + "device_duplicator_get_monitor_index: " + "Failed to get output (%08lX)", + hr); + } + + ++output; + } + + return index; +} + +static std::unordered_map instances; + +void reset_duplicators(void) +{ + for (std::pair &pair : instances) { + pair.second->updated = false; + } +} + +EXPORT gs_duplicator_t *device_duplicator_create(gs_device_t *device, int monitor_idx) +{ + gs_duplicator *duplicator = nullptr; + + const auto it = instances.find(monitor_idx); + if (it != instances.end()) { + duplicator = it->second; + duplicator->refs++; + return duplicator; + } + + try { + duplicator = new gs_duplicator(device, monitor_idx); + instances[monitor_idx] = duplicator; + + } catch (const char *error) { + blog(LOG_DEBUG, "device_duplicator_create: %s", error); + return nullptr; + + } catch (const HRError &error) { + blog(LOG_DEBUG, "device_duplicator_create: %s (%08lX)", error.str, error.hr); + return nullptr; + } + + return duplicator; +} + +EXPORT void gs_duplicator_destroy(gs_duplicator_t *duplicator) +{ + if (--duplicator->refs == 0) { + instances.erase(duplicator->idx); + delete duplicator; + } +} + +static HANDLE GetSharedHandle(IDXGIResource *dxgi_res) +{ + HANDLE handle; + HRESULT hr; + + hr = dxgi_res->GetSharedHandle(&handle); + if (FAILED(hr)) { + blog(LOG_WARNING, + "GetSharedHandle: Failed to " + "get shared handle: %08lX", + hr); + return nullptr; + } else { + return handle; + } +} + +static void CreateSharedTexture(gs_duplicator_t *d, D3D11_TEXTURE2D_DESC &desc, gs_color_format general_format) +{ + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + desc.Format = ConvertGSTextureFormatResource(general_format); + HRESULT hr = d->device11->CreateTexture2D(&desc, NULL, d->texShared.Assign()); + + if (FAILED(hr)) { + blog(LOG_WARNING, + "CreateSharedTexture: Failed to " + "create shared texture: %08lX", + hr); + return; + } + ComPtr dxgi_res; + + d->texShared->SetEvictionPriority(DXGI_RESOURCE_PRIORITY_MAXIMUM); + + hr = d->texShared->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgi_res); + if (FAILED(hr)) { + blog(LOG_WARNING, + "InitTexture: Failed to query " + "interface: %08lX", + hr); + } else { + d->texSharedHandle = GetSharedHandle(dxgi_res); + + hr = d->texShared->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **)&d->km); + if (FAILED(hr)) { + __debugbreak(); + blog(LOG_ERROR, "device_present (D3D12): IDXGISwapChain::Present failed %08lX", hr); + } + } +} + +static inline void copy_texture(gs_duplicator_t *d, ID3D11Texture2D *tex) +{ + D3D11_TEXTURE2D_DESC desc; + tex->GetDesc(&desc); + const gs_color_format format = ConvertDXGITextureFormat(desc.Format); + const gs_color_format general_format = gs_generalize_format(format); + + if (!d->texture || (gs_texture_get_width(d->texture) != desc.Width) || + (gs_texture_get_height(d->texture) != desc.Height) || (d->texture->format != general_format)) { + + gs_texture_destroy(d->texture); + d->texShared = nullptr; + if (d->km) { + d->km->ReleaseSync(0); + d->km.Release(); + d->km = nullptr; + } + + if (d->texSharedHandle) { + d->texSharedHandle = 0; + } + + CreateSharedTexture(d, desc, general_format); + d->texture = gs_texture_open_nt_shared((uint32_t)(uintptr_t)(d->texSharedHandle)); + + d->color_space = + d->hdr ? GS_CS_709_SCRGB + : ((desc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT) ? GS_CS_SRGB_16F : GS_CS_SRGB); + } + + if (d->texShared) { + d->km->AcquireSync(0, INFINITE); + d->context11->CopyResource(d->texShared, tex); + d->km->ReleaseSync(0); + } +} + +EXPORT bool gs_duplicator_update_frame(gs_duplicator_t *d) +{ + DXGI_OUTDUPL_FRAME_INFO info; + ComPtr tex; + ComPtr res; + HRESULT hr; + + if (!d->duplicator) { + return false; + } + if (d->updated) { + return true; + } + + hr = d->duplicator->AcquireNextFrame(0, &info, res.Assign()); + if (hr == DXGI_ERROR_ACCESS_LOST) { + return false; + + } else if (hr == DXGI_ERROR_WAIT_TIMEOUT) { + return true; + + } else if (FAILED(hr)) { + blog(LOG_ERROR, + "gs_duplicator_update_frame: Failed to update " + "frame (%08lX)", + hr); + return true; + } + + hr = res->QueryInterface(__uuidof(ID3D11Texture2D), (void **)tex.Assign()); + if (FAILED(hr)) { + blog(LOG_ERROR, + "gs_duplicator_update_frame: Failed to query " + "ID3D11Texture2D (%08lX)", + hr); + d->duplicator->ReleaseFrame(); + return true; + } + + copy_texture(d, tex); + d->duplicator->ReleaseFrame(); + d->updated = true; + return true; +} + +EXPORT gs_texture_t *gs_duplicator_get_texture(gs_duplicator_t *duplicator) +{ + return duplicator->texture; +} + +EXPORT enum gs_color_space gs_duplicator_get_color_space(gs_duplicator_t *duplicator) +{ + return duplicator->color_space; +} + +EXPORT float gs_duplicator_get_sdr_white_level(gs_duplicator_t *duplicator) +{ + return duplicator->sdr_white_nits; +} +} diff --git a/libobs-d3d12/d3d12-indexbuffer.cpp b/libobs-d3d12/d3d12-indexbuffer.cpp new file mode 100644 index 00000000000000..4fa20137e7410c --- /dev/null +++ b/libobs-d3d12/d3d12-indexbuffer.cpp @@ -0,0 +1,45 @@ +#include "d3d12-subsystem.hpp" + +void gs_index_buffer::InitBuffer() +{ + indexBuffer = new D3D12Graphics::ByteAddressBuffer(device->d3d12Instance); + indexBuffer->Create(L"Index Buffer", (uint32_t)num, (uint32_t)indexSize, indices.data); + + if (indexBuffer->GetResource() == nullptr) { + throw HRError("Failed to create buffer", -1); + } +} + +gs_index_buffer::gs_index_buffer(gs_device_t *device, enum gs_index_type type, void *indices, size_t num, + uint32_t flags) + : gs_obj(device, gs_type::gs_index_buffer), + dynamic((flags & GS_DYNAMIC) != 0), + type(type), + num(num), + indices(indices) +{ + switch (type) { + case GS_UNSIGNED_SHORT: + indexSize = 2; + break; + case GS_UNSIGNED_LONG: + indexSize = 4; + break; + } + + InitBuffer(); +} + +void gs_index_buffer::Release() +{ + device->d3d12Instance->GetCommandManager().IdleGPU(); + if (indexBuffer) { + delete indexBuffer; + indexBuffer = nullptr; + } +} + +gs_index_buffer::~gs_index_buffer() +{ + Release(); +} diff --git a/libobs-d3d12/d3d12-samplerstate.cpp b/libobs-d3d12/d3d12-samplerstate.cpp new file mode 100644 index 00000000000000..d96f560aebb0e8 --- /dev/null +++ b/libobs-d3d12/d3d12-samplerstate.cpp @@ -0,0 +1,70 @@ +#include +#include + +#include "d3d12-subsystem.hpp" + +static inline D3D12_TEXTURE_ADDRESS_MODE ConvertGSAddressMode(gs_address_mode mode) +{ + switch (mode) { + case GS_ADDRESS_WRAP: + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case GS_ADDRESS_CLAMP: + return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + case GS_ADDRESS_MIRROR: + return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case GS_ADDRESS_BORDER: + return D3D12_TEXTURE_ADDRESS_MODE_BORDER; + case GS_ADDRESS_MIRRORONCE: + return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; + } + + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; +} + +static inline D3D12_FILTER ConvertGSFilter(gs_sample_filter filter) +{ + switch (filter) { + case GS_FILTER_POINT: + return D3D12_FILTER_MIN_MAG_MIP_POINT; + case GS_FILTER_LINEAR: + return D3D12_FILTER_MIN_MAG_MIP_LINEAR; + case GS_FILTER_MIN_MAG_POINT_MIP_LINEAR: + return D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR; + case GS_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT: + return D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT; + case GS_FILTER_MIN_POINT_MAG_MIP_LINEAR: + return D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR; + case GS_FILTER_MIN_LINEAR_MAG_MIP_POINT: + return D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT; + case GS_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR: + return D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR; + case GS_FILTER_MIN_MAG_LINEAR_MIP_POINT: + return D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + case GS_FILTER_ANISOTROPIC: + return D3D12_FILTER_ANISOTROPIC; + } + + return D3D12_FILTER_MIN_MAG_MIP_POINT; +} + +gs_sampler_state::gs_sampler_state(gs_device_t *device, const gs_sampler_info *info) + : gs_obj(device, gs_type::gs_sampler_state), + info(*info), + sampleDesc(device->d3d12Instance) +{ + vec4 v4; + vec4_from_rgba(&v4, info->border_color); + + sampleDesc.Filter = ConvertGSFilter(info->filter); + + D3D12Graphics::Color borderColor = {v4.x, v4.y, v4.z, v4.w}; + sampleDesc.SetBorderColor(borderColor); + sampleDesc.SetTextureAddressMode(D3D12_TEXTURE_ADDRESS_MODE_CLAMP); + + sampleDesc.AddressU = ConvertGSAddressMode(info->address_u); + sampleDesc.AddressV = ConvertGSAddressMode(info->address_v); + sampleDesc.AddressW = ConvertGSAddressMode(info->address_w); + sampleDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + sampleDesc.MaxAnisotropy = 0; + sampleDesc.CreateDescriptor(); +} diff --git a/libobs-d3d12/d3d12-shader.cpp b/libobs-d3d12/d3d12-shader.cpp new file mode 100644 index 00000000000000..54f5a64bfabcc6 --- /dev/null +++ b/libobs-d3d12/d3d12-shader.cpp @@ -0,0 +1,394 @@ +#include "d3d12-subsystem.hpp" +#include "d3d12-shaderprocessor.hpp" +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +void gs_vertex_shader::GetBuffersExpected(const std::vector &inputs) +{ + for (size_t i = 0; i < inputs.size(); i++) { + const D3D12_INPUT_ELEMENT_DESC &input = inputs[i]; + if (strcmp(input.SemanticName, "NORMAL") == 0) + hasNormals = true; + else if (strcmp(input.SemanticName, "TANGENT") == 0) + hasTangents = true; + else if (strcmp(input.SemanticName, "COLOR") == 0) + hasColors = true; + else if (strcmp(input.SemanticName, "TEXCOORD") == 0) + nTexUnits++; + } +} + +gs_vertex_shader::gs_vertex_shader(gs_device_t *device, const char *file, const char *shaderString) + : gs_shader(device, gs_type::gs_vertex_shader, GS_SHADER_VERTEX), + hasNormals(false), + hasColors(false), + hasTangents(false), + nTexUnits(0) +{ + ShaderProcessor processor(device); + ComPtr shaderBlob; + std::string outputString; + + processor.Process(shaderString, file); + processor.BuildString(outputString); + processor.BuildParams(params); + processor.BuildInputLayout(layoutData); + GetBuffersExpected(layoutData); + BuildConstantBuffer(); + + actuallyShaderString = outputString; + Compile(outputString.c_str(), file, "vs_4_0", shaderBlob.Assign()); + + data.resize(shaderBlob->GetBufferSize()); + memcpy(&data[0], shaderBlob->GetBufferPointer(), data.size()); + + viewProj = gs_shader_get_param_by_name(this, "ViewProj"); + world = gs_shader_get_param_by_name(this, "World"); +} + +gs_pixel_shader::gs_pixel_shader(gs_device_t *device, const char *file, const char *shaderString) + : gs_shader(device, gs_type::gs_pixel_shader, GS_SHADER_PIXEL) +{ + ShaderProcessor processor(device); + ComPtr shaderBlob; + std::string outputString; + + processor.Process(shaderString, file); + processor.BuildString(outputString); + processor.BuildParams(params); + processor.BuildSamplers(samplers); + + samplerCount = samplers.size(); + + BuildConstantBuffer(); + actuallyShaderString = outputString; + Compile(outputString.c_str(), file, "ps_4_0", shaderBlob.Assign()); + + data.resize(shaderBlob->GetBufferSize()); + memcpy(&data[0], shaderBlob->GetBufferPointer(), data.size()); +} + +/* + * Shader compilers will pack constants in to single registers when possible. + * For example: + * + * uniform float3 test1; + * uniform float test2; + * + * will inhabit a single constant register (c0.xyz for 'test1', and c0.w for + * 'test2') + * + * However, if two constants cannot inhabit the same register, the second one + * must begin at a new register, for example: + * + * uniform float2 test1; + * uniform float3 test2; + * + * 'test1' will inhabit register constant c0.xy. However, because there's no + * room for 'test2, it must use a new register constant entirely (c1.xyz). + * + * So if we want to calculate the position of the constants in the constant + * buffer, we must take this in to account. + */ + +void gs_shader::BuildConstantBuffer() +{ + int32_t textures = 0; + for (size_t i = 0; i < params.size(); i++) { + gs_shader_param ¶m = params[i]; + size_t size = 0; + + switch (param.type) { + case GS_SHADER_PARAM_BOOL: + case GS_SHADER_PARAM_INT: + case GS_SHADER_PARAM_FLOAT: + size = sizeof(float); + break; + case GS_SHADER_PARAM_INT2: + case GS_SHADER_PARAM_VEC2: + size = sizeof(vec2); + break; + case GS_SHADER_PARAM_INT3: + case GS_SHADER_PARAM_VEC3: + size = sizeof(float) * 3; + break; + case GS_SHADER_PARAM_INT4: + case GS_SHADER_PARAM_VEC4: + size = sizeof(vec4); + break; + case GS_SHADER_PARAM_MATRIX4X4: + size = sizeof(float) * 4 * 4; + break; + case GS_SHADER_PARAM_TEXTURE: + ++textures; + continue; + case GS_SHADER_PARAM_STRING: + case GS_SHADER_PARAM_UNKNOWN: + continue; + } + + if (param.arrayCount) + size *= param.arrayCount; + + if (size && (constantSize & 15) != 0) { + size_t alignMax = (constantSize + 15) & ~15; + if ((size + constantSize) > alignMax) { + constantSize = alignMax; + } + } + + param.pos = constantSize; + constantSize += size; + } + + if (constantSize > 0) { + hasDynamicUniformConstantBuffer = true; /* align */ + } + + textureCount = textures; + + for (size_t i = 0; i < params.size(); i++) + gs_shader_set_default(¶ms[i]); +} + +static uint64_t fnv1a_hash(const char *str, size_t len) +{ + const uint64_t FNV_OFFSET = 14695981039346656037ULL; + const uint64_t FNV_PRIME = 1099511628211ULL; + uint64_t hash = FNV_OFFSET; + for (size_t i = 0; i < len; i++) { + hash ^= (uint64_t)str[i]; + hash *= FNV_PRIME; + } + return hash; +} + +void gs_shader::Compile(const char *shaderString, const char *file, const char *target, ID3D10Blob **shader) +{ + ComPtr errorsBlob; + HRESULT hr; + + bool is_cached = false; + + if (!shaderString) + throw "No shader string specified"; + + size_t shaderStrLen = strlen(shaderString); + + if (!is_cached) { + hr = D3DCompile(shaderString, shaderStrLen, file, NULL, NULL, "main", target, + D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION, 0, shader, errorsBlob.Assign()); + if (FAILED(hr)) { + if (errorsBlob != NULL && errorsBlob->GetBufferSize()) + throw ShaderError(errorsBlob, hr); + else + throw HRError("Failed to compile shader", hr); + } + } + +#ifdef DISASSEMBLE_SHADERS + ComPtr asmBlob; + + hr = D3DDisassemble((*shader)->GetBufferPointer(), (*shader)->GetBufferSize(), 0, nullptr, &asmBlob); + + if (SUCCEEDED(hr) && !!asmBlob && asmBlob->GetBufferSize()) { + blog(LOG_INFO, "============================================="); + blog(LOG_INFO, "Disassembly output for shader '%s':\n%s", file, asmBlob->GetBufferPointer()); + } +#endif +} + +inline void gs_shader::UpdateParam(std::vector &constData, gs_shader_param ¶m, bool &upload) +{ + if (param.type != GS_SHADER_PARAM_TEXTURE) { + if (!param.curValue.size()) + throw "Not all shader parameters were set"; + + /* padding in case the constant needs to start at a new + * register */ + if (param.pos > constData.size()) { + uint8_t zero = 0; + + constData.insert(constData.end(), param.pos - constData.size(), zero); + } + + constData.insert(constData.end(), param.curValue.begin(), param.curValue.end()); + + if (param.changed) { + upload = true; + param.changed = false; + } + + } else if (param.curValue.size() == sizeof(struct gs_shader_texture)) { + struct gs_shader_texture shader_tex; + memcpy(&shader_tex, param.curValue.data(), sizeof(shader_tex)); + if (shader_tex.srgb) + device_load_texture_srgb(device, shader_tex.tex, param.textureID); + else + device_load_texture(device, shader_tex.tex, param.textureID); + + if (param.nextSampler) { + device->context->SetDynamicSampler(samplerRootParameterIndex, 0, + param.nextSampler->sampleDesc.Sampler); + param.nextSampler = nullptr; + } + } +} + +void gs_shader::UploadParams() +{ + std::vector constData; + bool upload = true; + + constData.reserve(constantSize); + + for (size_t i = 0; i < params.size(); i++) + UpdateParam(constData, params[i], upload); + + if (constData.size() != constantSize) + throw "Invalid constant data size given to shader"; + + if (upload && dynamicUniformConstantBufferRootParameterIndex != -1) { + device->context->SetDynamicConstantBufferView(dynamicUniformConstantBufferRootParameterIndex, + constData.size(), constData.data()); + } +} + +void gs_shader_destroy(gs_shader_t *shader) +{ + delete shader; +} + +int gs_shader_get_num_params(const gs_shader_t *shader) +{ + return (int)shader->params.size(); +} + +gs_sparam_t *gs_shader_get_param_by_idx(gs_shader_t *shader, uint32_t param) +{ + return &shader->params[param]; +} + +gs_sparam_t *gs_shader_get_param_by_name(gs_shader_t *shader, const char *name) +{ + for (size_t i = 0; i < shader->params.size(); i++) { + gs_shader_param ¶m = shader->params[i]; + if (strcmp(param.name.c_str(), name) == 0) + return ¶m; + } + + return NULL; +} + +gs_sparam_t *gs_shader_get_viewproj_matrix(const gs_shader_t *shader) +{ + if (shader->type != GS_SHADER_VERTEX) + return NULL; + + return static_cast(shader)->viewProj; +} + +gs_sparam_t *gs_shader_get_world_matrix(const gs_shader_t *shader) +{ + if (shader->type != GS_SHADER_VERTEX) + return NULL; + + return static_cast(shader)->world; +} + +void gs_shader_get_param_info(const gs_sparam_t *param, struct gs_shader_param_info *info) +{ + if (!param) + return; + + info->name = param->name.c_str(); + info->type = param->type; +} + +static inline void shader_setval_inline(gs_shader_param *param, const void *data, size_t size) +{ + assert(param); + if (!param) + return; + + bool size_changed = param->curValue.size() != size; + if (size_changed) + param->curValue.resize(size); + + if (size_changed || memcmp(param->curValue.data(), data, size) != 0) { + memcpy(param->curValue.data(), data, size); + param->changed = true; + } +} + +void gs_shader_set_bool(gs_sparam_t *param, bool val) +{ + int b_val = (int)val; + shader_setval_inline(param, &b_val, sizeof(int)); +} + +void gs_shader_set_float(gs_sparam_t *param, float val) +{ + shader_setval_inline(param, &val, sizeof(float)); +} + +void gs_shader_set_int(gs_sparam_t *param, int val) +{ + shader_setval_inline(param, &val, sizeof(int)); +} + +void gs_shader_set_matrix3(gs_sparam_t *param, const struct matrix3 *val) +{ + struct matrix4 mat; + matrix4_from_matrix3(&mat, val); + shader_setval_inline(param, &mat, sizeof(matrix4)); +} + +void gs_shader_set_matrix4(gs_sparam_t *param, const struct matrix4 *val) +{ + shader_setval_inline(param, val, sizeof(matrix4)); +} + +void gs_shader_set_vec2(gs_sparam_t *param, const struct vec2 *val) +{ + shader_setval_inline(param, val, sizeof(vec2)); +} + +void gs_shader_set_vec3(gs_sparam_t *param, const struct vec3 *val) +{ + shader_setval_inline(param, val, sizeof(float) * 3); +} + +void gs_shader_set_vec4(gs_sparam_t *param, const struct vec4 *val) +{ + shader_setval_inline(param, val, sizeof(vec4)); +} + +void gs_shader_set_texture(gs_sparam_t *param, gs_texture_t *val) +{ + shader_setval_inline(param, &val, sizeof(gs_texture_t *)); +} + +void gs_shader_set_val(gs_sparam_t *param, const void *val, size_t size) +{ + shader_setval_inline(param, val, size); +} + +void gs_shader_set_default(gs_sparam_t *param) +{ + if (param->defaultValue.size()) + shader_setval_inline(param, param->defaultValue.data(), param->defaultValue.size()); +} + +void gs_shader_set_next_sampler(gs_sparam_t *param, gs_samplerstate_t *sampler) +{ + param->nextSampler = sampler; +} diff --git a/libobs-d3d12/d3d12-shaderprocessor.cpp b/libobs-d3d12/d3d12-shaderprocessor.cpp new file mode 100644 index 00000000000000..68cbaa2037b399 --- /dev/null +++ b/libobs-d3d12/d3d12-shaderprocessor.cpp @@ -0,0 +1,219 @@ +#include "d3d12-subsystem.hpp" +#include "d3d12-shaderprocessor.hpp" + +#include + +static const char *semanticInputNames[] = {"POSITION", "NORMAL", "COLOR", "TANGENT", "TEXCOORD", "VERTEXID"}; +static const char *semanticOutputNames[] = {"SV_Position", "NORMAL", "COLOR", "TANGENT", "TEXCOORD", "VERTEXID"}; + +static const char *ConvertSemanticName(const char *name) +{ + const size_t num = sizeof(semanticInputNames) / sizeof(const char *); + for (size_t i = 0; i < num; i++) { + if (strcmp(name, semanticInputNames[i]) == 0) + return semanticOutputNames[i]; + } + + throw "Unknown Semantic Name"; +} + +static void GetSemanticInfo(shader_var *var, const char *&name, uint32_t &index) +{ + const char *mapping = var->mapping; + const char *indexStr = mapping; + + while (*indexStr && !isdigit(*indexStr)) + indexStr++; + index = (*indexStr) ? strtol(indexStr, NULL, 10) : 0; + + std::string nameStr; + nameStr.assign(mapping, indexStr - mapping); + name = ConvertSemanticName(nameStr.c_str()); +} + +static void AddInputLayoutVar(shader_var *var, std::vector &layout) +{ + D3D12_INPUT_ELEMENT_DESC ied; + const char *semanticName; + uint32_t semanticIndex; + + GetSemanticInfo(var, semanticName, semanticIndex); + + memset(&ied, 0, sizeof(ied)); + ied.SemanticName = semanticName; + ied.SemanticIndex = semanticIndex; + ied.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + + if (strcmp(var->mapping, "COLOR") == 0) { + ied.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + + } else if (strcmp(var->mapping, "POSITION") == 0 || strcmp(var->mapping, "NORMAL") == 0 || + strcmp(var->mapping, "TANGENT") == 0) { + ied.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + + } else if (astrcmp_n(var->mapping, "TEXCOORD", 8) == 0) { + /* type is always a 'float' type */ + switch (var->type[5]) { + case 0: + ied.Format = DXGI_FORMAT_R32_FLOAT; + break; + case '2': + ied.Format = DXGI_FORMAT_R32G32_FLOAT; + break; + case '3': + case '4': + ied.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + break; + } + } + + layout.push_back(ied); +} + +static inline bool SetSlot(std::vector &layout, const char *name, uint32_t index, + uint32_t &slotIdx) +{ + for (size_t i = 0; i < layout.size(); i++) { + D3D12_INPUT_ELEMENT_DESC &input = layout[i]; + if (input.SemanticIndex == index && strcmpi(input.SemanticName, name) == 0) { + layout[i].InputSlot = slotIdx++; + return true; + } + } + + return false; +} + +static void BuildInputLayoutFromVars(shader_parser *parser, darray *vars, std::vector &layout) +{ + shader_var *array = (shader_var *)vars->array; + + for (size_t i = 0; i < vars->num; i++) { + shader_var *var = array + i; + + if (var->mapping) { + if (strcmp(var->mapping, "VERTEXID") != 0) + AddInputLayoutVar(var, layout); + } else { + shader_struct *st = shader_parser_getstruct(parser, var->type); + if (st) + BuildInputLayoutFromVars(parser, &st->vars.da, layout); + } + } + + /* + * Sets the input slot value for each semantic, however we do it in + * a specific order so that it will always match the vertex buffer's + * sub-buffer order (points-> normals-> colors-> tangents-> uvcoords) + */ + uint32_t slot = 0; + SetSlot(layout, "SV_Position", 0, slot); + SetSlot(layout, "NORMAL", 0, slot); + SetSlot(layout, "COLOR", 0, slot); + SetSlot(layout, "TANGENT", 0, slot); + + uint32_t index = 0; + while (SetSlot(layout, "TEXCOORD", index++, slot)) + ; +} + +void ShaderProcessor::BuildInputLayout(std::vector &layout) +{ + shader_func *func = shader_parser_getfunc(&parser, "main"); + if (!func) + throw "Failed to find 'main' shader function"; + + BuildInputLayoutFromVars(&parser, &func->params.da, layout); +} + +gs_shader_param::gs_shader_param(shader_var &var, uint32_t &texCounter) + : name(var.name), + type(get_shader_param_type(var.type)), + textureID(texCounter), + arrayCount(var.array_count), + changed(false) +{ + defaultValue.resize(var.default_val.num); + memcpy(defaultValue.data(), var.default_val.array, var.default_val.num); + + if (type == GS_SHADER_PARAM_TEXTURE) + texCounter++; + else + textureID = 0; +} + +static inline void AddParam(shader_var &var, std::vector ¶ms, uint32_t &texCounter) +{ + if (var.var_type != SHADER_VAR_UNIFORM || strcmp(var.type, "sampler") == 0) + return; + + params.push_back(gs_shader_param(var, texCounter)); +} + +void ShaderProcessor::BuildParams(std::vector ¶ms) +{ + uint32_t texCounter = 0; + + for (size_t i = 0; i < parser.params.num; i++) + AddParam(parser.params.array[i], params, texCounter); +} + +static inline void AddSampler(gs_device_t *device, shader_sampler &sampler, + std::vector> &samplers) +{ + gs_sampler_info si; + shader_sampler_convert(&sampler, &si); + samplers.emplace_back(new ShaderSampler(sampler.name, device, &si)); +} + +void ShaderProcessor::BuildSamplers(std::vector> &samplers) +{ + for (size_t i = 0; i < parser.samplers.num; i++) + AddSampler(device, parser.samplers.array[i], samplers); +} + +void ShaderProcessor::BuildString(std::string &outputString) +{ + std::stringstream output; + output << "static const bool obs_glsl_compile = false;\n\n"; + + cf_token *token = cf_preprocessor_get_tokens(&parser.cfp.pp); + while (token->type != CFTOKEN_NONE) { + /* cheaply just replace specific tokens */ + if (strref_cmp(&token->str, "POSITION") == 0) + output << "SV_Position"; + else if (strref_cmp(&token->str, "TARGET") == 0) + output << "SV_Target"; + else if (strref_cmp(&token->str, "texture2d") == 0) + output << "Texture2D"; + else if (strref_cmp(&token->str, "texture3d") == 0) + output << "Texture3D"; + else if (strref_cmp(&token->str, "texture_cube") == 0) + output << "TextureCube"; + else if (strref_cmp(&token->str, "texture_rect") == 0) + throw "texture_rect is not supported in D3D"; + else if (strref_cmp(&token->str, "sampler_state") == 0) + output << "SamplerState"; + else if (strref_cmp(&token->str, "VERTEXID") == 0) + output << "SV_VertexID"; + else + output.write(token->str.array, token->str.len); + + token++; + } + + outputString = move(output.str()); +} + +void ShaderProcessor::Process(const char *shader_string, const char *file) +{ + bool success = shader_parse(&parser, shader_string, file); + char *str = shader_parser_geterrors(&parser); + if (str) { + blog(LOG_WARNING, "Shader parser errors/warnings:\n%s\n", str); + bfree(str); + } + + if (!success) + throw "Failed to parse shader"; +} diff --git a/libobs-d3d12/d3d12-shaderprocessor.hpp b/libobs-d3d12/d3d12-shaderprocessor.hpp new file mode 100644 index 00000000000000..431cbbb14cfd34 --- /dev/null +++ b/libobs-d3d12/d3d12-shaderprocessor.hpp @@ -0,0 +1,21 @@ +#pragma once + +#include + +struct ShaderParser : shader_parser { + inline ShaderParser() { shader_parser_init(this); } + inline ~ShaderParser() { shader_parser_free(this); } +}; + +struct ShaderProcessor { + gs_device_t *device; + ShaderParser parser; + + void BuildInputLayout(std::vector &inputs); + void BuildParams(std::vector ¶ms); + void BuildSamplers(std::vector> &samplers); + void BuildString(std::string &outputString); + void Process(const char *shader_string, const char *file); + + inline ShaderProcessor(gs_device_t *device) : device(device) {} +}; diff --git a/libobs-d3d12/d3d12-stagesurf.cpp b/libobs-d3d12/d3d12-stagesurf.cpp new file mode 100644 index 00000000000000..54057935cf196c --- /dev/null +++ b/libobs-d3d12/d3d12-stagesurf.cpp @@ -0,0 +1,75 @@ +#include "d3d12-subsystem.hpp" + +gs_stage_surface::gs_stage_surface(gs_device_t *device, uint32_t width, uint32_t height, gs_color_format colorFormat) + : gs_obj(device, gs_type::gs_stage_surface), + D3D12Graphics::ReadbackBuffer(device->d3d12Instance), + width(width), + height(height), + format(colorFormat), + dxgiFormat(ConvertGSTextureFormatView(colorFormat)) +{ + Create(L"surface", (UINT)GetTotalBytes() / D3D12Graphics::BytesPerPixel(dxgiFormat), + D3D12Graphics::BytesPerPixel(dxgiFormat)); +} + +gs_stage_surface::gs_stage_surface(gs_device_t *device, uint32_t width, uint32_t height, bool p010) + : gs_obj(device, gs_type::gs_stage_surface), + D3D12Graphics::ReadbackBuffer(device->d3d12Instance), + width(width), + height(height), + format(GS_UNKNOWN), + dxgiFormat(p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12) +{ + Create(L"surface", + (UINT)GetTotalBytes() / D3D12Graphics::BytesPerPixel(p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12), + D3D12Graphics::BytesPerPixel(dxgiFormat)); +} + +UINT gs_stage_surface::GetLineSize() +{ + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedTextureDesc; + UINT NumRows; + UINT64 RowLength; + UINT64 TotalBytes = 0; + auto textureDesc = GetTextureDesc(); + device->d3d12Instance->GetDevice()->GetCopyableFootprints(&textureDesc, 0, 1, 0, &placedTextureDesc, &NumRows, + &RowLength, &TotalBytes); + return placedTextureDesc.Footprint.RowPitch; +} + +D3D12_RESOURCE_DESC gs_stage_surface::GetTextureDesc() +{ + D3D12_RESOURCE_DESC texDesc = {}; + + texDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + texDesc.Width = width; + texDesc.Height = height; + texDesc.DepthOrArraySize = 1; + + texDesc.MipLevels = 1; + + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + texDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + texDesc.Format = dxgiFormat; + if (texDesc.Format == DXGI_FORMAT_NV12 || texDesc.Format == DXGI_FORMAT_P010) { + texDesc.Width = (texDesc.Width + 1) & ~1; + texDesc.Height = (texDesc.Height + 1) & ~1; + } + + return texDesc; +} + +UINT64 gs_stage_surface::GetTotalBytes() +{ + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedTextureDesc; + UINT NumRows; + UINT64 RowLength; + UINT64 TotalBytes = 0; + auto textureDesc = GetTextureDesc(); + device->d3d12Instance->GetDevice()->GetCopyableFootprints(&textureDesc, 0, 1, 0, &placedTextureDesc, &NumRows, + &RowLength, &TotalBytes); + return TotalBytes; +} diff --git a/libobs-d3d12/d3d12-subsystem.cpp b/libobs-d3d12/d3d12-subsystem.cpp new file mode 100644 index 00000000000000..bc9839115a4760 --- /dev/null +++ b/libobs-d3d12/d3d12-subsystem.cpp @@ -0,0 +1,2326 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include "d3d12-subsystem.hpp" +#include +#include + +struct UnsupportedHWError : HRError { + inline UnsupportedHWError(const char *str, HRESULT hr) : HRError(str, hr) {} +}; + +static inline void LogD3D12ErrorDetails(HRError error, gs_device_t *device) +{ + if (error.hr == DXGI_ERROR_DEVICE_REMOVED) { + HRESULT DeviceRemovedReason = device->d3d12Instance->GetDevice()->GetDeviceRemovedReason(); + blog(LOG_ERROR, " Device Removed Reason: %08lX", DeviceRemovedReason); + } +} + +gs_obj::gs_obj(gs_device_t *device_, gs_type type) : device(device_), obj_type(type) +{ + prev_next = &device->first_obj; + next = device->first_obj; + device->first_obj = this; + if (next) + next->prev_next = &next; +} + +gs_obj::~gs_obj() +{ + if (prev_next) + *prev_next = next; + if (next) + next->prev_next = prev_next; +} + +static enum gs_color_space get_next_space(gs_device_t *device, HWND hwnd, DXGI_SWAP_EFFECT effect) +{ + enum gs_color_space next_space = GS_CS_SRGB; + if (effect == DXGI_SWAP_EFFECT_FLIP_DISCARD) { + const HMONITOR hMonitor = MonitorFromWindow(hwnd, MONITOR_DEFAULTTONEAREST); + if (hMonitor) { + const auto info = D3D12Graphics::D3D12DeviceInstance::GetMonitorColorInfo(hMonitor); + if (info.m_HDR) + next_space = GS_CS_709_SCRGB; + else if (info.m_BitsPerColor > 8) + next_space = GS_CS_SRGB_16F; + } + } + + return next_space; +} + +static enum gs_color_format get_swap_format_from_space(gs_color_space space, gs_color_format sdr_format) +{ + gs_color_format format = sdr_format; + switch (space) { + case GS_CS_SRGB_16F: + case GS_CS_709_SCRGB: + format = GS_RGBA16F; + } + + return format; +} + +static inline enum gs_color_space make_swap_desc(gs_device *device, DXGI_SWAP_CHAIN_DESC1 &desc, + const gs_init_data *data, + DXGI_SWAP_EFFECT effect = DXGI_SWAP_EFFECT_FLIP_DISCARD, + UINT flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH) +{ + const HWND hwnd = (HWND)data->window.hwnd; + const enum gs_color_space space = get_next_space(device, hwnd, effect); + const gs_color_format format = get_swap_format_from_space(space, data->format); + + memset(&desc, 0, sizeof(desc)); + + desc.Width = data->cx; + desc.Height = data->cy; + desc.Format = ConvertGSTextureFormatView(format); + desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + desc.BufferCount = data->num_backbuffers; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Scaling = DXGI_SCALING_NONE; + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; + + return space; +} + +void gs_swap_chain::Release() +{ + device->d3d12Instance->GetCommandManager().IdleGPU(); +} + +void gs_swap_chain::Resize(uint32_t cx, uint32_t cy, gs_color_format format) +{ + RECT clientRect; + HRESULT hr = 0; + + initData.cx = cx; + initData.cy = cy; + + if (cx == 0 || cy == 0) { + GetClientRect(hwnd, &clientRect); + if (cx == 0) + cx = clientRect.right; + if (cy == 0) + cy = clientRect.bottom; + } + const DXGI_FORMAT dxgi_format = ConvertGSTextureFormatView(format); + + for (uint32_t i = 0; i < initData.num_backbuffers; ++i) { + target[i].Release(); + target[i].Destroy(); + } + hr = swap->ResizeBuffers(initData.num_backbuffers, cx, cy, dxgi_format, swapDesc.Flags); + if (FAILED(hr)) { + HRESULT reason = device->d3d12Instance->GetDevice()->GetDeviceRemovedReason(); + __debugbreak(); + throw HRError("Failed to resize swap buffers", hr); + } + + if (bEnableHDROutput) { + hr = swap->SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020); + if (FAILED(hr)) { + __debugbreak(); + throw HRError("Failed to set color space", hr); + } + } + + for (uint32_t i = 0; i < initData.num_backbuffers; ++i) { + target[i].CreateTargetFromSwapChain(device, swap, i, format, initData); + } + + if (initData.zsformat != GS_ZS_NONE) { + zs = std::make_unique(device, initData.cx, initData.cy, initData.zsformat); + } + currentBackBufferIndex = swap->GetCurrentBackBufferIndex(); +} + +gs_swap_chain::gs_swap_chain(gs_device *device, const gs_init_data *data) + : gs_obj(device, gs_type::gs_swap_chain), + hwnd((HWND)data->window.hwnd), + initData(*data), + space(GS_CS_SRGB) +{ + initData.num_backbuffers = initData.num_backbuffers > 3 ? initData.num_backbuffers : 3; + space = make_swap_desc(device, swapDesc, &initData); + + ComPtr swap1; + DXGI_SWAP_CHAIN_FULLSCREEN_DESC fsSwapChainDesc = {}; + fsSwapChainDesc.Windowed = TRUE; + + HRESULT hr = device->d3d12Instance->GetDxgiFactory()->CreateSwapChainForHwnd( + device->d3d12Instance->GetCommandManager().GetCommandQueue(), hwnd, &swapDesc, &fsSwapChainDesc, + nullptr, &swap1); + if (FAILED(hr)) { + HRESULT reason = device->d3d12Instance->GetDevice()->GetDeviceRemovedReason(); + throw HRError("Failed to create swap chain", hr); + } + + swap = ComQIPtr(swap1); + if (!swap) + throw HRError("Failed to create swap chain3", hr); + + /* Ignore Alt+Enter */ + device->d3d12Instance->GetDxgiFactory()->MakeWindowAssociation(hwnd, DXGI_MWA_NO_ALT_ENTER); + + const gs_color_format format = + get_swap_format_from_space(get_next_space(device, hwnd, swapDesc.SwapEffect), initData.format); + + for (uint32_t i = 0; i < initData.num_backbuffers; ++i) { + target[i].CreateTargetFromSwapChain(device, swap, i, format, initData); + } + + if (initData.zsformat != GS_ZS_NONE) { + zs = std::make_unique(device, initData.cx, initData.cy, initData.zsformat); + } + + currentBackBufferIndex = swap->GetCurrentBackBufferIndex(); + + // HDR Enable + bool enableHDROutput = CheckHDRSupport(); + if (enableHDROutput && space == GS_CS_709_SCRGB && + SUCCEEDED(swap->SetColorSpace1(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020))) { + bEnableHDROutput = true; + } +} + +bool gs_swap_chain::CheckHDRSupport() +{ + ComPtr output; + ComPtr output6; + DXGI_OUTPUT_DESC1 outputDesc; + HRESULT hr = swap->GetContainingOutput(&output); + if (FAILED(hr)) { + return false; + } + + output6 = ComQIPtr(output); + if (!output6) { + return false; + } + + hr = output6->GetDesc1(&outputDesc); + if (FAILED(hr)) { + return false; + } + + if (outputDesc.ColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020) { + return true; + } + + UINT colorSpaceSupport = 0; + swap->CheckColorSpaceSupport(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, &colorSpaceSupport); + + if (colorSpaceSupport & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT) { + return true; + } + + return false; +} + +gs_swap_chain::~gs_swap_chain() +{ + Release(); +} + +static bool FastClearSupported(UINT vendorId, uint64_t version) +{ + /* Always true for non-NVIDIA GPUs */ + if (vendorId != 0x10de) + return true; + + const uint16_t aa = (version >> 48) & 0xffff; + const uint16_t bb = (version >> 32) & 0xffff; + const uint16_t ccccc = (version >> 16) & 0xffff; + const uint16_t ddddd = version & 0xffff; + + /* Check for NVIDIA driver version >= 31.0.15.2737 */ + return aa >= 31 && bb >= 0 && ccccc >= 15 && ddddd >= 2737; +} + +void gs_device::InitDevice(uint32_t adapterIdx) +{ + d3d12Instance = new D3D12Graphics::D3D12DeviceInstance(); + d3d12Instance->Initialize(adapterIdx); + auto device11 = d3d12Instance->GetDevice(); + + fastClearSupported = d3d12Instance->FastClearSupported(); + nv12Supported = d3d12Instance->IsNV12TextureSupported(); + p010Supported = d3d12Instance->IsP010TextureSupported(); + + blog(LOG_INFO, "D3D12 loaded successfully"); +} + +static inline void ConvertStencilSide(D3D12_DEPTH_STENCILOP_DESC &desc, const StencilSide &side) +{ + desc.StencilFunc = ConvertGSDepthTest(side.test); + desc.StencilFailOp = ConvertGSStencilOp(side.fail); + desc.StencilDepthFailOp = ConvertGSStencilOp(side.zfail); + desc.StencilPassOp = ConvertGSStencilOp(side.zpass); +} + +D3D12_DEPTH_STENCIL_DESC gs_device::ConvertZStencilState(const ZStencilState &zs) +{ + D3D12_DEPTH_STENCIL_DESC desc; + memset(&desc, 0, sizeof(desc)); + + desc.DepthEnable = zs.depthEnabled; + desc.DepthFunc = ConvertGSDepthTest(zs.depthFunc); + desc.DepthWriteMask = zs.depthWriteEnabled ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + desc.StencilEnable = zs.stencilEnabled; + desc.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + desc.StencilWriteMask = zs.stencilWriteEnabled ? D3D12_DEFAULT_STENCIL_WRITE_MASK : 0; + ConvertStencilSide(desc.FrontFace, zs.stencilFront); + ConvertStencilSide(desc.BackFace, zs.stencilBack); + return desc; +} + +D3D12_RASTERIZER_DESC gs_device::ConvertRasterState(const RasterState &rs) +{ + D3D12_RASTERIZER_DESC desc; + memset(&desc, 0, sizeof(desc)); + + /* use CCW to convert to a right-handed coordinate system */ + desc.FrontCounterClockwise = true; + desc.FillMode = D3D12_FILL_MODE_SOLID; + desc.CullMode = ConvertGSCullMode(rs.cullMode); + return desc; +} + +D3D12_BLEND_DESC gs_device::ConvertBlendState(const BlendState &bs) +{ + D3D12_BLEND_DESC desc; + memset(&desc, 0, sizeof(desc)); + + for (int i = 0; i < 8; i++) { + desc.RenderTarget[i].LogicOpEnable = FALSE; + desc.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP; + desc.RenderTarget[i].BlendEnable = bs.blendEnabled; + desc.RenderTarget[i].BlendOp = ConvertGSBlendOpType(bs.op); + desc.RenderTarget[i].BlendOpAlpha = ConvertGSBlendOpType(bs.op); + desc.RenderTarget[i].SrcBlend = ConvertGSBlendType(bs.srcFactorC); + desc.RenderTarget[i].DestBlend = ConvertGSBlendType(bs.destFactorC); + desc.RenderTarget[i].SrcBlendAlpha = ConvertGSBlendType(bs.srcFactorA); + desc.RenderTarget[i].DestBlendAlpha = ConvertGSBlendType(bs.destFactorA); + desc.RenderTarget[i].RenderTargetWriteMask = (bs.redEnabled ? D3D12_COLOR_WRITE_ENABLE_RED : 0) | + (bs.greenEnabled ? D3D12_COLOR_WRITE_ENABLE_GREEN : 0) | + (bs.blueEnabled ? D3D12_COLOR_WRITE_ENABLE_BLUE : 0) | + (bs.alphaEnabled ? D3D12_COLOR_WRITE_ENABLE_ALPHA : 0); + } + + return desc; +} + +void gs_device::LoadVertexBufferData() +{ + if (curVertexBuffer == lastVertexBuffer && curVertexShader == lastVertexShader) + return; + + D3D12Graphics::GpuBuffer *buffers[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; + + memset(buffers, 0, sizeof(buffers)); + + uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT] = {0}; + uint32_t offsets[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT] = {0}; + UINT numBuffers = 0; + + if (curVertexBuffer && curVertexShader) { + numBuffers = curVertexBuffer->MakeBufferList(curVertexShader, buffers, strides); + } else { + numBuffers = curVertexShader ? curVertexShader->NumBuffersExpected() : 0; + } + + curVertexBufferViews.resize(numBuffers); + + for (uint32_t i = 0; i < numBuffers && curVertexBuffer; ++i) { + curVertexBufferViews[i] = buffers[i]->VertexBufferView(); + } + + lastVertexBuffer = curVertexBuffer; + lastVertexShader = curVertexShader; +} + +void gs_device::LoadRootSignature(std::unique_ptr &rootSignature) +{ + int32_t numParameters = 0; + if (curVertexShader->hasDynamicUniformConstantBuffer) { + numParameters += 1; + } + + if (curPixelShader->samplerCount > 0) { + numParameters += 1; + } + + if (curPixelShader->textureCount > 0) { + numParameters += 1; + } + + if (curPixelShader->hasDynamicUniformConstantBuffer) { + numParameters += 1; + } + + rootSignature->Reset(numParameters, 0); + int32_t parameterIndex = 0; + + D3D12Graphics::RootSignature &curRootSignatureTemp = *rootSignature; + if (curVertexShader->hasDynamicUniformConstantBuffer) { + curRootSignatureTemp[parameterIndex].InitAsConstantBuffer(0, D3D12_SHADER_VISIBILITY_VERTEX); + curVertexShader->dynamicUniformConstantBufferRootParameterIndex = parameterIndex; + parameterIndex += 1; + } + + if (curPixelShader->samplerCount > 0) { + curRootSignatureTemp[parameterIndex].InitAsDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, + (UINT)(curPixelShader->samplerCount), + D3D12_SHADER_VISIBILITY_PIXEL); + curPixelShader->samplerRootParameterIndex = parameterIndex; + parameterIndex += 1; + } + + if (curPixelShader->textureCount > 0) { + curRootSignatureTemp[parameterIndex].InitAsDescriptorRange(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, + (UINT)(curPixelShader->textureCount), + D3D12_SHADER_VISIBILITY_PIXEL); + curPixelShader->textureRootParameterIndex = parameterIndex; + parameterIndex += 1; + } + + if (curPixelShader->hasDynamicUniformConstantBuffer) { + curRootSignatureTemp[parameterIndex].InitAsConstantBuffer(0, D3D12_SHADER_VISIBILITY_PIXEL); + curPixelShader->dynamicUniformConstantBufferRootParameterIndex = parameterIndex; + parameterIndex += 1; + } + + rootSignature->Finalize(L"", D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS); +} + +void gs_device::LoadCurrentGraphicsPSO(std::unique_ptr &PipelineState, + std::unique_ptr &rootSignature) +{ + DXGI_FORMAT zsForamt = curZStencilBuffer ? curZStencilBuffer->GetDSVFormat() : DXGI_FORMAT_UNKNOWN; + + PipelineState->SetRootSignature(*rootSignature); + PipelineState->SetRasterizerState(ConvertRasterState(curRasterState)); + PipelineState->SetBlendState(ConvertBlendState(curBlendState)); + PipelineState->SetDepthStencilState(ConvertZStencilState(curZstencilState)); + PipelineState->SetSampleMask(0xFFFFFFFF); + PipelineState->SetInputLayout((UINT)(curVertexShader->layoutData.size()), curVertexShader->layoutData.data()); + PipelineState->SetPrimitiveTopologyType(ConvertD3D12Topology(curToplogy)); + + PipelineState->SetVertexShader(curVertexShader->data.data(), curVertexShader->data.size()); + PipelineState->SetPixelShader(curPixelShader->data.data(), curPixelShader->data.size()); + + DXGI_FORMAT rtvFormat = curFramebufferSrgb ? curRenderTarget->dxgiFormatViewLinear + : curRenderTarget->dxgiFormatView; + PipelineState->SetRenderTargetFormats(1, &rtvFormat, zsForamt); + PipelineState->Finalize(); +} + +void gs_device::UpdateViewProjMatrix() +{ + gs_matrix_get(&curViewMatrix); + + /* negate Z col of the view matrix for right-handed coordinate system */ + curViewMatrix.x.z = -curViewMatrix.x.z; + curViewMatrix.y.z = -curViewMatrix.y.z; + curViewMatrix.z.z = -curViewMatrix.z.z; + curViewMatrix.t.z = -curViewMatrix.t.z; + + matrix4_mul(&curViewProjMatrix, &curViewMatrix, &curProjMatrix); + matrix4_transpose(&curViewProjMatrix, &curViewProjMatrix); + + if (curVertexShader->viewProj) + gs_shader_set_matrix4(curVertexShader->viewProj, &curViewProjMatrix); +} +void gs_device::FlushOutputViews() +{ + if (curFramebufferInvalidate) { + D3D12Graphics::GpuResource *rtv = nullptr; + D3D12_CPU_DESCRIPTOR_HANDLE pRenderTargetDescriptors; + if (curRenderTarget) { + const int i = curRenderSide; + rtv = curRenderTarget; + + pRenderTargetDescriptors = curFramebufferSrgb ? curRenderTarget->renderTargetLinearRTV[i] + : curRenderTarget->renderTargetRTV[i]; + if (!rtv) { + blog(LOG_ERROR, "device_draw (D3D12): texture is not a render target"); + return; + } + } + const D3D12_CPU_DESCRIPTOR_HANDLE *dsv = nullptr; + if (curZStencilBuffer) { + dsv = &curZStencilBuffer->GetDSV(); + } + + context->TransitionResource(*rtv, D3D12_RESOURCE_STATE_RENDER_TARGET); + context->SetRenderTargets(1, &pRenderTargetDescriptors, false, dsv); + + curFramebufferInvalidate = false; + } +} + +gs_monitor_color_info gs_device::GetMonitorColorInfo(HMONITOR hMonitor) +{ + D3D12Graphics::MonitorColorInfo info = D3D12Graphics::D3D12DeviceInstance::GetMonitorColorInfo(hMonitor); + return gs_monitor_color_info(info.m_HDR, info.m_BitsPerColor, info.m_SDRWhiteNits); +} + +gs_device::gs_device(uint32_t adapterIdx) +{ + matrix4_identity(&curProjMatrix); + matrix4_identity(&curViewMatrix); + matrix4_identity(&curViewProjMatrix); + + memset(&viewport, 0, sizeof(viewport)); + + for (size_t i = 0; i < GS_MAX_TEXTURES; i++) { + curTextures[i] = NULL; + curSamplers[i] = NULL; + } + InitDevice(adapterIdx); + device_set_render_target(this, NULL, NULL); +} + +gs_device::~gs_device() {} + +const char *device_get_name(void) +{ + return "Direct3D 12"; +} + +int device_get_type(void) +{ + return GS_DEVICE_DIRECT3D_12; +} + +const char *device_preprocessor_name(void) +{ + return "_D3D12"; +} + +bool device_enum_adapters(gs_device_t *device, bool (*callback)(void *param, const char *name, uint32_t id), + void *param) +{ + UNUSED_PARAMETER(device); + + try { + D3D12Graphics::D3D12DeviceInstance::EnumD3DAdapters(callback, param); + return true; + + } catch (const HRError &error) { + blog(LOG_WARNING, "Failed enumerating devices: %s (%08lX)", error.str, error.hr); + return false; + } +} + +int device_create(gs_device_t **p_device, uint32_t adapter) +{ + gs_device *device = NULL; + int errorcode = GS_SUCCESS; + + try { + blog(LOG_INFO, "---------------------------------"); + blog(LOG_INFO, "Initializing D3D12..."); + D3D12Graphics::D3D12DeviceInstance::LogD3DAdapters(); + device = new gs_device(adapter); + + } catch (const UnsupportedHWError &error) { + blog(LOG_ERROR, "device_create (D3D12): %s (%08lX)", error.str, error.hr); + errorcode = GS_ERROR_NOT_SUPPORTED; + + } catch (const HRError &error) { + blog(LOG_ERROR, "device_create (D3D12): %s (%08lX)", error.str, error.hr); + errorcode = GS_ERROR_FAIL; + } + + *p_device = device; + return errorcode; +} + +void device_destroy(gs_device_t *device) +{ + delete device; +} + +void device_enter_context(gs_device_t *device) +{ + /* does nothing */ + UNUSED_PARAMETER(device); +} + +void device_leave_context(gs_device_t *device) +{ + /* does nothing */ + UNUSED_PARAMETER(device); +} + +void *device_get_device_obj(gs_device_t *device) +{ + return (void *)device->d3d12Instance->GetDevice(); +} + +gs_swapchain_t *device_swapchain_create(gs_device_t *device, const struct gs_init_data *data) +{ + gs_swap_chain *swap = NULL; + + try { + swap = new gs_swap_chain(device, data); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_swapchain_create (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return swap; +} + +static void device_resize_internal(gs_device_t *device, uint32_t cx, uint32_t cy, gs_color_space space) +{ + try { + const gs_color_format format = get_swap_format_from_space(space, device->curSwapChain->initData.format); + device->context->Flush(true); + device->context->SetNullRenderTarget(); + device->curSwapChain->Resize(cx, cy, format); + device->curRenderTarget = &device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex]; + device->curSwapChain->space = space; + device->curFramebufferInvalidate = true; + } catch (const HRError &error) { + blog(LOG_ERROR, "device_resize_internal (D3D11): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } +} + +void device_resize(gs_device_t *device, uint32_t cx, uint32_t cy) +{ + if (!device->curSwapChain) { + blog(LOG_WARNING, "device_resize (D3D12): No active swap"); + return; + } + + const enum gs_color_space next_space = + get_next_space(device, device->curSwapChain->hwnd, device->curSwapChain->swapDesc.SwapEffect); + device_resize_internal(device, cx, cy, next_space); +} + +enum gs_color_space device_get_color_space(gs_device_t *device) +{ + return device->curColorSpace; +} + +void device_update_color_space(gs_device_t *device) +{ + if (device->curSwapChain) { + const enum gs_color_space next_space = + get_next_space(device, device->curSwapChain->hwnd, device->curSwapChain->swapDesc.SwapEffect); + if (device->curSwapChain->space != next_space) + device_resize_internal(device, 0, 0, next_space); + } else { + blog(LOG_WARNING, "device_update_color_space (D3D12): No active swap"); + } +} + +void device_get_size(const gs_device_t *device, uint32_t *cx, uint32_t *cy) +{ + if (device->curSwapChain) { + *cx = device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex].width; + *cy = device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex].height; + } else { + blog(LOG_ERROR, "device_get_size (D3D12): no active swap"); + *cx = 0; + *cy = 0; + } +} + +uint32_t device_get_width(const gs_device_t *device) +{ + if (device->curSwapChain) { + return device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex].width; + } else { + blog(LOG_ERROR, "device_get_size (D3D12): no active swap"); + return 0; + } +} + +uint32_t device_get_height(const gs_device_t *device) +{ + if (device->curSwapChain) { + return device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex].height; + } else { + blog(LOG_ERROR, "device_get_size (D3D12): no active swap"); + return 0; + } +} + +gs_texture_t *device_texture_create(gs_device_t *device, uint32_t width, uint32_t height, + enum gs_color_format color_format, uint32_t levels, const uint8_t **data, + uint32_t flags) +{ + gs_texture *texture = NULL; + try { + texture = new gs_texture_2d(device, width, height, color_format, levels, data, flags, GS_TEXTURE_2D, + false); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_texture_create (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "device_texture_create (D3D12): %s", error); + } + + return texture; +} + +gs_texture_t *device_cubetexture_create(gs_device_t *device, uint32_t size, enum gs_color_format color_format, + uint32_t levels, const uint8_t **data, uint32_t flags) +{ + gs_texture *texture = NULL; + try { + texture = new gs_texture_2d(device, size, size, color_format, levels, data, flags, GS_TEXTURE_CUBE, + false); + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_cubetexture_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "device_cubetexture_create (D3D12): %s", error); + } + + return texture; +} + +gs_texture_t *device_voltexture_create(gs_device_t *device, uint32_t width, uint32_t height, uint32_t depth, + enum gs_color_format color_format, uint32_t levels, const uint8_t *const *data, + uint32_t flags) +{ + gs_texture *texture = NULL; + try { + texture = new gs_texture_3d(device, width, height, depth, color_format, levels, data, flags); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_voltexture_create (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "device_voltexture_create (D3D12): %s", error); + } + + return texture; +} + +gs_zstencil_t *device_zstencil_create(gs_device_t *device, uint32_t width, uint32_t height, + enum gs_zstencil_format format) +{ + gs_zstencil_buffer *zstencil = NULL; + try { + zstencil = new gs_zstencil_buffer(device, width, height, format); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_zstencil_create (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return zstencil; +} +gs_stagesurf_t *device_stagesurface_create(gs_device_t *device, uint32_t width, uint32_t height, + enum gs_color_format color_format) +{ + gs_stage_surface *surf = NULL; + try { + surf = new gs_stage_surface(device, width, height, color_format); + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_stagesurface_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return surf; +} + +gs_samplerstate_t *device_samplerstate_create(gs_device_t *device, const struct gs_sampler_info *info) +{ + gs_sampler_state *ss = NULL; + try { + ss = new gs_sampler_state(device, info); + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_samplerstate_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return ss; +} + +gs_shader_t *device_vertexshader_create(gs_device_t *device, const char *shader_string, const char *file, + char **error_string) +{ + gs_vertex_shader *shader = NULL; + try { + shader = new gs_vertex_shader(device, file, shader_string); + + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_vertexshader_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + + } catch (const ShaderError &error) { + const char *buf = (const char *)error.errors->GetBufferPointer(); + if (error_string) + *error_string = bstrdup(buf); + blog(LOG_ERROR, + "device_vertexshader_create (D3D12): " + "Compile warnings/errors for %s:\n%s", + file, buf); + + } catch (const char *error) { + blog(LOG_ERROR, "device_vertexshader_create (D3D12): %s", error); + } + + return shader; +} + +gs_shader_t *device_pixelshader_create(gs_device_t *device, const char *shader_string, const char *file, + char **error_string) +{ + gs_pixel_shader *shader = NULL; + try { + shader = new gs_pixel_shader(device, file, shader_string); + + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_pixelshader_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + + } catch (const ShaderError &error) { + const char *buf = (const char *)error.errors->GetBufferPointer(); + if (error_string) + *error_string = bstrdup(buf); + blog(LOG_ERROR, + "device_pixelshader_create (D3D12): " + "Compiler warnings/errors for %s:\n%s", + file, buf); + + } catch (const char *error) { + blog(LOG_ERROR, "device_pixelshader_create (D3D12): %s", error); + } + + return shader; +} + +gs_vertbuffer_t *device_vertexbuffer_create(gs_device_t *device, struct gs_vb_data *data, uint32_t flags) +{ + gs_vertex_buffer *buffer = NULL; + try { + buffer = new gs_vertex_buffer(device, data, flags); + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_vertexbuffer_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "device_vertexbuffer_create (D3D12): %s", error); + } + + return buffer; +} + +gs_indexbuffer_t *device_indexbuffer_create(gs_device_t *device, enum gs_index_type type, void *indices, size_t num, + uint32_t flags) +{ + gs_index_buffer *buffer = NULL; + try { + buffer = new gs_index_buffer(device, type, indices, num, flags); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_indexbuffer_create (D3D11): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return buffer; +} + +gs_timer_t *device_timer_create(gs_device_t *device) +{ + gs_timer *timer = NULL; + try { + timer = new gs_timer(device); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_timer_create (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return timer; +} + +gs_timer_range_t *device_timer_range_create(gs_device_t *device) +{ + gs_timer_range *range = NULL; + try { + range = new gs_timer_range(device); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_timer_range_create (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return range; +} + +enum gs_texture_type device_get_texture_type(const gs_texture_t *texture) +{ + return texture->type; +} + +void device_load_vertexbuffer(gs_device_t *device, gs_vertbuffer_t *vertbuffer) +{ + if (device->curVertexBuffer == vertbuffer) + return; + + device->curVertexBuffer = vertbuffer; +} + +void device_load_indexbuffer(gs_device_t *device, gs_indexbuffer_t *indexbuffer) +{ + if (device->curIndexBuffer == indexbuffer) + return; + + device->curIndexBuffer = indexbuffer; +} + +static void device_load_texture_internal(gs_device_t *device, gs_texture_t *tex, int unit, + const D3D12_CPU_DESCRIPTOR_HANDLE *handle) +{ + if (device->curTextures[unit] == tex) + return; + + device->curTextures[unit] = tex; + device->context->SetDynamicDescriptor(device->curPixelShader->textureRootParameterIndex, unit, *handle); +} + +void device_load_texture(gs_device_t *device, gs_texture_t *tex, int unit) +{ + if (device->curTextures[unit] == tex) + return; + + const D3D12_CPU_DESCRIPTOR_HANDLE *handle = nullptr; + if (tex) { + handle = &tex->shaderSRV; + device->context->TransitionResource(*tex, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } else { + handle = nullptr; + } + device_load_texture_internal(device, tex, unit, handle); +} + +void device_load_texture_srgb(gs_device_t *device, gs_texture_t *tex, int unit) +{ + if (device->curTextures[unit] == tex) + return; + const D3D12_CPU_DESCRIPTOR_HANDLE *handle = nullptr; + if (tex) { + handle = &tex->shaderLinearSRV; + device->context->TransitionResource(*tex, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } else { + handle = nullptr; + } + device_load_texture_internal(device, tex, unit, handle); +} + +void device_load_samplerstate(gs_device_t *device, gs_samplerstate_t *samplerstate, int unit) +{ + const D3D12_CPU_DESCRIPTOR_HANDLE *handle = nullptr; + if (device->curSamplers[unit] == samplerstate) + return; + + if (samplerstate) { + handle = &samplerstate->sampleDesc.Sampler; + } + + device->curSamplers[unit] = samplerstate; + device->context->SetDynamicSampler(device->curPixelShader->samplerRootParameterIndex, unit, *handle); +} + +void device_load_vertexshader(gs_device_t *device, gs_shader_t *vertshader) +{ + if (device->curVertexShader == vertshader) + return; + + gs_vertex_shader *vs = static_cast(vertshader); + + if (vertshader) { + if (vertshader->type != GS_SHADER_VERTEX) { + blog(LOG_ERROR, "device_load_vertexshader (D3D12): " + "Specified shader is not a vertex " + "shader"); + return; + } + } + + device->curVertexShader = vs; +} + +void device_load_pixelshader(gs_device_t *device, gs_shader_t *pixelshader) +{ + gs_samplerstate_t *states[GS_MAX_TEXTURES] = {0}; + + if (device->curPixelShader == pixelshader) + return; + + gs_pixel_shader *ps = static_cast(pixelshader); + + if (pixelshader) { + if (pixelshader->type != GS_SHADER_PIXEL) { + blog(LOG_ERROR, "device_load_pixelshader (D3D11): " + "Specified shader is not a pixel " + "shader"); + return; + } + + } else { + memset(states, 0, sizeof(states)); + } + + memset(device->curTextures, 0, sizeof(device->curTextures)); + + device->curPixelShader = ps; +} + +void device_load_default_samplerstate(gs_device_t *device, bool b_3d, int unit) +{ + /* TODO */ + UNUSED_PARAMETER(device); + UNUSED_PARAMETER(b_3d); + UNUSED_PARAMETER(unit); +} + +gs_shader_t *device_get_vertex_shader(const gs_device_t *device) +{ + return device->curVertexShader; +} + +gs_shader_t *device_get_pixel_shader(const gs_device_t *device) +{ + return device->curPixelShader; +} + +gs_texture_t *device_get_render_target(const gs_device_t *device) +{ + return device->curRenderTarget; +} + +gs_zstencil_t *device_get_zstencil_target(const gs_device_t *device) +{ + return device->curZStencilBuffer; +} + +static void device_set_render_target_internal(gs_device_t *device, gs_texture_t *tex, gs_zstencil_t *zstencil, + enum gs_color_space space) +{ + if (device->curSwapChain) { + if (!tex) + tex = &device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex]; + if (!zstencil) + zstencil = device->curSwapChain->zs.get(); + } + + if (device->curRenderTarget == tex && device->curZStencilBuffer == zstencil) { + device->curColorSpace = space; + } + + if (tex && tex->type != GS_TEXTURE_2D) { + blog(LOG_ERROR, "device_set_render_target_internal (D3D12): texture is not a 2D texture"); + return; + } + + gs_texture_2d *const tex2d = static_cast(tex); + if (device->curRenderTarget != tex2d || device->curRenderSide != 0 || device->curZStencilBuffer != zstencil) { + device->curRenderTarget = tex2d; + device->curZStencilBuffer = zstencil; + device->curRenderSide = 0; + device->curColorSpace = space; + device->curFramebufferInvalidate = true; + } +} + +void device_set_render_target(gs_device_t *device, gs_texture_t *tex, gs_zstencil_t *zstencil) +{ + device_set_render_target_internal(device, tex, zstencil, GS_CS_SRGB); +} + +void device_set_render_target_with_color_space(gs_device_t *device, gs_texture_t *tex, gs_zstencil_t *zstencil, + enum gs_color_space space) +{ + device_set_render_target_internal(device, tex, zstencil, space); +} + +void device_set_cube_render_target(gs_device_t *device, gs_texture_t *tex, int side, gs_zstencil_t *zstencil) +{ + if (device->curSwapChain) { + if (!tex) { + tex = &device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex]; + side = 0; + } + + if (!zstencil) + zstencil = device->curSwapChain->zs.get(); + } + + if (device->curRenderTarget == tex && device->curRenderSide == side && device->curZStencilBuffer == zstencil) + return; + + if (tex->type != GS_TEXTURE_CUBE) { + blog(LOG_ERROR, "device_set_cube_render_target (D3D11): " + "texture is not a cube texture"); + return; + } + + gs_texture_2d *const tex2d = static_cast(tex); + if (device->curRenderTarget != tex2d || device->curRenderSide != side || + device->curZStencilBuffer != zstencil) { + device->curRenderTarget = tex2d; + device->curZStencilBuffer = zstencil; + device->curRenderSide = side; + device->curColorSpace = GS_CS_SRGB; + device->curFramebufferInvalidate = true; + } +} + +void device_enable_framebuffer_srgb(gs_device_t *device, bool enable) +{ + if (device->curFramebufferSrgb != enable) { + device->curFramebufferSrgb = enable; + device->curFramebufferInvalidate = true; + } +} + +bool device_framebuffer_srgb_enabled(gs_device_t *device) +{ + return device->curFramebufferSrgb; +} + +static DXGI_FORMAT get_copy_compare_format(gs_color_format format) +{ + switch (format) { + case GS_RGBA_UNORM: + return DXGI_FORMAT_R8G8B8A8_TYPELESS; + case GS_BGRX_UNORM: + return DXGI_FORMAT_B8G8R8X8_TYPELESS; + case GS_BGRA_UNORM: + return DXGI_FORMAT_B8G8R8A8_TYPELESS; + default: + return ConvertGSTextureFormatResource(format); + } +} + +void device_copy_texture_region(gs_device_t *device, gs_texture_t *dst, uint32_t dst_x, uint32_t dst_y, + gs_texture_t *src, uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h) +{ + try { + gs_texture_2d *src2d = static_cast(src); + gs_texture_2d *dst2d = static_cast(dst); + + if (!src) + throw "Source texture is NULL"; + if (!dst) + throw "Destination texture is NULL"; + if (src->type != GS_TEXTURE_2D || dst->type != GS_TEXTURE_2D) + throw "Source and destination textures must be a 2D " + "textures"; + if (get_copy_compare_format(dst->format) != get_copy_compare_format(src->format)) + throw "Source and destination formats do not match"; + + uint32_t copyWidth = (uint32_t)src_w ? (uint32_t)src_w : (src2d->width - src_x); + uint32_t copyHeight = (uint32_t)src_h ? (uint32_t)src_h : (src2d->height - src_y); + + uint32_t dstWidth = dst2d->width - dst_x; + uint32_t dstHeight = dst2d->height - dst_y; + + if (dstWidth < copyWidth || dstHeight < copyHeight) + throw "Destination texture region is not big " + "enough to hold the source region"; + + if (dst_x == 0 && dst_y == 0 && src_x == 0 && src_y == 0 && src_w == 0 && src_h == 0) { + copyWidth = 0; + copyHeight = 0; + } + + if (dst_x == 0 && dst_y == 0 && src_x == 0 && src_y == 0 && src_w == 0 && src_h == 0) { + device->context->CopyBuffer(*dst2d, *src2d); + } + + RECT RectRegion; + RectRegion.left = src_x; + if (src_w > 0) + RectRegion.right = src_x + src_w; + else + RectRegion.right = src2d->width - 1; + + RectRegion.top = src_y; + if (src_h > 0) + RectRegion.bottom = src_y + src_h; + else + RectRegion.bottom = src2d->height - 1; + device->context->CopyTextureRegion(*dst, dst_x, dst_y, 0, *src, RectRegion); + + } catch (const char *error) { + blog(LOG_ERROR, "device_copy_texture (D3D12): %s", error); + } +} + +void device_copy_texture(gs_device_t *device, gs_texture_t *dst, gs_texture_t *src) +{ + device_copy_texture_region(device, dst, 0, 0, src, 0, 0, 0, 0); +} + +void device_stage_texture(gs_device_t *device, gs_stagesurf_t *dst, gs_texture_t *src) +{ + try { + gs_texture_2d *src2d = static_cast(src); + + if (!src) + throw "Source texture is NULL"; + if (src->type != GS_TEXTURE_2D) + throw "Source texture must be a 2D texture"; + if (!dst) + throw "Destination surface is NULL"; + if (dst->format != GS_UNKNOWN && dst->format != src->format) + throw "Source and destination formats do not match"; + if (dst->width != src2d->width || dst->height != src2d->height) + throw "Source and destination must have the same " + "dimensions"; + + device->context->ReadbackTexture(*dst, *src); + } catch (const char *error) { + blog(LOG_ERROR, "device_copy_texture (D3D12): %s", error); + } +} + +extern "C" void reset_duplicators(void); +void device_begin_frame(gs_device_t *device) +{ + reset_duplicators(); + if (!device->context) { + device->context = device->d3d12Instance->GetNewGraphicsContext(); + } +} + +void device_end_frame(gs_device_t *device) +{ + device->context->Finish(); + device->context = device->d3d12Instance->GetNewGraphicsContext(); +} + +void device_begin_scene(gs_device_t *device) +{ + UNUSED_PARAMETER(device); +} + +void device_draw(gs_device_t *device, enum gs_draw_mode draw_mode, uint32_t start_vert, uint32_t num_verts) +{ + try { + if (!device->curVertexShader) + throw "No vertex shader specified"; + + if (!device->curPixelShader) + throw "No pixel shader specified"; + + if (!device->curVertexBuffer && (num_verts == 0)) + throw "No vertex buffer specified"; + + if (!device->curSwapChain && !device->curRenderTarget) + throw "No render target or swap chain to render to"; + + device->FlushOutputViews(); + gs_effect_t *effect = gs_get_effect(); + if (effect) + gs_effect_update_params(effect); + + device->UpdateViewProjMatrix(); + + device->curToplogy = ConvertGSTopology(draw_mode); + + device->LoadVertexBufferData(); + + device->context->SetVertexBuffers(0, (UINT)device->curVertexBufferViews.size(), + device->curVertexBufferViews.data()); + + std::unique_ptr rootSig = + std::make_unique(device->d3d12Instance); + device->LoadRootSignature(rootSig); + + std::unique_ptr pso = + std::make_unique(device->d3d12Instance); + device->LoadCurrentGraphicsPSO(pso, rootSig); + + device->context->SetPipelineState(*(pso)); + device->context->SetRootSignature(*(rootSig)); + + gs_samplerstate_t *states[GS_MAX_TEXTURES] = {0}; + device->curPixelShader->GetSamplerStates(states); + + for (int32_t i = 0; i < GS_MAX_TEXTURES; ++i) { + if (states[i] != nullptr) { + device->context->SetDynamicSampler(device->curPixelShader->samplerRootParameterIndex, i, + states[i]->sampleDesc.Sampler); + } + } + + for (int32_t i = 0; i < GS_MAX_TEXTURES; ++i) { + if (device->curSamplers[i] && + device->curSamplers[i]->sampleDesc.Sampler.ptr != states[i]->sampleDesc.Sampler.ptr) { + device->curSamplers[i] = nullptr; + } + } + + device->curVertexShader->UploadParams(); + device->curPixelShader->UploadParams(); + + D3D12Graphics::Color blendFactor = {1.0f, 1.0f, 1.0f, 1.0f}; + + device->context->SetBlendFactor(blendFactor); + + device->context->SetStencilRef(0); + D3D12_PRIMITIVE_TOPOLOGY newToplogy = ConvertGSTopology(draw_mode); + device->context->SetPrimitiveTopology(newToplogy); + } catch (const char *error) { + blog(LOG_ERROR, "device_draw (D3D12): %s", error); + return; + + } catch (const HRError &error) { + blog(LOG_ERROR, "device_draw (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + return; + } + + if (device->curIndexBuffer) { + if (num_verts == 0) + num_verts = (uint32_t)device->curIndexBuffer->num; + device->context->DrawIndexedInstanced(num_verts, 1, start_vert, 0, 0); + } else { + if (num_verts == 0) + num_verts = (uint32_t)device->curVertexBuffer->numVerts; + device->context->DrawInstanced(num_verts, 1, start_vert, 0); + } +} + +void device_end_scene(gs_device_t *device) {} + +void device_load_swapchain(gs_device_t *device, gs_swapchain_t *swapchain) +{ + gs_texture_t *target = device->curRenderTarget; + gs_zstencil_t *zs = device->curZStencilBuffer; + bool is_cube = device->curRenderTarget ? (device->curRenderTarget->type == GS_TEXTURE_CUBE) : false; + + if (device->curSwapChain) { + if (target == &device->curSwapChain->target[device->curSwapChain->currentBackBufferIndex]) + target = NULL; + if (zs == device->curSwapChain->zs.get()) + zs = NULL; + } + + device->curSwapChain = swapchain; + + if (is_cube) { + device_set_cube_render_target(device, target, device->curRenderSide, zs); + } else { + const enum gs_color_space space = swapchain ? swapchain->space : GS_CS_SRGB; + device_set_render_target_internal(device, target, zs, space); + } +} + +void device_clear(gs_device_t *device, uint32_t clear_flags, const struct vec4 *color, float depth, uint8_t stencil) +{ + if (clear_flags & GS_CLEAR_COLOR) { + gs_texture_2d *const tex = device->curRenderTarget; + if (tex) { + const int side = device->curRenderSide; + D3D12_CPU_DESCRIPTOR_HANDLE rtv = device->curFramebufferSrgb ? tex->renderTargetLinearRTV[side] + : tex->renderTargetRTV[side]; + device->context->TransitionResource(*tex, D3D12_RESOURCE_STATE_RENDER_TARGET); + D3D12_RECT Rect; + Rect.left = 0; + Rect.top = 0; + Rect.right = tex->width; + Rect.bottom = tex->height; + device->context->ClearColor(rtv, color->ptr, 1, &Rect); + } + } + + if (device->curZStencilBuffer) { + bool clearDepth = false; + bool clearStencil = false; + if ((clear_flags & GS_CLEAR_DEPTH) != 0) + clearDepth = true; + if ((clear_flags & GS_CLEAR_STENCIL) != 0) + clearStencil = true; + + if ((clearDepth || clearStencil) && device->curZStencilBuffer) { + D3D12Graphics::DepthBuffer &depthBuffer = + (D3D12Graphics::DepthBuffer &)(*device->curZStencilBuffer); + device->context->ClearDepthAndStencil(depthBuffer); + } + } +} + +bool device_is_present_ready(gs_device_t *device) +{ + gs_swap_chain *const curSwapChain = device->curSwapChain; + bool ready = curSwapChain != nullptr; + if (ready) { + device->context->TransitionResource(*device->curRenderTarget, D3D12_RESOURCE_STATE_RENDER_TARGET); + } else { + blog(LOG_WARNING, "device_is_present_ready (D3D12): No active swap"); + } + + return ready; +} + +void device_present(gs_device_t *device) +{ + gs_swap_chain *const curSwapChain = device->curSwapChain; + if (curSwapChain) { + device->curFramebufferInvalidate = true; + device->context->TransitionResource(*device->curRenderTarget, D3D12_RESOURCE_STATE_PRESENT); + device->context->Finish(); + device->context = device->d3d12Instance->GetNewGraphicsContext(); + const HRESULT hr = curSwapChain->swap->Present(0, 0); + if (FAILED(hr)) { + auto removeReason = device->d3d12Instance->GetDevice()->GetDeviceRemovedReason(); + __debugbreak(); + blog(LOG_ERROR, "device_present (D3D12): IDXGISwapChain::Present failed %08lX", removeReason); + } + + curSwapChain->currentBackBufferIndex = curSwapChain->swap->GetCurrentBackBufferIndex(); + device->curRenderTarget = &curSwapChain->target[curSwapChain->currentBackBufferIndex]; + } else { + blog(LOG_WARNING, "device_present (D3D12): No active swap"); + } +} + +void device_flush(gs_device_t *device) +{ + if (device->context) { + device->context->Flush(); + } +} + +void device_set_cull_mode(gs_device_t *device, enum gs_cull_mode mode) +{ + if (mode == device->curRasterState.cullMode) + return; + + device->curRasterState.cullMode = mode; +} + +enum gs_cull_mode device_get_cull_mode(const gs_device_t *device) +{ + return device->curRasterState.cullMode; +} + +void device_enable_blending(gs_device_t *device, bool enable) +{ + if (enable == device->curBlendState.blendEnabled) + return; + + device->curBlendState.blendEnabled = enable; +} + +void device_enable_depth_test(gs_device_t *device, bool enable) +{ + if (enable == device->curZstencilState.depthEnabled) + return; + + device->curZstencilState.depthEnabled = enable; +} + +void device_enable_stencil_test(gs_device_t *device, bool enable) +{ + if (enable == device->curZstencilState.stencilEnabled) + return; + + device->curZstencilState.stencilEnabled = enable; +} + +void device_enable_stencil_write(gs_device_t *device, bool enable) +{ + if (enable == device->curZstencilState.stencilWriteEnabled) + return; + + device->curZstencilState.stencilWriteEnabled = enable; +} + +void device_enable_color(gs_device_t *device, bool red, bool green, bool blue, bool alpha) +{ + if (device->curBlendState.redEnabled == red && device->curBlendState.greenEnabled == green && + device->curBlendState.blueEnabled == blue && device->curBlendState.alphaEnabled == alpha) + return; + + device->curBlendState.redEnabled = red; + device->curBlendState.greenEnabled = green; + device->curBlendState.blueEnabled = blue; + device->curBlendState.alphaEnabled = alpha; +} + +void device_blend_function(gs_device_t *device, enum gs_blend_type src, enum gs_blend_type dest) +{ + if (device->curBlendState.srcFactorC == src && device->curBlendState.destFactorC == dest && + device->curBlendState.srcFactorA == src && device->curBlendState.destFactorA == dest) + return; + + device->curBlendState.srcFactorC = src; + device->curBlendState.destFactorC = dest; + device->curBlendState.srcFactorA = src; + device->curBlendState.destFactorA = dest; +} + +void device_blend_function_separate(gs_device_t *device, enum gs_blend_type src_c, enum gs_blend_type dest_c, + enum gs_blend_type src_a, enum gs_blend_type dest_a) +{ + if (device->curBlendState.srcFactorC == src_c && device->curBlendState.destFactorC == dest_c && + device->curBlendState.srcFactorA == src_a && device->curBlendState.destFactorA == dest_a) + return; + + device->curBlendState.srcFactorC = src_c; + device->curBlendState.destFactorC = dest_c; + device->curBlendState.srcFactorA = src_a; + device->curBlendState.destFactorA = dest_a; +} + +void device_blend_op(gs_device_t *device, enum gs_blend_op_type op) +{ + if (device->curBlendState.op == op) + return; + + device->curBlendState.op = op; +} + +void device_depth_function(gs_device_t *device, enum gs_depth_test test) +{ + if (device->curZstencilState.depthFunc == test) + return; + + device->curZstencilState.depthFunc = test; +} + +static inline void update_stencilside_test(gs_device_t *device, StencilSide &side, gs_depth_test test) +{ + if (side.test == test) + return; + + side.test = test; +} + +void device_stencil_function(gs_device_t *device, enum gs_stencil_side side, enum gs_depth_test test) +{ + int sideVal = (int)side; + + if (sideVal & GS_STENCIL_FRONT) + update_stencilside_test(device, device->curZstencilState.stencilFront, test); + if (sideVal & GS_STENCIL_BACK) + update_stencilside_test(device, device->curZstencilState.stencilBack, test); +} + +static inline void update_stencilside_op(gs_device_t *device, StencilSide &side, enum gs_stencil_op_type fail, + enum gs_stencil_op_type zfail, enum gs_stencil_op_type zpass) +{ + if (side.fail == fail && side.zfail == zfail && side.zpass == zpass) + return; + + side.fail = fail; + side.zfail = zfail; + side.zpass = zpass; +} + +void device_stencil_op(gs_device_t *device, enum gs_stencil_side side, enum gs_stencil_op_type fail, + enum gs_stencil_op_type zfail, enum gs_stencil_op_type zpass) +{ + int sideVal = (int)side; + + if (sideVal & GS_STENCIL_FRONT) + update_stencilside_op(device, device->curZstencilState.stencilFront, fail, zfail, zpass); + if (sideVal & GS_STENCIL_BACK) + update_stencilside_op(device, device->curZstencilState.stencilBack, fail, zfail, zpass); +} + +void device_set_viewport(gs_device_t *device, int x, int y, int width, int height) +{ + device->context->SetViewportAndScissor(x, y, width, height); + + device->viewport.x = x; + device->viewport.y = y; + device->viewport.cx = width; + device->viewport.cy = height; +} + +void device_get_viewport(const gs_device_t *device, struct gs_rect *rect) +{ + memcpy(rect, &device->viewport, sizeof(gs_rect)); +} + +void device_set_scissor_rect(gs_device_t *device, const struct gs_rect *rect) +{ + D3D12_RECT d3drect; + if (rect != NULL) { + d3drect.left = rect->x; + d3drect.top = rect->y; + d3drect.right = rect->x + rect->cx; + d3drect.bottom = rect->y + rect->cy; + device->context->SetScissor(d3drect); + } +} + +void device_ortho(gs_device_t *device, float left, float right, float top, float bottom, float zNear, float zFar) +{ + matrix4 *dst = &device->curProjMatrix; + + float rml = right - left; + float bmt = bottom - top; + float fmn = zFar - zNear; + + vec4_zero(&dst->x); + vec4_zero(&dst->y); + vec4_zero(&dst->z); + vec4_zero(&dst->t); + + dst->x.x = 2.0f / rml; + dst->t.x = (left + right) / -rml; + + dst->y.y = 2.0f / -bmt; + dst->t.y = (bottom + top) / bmt; + + dst->z.z = 1.0f / fmn; + dst->t.z = zNear / -fmn; + + dst->t.w = 1.0f; +} + +void device_frustum(gs_device_t *device, float left, float right, float top, float bottom, float zNear, float zFar) +{ + matrix4 *dst = &device->curProjMatrix; + + float rml = right - left; + float bmt = bottom - top; + float fmn = zFar - zNear; + float nearx2 = 2.0f * zNear; + + vec4_zero(&dst->x); + vec4_zero(&dst->y); + vec4_zero(&dst->z); + vec4_zero(&dst->t); + + dst->x.x = nearx2 / rml; + dst->z.x = (left + right) / -rml; + + dst->y.y = nearx2 / -bmt; + dst->z.y = (bottom + top) / bmt; + + dst->z.z = zFar / fmn; + dst->t.z = (zNear * zFar) / -fmn; + + dst->z.w = 1.0f; +} + +void device_projection_push(gs_device_t *device) +{ + mat4float mat; + memcpy(&mat, &device->curProjMatrix, sizeof(matrix4)); + device->projStack.push_back(mat); +} + +void device_projection_pop(gs_device_t *device) +{ + if (device->projStack.empty()) + return; + + const mat4float &mat = device->projStack.back(); + memcpy(&device->curProjMatrix, &mat, sizeof(matrix4)); + device->projStack.pop_back(); +} + +void gs_swapchain_destroy(gs_swapchain_t *swapchain) +{ + if (swapchain->device->curSwapChain == swapchain) + device_load_swapchain(swapchain->device, nullptr); + + delete swapchain; +} + +void gs_texture_destroy(gs_texture_t *tex) +{ + if (tex == nullptr) { + return; + } + delete tex; +} + +uint32_t gs_texture_get_width(const gs_texture_t *tex) +{ + if (tex->type != GS_TEXTURE_2D) + return 0; + + return static_cast(tex)->width; +} + +uint32_t gs_texture_get_height(const gs_texture_t *tex) +{ + if (tex->type != GS_TEXTURE_2D) + return 0; + + return static_cast(tex)->height; +} + +enum gs_color_format gs_texture_get_color_format(const gs_texture_t *tex) +{ + if (tex->type != GS_TEXTURE_2D) + return GS_UNKNOWN; + + return static_cast(tex)->format; +} + +bool gs_texture_map(gs_texture_t *tex, uint8_t **ptr, uint32_t *linesize) +{ + if (tex->type != GS_TEXTURE_2D) { + return false; + } + + gs_texture_2d *texture = (gs_texture_2d *)(tex); + + if (!texture->isDynamic) { + return false; + } + + D3D12Graphics::UploadBuffer *upload = texture->uploadBuffer.get(); + *ptr = (uint8_t *)upload->Map(); + + *linesize = texture->GetLineSize(); + + return !!(*ptr); +} + +void gs_texture_unmap(gs_texture_t *tex) +{ + if (tex->type != GS_TEXTURE_2D) { + return; + } + + gs_texture_2d *texture = (gs_texture_2d *)(tex); + + if (!texture->isDynamic) { + return; + } + + D3D12Graphics::UploadBuffer *upload = texture->uploadBuffer.get(); + upload->Unmap(); + texture->device->context->UpdateTexture(*texture, *upload); + texture->device->context->TransitionResource(*texture, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + texture->device->context->Flush(); +} + +void *gs_texture_get_obj(gs_texture_t *tex) +{ + if (tex->type != GS_TEXTURE_2D) + return nullptr; + + gs_texture_2d *tex2d = static_cast(tex); + return tex2d->GetResource(); +} + +void gs_cubetexture_destroy(gs_texture_t *cubetex) +{ + delete cubetex; +} + +uint32_t gs_cubetexture_get_size(const gs_texture_t *cubetex) +{ + if (cubetex->type != GS_TEXTURE_CUBE) + return 0; + + const gs_texture_2d *tex = static_cast(cubetex); + return tex->width; +} + +enum gs_color_format gs_cubetexture_get_color_format(const gs_texture_t *cubetex) +{ + if (cubetex->type != GS_TEXTURE_CUBE) + return GS_UNKNOWN; + + const gs_texture_2d *tex = static_cast(cubetex); + return tex->format; +} + +void gs_voltexture_destroy(gs_texture_t *voltex) +{ + delete voltex; +} + +uint32_t gs_voltexture_get_width(const gs_texture_t *voltex) +{ + /* TODO */ + UNUSED_PARAMETER(voltex); + return 0; +} + +uint32_t gs_voltexture_get_height(const gs_texture_t *voltex) +{ + /* TODO */ + UNUSED_PARAMETER(voltex); + return 0; +} + +uint32_t gs_voltexture_get_depth(const gs_texture_t *voltex) +{ + /* TODO */ + UNUSED_PARAMETER(voltex); + return 0; +} + +enum gs_color_format gs_voltexture_get_color_format(const gs_texture_t *voltex) +{ + /* TODO */ + UNUSED_PARAMETER(voltex); + return GS_UNKNOWN; +} + +void gs_stagesurface_destroy(gs_stagesurf_t *stagesurf) +{ + delete stagesurf; +} + +uint32_t gs_stagesurface_get_width(const gs_stagesurf_t *stagesurf) +{ + return stagesurf->width; +} + +uint32_t gs_stagesurface_get_height(const gs_stagesurf_t *stagesurf) +{ + return stagesurf->height; +} + +enum gs_color_format gs_stagesurface_get_color_format(const gs_stagesurf_t *stagesurf) +{ + return stagesurf->format; +} + +bool gs_stagesurface_map(gs_stagesurf_t *stagesurf, uint8_t **data, uint32_t *linesize) +{ + *data = (uint8_t *)stagesurf->Map(); + *linesize = stagesurf->GetLineSize(); + return !!(*data); +} + +void gs_stagesurface_unmap(gs_stagesurf_t *stagesurf) +{ + stagesurf->Unmap(); +} + +void gs_zstencil_destroy(gs_zstencil_t *zstencil) +{ + delete zstencil; +} + +void gs_samplerstate_destroy(gs_samplerstate_t *samplerstate) +{ + if (!samplerstate) + return; + + if (samplerstate->device) + for (int i = 0; i < GS_MAX_TEXTURES; i++) + if (samplerstate->device->curSamplers[i] == samplerstate) + samplerstate->device->curSamplers[i] = nullptr; + + delete samplerstate; +} + +void gs_vertexbuffer_destroy(gs_vertbuffer_t *vertbuffer) +{ + delete vertbuffer; +} + +static inline void gs_vertexbuffer_flush_internal(gs_vertbuffer_t *vertbuffer, const gs_vb_data *data) +{ + size_t num_tex = data->num_tex < vertbuffer->uvBuffers.size() ? data->num_tex : vertbuffer->uvBuffers.size(); + + if (!vertbuffer->dynamic) { + blog(LOG_ERROR, "gs_vertexbuffer_flush: vertex buffer is " + "not dynamic"); + return; + } + + if (data->points) + vertbuffer->FlushBuffer(vertbuffer->vertexBuffer, data->points, sizeof(vec3)); + + if (vertbuffer->normalBuffer && data->normals) + vertbuffer->FlushBuffer(vertbuffer->normalBuffer, data->normals, sizeof(vec3)); + + if (vertbuffer->tangentBuffer && data->tangents) + vertbuffer->FlushBuffer(vertbuffer->tangentBuffer, data->tangents, sizeof(vec3)); + + if (vertbuffer->colorBuffer && data->colors) + vertbuffer->FlushBuffer(vertbuffer->colorBuffer, data->colors, sizeof(uint32_t)); + + for (size_t i = 0; i < num_tex; i++) { + gs_tvertarray &tv = data->tvarray[i]; + vertbuffer->FlushBuffer(vertbuffer->uvBuffers[i], tv.array, tv.width * sizeof(float)); + } +} + +void gs_vertexbuffer_flush(gs_vertbuffer_t *vertbuffer) +{ + gs_vertexbuffer_flush_internal(vertbuffer, vertbuffer->vbd.data); +} + +void gs_vertexbuffer_flush_direct(gs_vertbuffer_t *vertbuffer, const gs_vb_data *data) +{ + gs_vertexbuffer_flush_internal(vertbuffer, data); +} + +struct gs_vb_data *gs_vertexbuffer_get_data(const gs_vertbuffer_t *vertbuffer) +{ + return vertbuffer->vbd.data; +} + +void gs_indexbuffer_destroy(gs_indexbuffer_t *indexbuffer) +{ + delete indexbuffer; +} + +static inline void gs_indexbuffer_flush_internal(gs_indexbuffer_t *indexbuffer, const void *data) +{ + if (!indexbuffer->dynamic) + return; + + indexbuffer->device->context->WriteBuffer(*indexbuffer->indexBuffer, 0, data, + indexbuffer->num * indexbuffer->indexSize); +} + +void gs_indexbuffer_flush(gs_indexbuffer_t *indexbuffer) +{ + gs_indexbuffer_flush_internal(indexbuffer, indexbuffer->indices.data); +} + +void gs_indexbuffer_flush_direct(gs_indexbuffer_t *indexbuffer, const void *data) +{ + gs_indexbuffer_flush_internal(indexbuffer, data); +} + +void *gs_indexbuffer_get_data(const gs_indexbuffer_t *indexbuffer) +{ + return indexbuffer->indices.data; +} + +size_t gs_indexbuffer_get_num_indices(const gs_indexbuffer_t *indexbuffer) +{ + return indexbuffer->num; +} + +enum gs_index_type gs_indexbuffer_get_type(const gs_indexbuffer_t *indexbuffer) +{ + return indexbuffer->type; +} + +void gs_timer_destroy(gs_timer_t *timer) +{ + delete timer; +} + +void gs_timer_begin(gs_timer_t *timer) +{ + // timer->device->context->End(timer->query_begin); +} + +void gs_timer_end(gs_timer_t *timer) +{ + // timer->device->context->End(timer->query_end); +} + +bool gs_timer_get_data(gs_timer_t *timer, uint64_t *ticks) +{ + /*uint64_t begin, end; + HRESULT hr_begin, hr_end; + do { + hr_begin = timer->device->context->GetData(timer->query_begin, &begin, sizeof(begin), 0); + } while (hr_begin == S_FALSE); + do { + hr_end = timer->device->context->GetData(timer->query_end, &end, sizeof(end), 0); + } while (hr_end == S_FALSE); + + const bool succeeded = SUCCEEDED(hr_begin) && SUCCEEDED(hr_end); + if (succeeded) + *ticks = end - begin; + + return succeeded;*/ + return false; +} + +void gs_timer_range_destroy(gs_timer_range_t *range) +{ + delete range; +} + +void gs_timer_range_begin(gs_timer_range_t *range) +{ + // range->device->context->Begin(range->query_disjoint); +} + +void gs_timer_range_end(gs_timer_range_t *range) +{ + // range->device->context->End(range->query_disjoint); +} + +bool gs_timer_range_get_data(gs_timer_range_t *range, bool *disjoint, uint64_t *frequency) +{ + /*D3D11_QUERY_DATA_TIMESTAMP_DISJOINT timestamp_disjoint; + HRESULT hr; + do { + hr = range->device->context->GetData(range->query_disjoint, ×tamp_disjoint, + sizeof(timestamp_disjoint), 0); + } while (hr == S_FALSE); + + const bool succeeded = SUCCEEDED(hr); + if (succeeded) { + *disjoint = timestamp_disjoint.Disjoint; + *frequency = timestamp_disjoint.Frequency; + } + + return succeeded;*/ + return false; +} + +gs_timer::gs_timer(gs_device_t *device) : gs_obj(device, gs_type::gs_timer) +{ + //Rebuild(device->device); +} + +gs_timer_range::gs_timer_range(gs_device_t *device) : gs_obj(device, gs_type::gs_timer_range) +{ + //Rebuild(device->device); +} + +extern "C" EXPORT bool device_gdi_texture_available(void) +{ + return true; +} + +extern "C" EXPORT bool device_shared_texture_available(void) +{ + return true; +} + +extern "C" EXPORT bool device_nv12_available(gs_device_t *device) +{ + return device->nv12Supported; +} + +extern "C" EXPORT bool device_p010_available(gs_device_t *device) +{ + return device->p010Supported; +} + +extern "C" EXPORT bool device_is_monitor_hdr(gs_device_t *device, void *monitor) +{ + const HMONITOR hMonitor = static_cast(monitor); + return device->GetMonitorColorInfo(hMonitor).hdr; +} + +extern "C" EXPORT void device_debug_marker_begin(gs_device_t *, const char *markername, const float color[4]) {} + +extern "C" EXPORT void device_debug_marker_end(gs_device_t *) {} + +extern "C" EXPORT gs_texture_t *device_texture_create_gdi(gs_device_t *device, uint32_t width, uint32_t height) +{ + gs_texture *texture = nullptr; + try { + texture = new gs_texture_2d(device, width, height, GS_BGRA_UNORM, 1, nullptr, GS_RENDER_TARGET, + GS_TEXTURE_2D, true); + } catch (const HRError &error) { + blog(LOG_ERROR, "device_texture_create_gdi (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "device_texture_create_gdi (D3D12): %s", error); + } + + return texture; +} + +static inline bool TextureGDICompatible(gs_texture_2d *tex2d, const char *func) +{ + if (!tex2d->isGDICompatible) { + blog(LOG_ERROR, "%s (D3D11): Texture is not GDI compatible", func); + return false; + } + + return true; +} + +extern "C" EXPORT void *gs_texture_get_dc(gs_texture_t *tex) +{ + HDC hDC = nullptr; + + if (tex->type != GS_TEXTURE_2D) + return nullptr; + + gs_texture_2d *tex2d = static_cast(tex); + if (!TextureGDICompatible(tex2d, "gs_texture_get_dc")) + return nullptr; + + if (!tex2d->gdiSurface) + return nullptr; + + tex2d->gdiSurface->GetDC(true, &hDC); + return hDC; +} + +extern "C" EXPORT void gs_texture_release_dc(gs_texture_t *tex) +{ + if (tex->type != GS_TEXTURE_2D) + return; + + gs_texture_2d *tex2d = static_cast(tex); + if (!TextureGDICompatible(tex2d, "gs_texture_release_dc")) + return; + + tex2d->gdiSurface->ReleaseDC(nullptr); +} + +extern "C" EXPORT gs_texture_t *device_texture_open_shared(gs_device_t *device, uint32_t handle) +{ + gs_texture *texture = nullptr; + try { + texture = new gs_texture_2d(device, handle); + } catch (const HRError &error) { + blog(LOG_ERROR, "gs_texture_open_shared (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "gs_texture_open_shared (D3D12): %s", error); + } + + return texture; +} + +extern "C" EXPORT gs_texture_t *device_texture_open_nt_shared(gs_device_t *device, uint32_t handle) +{ + gs_texture *texture = nullptr; + try { + texture = new gs_texture_2d(device, handle, true); + } catch (const HRError &error) { + blog(LOG_ERROR, "gs_texture_open_nt_shared (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "gs_texture_open_nt_shared (D3D12): %s", error); + } + + return texture; +} + +extern "C" EXPORT uint32_t device_texture_get_shared_handle(gs_texture_t *tex) +{ + gs_texture_2d *tex2d = reinterpret_cast(tex); + if (tex->type != GS_TEXTURE_2D) + return GS_INVALID_HANDLE; + + return tex2d->isShared ? (uint32_t)tex2d->sharedHandle : GS_INVALID_HANDLE; +} + +extern "C" EXPORT gs_texture_t *device_texture_wrap_obj(gs_device_t *device, void *obj) +{ + gs_texture *texture = nullptr; + try { + texture = new gs_texture_2d(device, (ID3D12Resource *)obj); + } catch (const HRError &error) { + blog(LOG_ERROR, "gs_texture_wrap_obj (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } catch (const char *error) { + blog(LOG_ERROR, "gs_texture_wrap_obj (D3D12): %s", error); + } + + return texture; +} + +int device_texture_acquire_sync(gs_texture_t *tex, uint64_t key, uint32_t ms) +{ + gs_texture_2d *tex2d = reinterpret_cast(tex); + if (tex->type != GS_TEXTURE_2D) + return -1; + + if (tex2d->acquired) { + return 0; + } + + tex2d->acquired = true; + return 0; +} + +extern "C" EXPORT int device_texture_release_sync(gs_texture_t *tex, uint64_t key) +{ + gs_texture_2d *tex2d = reinterpret_cast(tex); + if (tex->type != GS_TEXTURE_2D) + return -1; + + if (!tex2d->acquired) + return 0; + + tex2d->acquired = false; + return 0; +} + +extern "C" EXPORT bool device_texture_create_nv12(gs_device_t *device, gs_texture_t **p_tex_y, gs_texture_t **p_tex_uv, + uint32_t width, uint32_t height, uint32_t flags) +{ + if (!device->nv12Supported) + return false; + + *p_tex_y = nullptr; + *p_tex_uv = nullptr; + + gs_texture_2d *tex_y; + gs_texture_2d *tex_uv; + + try { + tex_y = new gs_texture_2d(device, width, height, GS_R8, 1, nullptr, flags, GS_TEXTURE_2D, false, true); + tex_uv = new gs_texture_2d(device, tex_y->m_pResource, flags); + + } catch (const HRError &error) { + blog(LOG_ERROR, "gs_texture_create_nv12 (D3D11): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + return false; + + } catch (const char *error) { + blog(LOG_ERROR, "gs_texture_create_nv12 (D3D11): %s", error); + return false; + } + + tex_y->pairedTexture = tex_uv; + tex_uv->pairedTexture = tex_y; + + *p_tex_y = tex_y; + *p_tex_uv = tex_uv; + return true; +} + +extern "C" EXPORT bool device_texture_create_p010(gs_device_t *device, gs_texture_t **p_tex_y, gs_texture_t **p_tex_uv, + uint32_t width, uint32_t height, uint32_t flags) +{ + if (!device->p010Supported) + return false; + + *p_tex_y = nullptr; + *p_tex_uv = nullptr; + + gs_texture_2d *tex_y; + gs_texture_2d *tex_uv; + + try { + tex_y = new gs_texture_2d(device, width, height, GS_R16, 1, nullptr, flags, GS_TEXTURE_2D, false, true); + tex_uv = new gs_texture_2d(device, tex_y->m_pResource, flags); + + } catch (const HRError &error) { + blog(LOG_ERROR, "gs_texture_create_p010 (D3D12): %s (%08lX)", error.str, error.hr); + LogD3D12ErrorDetails(error, device); + return false; + + } catch (const char *error) { + blog(LOG_ERROR, "gs_texture_create_p010 (D3D12): %s", error); + return false; + } + + tex_y->pairedTexture = tex_uv; + tex_uv->pairedTexture = tex_y; + + *p_tex_y = tex_y; + *p_tex_uv = tex_uv; + return true; +} + +extern "C" EXPORT gs_stagesurf_t *device_stagesurface_create_nv12(gs_device_t *device, uint32_t width, uint32_t height) +{ + gs_stage_surface *surf = NULL; + try { + surf = new gs_stage_surface(device, width, height, false); + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_stagesurface_create (D3D12): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return surf; +} + +extern "C" EXPORT gs_stagesurf_t *device_stagesurface_create_p010(gs_device_t *device, uint32_t width, uint32_t height) +{ + gs_stage_surface *surf = NULL; + try { + surf = new gs_stage_surface(device, width, height, true); + } catch (const HRError &error) { + blog(LOG_ERROR, + "device_stagesurface_create (D3D11): %s " + "(%08lX)", + error.str, error.hr); + LogD3D12ErrorDetails(error, device); + } + + return surf; +} + +extern "C" EXPORT void device_register_loss_callbacks(gs_device_t *device, const gs_device_loss *callbacks) +{ + device->loss_callbacks.emplace_back(*callbacks); +} + +extern "C" EXPORT void device_unregister_loss_callbacks(gs_device_t *device, void *data) +{ + for (auto iter = device->loss_callbacks.begin(); iter != device->loss_callbacks.end(); ++iter) { + if (iter->data == data) { + device->loss_callbacks.erase(iter); + break; + } + } +} + +uint32_t gs_get_adapter_count(void) +{ + uint32_t count = 0; + + ComPtr factory; + HRESULT hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); + if (SUCCEEDED(hr)) { + ComPtr adapter; + for (UINT i = 0; factory->EnumAdapters1(i, adapter.Assign()) == S_OK; ++i) { + DXGI_ADAPTER_DESC desc; + if (SUCCEEDED(adapter->GetDesc(&desc))) { + /* ignore Microsoft's 'basic' renderer' */ + if (desc.VendorId != 0x1414 && desc.DeviceId != 0x8c) { + ++count; + } + } + } + } + + return count; +} + +extern "C" EXPORT bool device_can_adapter_fast_clear(gs_device_t *device) +{ + return device->fastClearSupported; +} diff --git a/libobs-d3d12/d3d12-subsystem.hpp b/libobs-d3d12/d3d12-subsystem.hpp new file mode 100644 index 00000000000000..3d755fa246e490 --- /dev/null +++ b/libobs-d3d12/d3d12-subsystem.hpp @@ -0,0 +1,972 @@ +#pragma once + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +struct shader_var; +struct shader_sampler; +struct gs_vertex_shader; +struct gs_pixel_shader; + +static inline uint32_t GetWinVer() +{ + struct win_version_info ver; + get_win_ver(&ver); + + return (ver.major << 8) | ver.minor; +} + +static inline DXGI_FORMAT ConvertGSTextureFormatResource(gs_color_format format) +{ + switch (format) { + case GS_UNKNOWN: + return DXGI_FORMAT_UNKNOWN; + case GS_A8: + return DXGI_FORMAT_A8_UNORM; + case GS_R8: + return DXGI_FORMAT_R8_UNORM; + case GS_RGBA: + return DXGI_FORMAT_R8G8B8A8_TYPELESS; + case GS_BGRX: + return DXGI_FORMAT_B8G8R8X8_TYPELESS; + case GS_BGRA: + return DXGI_FORMAT_B8G8R8A8_TYPELESS; + case GS_R10G10B10A2: + return DXGI_FORMAT_R10G10B10A2_UNORM; + case GS_RGBA16: + return DXGI_FORMAT_R16G16B16A16_UNORM; + case GS_R16: + return DXGI_FORMAT_R16_UNORM; + case GS_RGBA16F: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case GS_RGBA32F: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case GS_RG16F: + return DXGI_FORMAT_R16G16_FLOAT; + case GS_RG32F: + return DXGI_FORMAT_R32G32_FLOAT; + case GS_R16F: + return DXGI_FORMAT_R16_FLOAT; + case GS_R32F: + return DXGI_FORMAT_R32_FLOAT; + case GS_DXT1: + return DXGI_FORMAT_BC1_UNORM; + case GS_DXT3: + return DXGI_FORMAT_BC2_UNORM; + case GS_DXT5: + return DXGI_FORMAT_BC3_UNORM; + case GS_R8G8: + return DXGI_FORMAT_R8G8_UNORM; + case GS_RGBA_UNORM: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case GS_BGRX_UNORM: + return DXGI_FORMAT_B8G8R8X8_UNORM; + case GS_BGRA_UNORM: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case GS_RG16: + return DXGI_FORMAT_R16G16_UNORM; + } + + return DXGI_FORMAT_UNKNOWN; +} + +static inline DXGI_FORMAT ConvertGSTextureFormatView(gs_color_format format) +{ + switch (format) { + case GS_RGBA: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case GS_BGRX: + return DXGI_FORMAT_B8G8R8X8_UNORM; + case GS_BGRA: + return DXGI_FORMAT_B8G8R8A8_UNORM; + default: + return ConvertGSTextureFormatResource(format); + } +} + +static inline DXGI_FORMAT ConvertGSTextureFormatViewLinear(gs_color_format format) +{ + switch (format) { + case GS_RGBA: + return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + case GS_BGRX: + return DXGI_FORMAT_B8G8R8X8_UNORM_SRGB; + case GS_BGRA: + return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB; + default: + return ConvertGSTextureFormatResource(format); + } +} + +static inline gs_color_format ConvertDXGITextureFormat(DXGI_FORMAT format) +{ + switch (format) { + case DXGI_FORMAT_A8_UNORM: + return GS_A8; + case DXGI_FORMAT_R8_UNORM: + return GS_R8; + case DXGI_FORMAT_R8G8_UNORM: + return GS_R8G8; + case DXGI_FORMAT_R8G8B8A8_TYPELESS: + return GS_RGBA; + case DXGI_FORMAT_B8G8R8X8_TYPELESS: + return GS_BGRX; + case DXGI_FORMAT_B8G8R8A8_TYPELESS: + return GS_BGRA; + case DXGI_FORMAT_R10G10B10A2_UNORM: + return GS_R10G10B10A2; + case DXGI_FORMAT_R16G16B16A16_UNORM: + return GS_RGBA16; + case DXGI_FORMAT_R16_UNORM: + return GS_R16; + case DXGI_FORMAT_R16G16B16A16_FLOAT: + return GS_RGBA16F; + case DXGI_FORMAT_R32G32B32A32_FLOAT: + return GS_RGBA32F; + case DXGI_FORMAT_R16G16_FLOAT: + return GS_RG16F; + case DXGI_FORMAT_R32G32_FLOAT: + return GS_RG32F; + case DXGI_FORMAT_R16_FLOAT: + return GS_R16F; + case DXGI_FORMAT_R32_FLOAT: + return GS_R32F; + case DXGI_FORMAT_BC1_UNORM: + return GS_DXT1; + case DXGI_FORMAT_BC2_UNORM: + return GS_DXT3; + case DXGI_FORMAT_BC3_UNORM: + return GS_DXT5; + case DXGI_FORMAT_R8G8B8A8_UNORM: + return GS_RGBA_UNORM; + case DXGI_FORMAT_B8G8R8X8_UNORM: + return GS_BGRX_UNORM; + case DXGI_FORMAT_B8G8R8A8_UNORM: + return GS_BGRA_UNORM; + case DXGI_FORMAT_R16G16_UNORM: + return GS_RG16; + } + + return GS_UNKNOWN; +} + +static inline DXGI_FORMAT ConvertGSZStencilFormat(gs_zstencil_format format) +{ + switch (format) { + case GS_ZS_NONE: + return DXGI_FORMAT_UNKNOWN; + case GS_Z16: + return DXGI_FORMAT_D16_UNORM; + case GS_Z24_S8: + return DXGI_FORMAT_D24_UNORM_S8_UINT; + case GS_Z32F: + return DXGI_FORMAT_D32_FLOAT; + case GS_Z32F_S8X24: + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + } + + return DXGI_FORMAT_UNKNOWN; +} + +static inline D3D12_COMPARISON_FUNC ConvertGSDepthTest(gs_depth_test test) +{ + switch (test) { + case GS_NEVER: + return D3D12_COMPARISON_FUNC_NEVER; + case GS_LESS: + return D3D12_COMPARISON_FUNC_LESS; + case GS_LEQUAL: + return D3D12_COMPARISON_FUNC_EQUAL; + case GS_EQUAL: + return D3D12_COMPARISON_FUNC_EQUAL; + case GS_GEQUAL: + return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case GS_GREATER: + return D3D12_COMPARISON_FUNC_GREATER; + case GS_NOTEQUAL: + return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case GS_ALWAYS: + return D3D12_COMPARISON_FUNC_ALWAYS; + } + + return D3D12_COMPARISON_FUNC_NEVER; +} + +static inline D3D12_STENCIL_OP ConvertGSStencilOp(gs_stencil_op_type op) +{ + switch (op) { + case GS_KEEP: + return D3D12_STENCIL_OP_KEEP; + case GS_ZERO: + return D3D12_STENCIL_OP_ZERO; + case GS_REPLACE: + return D3D12_STENCIL_OP_REPLACE; + case GS_INCR: + return D3D12_STENCIL_OP_INCR; + case GS_DECR: + return D3D12_STENCIL_OP_DECR; + case GS_INVERT: + return D3D12_STENCIL_OP_INVERT; + } + + return D3D12_STENCIL_OP_KEEP; +} + +static inline D3D12_BLEND ConvertGSBlendType(gs_blend_type type) +{ + switch (type) { + case GS_BLEND_ZERO: + return D3D12_BLEND_ZERO; + case GS_BLEND_ONE: + return D3D12_BLEND_ONE; + case GS_BLEND_SRCCOLOR: + return D3D12_BLEND_SRC_COLOR; + case GS_BLEND_INVSRCCOLOR: + return D3D12_BLEND_INV_SRC_COLOR; + case GS_BLEND_SRCALPHA: + return D3D12_BLEND_SRC_ALPHA; + case GS_BLEND_INVSRCALPHA: + return D3D12_BLEND_INV_SRC_ALPHA; + case GS_BLEND_DSTCOLOR: + return D3D12_BLEND_DEST_COLOR; + case GS_BLEND_INVDSTCOLOR: + return D3D12_BLEND_INV_DEST_COLOR; + case GS_BLEND_DSTALPHA: + return D3D12_BLEND_DEST_ALPHA; + case GS_BLEND_INVDSTALPHA: + return D3D12_BLEND_INV_DEST_ALPHA; + case GS_BLEND_SRCALPHASAT: + return D3D12_BLEND_SRC_ALPHA_SAT; + } + + return D3D12_BLEND_ONE; +} + +static inline D3D12_BLEND_OP ConvertGSBlendOpType(gs_blend_op_type type) +{ + switch (type) { + case GS_BLEND_OP_ADD: + return D3D12_BLEND_OP_ADD; + case GS_BLEND_OP_SUBTRACT: + return D3D12_BLEND_OP_SUBTRACT; + case GS_BLEND_OP_REVERSE_SUBTRACT: + return D3D12_BLEND_OP_REV_SUBTRACT; + case GS_BLEND_OP_MIN: + return D3D12_BLEND_OP_MIN; + case GS_BLEND_OP_MAX: + return D3D12_BLEND_OP_MAX; + } + + return D3D12_BLEND_OP_ADD; +} + +static inline D3D12_CULL_MODE ConvertGSCullMode(gs_cull_mode mode) +{ + switch (mode) { + case GS_BACK: + return D3D12_CULL_MODE_BACK; + case GS_FRONT: + return D3D12_CULL_MODE_FRONT; + case GS_NEITHER: + return D3D12_CULL_MODE_NONE; + } + + return D3D12_CULL_MODE_BACK; +} + +static inline D3D12_PRIMITIVE_TOPOLOGY ConvertGSTopology(gs_draw_mode mode) +{ + switch (mode) { + case GS_POINTS: + return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + case GS_LINES: + return D3D_PRIMITIVE_TOPOLOGY_LINELIST; + case GS_LINESTRIP: + return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + case GS_TRIS: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case GS_TRISTRIP: + return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + } + + return D3D_PRIMITIVE_TOPOLOGY_POINTLIST; +} + +static inline D3D12_PRIMITIVE_TOPOLOGY_TYPE ConvertD3D12Topology(D3D12_PRIMITIVE_TOPOLOGY tology) +{ + switch (tology) { + case D3D_PRIMITIVE_TOPOLOGY_POINTLIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + case D3D_PRIMITIVE_TOPOLOGY_LINELIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case D3D_PRIMITIVE_TOPOLOGY_LINESTRIP: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + case D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + case D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + } + + return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; +} + +struct VBDataPtr { + gs_vb_data *data; + + inline VBDataPtr(gs_vb_data *data) : data(data) {} + inline ~VBDataPtr() { gs_vbdata_destroy(data); } +}; + +enum class gs_type { + gs_vertex_buffer, + gs_index_buffer, + gs_texture_2d, + gs_zstencil_buffer, + gs_stage_surface, + gs_sampler_state, + gs_vertex_shader, + gs_pixel_shader, + gs_duplicator, + gs_swap_chain, + gs_timer, + gs_timer_range, + gs_texture_3d, +}; + +struct gs_obj { + gs_device_t *device = nullptr; + gs_type obj_type = gs_type::gs_vertex_buffer; + gs_obj *next = nullptr; + gs_obj **prev_next = nullptr; + + inline gs_obj() : device(nullptr), next(nullptr), prev_next(nullptr) {} + + gs_obj(gs_device_t *device, gs_type type); + virtual ~gs_obj(); +}; +struct gs_timer : gs_obj { + //ComPtr query_begin; + //ComPtr query_end; + + void Rebuild(ID3D12Device *dev) {} + + inline void Release() + { + //query_begin.Release(); + //query_end.Release(); + } + + gs_timer(gs_device_t *device); +}; + +struct gs_timer_range : gs_obj { + //ComPtr query_disjoint; + + void Rebuild(ID3D12Device *dev) {} + + inline void Release() + { + //query_disjoint.Release(); + } + + gs_timer_range(gs_device_t *device); +}; + +struct gs_texture : gs_obj, public D3D12Graphics::GpuResource { + gs_texture_type type; + uint32_t levels; + gs_color_format format; + + D3D12_CPU_DESCRIPTOR_HANDLE shaderSRV = {D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN}; + D3D12_CPU_DESCRIPTOR_HANDLE shaderLinearSRV = {D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN}; + + void Rebuild(ID3D12Device *dev); + + inline gs_texture(gs_texture_type type, uint32_t levels, gs_color_format format) + : type(type), + levels(levels), + format(format) + { + } + + inline gs_texture(gs_device *device, gs_type obj_type, gs_texture_type type) + : gs_obj(device, obj_type), + type(type) + { + } + + inline gs_texture(gs_device *device, gs_type obj_type, gs_texture_type type, uint32_t levels, + gs_color_format format) + : gs_obj(device, obj_type), + type(type), + levels(levels), + format(format) + { + } +}; + +struct gs_texture_2d : gs_texture { + D3D12_RESOURCE_DESC texDesc = {}; + std::unique_ptr uploadBuffer; + + D3D12_CPU_DESCRIPTOR_HANDLE renderTargetRTV[6] = { + D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, + D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, + D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN}; + + D3D12_CPU_DESCRIPTOR_HANDLE renderTargetLinearRTV[6] = { + D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, + D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, + D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN, D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN}; + + ComPtr gdiSurface; + + uint32_t width = 0, height = 0; + uint32_t flags = 0; + + DXGI_FORMAT dxgiFormatResource = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT dxgiFormatView = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT dxgiFormatViewLinear = DXGI_FORMAT_UNKNOWN; + bool isRenderTarget = false; + bool isGDICompatible = false; + bool isDynamic = false; + bool isShared = false; + bool genMipmaps = false; + HANDLE sharedHandle = NULL; + + gs_texture_2d *pairedTexture = nullptr; + bool twoPlane = false; + bool chroma = false; + bool acquired = false; + + std::vector> data; + std::vector srd; + + void InitSRD(std::vector &srd); + void InitResourceView(int32_t PlaneSliceCount = 0); + void InitRenderTargets(int32_t PlaneSliceCount = 0); + void InitUAV(); + UINT GetLineSize(); + void CreateTargetFromSwapChain(gs_device_t *device, IDXGISwapChain *swap, int32_t bufferIndex, + enum gs_color_format resFormat, const gs_init_data &initData); + + void InitTexture(const uint8_t *const *data); + + void BackupTexture(const uint8_t *const *data); + void GetSharedHandle(IDXGIResource *dxgi_res); + + void RebuildSharedTextureFallback(); + void Rebuild(ID3D12Device *dev); + void RebuildPaired_Y(ID3D12Device *dev); + void RebuildPaired_UV(ID3D12Device *dev); + + inline void Release() {} + + inline gs_texture_2d() : gs_texture(GS_TEXTURE_2D, 0, GS_UNKNOWN) {} + + gs_texture_2d(gs_device_t *device, uint32_t width, uint32_t height, gs_color_format colorFormat, + uint32_t levels, const uint8_t *const *data, uint32_t flags, gs_texture_type type, + bool gdiCompatible, bool twoPlane = false); + + gs_texture_2d(gs_device_t *device, ID3D12Resource *nv12, uint32_t flags); + gs_texture_2d(gs_device_t *device, uint32_t handle, bool ntHandle = false); + gs_texture_2d(gs_device_t *device, ID3D12Resource *obj); + virtual ~gs_texture_2d(); + virtual void Destroy() override; +}; + +struct gs_texture_3d : gs_texture { + uint32_t width = 0, height = 0, depth = 0; + uint32_t flags = 0; + DXGI_FORMAT dxgiFormatResource = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT dxgiFormatView = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT dxgiFormatViewLinear = DXGI_FORMAT_UNKNOWN; + bool isDynamic = false; + bool isShared = false; + bool genMipmaps = false; + uint32_t sharedHandle = GS_INVALID_HANDLE; + + bool chroma = false; + bool acquired = false; + + std::vector> data; + std::vector srd; + + void InitSRD(std::vector &srd); + void InitTexture(const uint8_t *const *data); + void InitResourceView(); + void BackupTexture(const uint8_t *const *data); + void GetSharedHandle(IDXGIResource *dxgi_res); + + void RebuildSharedTextureFallback(); + void Rebuild(ID3D12Device *dev); + void RebuildNV12_Y(ID3D12Device *dev); + void RebuildNV12_UV(ID3D12Device *dev); + + inline void Release() {} + + inline gs_texture_3d() : gs_texture(GS_TEXTURE_3D, 0, GS_UNKNOWN) {} + + gs_texture_3d(gs_device_t *device, uint32_t width, uint32_t height, uint32_t depth, gs_color_format colorFormat, + uint32_t levels, const uint8_t *const *data, uint32_t flags); + + gs_texture_3d(gs_device_t *device, uint32_t handle); +}; + +struct gs_zstencil_buffer : gs_obj, public D3D12Graphics::DepthBuffer { + gs_zstencil_format format; + + void inline Clear() { Destroy(); } + + inline void Release() { Destroy(); } + + gs_zstencil_buffer(gs_device_t *device, uint32_t width, uint32_t height, gs_zstencil_format format); + + void Rebuild(ID3D12Device *dev); +}; + +struct gs_stage_surface : gs_obj, public D3D12Graphics::ReadbackBuffer { + uint32_t width, height; + gs_color_format format; + DXGI_FORMAT dxgiFormat; + + inline void Release() { Destroy(); } + void Rebuild(ID3D12Device *dev); + UINT GetLineSize(); + D3D12_RESOURCE_DESC GetTextureDesc(); + UINT64 GetTotalBytes(); + + gs_stage_surface(gs_device_t *device, uint32_t width, uint32_t height, gs_color_format colorFormat); + gs_stage_surface(gs_device_t *device, uint32_t width, uint32_t height, bool p010); +}; + +struct gs_sampler_state : gs_obj { + gs_sampler_info info; + D3D12Graphics::SamplerDesc sampleDesc; + gs_sampler_state(gs_device_t *device, const gs_sampler_info *info); +}; + +struct gs_shader_param { + std::string name; + gs_shader_param_type type; + + uint32_t textureID; + struct gs_sampler_state *nextSampler = nullptr; + + int arrayCount; + + size_t pos; + + std::vector curValue; + std::vector defaultValue; + bool changed; + + gs_shader_param(shader_var &var, uint32_t &texCounter); +}; + +struct ShaderError { + ComPtr errors; + HRESULT hr; + + inline ShaderError(const ComPtr &errors, HRESULT hr) : errors(errors), hr(hr) {} +}; + +struct gs_shader : gs_obj { + gs_shader_type type; + std::vector params; + + size_t samplerCount = 0; + size_t textureCount = 0; + bool hasDynamicUniformConstantBuffer = false; // const buffer + size_t constantSize; + + int32_t textureRootParameterIndex = -1; + int32_t samplerRootParameterIndex = -1; + int32_t dynamicUniformConstantBufferRootParameterIndex = -1; + + std::vector data; + std::string actuallyShaderString; + + inline void UpdateParam(std::vector &constData, gs_shader_param ¶m, bool &upload); + void UploadParams(); + + void BuildConstantBuffer(); + void Compile(const char *shaderStr, const char *file, const char *target, ID3D10Blob **shader); + + inline gs_shader(gs_device_t *device, gs_type obj_type, gs_shader_type type) + : gs_obj(device, obj_type), + type(type), + constantSize(0) + { + } + + virtual ~gs_shader() {} +}; + +struct ShaderSampler { + std::string name; + gs_sampler_state sampler; + inline ShaderSampler(const char *name, gs_device_t *device, gs_sampler_info *info) + : name(name), + sampler(device, info) + { + } +}; + +struct gs_vertex_shader : gs_shader { + gs_shader_param *world, *viewProj; + + std::vector layoutData; + + bool hasNormals; + bool hasColors; + bool hasTangents; + uint32_t nTexUnits; + + void Rebuild(ID3D12Device *dev) {} + + inline void Release() {} + + inline uint32_t NumBuffersExpected() const + { + uint32_t count = nTexUnits + 1; + if (hasNormals) + count++; + if (hasColors) + count++; + if (hasTangents) + count++; + + return count; + } + + void GetBuffersExpected(const std::vector &inputs); + + gs_vertex_shader(gs_device_t *device, const char *file, const char *shaderString); +}; + +struct gs_duplicator : gs_obj { + ComPtr factory; + ComPtr adapter; + ComPtr device11; + ComPtr context11; + ComPtr duplicator; + gs_texture_t *texture; + + ComPtr km; + ComPtr texShared; + HANDLE texSharedHandle = NULL; + + bool hdr = false; + enum gs_color_space color_space = GS_CS_SRGB; + float sdr_white_nits = 80.f; + int idx; + long refs; + bool updated; + + void Start(); + + inline void Release() { duplicator.Release(); } + + gs_duplicator(gs_device_t *device, int monitor_idx); + ~gs_duplicator(); +}; + +struct gs_pixel_shader : gs_shader { + std::vector> samplers; + + void Rebuild(ID3D12Device *dev); + + inline void Release() {} + + inline void GetSamplerStates(gs_samplerstate_t **descriptor) + { + size_t i; + for (i = 0; i < samplers.size(); i++) + descriptor[i] = &samplers[i]->sampler; + for (; i < GS_MAX_TEXTURES; i++) + descriptor[i] = NULL; + } + + gs_pixel_shader(gs_device_t *device, const char *file, const char *shaderString); +}; + +struct gs_swap_chain : gs_obj { + HWND hwnd; + gs_init_data initData; + DXGI_SWAP_CHAIN_DESC1 swapDesc = {}; + gs_color_space space; + + gs_texture_2d target[GS_MAX_TEXTURES]; + std::unique_ptr zs; + ComPtr swap; + int32_t currentBackBufferIndex = 0; + bool bEnableHDROutput = false; + + bool CheckHDRSupport(); + void Resize(uint32_t cx, uint32_t cy, gs_color_format format); + + void Release(); + + gs_swap_chain(gs_device *device, const gs_init_data *data); + virtual ~gs_swap_chain(); +}; +struct gs_vertex_buffer : gs_obj { + D3D12Graphics::GpuBuffer *vertexBuffer = nullptr; + D3D12Graphics::GpuBuffer *normalBuffer = nullptr; + D3D12Graphics::GpuBuffer *colorBuffer = nullptr; + D3D12Graphics::GpuBuffer *tangentBuffer = nullptr; + std::vector uvBuffers; + + bool dynamic; + VBDataPtr vbd; + size_t numVerts; + std::vector uvSizes; + + void FlushBuffer(D3D12Graphics::GpuBuffer *buffer, void *array, size_t elementSize); + + UINT MakeBufferList(gs_vertex_shader *shader, D3D12Graphics::GpuBuffer **buffers, uint32_t *strides); + + void InitBuffer(const size_t elementSize, const size_t numVerts, void *array, + D3D12Graphics::GpuBuffer **buffer); + + void BuildBuffers(); + + inline void Release(); + + void Rebuild(); + ~gs_vertex_buffer(); + + gs_vertex_buffer(gs_device_t *device, struct gs_vb_data *data, uint32_t flags); +}; + +/* exception-safe RAII wrapper for index buffer data (NOTE: not copy-safe) */ +struct DataPtr { + void *data; + + inline DataPtr(void *data) : data(data) {} + inline ~DataPtr() { bfree(data); } +}; + +struct gs_index_buffer : gs_obj { + D3D12Graphics::GpuBuffer *indexBuffer; + bool dynamic; + gs_index_type type; + size_t indexSize; + size_t num; + DataPtr indices; + + D3D11_BUFFER_DESC bd = {}; + D3D11_SUBRESOURCE_DATA srd = {}; + + void InitBuffer(); + + void Rebuild(ID3D11Device *dev); + + void Release(); + + gs_index_buffer(gs_device_t *device, enum gs_index_type type, void *indices, size_t num, uint32_t flags); + ~gs_index_buffer(); +}; + +struct BlendState { + bool blendEnabled; + gs_blend_type srcFactorC; + gs_blend_type destFactorC; + gs_blend_type srcFactorA; + gs_blend_type destFactorA; + gs_blend_op_type op; + + bool redEnabled; + bool greenEnabled; + bool blueEnabled; + bool alphaEnabled; + + inline bool operator==(const BlendState &other) const + { + return blendEnabled == other.blendEnabled && srcFactorC == other.srcFactorC && + destFactorC == other.destFactorC && srcFactorA == other.srcFactorA && op == other.op && + redEnabled == other.redEnabled && greenEnabled == other.greenEnabled && + blueEnabled == other.blueEnabled && alphaEnabled == other.alphaEnabled; + } + + inline BlendState() + : blendEnabled(true), + srcFactorC(GS_BLEND_SRCALPHA), + destFactorC(GS_BLEND_INVSRCALPHA), + srcFactorA(GS_BLEND_ONE), + destFactorA(GS_BLEND_INVSRCALPHA), + op(GS_BLEND_OP_ADD), + redEnabled(true), + greenEnabled(true), + blueEnabled(true), + alphaEnabled(true) + { + } + + inline BlendState(const BlendState &state) { memcpy(this, &state, sizeof(BlendState)); } +}; + +struct StencilSide { + gs_depth_test test; + gs_stencil_op_type fail; + gs_stencil_op_type zfail; + gs_stencil_op_type zpass; + + inline bool operator==(const StencilSide &other) const + { + return test == other.test && fail == other.fail && zfail == other.zfail && zpass == other.zpass; + } + + inline bool operator!=(const StencilSide &other) const { return !(*this == other); } + + inline StencilSide() : test(GS_ALWAYS), fail(GS_KEEP), zfail(GS_KEEP), zpass(GS_KEEP) {} +}; + +struct ZStencilState { + bool depthEnabled; + bool depthWriteEnabled; + gs_depth_test depthFunc; + + bool stencilEnabled; + bool stencilWriteEnabled; + StencilSide stencilFront; + StencilSide stencilBack; + + inline ZStencilState() + : depthEnabled(true), + depthWriteEnabled(true), + depthFunc(GS_LESS), + stencilEnabled(false), + stencilWriteEnabled(true) + { + } + + inline bool operator==(const ZStencilState &other) const + { + return depthEnabled == other.depthEnabled && depthWriteEnabled == other.depthWriteEnabled && + depthFunc == other.depthFunc && stencilEnabled == other.stencilEnabled && + stencilWriteEnabled == other.stencilWriteEnabled && stencilFront == other.stencilFront && + stencilBack == other.stencilBack; + } + + inline bool operator!=(const ZStencilState &other) const { return !(*this == other); } + + inline ZStencilState(const ZStencilState &state) { memcpy(this, &state, sizeof(ZStencilState)); } +}; + +struct RasterState { + gs_cull_mode cullMode; + + inline bool operator==(const RasterState &other) const { return cullMode == other.cullMode; } + + inline bool operator!=(const RasterState &other) const { return !(*this == other); } + + inline RasterState() : cullMode(GS_BACK) {} + + inline RasterState(const RasterState &state) { memcpy(this, &state, sizeof(RasterState)); } +}; + +struct mat4float { + float mat[16]; +}; + +struct gs_monitor_color_info { + bool hdr; + UINT bits_per_color; + ULONG sdr_white_nits; + + gs_monitor_color_info(bool hdr, int bits_per_color, ULONG sdr_white_nits) + : hdr(hdr), + bits_per_color(bits_per_color), + sdr_white_nits(sdr_white_nits) + { + } +}; + +struct gs_device { + D3D12Graphics::D3D12DeviceInstance *d3d12Instance = nullptr; + + uint32_t adpIdx = 0; + bool nv12Supported = false; + bool p010Supported = false; + bool fastClearSupported = false; + + gs_texture_2d *curRenderTarget = nullptr; + gs_zstencil_buffer *curZStencilBuffer = nullptr; + int curRenderSide = 0; + enum gs_color_space curColorSpace = GS_CS_SRGB; + bool curFramebufferSrgb = false; + bool curFramebufferInvalidate = false; + + gs_texture *curTextures[GS_MAX_TEXTURES]; + gs_sampler_state *curSamplers[GS_MAX_TEXTURES]; + gs_vertex_buffer *curVertexBuffer = nullptr; + gs_index_buffer *curIndexBuffer = nullptr; + gs_vertex_shader *curVertexShader = nullptr; + gs_pixel_shader *curPixelShader = nullptr; + gs_swap_chain *curSwapChain = nullptr; + std::vector curVertexBufferViews; + + gs_vertex_buffer *lastVertexBuffer = nullptr; + gs_vertex_shader *lastVertexShader = nullptr; + + bool zstencilStateChanged = true; + bool rasterStateChanged = true; + bool blendStateChanged = true; + ZStencilState curZstencilState; + RasterState curRasterState; + BlendState curBlendState; + + gs_rect viewport; + + std::vector projStack; + + matrix4 curProjMatrix; + matrix4 curViewMatrix; + matrix4 curViewProjMatrix; + + std::vector loss_callbacks; + gs_obj *first_obj = nullptr; + std::vector> monitor_to_hdr; + + void InitDevice(uint32_t adapterIdx); + + D3D12_DEPTH_STENCIL_DESC ConvertZStencilState(const ZStencilState &zs); + D3D12_RASTERIZER_DESC ConvertRasterState(const RasterState &rs); + D3D12_BLEND_DESC ConvertBlendState(const BlendState &bs); + + void LoadVertexBufferData(); + + void LoadRootSignature(std::unique_ptr &rootSignature); + void LoadCurrentGraphicsPSO(std::unique_ptr &PipelineState, + std::unique_ptr &rootSignature); + + D3D12_PRIMITIVE_TOPOLOGY curToplogy; + + void UpdateViewProjMatrix(); + + void FlushOutputViews(); + + void RebuildDevice(); + + D3D12Graphics::GraphicsContext *context = nullptr; + + gs_monitor_color_info GetMonitorColorInfo(HMONITOR hMonitor); + + gs_device(uint32_t adapterIdx); + ~gs_device(); +}; + +extern "C" EXPORT int device_texture_acquire_sync(gs_texture_t *tex, uint64_t key, uint32_t ms); diff --git a/libobs-d3d12/d3d12-texture2d.cpp b/libobs-d3d12/d3d12-texture2d.cpp new file mode 100644 index 00000000000000..d5e770a18d10dd --- /dev/null +++ b/libobs-d3d12/d3d12-texture2d.cpp @@ -0,0 +1,412 @@ +#include +#include "d3d12-subsystem.hpp" + +void gs_texture_2d::InitSRD(std::vector &srd) +{ + uint32_t rowSizeBytes = width * gs_get_format_bpp(format); + uint32_t texSizeBytes = height * rowSizeBytes / 8; + size_t textures = type == GS_TEXTURE_2D ? 1 : 6; + uint32_t actual_levels = levels; + size_t curTex = 0; + + if (!actual_levels) + actual_levels = gs_get_total_levels(width, height, 1); + + rowSizeBytes /= 8; + + for (size_t i = 0; i < textures; i++) { + uint32_t newRowSize = rowSizeBytes; + uint32_t newTexSize = texSizeBytes; + + for (uint32_t j = 0; j < actual_levels; j++) { + D3D12_SUBRESOURCE_DATA newSRD; + newSRD.pData = data[curTex++].data(); + newSRD.RowPitch = newRowSize; + newSRD.SlicePitch = newTexSize; + srd.push_back(newSRD); + + newRowSize /= 2; + newTexSize /= 4; + } + } +} + +void gs_texture_2d::BackupTexture(const uint8_t *const *data) +{ + uint32_t textures = type == GS_TEXTURE_CUBE ? 6 : 1; + uint32_t bbp = gs_get_format_bpp(format); + + this->data.resize(levels * textures); + + for (uint32_t t = 0; t < textures; t++) { + uint32_t w = width; + uint32_t h = height; + + for (uint32_t lv = 0; lv < levels; lv++) { + uint32_t i = levels * t + lv; + if (!data[i]) + break; + + uint32_t texSize = bbp * w * h / 8; + + std::vector &subData = this->data[i]; + subData.resize(texSize); + memcpy(&subData[0], data[i], texSize); + + if (w > 1) + w /= 2; + if (h > 1) + h /= 2; + } + } +} + +void gs_texture_2d::GetSharedHandle(IDXGIResource *dxgi_res) +{ + HANDLE handle; + HRESULT hr; + + hr = dxgi_res->GetSharedHandle(&handle); + if (FAILED(hr)) { + blog(LOG_WARNING, + "GetSharedHandle: Failed to " + "get shared handle: %08lX", + hr); + } else { + sharedHandle = handle; + } +} + +void gs_texture_2d::InitTexture(const uint8_t *const *data) +{ + Destroy(); + + texDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + texDesc.Width = width; + texDesc.Height = height; + texDesc.DepthOrArraySize = type == GS_TEXTURE_CUBE ? 6 : 1; + + texDesc.MipLevels = genMipmaps ? 0 : levels; + + texDesc.Format = twoPlane ? ((format == GS_R16) ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12) : dxgiFormatResource; + + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + m_UsageState = D3D12_RESOURCE_STATE_COPY_DEST; + texDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + if (isShared) { + texDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; + } + + if (isRenderTarget) { + m_UsageState = D3D12_RESOURCE_STATE_COMMON; + texDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + texDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + + if (data) { + BackupTexture(data); + InitSRD(srd); + } + + if (texDesc.Format == DXGI_FORMAT_NV12 || texDesc.Format == DXGI_FORMAT_P010) { + texDesc.Width = (texDesc.Width + 1) & ~1; + texDesc.Height = (texDesc.Height + 1) & ~1; + } + + HRESULT hr = device->d3d12Instance->GetDevice()->CreateCommittedResource( + &HeapProps, isShared ? D3D12_HEAP_FLAG_SHARED : D3D12_HEAP_FLAG_NONE, &texDesc, m_UsageState, nullptr, + IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + auto removeReason = device->d3d12Instance->GetDevice()->GetDeviceRemovedReason(); + throw HRError("Failed to create 2D texture resource", removeReason); + } + + if (data) { + device->d3d12Instance->InitializeTexture(*this, (UINT)srd.size(), srd.data()); + } + + if (isDynamic) { + uploadBuffer = std::make_unique(device->d3d12Instance); + uploadBuffer->Create(L"Texture2D Upload Buffer", + D3D12Graphics::GetRequiredIntermediateSize(GetResource(), 0, + data ? (UINT)srd.size() : 1)); + } + + if (isShared) { + hr = device->d3d12Instance->GetDevice()->CreateSharedHandle(m_pResource.Get(), nullptr, GENERIC_ALL, + nullptr, &sharedHandle); + if (FAILED(hr)) { + throw HRError("Create Shared Handle Failed", hr); + } + + acquired = true; + } +} + +void gs_texture_2d::InitResourceView(int32_t PlaneSliceCount) +{ + D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; + SRVDesc.Format = dxgiFormatView; + SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + if (type == GS_TEXTURE_CUBE) { + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + SRVDesc.TextureCube.MipLevels = genMipmaps || !levels ? -1 : levels; + SRVDesc.TextureCube.MostDetailedMip = 0; + } else { + SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + SRVDesc.Texture2D.MipLevels = genMipmaps || !levels ? -1 : levels; + SRVDesc.Texture2D.MostDetailedMip = 0; + SRVDesc.Texture2D.PlaneSlice = PlaneSliceCount; + } + + if (shaderSRV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) + shaderSRV = device->d3d12Instance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + ID3D12Resource *Resource = m_pResource.Get(); + device->d3d12Instance->GetDevice()->CreateShaderResourceView(Resource, &SRVDesc, shaderSRV); + + SRVDesc.Format = dxgiFormatViewLinear; + + if (shaderLinearSRV.ptr == D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + shaderLinearSRV = device->d3d12Instance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + } + + device->d3d12Instance->GetDevice()->CreateShaderResourceView(Resource, &SRVDesc, shaderLinearSRV); +} + +void gs_texture_2d::InitRenderTargets(int32_t PlaneSliceCount) +{ + D3D12_RENDER_TARGET_VIEW_DESC RTVDesc = {}; + if (type == GS_TEXTURE_2D) { + RTVDesc.Format = dxgiFormatView; + RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + RTVDesc.Texture2D.MipSlice = 0; + RTVDesc.Texture2D.PlaneSlice = PlaneSliceCount; + + renderTargetRTV[0] = device->d3d12Instance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + device->d3d12Instance->GetDevice()->CreateRenderTargetView(GetResource(), &RTVDesc, renderTargetRTV[0]); + + RTVDesc.Format = dxgiFormatViewLinear; + renderTargetLinearRTV[0] = device->d3d12Instance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + device->d3d12Instance->GetDevice()->CreateRenderTargetView(GetResource(), &RTVDesc, + renderTargetLinearRTV[0]); + } else { + RTVDesc.Format = dxgiFormatView; + RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + RTVDesc.Texture2DArray.MipSlice = 0; + RTVDesc.Texture2DArray.ArraySize = 1; + + for (UINT i = 0; i < 6; i++) { + RTVDesc.Texture2DArray.FirstArraySlice = i; + renderTargetRTV[0] = device->d3d12Instance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + device->d3d12Instance->GetDevice()->CreateRenderTargetView(GetResource(), &RTVDesc, + renderTargetRTV[0]); + + RTVDesc.Format = dxgiFormatViewLinear; + renderTargetLinearRTV[i] = + device->d3d12Instance->AllocateDescriptor(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + device->d3d12Instance->GetDevice()->CreateRenderTargetView(GetResource(), &RTVDesc, + renderTargetLinearRTV[i]); + } + } +} + +void gs_texture_2d::InitUAV() {} + +UINT gs_texture_2d::GetLineSize() +{ + D3D12_PLACED_SUBRESOURCE_FOOTPRINT placedTextureDesc; + + auto textureDesc = m_pResource->GetDesc(); + UINT NumRows; + UINT64 RowLength; + UINT64 TotalBytes; + device->d3d12Instance->GetDevice()->GetCopyableFootprints(&textureDesc, 0, 1, 0, &placedTextureDesc, &NumRows, + &RowLength, &TotalBytes); + return placedTextureDesc.Footprint.RowPitch; +} + +void gs_texture_2d::CreateTargetFromSwapChain(gs_device_t *device_, IDXGISwapChain *swap, int32_t bufferIndex, + enum gs_color_format resFormat, const gs_init_data &initData) +{ + device = device_; + isRenderTarget = true; + format = initData.format; + dxgiFormatResource = ConvertGSTextureFormatResource(resFormat); + dxgiFormatView = ConvertGSTextureFormatView(resFormat); + dxgiFormatViewLinear = ConvertGSTextureFormatViewLinear(resFormat); + + width = initData.cx; + height = initData.cy; + + HRESULT hr = swap->GetBuffer(bufferIndex, IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) { + throw HRError("Failed to get swap chain buffer", hr); + } + + std::wstring bufferName = std::to_wstring(bufferIndex) + L" - Swap Chain Back Buffer"; + m_pResource->SetName(bufferName.c_str()); + InitRenderTargets(); +} + +#define SHARED_FLAGS (GS_SHARED_TEX | GS_SHARED_KM_TEX) + +gs_texture_2d::gs_texture_2d(gs_device_t *device, uint32_t width, uint32_t height, gs_color_format colorFormat, + uint32_t levels, const uint8_t *const *data, uint32_t flags_, gs_texture_type type, + bool gdiCompatible, bool twoPlane_) + : gs_texture(device, gs_type::gs_texture_2d, type, levels, colorFormat), + width(width), + height(height), + flags(flags_), + dxgiFormatResource(ConvertGSTextureFormatResource(format)), + dxgiFormatView(ConvertGSTextureFormatView(format)), + dxgiFormatViewLinear(ConvertGSTextureFormatViewLinear(format)), + isRenderTarget((flags_ & GS_RENDER_TARGET) != 0), + isGDICompatible(gdiCompatible), + isDynamic((flags_ & GS_DYNAMIC) != 0), + isShared((flags_ & SHARED_FLAGS) != 0), + genMipmaps((flags_ & GS_BUILD_MIPMAPS) != 0), + sharedHandle(NULL), + twoPlane(twoPlane_) +{ + InitTexture(data); + InitResourceView(); + + if (isRenderTarget) { + InitRenderTargets(); + InitUAV(); + } +} + +gs_texture_2d::gs_texture_2d(gs_device_t *device, ID3D12Resource *nv12tex, uint32_t flags_) + : gs_texture(device, gs_type::gs_texture_2d, GS_TEXTURE_2D), + isRenderTarget((flags_ & GS_RENDER_TARGET) != 0), + isDynamic((flags_ & GS_DYNAMIC) != 0), + isShared((flags_ & SHARED_FLAGS) != 0), + genMipmaps((flags_ & GS_BUILD_MIPMAPS) != 0), + twoPlane(true) +{ + m_pResource = nv12tex; + m_UsageState = D3D12_RESOURCE_STATE_COMMON; + texDesc = nv12tex->GetDesc(); + + const bool p010 = texDesc.Format == DXGI_FORMAT_P010; + const DXGI_FORMAT dxgi_format = p010 ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM; + + this->type = GS_TEXTURE_2D; + this->format = p010 ? GS_RG16 : GS_R8G8; + this->flags = flags_; + this->levels = 1; + this->device = device; + this->chroma = true; + this->width = (uint32_t)texDesc.Width / 2; + this->height = (uint32_t)texDesc.Height / 2; + this->dxgiFormatResource = dxgi_format; + this->dxgiFormatView = dxgi_format; + this->dxgiFormatViewLinear = dxgi_format; + + InitResourceView(1); + if (isRenderTarget) { + InitRenderTargets(1); + InitUAV(); + } +} + +gs_texture_2d::gs_texture_2d(gs_device_t *device, uint32_t handle, bool ntHandle) + : gs_texture(device, gs_type::gs_texture_2d, GS_TEXTURE_2D), + isShared(true), + sharedHandle((HANDLE)handle) +{ + (void)ntHandle; + + HRESULT hr = device->d3d12Instance->GetDevice()->OpenSharedHandle((HANDLE)(uintptr_t)handle, + IID_PPV_ARGS(&m_pResource)); + if (FAILED(hr)) + throw HRError("Failed to open shared 2D texture", hr); + + texDesc = m_pResource->GetDesc(); + + const gs_color_format format = ConvertDXGITextureFormat(texDesc.Format); + + this->type = GS_TEXTURE_2D; + this->format = format; + this->levels = 1; + this->device = device; + + this->width = (uint32_t)texDesc.Width; + this->height = (uint32_t)texDesc.Height; + this->dxgiFormatResource = ConvertGSTextureFormatResource(format); + this->dxgiFormatView = ConvertGSTextureFormatView(format); + this->dxgiFormatViewLinear = ConvertGSTextureFormatViewLinear(format); + + InitResourceView(); + if (isRenderTarget) { + InitRenderTargets(1); + InitUAV(); + } +} + +gs_texture_2d::gs_texture_2d(gs_device_t *device, ID3D12Resource *obj) + : gs_texture(device, gs_type::gs_texture_2d, GS_TEXTURE_2D) +{ + m_pResource = obj; + + texDesc = m_pResource->GetDesc(); + + const gs_color_format format = ConvertDXGITextureFormat(texDesc.Format); + + this->type = GS_TEXTURE_2D; + this->format = format; + this->levels = 1; + this->device = device; + + this->width = (uint32_t)texDesc.Width; + this->height = (uint32_t)texDesc.Height; + this->dxgiFormatResource = ConvertGSTextureFormatResource(format); + this->dxgiFormatView = ConvertGSTextureFormatView(format); + this->dxgiFormatViewLinear = ConvertGSTextureFormatViewLinear(format); + + InitResourceView(); + if (isRenderTarget) { + InitRenderTargets(1); + InitUAV(); + } +} + +gs_texture_2d::~gs_texture_2d() +{ + Destroy(); +} + +void gs_texture_2d::Destroy() +{ + if (device) { + device->d3d12Instance->GetCommandManager().IdleGPU(); + } + + uploadBuffer.reset(); + if (shaderSRV.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + shaderSRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + if (shaderLinearSRV.ptr != D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN) { + shaderLinearSRV.ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + + for (size_t i = 0; i < 6; ++i) { + renderTargetRTV[i].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + renderTargetLinearRTV[i].ptr = D3D12_GPU_VIRTUAL_ADDRESS_UNKNOWN; + } + GpuResource::Destroy(); +} diff --git a/libobs-d3d12/d3d12-texture3d.cpp b/libobs-d3d12/d3d12-texture3d.cpp new file mode 100644 index 00000000000000..8d5974851a37f7 --- /dev/null +++ b/libobs-d3d12/d3d12-texture3d.cpp @@ -0,0 +1,102 @@ +#include +#include "d3d12-subsystem.hpp" + +void gs_texture_3d::InitSRD(std::vector &srd) +{ + uint32_t rowSizeBits = width * gs_get_format_bpp(format); + uint32_t sliceSizeBytes = height * rowSizeBits / 8; + uint32_t actual_levels = levels; + + if (!actual_levels) + actual_levels = gs_get_total_levels(width, height, depth); + + uint32_t newRowSize = rowSizeBits / 8; + uint32_t newSlizeSize = sliceSizeBytes; + + for (uint32_t level = 0; level < actual_levels; ++level) { + D3D12_SUBRESOURCE_DATA newSRD; + newSRD.pData = data[level].data(); + newSRD.RowPitch = newRowSize; + newSRD.SlicePitch = newSlizeSize; + srd.push_back(newSRD); + + newRowSize /= 2; + newSlizeSize /= 4; + } +} + +void gs_texture_3d::BackupTexture(const uint8_t *const *data) +{ + this->data.resize(levels); + + uint32_t w = width; + uint32_t h = height; + uint32_t d = depth; + const uint32_t bbp = gs_get_format_bpp(format); + + for (uint32_t i = 0; i < levels; i++) { + if (!data[i]) + break; + + const uint32_t texSize = bbp * w * h * d / 8; + this->data[i].resize(texSize); + + std::vector &subData = this->data[i]; + memcpy(&subData[0], data[i], texSize); + + if (w > 1) + w /= 2; + if (h > 1) + h /= 2; + if (d > 1) + d /= 2; + } +} + +void gs_texture_3d::GetSharedHandle(IDXGIResource *dxgi_res) +{ + HANDLE handle; + HRESULT hr; + + hr = dxgi_res->GetSharedHandle(&handle); + if (FAILED(hr)) { + blog(LOG_WARNING, + "GetSharedHandle: Failed to " + "get shared handle: %08lX", + hr); + } else { + sharedHandle = (uint32_t)(uintptr_t)handle; + } +} + +void gs_texture_3d::InitTexture(const uint8_t *const *data) {} + +void gs_texture_3d::InitResourceView() {} + +#define SHARED_FLAGS (GS_SHARED_TEX | GS_SHARED_KM_TEX) + +gs_texture_3d::gs_texture_3d(gs_device_t *device, uint32_t width, uint32_t height, uint32_t depth, + gs_color_format colorFormat, uint32_t levels, const uint8_t *const *data, uint32_t flags_) + : gs_texture(device, gs_type::gs_texture_3d, GS_TEXTURE_3D, levels, colorFormat), + width(width), + height(height), + depth(depth), + flags(flags_), + dxgiFormatResource(ConvertGSTextureFormatResource(format)), + dxgiFormatView(ConvertGSTextureFormatView(format)), + dxgiFormatViewLinear(ConvertGSTextureFormatViewLinear(format)), + isDynamic((flags_ & GS_DYNAMIC) != 0), + isShared((flags_ & SHARED_FLAGS) != 0), + genMipmaps((flags_ & GS_BUILD_MIPMAPS) != 0), + sharedHandle(GS_INVALID_HANDLE) +{ + InitTexture(data); + InitResourceView(); +} + +gs_texture_3d::gs_texture_3d(gs_device_t *device, uint32_t handle) + : gs_texture(device, gs_type::gs_texture_3d, GS_TEXTURE_3D), + isShared(true), + sharedHandle(handle) +{ +} diff --git a/libobs-d3d12/d3d12-vertexbuffer.cpp b/libobs-d3d12/d3d12-vertexbuffer.cpp new file mode 100644 index 00000000000000..33ae6fc6af709e --- /dev/null +++ b/libobs-d3d12/d3d12-vertexbuffer.cpp @@ -0,0 +1,137 @@ +#include +#include +#include "d3d12-subsystem.hpp" + +static inline void PushBuffer(UINT *refNumBuffers, D3D12Graphics::GpuBuffer **buffers, uint32_t *strides, + D3D12Graphics::GpuBuffer *buffer, size_t elementSize, const char *name) +{ + const UINT numBuffers = *refNumBuffers; + if (buffer) { + buffers[numBuffers] = buffer; + strides[numBuffers] = (uint32_t)elementSize; + *refNumBuffers = numBuffers + 1; + } else { + blog(LOG_ERROR, "This vertex shader requires a %s buffer", name); + } +} + +void gs_vertex_buffer::FlushBuffer(D3D12Graphics::GpuBuffer *buffer, void *array, size_t elementSize) +{ + device->context->WriteBuffer(*buffer, 0, array, elementSize * vbd.data->num); +} + +UINT gs_vertex_buffer::MakeBufferList(gs_vertex_shader *shader, D3D12Graphics::GpuBuffer **buffers, uint32_t *strides) +{ + UINT numBuffers = 0; + PushBuffer(&numBuffers, buffers, strides, vertexBuffer, sizeof(vec3), "point"); + + if (shader->hasNormals) + PushBuffer(&numBuffers, buffers, strides, normalBuffer, sizeof(vec3), "normal"); + if (shader->hasColors) + PushBuffer(&numBuffers, buffers, strides, colorBuffer, sizeof(uint32_t), "color"); + if (shader->hasTangents) + PushBuffer(&numBuffers, buffers, strides, tangentBuffer, sizeof(vec3), "tangent"); + if (shader->nTexUnits <= uvBuffers.size()) { + for (size_t i = 0; i < shader->nTexUnits; i++) { + buffers[numBuffers] = uvBuffers[i]; + strides[numBuffers] = (uint32_t)uvSizes[i]; + ++numBuffers; + } + } else { + blog(LOG_ERROR, + "This vertex shader requires at least %u " + "texture buffers.", + (uint32_t)shader->nTexUnits); + } + + return numBuffers; +} + +void gs_vertex_buffer::InitBuffer(const size_t elementSize, const size_t numVerts, void *array, + D3D12Graphics::GpuBuffer **buffer) +{ + D3D12Graphics::GpuBuffer *byteBuffer = new D3D12Graphics::ByteAddressBuffer(device->d3d12Instance); + byteBuffer->Create(L"Vertex Buffer", (uint32_t)numVerts, (uint32_t)elementSize, array); + + *buffer = byteBuffer; + if (byteBuffer->GetResource() == nullptr) { + throw HRError("Failed to create buffer", -1); + } +} + +void gs_vertex_buffer::BuildBuffers() +{ + InitBuffer(sizeof(vec3), vbd.data->num, vbd.data->points, &vertexBuffer); + + if (vbd.data->normals) + InitBuffer(sizeof(vec3), vbd.data->num, vbd.data->normals, &normalBuffer); + + if (vbd.data->tangents) + InitBuffer(sizeof(vec3), vbd.data->num, vbd.data->tangents, &tangentBuffer); + + if (vbd.data->colors) + InitBuffer(sizeof(uint32_t), vbd.data->num, vbd.data->colors, &colorBuffer); + + for (size_t i = 0; i < vbd.data->num_tex; i++) { + struct gs_tvertarray *tverts = vbd.data->tvarray + i; + + if (tverts->width != 2 && tverts->width != 4) + throw "Invalid texture vertex size specified"; + if (!tverts->array) + throw "No texture vertices specified"; + + D3D12Graphics::GpuBuffer *buffer; + InitBuffer(tverts->width * sizeof(float), vbd.data->num, tverts->array, &buffer); + + uvBuffers.push_back(buffer); + uvSizes.push_back(tverts->width * sizeof(float)); + } +} + +gs_vertex_buffer::~gs_vertex_buffer() +{ + Release(); +} + +void gs_vertex_buffer::Release() +{ + device->d3d12Instance->GetCommandManager().IdleGPU(); + if (vertexBuffer) { + delete vertexBuffer; + vertexBuffer = nullptr; + } + if (normalBuffer) { + delete normalBuffer; + normalBuffer = nullptr; + } + if (colorBuffer) { + delete colorBuffer; + colorBuffer = nullptr; + } + if (tangentBuffer) { + delete tangentBuffer; + tangentBuffer = nullptr; + } + + for (auto buf : uvBuffers) { + if (buf) { + delete buf; + } + } + + uvBuffers.clear(); +} + +gs_vertex_buffer::gs_vertex_buffer(gs_device_t *device, struct gs_vb_data *data, uint32_t flags) + : gs_obj(device, gs_type::gs_vertex_buffer), + dynamic((flags & GS_DYNAMIC) != 0), + vbd(data), + numVerts(data->num) +{ + if (!data->num) + throw "Cannot initialize vertex buffer with 0 vertices"; + if (!data->points) + throw "No points specified for vertex buffer"; + + BuildBuffers(); +} diff --git a/libobs-d3d12/d3d12-zstencilbuffer.cpp b/libobs-d3d12/d3d12-zstencilbuffer.cpp new file mode 100644 index 00000000000000..b976b69b099631 --- /dev/null +++ b/libobs-d3d12/d3d12-zstencilbuffer.cpp @@ -0,0 +1,9 @@ +#include "d3d12-subsystem.hpp" + +gs_zstencil_buffer::gs_zstencil_buffer(gs_device_t *device, uint32_t width, uint32_t height, gs_zstencil_format format) + : gs_obj(device, gs_type::gs_zstencil_buffer), + DepthBuffer(device->d3d12Instance), + format(format) +{ + Create(L"zstencil buffer", width, height, ConvertGSZStencilFormat(format)); +} diff --git a/libobs-metal/metal-unimplemented.swift b/libobs-metal/metal-unimplemented.swift index 35154fc857118b..4e47d08e54d874 100644 --- a/libobs-metal/metal-unimplemented.swift +++ b/libobs-metal/metal-unimplemented.swift @@ -95,3 +95,8 @@ public func device_set_cube_render_target( ) { return } + +@_cdecl("device_end_frame") +public func device_end_frame(device: UnsafeRawPointer) { + return +} diff --git a/libobs-opengl/gl-subsystem.c b/libobs-opengl/gl-subsystem.c index 30d961cf3700d5..a0e6cdd60a0d4d 100644 --- a/libobs-opengl/gl-subsystem.c +++ b/libobs-opengl/gl-subsystem.c @@ -1015,6 +1015,12 @@ void device_begin_frame(gs_device_t *device) UNUSED_PARAMETER(device); } +void device_end_frame(gs_device_t *device) +{ + /* does nothing */ + UNUSED_PARAMETER(device); +} + void device_begin_scene(gs_device_t *device) { clear_textures(device); diff --git a/libobs-winrt/winrt-capture.cpp b/libobs-winrt/winrt-capture.cpp index a31414f47c9942..e075f68a7574de 100644 --- a/libobs-winrt/winrt-capture.cpp +++ b/libobs-winrt/winrt-capture.cpp @@ -99,6 +99,15 @@ struct winrt_capture { bool capture_cursor; BOOL cursor_visible; + // D3D12 WGC not support, use D3D11 + int32_t deviceType = GS_DEVICE_DIRECT3D_11; + ComPtr factory; + ComPtr adapter; + ComPtr device11; + ComPtr km; + ComPtr texShared; + HANDLE texSharedHandle = NULL; + gs_texture_t *texture; bool texture_written; winrt::Windows::Graphics::Capture::GraphicsCaptureItem item{nullptr}; @@ -123,6 +132,59 @@ struct winrt_capture { active = FALSE; } + // D3D11 and D3D12 shared resource + HANDLE GetSharedHandle(IDXGIResource *dxgi_res) + { + HANDLE handle; + HRESULT hr; + + hr = dxgi_res->GetSharedHandle(&handle); + if (FAILED(hr)) { + blog(LOG_WARNING, + "GetSharedHandle: Failed to " + "get shared handle: %08lX", + hr); + return nullptr; + } else { + return handle; + } + } + + gs_texture_t *CreateSharedTexture(D3D11_TEXTURE2D_DESC &desc) + { + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + HRESULT hr = device11->CreateTexture2D(&desc, NULL, texShared.Assign()); + + if (FAILED(hr)) { + blog(LOG_WARNING, + "CreateSharedTexture: Failed to " + "create shared texture: %08lX", + hr); + return nullptr; + } + ComPtr dxgi_res; + + texShared->SetEvictionPriority(DXGI_RESOURCE_PRIORITY_MAXIMUM); + + hr = texShared->QueryInterface(__uuidof(IDXGIResource), (void **)&dxgi_res); + if (FAILED(hr)) { + blog(LOG_WARNING, + "InitTexture: Failed to query " + "interface: %08lX", + hr); + } else { + texSharedHandle = GetSharedHandle(dxgi_res); + + hr = texShared->QueryInterface(__uuidof(IDXGIKeyedMutex), (void **)&km); + if (FAILED(hr)) { + return nullptr; + } + } + uint64_t shared_handle = reinterpret_cast(texSharedHandle); + return gs_texture_open_nt_shared((uint32_t)shared_handle); + } + void on_frame_arrived(winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const &sender, winrt::Windows::Foundation::IInspectable const &) { @@ -159,17 +221,36 @@ struct winrt_capture { if (!texture) { const gs_color_format color_format = desc.Format == DXGI_FORMAT_R16G16B16A16_FLOAT ? GS_RGBA16F : GS_BGRA; - texture = gs_texture_create(texture_width, texture_height, color_format, 1, - NULL, 0); + + if (deviceType == GS_DEVICE_DIRECT3D_11) { + texture = gs_texture_create(texture_width, texture_height, color_format, + 1, NULL, 0); + } else { + texture = CreateSharedTexture(desc); + } } - if (client_area) { - context->CopySubresourceRegion((ID3D11Texture2D *)gs_texture_get_obj(texture), - 0, 0, 0, 0, frame_surface.get(), 0, &client_box); + ID3D11Texture2D *dst = deviceType == GS_DEVICE_DIRECT3D_11 + ? (ID3D11Texture2D *)gs_texture_get_obj(texture) + : texShared.Get(); + if (deviceType == GS_DEVICE_DIRECT3D_11) { + if (client_area) { + context->CopySubresourceRegion(dst, 0, 0, 0, 0, frame_surface.get(), 0, + &client_box); + } else { + /* if they gave an SRV, we could avoid this copy */ + context->CopyResource(dst, frame_surface.get()); + } } else { - /* if they gave an SRV, we could avoid this copy */ - context->CopyResource((ID3D11Texture2D *)gs_texture_get_obj(texture), - frame_surface.get()); + km->AcquireSync(0, INFINITE); + if (client_area) { + context->CopySubresourceRegion(dst, 0, 0, 0, 0, frame_surface.get(), 0, + &client_box); + } else { + /* if they gave an SRV, we could avoid this copy */ + context->CopyResource(dst, frame_surface.get()); + } + km->ReleaseSync(0); } texture_written = true; @@ -281,6 +362,10 @@ static void winrt_capture_device_loss_rebuild(void *device_void, void *data) { winrt_capture *capture = static_cast(data); + if (capture->deviceType == GS_DEVICE_DIRECT3D_12) { + return; + } + auto activation_factory = winrt::get_activation_factory(); auto interop_factory = activation_factory.as(); @@ -337,10 +422,36 @@ static void winrt_capture_device_loss_rebuild(void *device_void, void *data) static struct winrt_capture *winrt_capture_init_internal(BOOL cursor, HWND window, BOOL client_area, BOOL force_sdr, HMONITOR monitor) try { - ID3D11Device *const d3d_device = (ID3D11Device *)gs_get_device_obj(); ComPtr dxgi_device; - HRESULT hr = d3d_device->QueryInterface(&dxgi_device); + ComPtr factory; + ComPtr adapter; + ComPtr device11; + ComPtr context11; + + if (gs_get_device_type() == GS_DEVICE_DIRECT3D_11) { + device11 = (ID3D11Device *)gs_get_device_obj(); + } else { + HRESULT hr; + + hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); + if (FAILED(hr)) { + return nullptr; + } + + hr = factory->EnumAdapters1(0, &adapter); + if (FAILED(hr)) { + return nullptr; + } + + hr = D3D11CreateDevice(adapter.Get(), D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0, D3D11_SDK_VERSION, + device11.Assign(), NULL, context11.Assign()); + if (FAILED(hr)) { + return nullptr; + } + } + + HRESULT hr = device11->QueryInterface(&dxgi_device); if (FAILED(hr)) { blog(LOG_ERROR, "Failed to get DXGI device"); return nullptr; @@ -390,9 +501,15 @@ try { capture->format = format; capture->capture_cursor = cursor && cursor_toggle_supported; capture->cursor_visible = cursor; + + capture->deviceType = gs_get_device_type(); + capture->factory = factory; + capture->adapter = adapter; + capture->device11 = device11; + capture->item = item; capture->device = device; - d3d_device->GetImmediateContext(&capture->context); + device11->GetImmediateContext(&capture->context); capture->frame_pool = frame_pool; capture->session = session; capture->last_size = size; diff --git a/libobs/graphics/device-exports.h b/libobs/graphics/device-exports.h index 782b623345dd09..a84433776b82db 100644 --- a/libobs/graphics/device-exports.h +++ b/libobs/graphics/device-exports.h @@ -92,6 +92,7 @@ EXPORT void device_copy_texture_region(gs_device_t *device, gs_texture_t *dst, u uint32_t src_h); EXPORT void device_stage_texture(gs_device_t *device, gs_stagesurf_t *dst, gs_texture_t *src); EXPORT void device_begin_frame(gs_device_t *device); +EXPORT void device_end_frame(gs_device_t *device); EXPORT void device_begin_scene(gs_device_t *device); EXPORT void device_draw(gs_device_t *device, enum gs_draw_mode draw_mode, uint32_t start_vert, uint32_t num_verts); EXPORT void device_end_scene(gs_device_t *device); diff --git a/libobs/graphics/graphics-imports.c b/libobs/graphics/graphics-imports.c index 6a4a246ef91f0b..a9f71a70d54466 100644 --- a/libobs/graphics/graphics-imports.c +++ b/libobs/graphics/graphics-imports.c @@ -94,6 +94,7 @@ bool load_graphics_imports(struct gs_exports *exports, void *module, const char GRAPHICS_IMPORT(device_copy_texture); GRAPHICS_IMPORT(device_stage_texture); GRAPHICS_IMPORT(device_begin_frame); + GRAPHICS_IMPORT(device_end_frame); GRAPHICS_IMPORT(device_begin_scene); GRAPHICS_IMPORT(device_draw); GRAPHICS_IMPORT(device_load_swapchain); diff --git a/libobs/graphics/graphics-internal.h b/libobs/graphics/graphics-internal.h index 88423ad27cfa42..ce9a116b9cdf72 100644 --- a/libobs/graphics/graphics-internal.h +++ b/libobs/graphics/graphics-internal.h @@ -96,6 +96,7 @@ struct gs_exports { uint32_t src_h); void (*device_stage_texture)(gs_device_t *device, gs_stagesurf_t *dst, gs_texture_t *src); void (*device_begin_frame)(gs_device_t *device); + void (*device_end_frame)(gs_device_t *device); void (*device_begin_scene)(gs_device_t *device); void (*device_draw)(gs_device_t *device, enum gs_draw_mode draw_mode, uint32_t start_vert, uint32_t num_verts); void (*device_end_scene)(gs_device_t *device); diff --git a/libobs/graphics/graphics.c b/libobs/graphics/graphics.c index 00abe2ea43bb12..0c95739e25600c 100644 --- a/libobs/graphics/graphics.c +++ b/libobs/graphics/graphics.c @@ -1919,6 +1919,16 @@ void gs_begin_frame(void) graphics->exports.device_begin_frame(graphics->device); } +void gs_end_frame(void) +{ + graphics_t *graphics = thread_graphics; + + if (!gs_valid("gs_begin_frame")) + return; + + graphics->exports.device_end_frame(graphics->device); +} + void gs_begin_scene(void) { graphics_t *graphics = thread_graphics; diff --git a/libobs/graphics/graphics.h b/libobs/graphics/graphics.h index 2b6c515119188c..98b21161609810 100644 --- a/libobs/graphics/graphics.h +++ b/libobs/graphics/graphics.h @@ -501,6 +501,7 @@ struct gs_init_data { #define GS_DEVICE_OPENGL 1 #define GS_DEVICE_DIRECT3D_11 2 #define GS_DEVICE_METAL 3 +#define GS_DEVICE_DIRECT3D_12 4 EXPORT const char *gs_get_device_name(void); EXPORT const char *gs_get_driver_version(void); @@ -680,6 +681,7 @@ EXPORT void gs_copy_texture_region(gs_texture_t *dst, uint32_t dst_x, uint32_t d EXPORT void gs_stage_texture(gs_stagesurf_t *dst, gs_texture_t *src); EXPORT void gs_begin_frame(void); +EXPORT void gs_end_frame(void); EXPORT void gs_begin_scene(void); EXPORT void gs_draw(enum gs_draw_mode draw_mode, uint32_t start_vert, uint32_t num_verts); EXPORT void gs_end_scene(void); diff --git a/libobs/obs-video-gpu-encode.c b/libobs/obs-video-gpu-encode.c index 97756219413b7c..1529cdacad8980 100644 --- a/libobs/obs-video-gpu-encode.c +++ b/libobs/obs-video-gpu-encode.c @@ -183,6 +183,10 @@ static void *gpu_encode_thread(void *data) ept->fer = fer_ts; } + if (!success) { + os_event_signal(video->gpu_encode_inactive); + } + send_off_encoder_packet(encoder, success, received, &pkt); lock_key = next_key; diff --git a/libobs/obs-video.c b/libobs/obs-video.c index 55a63ef5651656..89bc6a161620e2 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -1053,6 +1053,8 @@ static void uninit_winrt_state(struct winrt_state *winrt) static const char *tick_sources_name = "tick_sources"; static const char *render_displays_name = "render_displays"; static const char *output_frame_name = "output_frame"; +static const char *gs_begin_frame_name = "gs_begin_frame"; +static const char *gs_end_frame_name = "gs_end_frame"; static inline void update_active_state(struct obs_core_video_mix *video) { const bool raw_was_active = video->raw_was_active; @@ -1107,7 +1109,9 @@ bool obs_graphics_thread_loop(struct obs_graphics_context *context) source_profiler_frame_begin(); gs_enter_context(obs->video.graphics); + profile_start(gs_begin_frame_name); gs_begin_frame(); + profile_end(gs_begin_frame_name); gs_leave_context(); profile_start(tick_sources_name); @@ -1127,6 +1131,12 @@ bool obs_graphics_thread_loop(struct obs_graphics_context *context) output_frames(); profile_end(output_frame_name); + gs_enter_context(obs->video.graphics); + profile_start(gs_end_frame_name); + gs_end_frame(); + profile_end(gs_end_frame_name); + gs_leave_context(); + profile_start(render_displays_name); render_displays(); profile_end(render_displays_name); diff --git a/libobs/util/windows/HRError.hpp b/libobs/util/windows/HRError.hpp index 0f343799a76285..a9b7e2a5bd9b89 100644 --- a/libobs/util/windows/HRError.hpp +++ b/libobs/util/windows/HRError.hpp @@ -20,5 +20,17 @@ struct HRError { const char *str; HRESULT hr; - inline HRError(const char *str, HRESULT hr) : str(str), hr(hr) {} + inline HRError(const char *str, HRESULT hr) : str(str), hr(hr) + { +#if 0 + __debugbreak(); +#endif + } + inline HRError(const char *str) : str(str) + { + +#if 0 + __debugbreak(); +#endif + } }; diff --git a/plugins/obs-nvenc/CMakeLists.txt b/plugins/obs-nvenc/CMakeLists.txt index 93053c1edb9e13..4951690ab94564 100644 --- a/plugins/obs-nvenc/CMakeLists.txt +++ b/plugins/obs-nvenc/CMakeLists.txt @@ -30,6 +30,7 @@ target_sources( PRIVATE $<$:nvenc-opengl.c> $<$:nvenc-d3d11.c> + $<$:nvenc-d3d12.c> cuda-helpers.c cuda-helpers.h nvenc-compat.c diff --git a/plugins/obs-nvenc/nvenc-d3d11.c b/plugins/obs-nvenc/nvenc-d3d11.c index cb5532ab7df867..6c1a222ac58249 100644 --- a/plugins/obs-nvenc/nvenc-d3d11.c +++ b/plugins/obs-nvenc/nvenc-d3d11.c @@ -67,7 +67,7 @@ bool d3d11_init(struct nvenc_data *enc, obs_data_t *settings) return false; } - enc->device = device; + enc->device11 = device; enc->context = context; return true; } @@ -83,8 +83,8 @@ void d3d11_free(struct nvenc_data *enc) if (enc->context) { enc->context->lpVtbl->Release(enc->context); } - if (enc->device) { - enc->device->lpVtbl->Release(enc->device); + if (enc->device11) { + enc->device11->lpVtbl->Release(enc->device11); } } @@ -104,7 +104,7 @@ static bool d3d11_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex) desc.SampleDesc.Count = 1; desc.BindFlags = D3D11_BIND_RENDER_TARGET; - ID3D11Device *const device = enc->device; + ID3D11Device *const device = enc->device11; ID3D11Texture2D *tex; HRESULT hr = device->lpVtbl->CreateTexture2D(device, &desc, NULL, &tex); if (FAILED(hr)) { @@ -156,7 +156,8 @@ static void d3d11_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex) nv.nvEncUnmapInputResource(enc->session, nvtex->mapped_res); } nv.nvEncUnregisterResource(enc->session, nvtex->res); - nvtex->tex->lpVtbl->Release(nvtex->tex); + ID3D11Texture2D *tex11 = (ID3D11Texture2D *)(nvtex->tex); + tex11->lpVtbl->Release(tex11); } } @@ -172,7 +173,7 @@ void d3d11_free_textures(struct nvenc_data *enc) static ID3D11Texture2D *get_tex_from_handle(struct nvenc_data *enc, uint32_t handle, IDXGIKeyedMutex **km_out) { - ID3D11Device *device = enc->device; + ID3D11Device *device = enc->device11; IDXGIKeyedMutex *km; ID3D11Texture2D *input_tex; HRESULT hr; diff --git a/plugins/obs-nvenc/nvenc-d3d12.c b/plugins/obs-nvenc/nvenc-d3d12.c new file mode 100644 index 00000000000000..ace32170a8e0db --- /dev/null +++ b/plugins/obs-nvenc/nvenc-d3d12.c @@ -0,0 +1,475 @@ +#include "nvenc-internal.h" +#include "nvenc-helpers.h" + +/* + * NVENC implementation using Direct3D 11 context and textures + */ + +/* ------------------------------------------------------------------------- */ +/* D3D11 Context/Device management */ + +static HANDLE get_lib(struct nvenc_data *enc, const char *lib) +{ + HMODULE mod = GetModuleHandleA(lib); + if (mod) + return mod; + + mod = LoadLibraryA(lib); + if (!mod) + error("Failed to load %s", lib); + return mod; +} + +typedef HRESULT(WINAPI *CREATEDXGIFACTORY2PROC)(UINT, REFIID, void **); + +bool d3d12_init(struct nvenc_data *enc, obs_data_t *settings) +{ + HMODULE dxgi = get_lib(enc, "DXGI.dll"); + HMODULE d3d12 = get_lib(enc, "D3D12.dll"); + CREATEDXGIFACTORY2PROC create_dxgi; + PFN_D3D12_CREATE_DEVICE create_device; + IDXGIFactory6 *factory; + IDXGIAdapter1 *adapter1; + ID3D12Device *device; + ID3D12Fence *fence; + ID3D12CommandQueue *command_queue; + ID3D12GraphicsCommandList *command_list; + ID3D12CommandAllocator *allocator; + HANDLE fence_event_handle; + + HRESULT hr; + + if (!dxgi || !d3d12) { + return false; + } + + create_dxgi = (CREATEDXGIFACTORY2PROC)GetProcAddress(dxgi, "CreateDXGIFactory2"); + create_device = (PFN_D3D12_CREATE_DEVICE)GetProcAddress(d3d12, "D3D12CreateDevice"); +#if 0 + bool create_debug = 0; + ID3D12Debug *debug_interface; + if (create_debug) { + PFN_D3D12_GET_DEBUG_INTERFACE get_debug_interface_func; + get_debug_interface_func = + (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(d3d12, "D3D12GetDebugInterface"); + if (SUCCEEDED(get_debug_interface_func(&IID_ID3D12Debug, (void **)&debug_interface))) { + debug_interface->lpVtbl->EnableDebugLayer(debug_interface); + + uint32_t useGPUBasedValidation = 1; + if (useGPUBasedValidation) { + ID3D12Debug1* debugInterface1; + if (SUCCEEDED((debug_interface->lpVtbl->QueryInterface( + debug_interface, &IID_ID3D12Debug1, &debugInterface1)))) { + debugInterface1->lpVtbl->SetEnableGPUBasedValidation(debugInterface1, true); + } + } + } + + IDXGIInfoQueue *dxgi_Info_queue; + CREATEDXGIFACTORY2PROC dxgi_get_debug_interface1; + dxgi_get_debug_interface1 = (CREATEDXGIFACTORY2PROC)GetProcAddress(dxgi, "DXGIGetDebugInterface1"); + HRESULT result = dxgi_get_debug_interface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_Info_queue); + if (FAILED(result)) { + } + + result = dxgi_Info_queue->lpVtbl->SetBreakOnSeverity(dxgi_Info_queue, DXGI_DEBUG_ALL, + DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE); + result = dxgi_Info_queue->lpVtbl->SetBreakOnSeverity(dxgi_Info_queue, DXGI_DEBUG_ALL, + DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE); + + DXGI_INFO_QUEUE_MESSAGE_ID hide[] = { + 80 /* IDXGISwapChain::GetContainingOutput: The swapchain's adapter does not control the output on which the swapchain's window resides. */ + , + }; + DXGI_INFO_QUEUE_FILTER filter = {}; + filter.DenyList.NumIDs = _countof(hide); + filter.DenyList.pIDList = hide; + dxgi_Info_queue->lpVtbl->AddStorageFilterEntries(dxgi_Info_queue, DXGI_DEBUG_DXGI, &filter); + } +#endif + + if (!create_dxgi || !create_device) { + error("Failed to load D3D12/DXGI procedures"); + return false; + } + + hr = create_dxgi(/* create_debug ? DXGI_CREATE_FACTORY_DEBUG :*/ 0, &IID_IDXGIFactory6, &factory); + if (FAILED(hr)) { + error_hr("CreateDXGIFactory1 failed"); + return false; + } + + hr = factory->lpVtbl->EnumAdapterByGpuPreference(factory, 0, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + &IID_IDXGIAdapter1, &adapter1); + factory->lpVtbl->Release(factory); + if (FAILED(hr)) { + error_hr("EnumAdapters failed"); + return false; + } + + hr = create_device((IUnknown *)adapter1, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); + adapter1->lpVtbl->Release(adapter1); + if (FAILED(hr)) { + error_hr("D3D12CreateDevice failed"); + return false; + } +#if 0 + if (create_debug) { + ID3D12InfoQueue *pInfoQueue; + if (SUCCEEDED(device->lpVtbl->QueryInterface(device, &IID_ID3D12InfoQueue, &pInfoQueue))) { + D3D12_MESSAGE_SEVERITY Severities[] = {D3D12_MESSAGE_SEVERITY_INFO}; + D3D12_MESSAGE_ID DenyIds[] = { + D3D12_MESSAGE_ID_INVALID_DESCRIPTOR_HANDLE, + D3D12_MESSAGE_ID_CREATEGRAPHICSPIPELINESTATE_PS_OUTPUT_RT_OUTPUT_MISMATCH, + D3D12_MESSAGE_ID_COMMAND_LIST_DESCRIPTOR_TABLE_NOT_SET, + D3D12_MESSAGE_ID_RESOURCE_BARRIER_DUPLICATE_SUBRESOURCE_TRANSITIONS, + D3D12_MESSAGE_ID_RESOLVE_QUERY_INVALID_QUERY_STATE, + D3D12_MESSAGE_ID_CREATERESOURCE_STATE_IGNORED, + }; + + D3D12_INFO_QUEUE_FILTER NewFilter; + memset(&NewFilter, 0, sizeof(D3D12_INFO_QUEUE_FILTER)); + NewFilter.DenyList.NumSeverities = _countof(Severities); + NewFilter.DenyList.pSeverityList = Severities; + NewFilter.DenyList.NumIDs = _countof(DenyIds); + NewFilter.DenyList.pIDList = DenyIds; + + pInfoQueue->lpVtbl->PushStorageFilter(pInfoQueue, &NewFilter); + pInfoQueue->lpVtbl->Release(pInfoQueue); + } + } +#endif + hr = device->lpVtbl->CreateFence(device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void **)(&fence)); + if (FAILED(hr)) { + error_hr("D3D12 CreateFence failed"); + return false; + } + + D3D12_COMMAND_QUEUE_DESC QueueDesc; + memset(&QueueDesc, 0, sizeof(D3D12_COMMAND_QUEUE_DESC)); + QueueDesc.Type = D3D12_COMMAND_LIST_TYPE_COPY; + QueueDesc.NodeMask = 1; + hr = device->lpVtbl->CreateCommandQueue(device, &QueueDesc, &IID_ID3D12CommandQueue, (void **)(&command_queue)); + if (FAILED(hr)) { + error_hr("D3D12 CreateCommandQueue failed"); + return false; + } + + hr = device->lpVtbl->CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_COPY, &IID_ID3D12CommandAllocator, + (void **)(&allocator)); + if (FAILED(hr)) { + error_hr("D3D12 CreateCommandAllocator failed"); + return false; + } + + hr = device->lpVtbl->CreateCommandList(device, 1, D3D12_COMMAND_LIST_TYPE_COPY, allocator, NULL, + &IID_ID3D12GraphicsCommandList, (void **)(&command_list)); + if (FAILED(hr)) { + error_hr("D3D12 CreateCommandList failed"); + return false; + } + + fence_event_handle = CreateEvent(NULL, false, false, NULL); + + enc->device12 = device; + enc->fence = fence; + enc->command_queue = command_queue; + enc->fence_event_handle = fence_event_handle; + enc->allocator = allocator; + enc->command_list = command_list; + enc->next_fence_value = 0; + enc->last_completed_fence_value = 0; + return true; +} + +void d3d12_free(struct nvenc_data *enc) +{ + for (size_t i = 0; i < enc->input_textures.num; i++) { + ID3D12Resource *tex = enc->input_textures.array[i].tex; + tex->lpVtbl->Release(tex); + } + + if (enc->device12) { + enc->device12->lpVtbl->Release(enc->device12); + } +} + +/* ------------------------------------------------------------------------- */ +/* D3D11 Surface management */ + +static bool d3d12_texture_init(struct nvenc_data *enc, struct nv_texture *nvtex) +{ + ID3D12Device *const device = enc->device12; + const bool p010 = obs_encoder_video_tex_active(enc->encoder, VIDEO_FORMAT_P010); + + D3D12_RESOURCE_DESC desc; + memset(&desc, 0, sizeof(desc)); + desc.Width = enc->cx; + desc.Height = enc->cy; + desc.MipLevels = 1; + desc.DepthOrArraySize = 1; + desc.Format = p010 ? DXGI_FORMAT_P010 : DXGI_FORMAT_NV12; + desc.SampleDesc.Count = 1; + desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + desc.Flags = D3D12_RESOURCE_FLAG_NONE; + + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + ID3D12Resource *tex; + HRESULT hr = device->lpVtbl->CreateCommittedResource(device, &HeapProps, D3D12_HEAP_FLAG_NONE, &desc, + D3D12_RESOURCE_STATE_COPY_DEST, NULL, &IID_ID3D12Resource, + &tex); + if (FAILED(hr)) { + error_hr("Failed to create texture"); + return false; + } + + NV_ENC_REGISTER_RESOURCE res = {NV_ENC_REGISTER_RESOURCE_VER}; + res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; + res.resourceToRegister = tex; + res.width = enc->cx; + res.height = enc->cy; + res.bufferFormat = p010 ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT : NV_ENC_BUFFER_FORMAT_NV12; + res.bufferUsage = NV_ENC_INPUT_IMAGE; + res.pInputFencePoint = NULL; + if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) { + tex->lpVtbl->Release(tex); + return false; + } + + nvtex->res = res.registeredResource; + nvtex->tex = tex; + nvtex->mapped_res = NULL; + return true; +} + +bool d3d12_init_textures(struct nvenc_data *enc) +{ + da_reserve(enc->textures, enc->buf_count); + for (uint32_t i = 0; i < enc->buf_count; i++) { + struct nv_texture texture; + if (!d3d12_texture_init(enc, &texture)) { + return false; + } + + da_push_back(enc->textures, &texture); + } + + return true; +} + +static void d3d12_texture_free(struct nvenc_data *enc, struct nv_texture *nvtex) +{ + + if (nvtex->res) { + if (nvtex->mapped_res) { + nv.nvEncUnmapInputResource(enc->session, nvtex->mapped_res); + } + nv.nvEncUnregisterResource(enc->session, nvtex->res); + ID3D12Resource *tex12 = (ID3D12Resource *)(nvtex->tex); + tex12->lpVtbl->Release(tex12); + } +} + +void d3d12_free_textures(struct nvenc_data *enc) +{ + for (size_t i = 0; i < enc->textures.num; i++) { + d3d12_texture_free(enc, &enc->textures.array[i]); + } +} + +bool d3d12_init_readback(struct nvenc_data *enc, struct nv_bitstream *bs) +{ + ID3D12Device *const device = enc->device12; + D3D12_HEAP_PROPERTIES HeapProps; + HeapProps.Type = D3D12_HEAP_TYPE_READBACK; + HeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + HeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + HeapProps.CreationNodeMask = 1; + HeapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC ResourceDesc; + memset(&ResourceDesc, 0, sizeof(D3D12_RESOURCE_DESC)); + ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + ResourceDesc.Width = 2 * enc->cx * enc->cy; + ResourceDesc.Height = 1; + ResourceDesc.DepthOrArraySize = 1; + ResourceDesc.MipLevels = 1; + ResourceDesc.Format = DXGI_FORMAT_UNKNOWN; + ResourceDesc.SampleDesc.Count = 1; + ResourceDesc.SampleDesc.Quality = 0; + ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + ResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + ID3D12Resource *tex = NULL; + HRESULT hr = device->lpVtbl->CreateCommittedResource(device, &HeapProps, D3D12_HEAP_FLAG_NONE, &ResourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, NULL, &IID_ID3D12Resource, + (void **)(&tex)); + if (FAILED(hr)) { + auto remoteReason = device->lpVtbl->GetDeviceRemovedReason(device); + error_hr("Failed to create texture"); + return false; + } + + NV_ENC_REGISTER_RESOURCE res = {NV_ENC_REGISTER_RESOURCE_VER}; + res.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; + res.resourceToRegister = tex; + res.width = enc->cx; + res.height = enc->cy; + res.bufferFormat = NV_ENC_BUFFER_FORMAT_U8; + res.bufferUsage = NV_ENC_OUTPUT_BITSTREAM; + res.pInputFencePoint = NULL; + if (NV_FAILED(nv.nvEncRegisterResource(enc->session, &res))) { + tex->lpVtbl->Release(tex); + return false; + } + + bs->ptr = res.registeredResource; + bs->tex = tex; + bs->mapped_res = NULL; + return true; +} + +void d3d12_free_readback(struct nvenc_data *enc, struct nv_bitstream *bs) +{ + if (bs->ptr) { + if (bs->mapped_res) { + nv.nvEncUnmapInputResource(enc->session, bs->mapped_res); + } + nv.nvEncUnregisterResource(enc->session, bs->ptr); + ID3D12Resource *tex12 = (ID3D12Resource *)(bs->tex); + tex12->lpVtbl->Release(tex12); + } +} + +/* ------------------------------------------------------------------------- */ +/* Actual encoding stuff */ + +static ID3D12Resource *get_tex_from_handle(struct nvenc_data *enc, uint32_t handle, IDXGIKeyedMutex **km_out) +{ + ID3D12Device *device = enc->device12; + ID3D12Resource *input_tex; + HRESULT hr; + + for (size_t i = 0; i < enc->input_textures.num; i++) { + struct handle_tex *ht = &enc->input_textures.array[i]; + if (ht->handle == handle) { + *km_out = ht->km; + return ht->tex; + } + } + + hr = device->lpVtbl->OpenSharedHandle(device, (HANDLE)(uintptr_t)handle, &IID_ID3D12Resource, &input_tex); + if (FAILED(hr)) { + error_hr("OpenSharedResource failed"); + return NULL; + } + + *km_out = NULL; + struct handle_tex new_ht = {handle, input_tex, NULL}; + da_push_back(enc->input_textures, &new_ht); + return input_tex; +} + +bool d3d12_encode(void *data, struct encoder_texture *texture, int64_t pts, uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, bool *received_packet) +{ + struct nvenc_data *enc = data; + ID3D12Resource *input_tex; + ID3D12Resource *output_tex; + IDXGIKeyedMutex *km; + struct nv_texture *nvtex; + struct nv_bitstream *bs; + + ID3D12GraphicsCommandList *command_list = enc->command_list; + ID3D12CommandQueue *command_queue = enc->command_queue; + ID3D12Fence *fence = enc->fence; + + if (texture->handle == GS_INVALID_HANDLE) { + error("Encode failed: bad texture handle"); + *next_key = lock_key; + return false; + } + + bs = &enc->bitstreams.array[enc->next_bitstream]; + nvtex = &enc->textures.array[enc->next_bitstream]; + + input_tex = get_tex_from_handle(enc, texture->handle, &km); + output_tex = nvtex->tex; + + if (!input_tex) { + *next_key = lock_key; + return false; + } + + deque_push_back(&enc->dts_list, &pts, sizeof(pts)); + + /* ------------------------------------ */ + /* copy to output tex */ + + // km->lpVtbl->AcquireSync(km, lock_key, INFINITE); + + command_list->lpVtbl->CopyResource(command_list, output_tex, input_tex); + HRESULT hr = command_list->lpVtbl->Close(command_list); + if (FAILED(hr)) { + error_hr("CommandList(Close) failed"); + return false; + } + + command_queue->lpVtbl->ExecuteCommandLists(command_queue, 1, (ID3D12CommandList **)&command_list); + hr = command_queue->lpVtbl->Signal(command_queue, enc->fence, ++enc->next_fence_value); + if (FAILED(hr)) { + error_hr("CommandQeue(Signal) failed"); + return false; + } + + hr = fence->lpVtbl->SetEventOnCompletion(fence, enc->next_fence_value, enc->fence_event_handle); + if (FAILED(hr)) { + error_hr("Fence(SetEventOnCompletion) failed"); + return false; + } + + WaitForSingleObject(enc->fence_event_handle, INFINITE); + enc->last_completed_fence_value = enc->next_fence_value; + + hr = command_list->lpVtbl->Reset(command_list, enc->allocator, NULL); + if (FAILED(hr)) { + error_hr("CommandList(Reset) failed"); + return false; + } + + // km->lpVtbl->ReleaseSync(km, *next_key); + + /* ------------------------------------ */ + /* map output tex so nvenc can use it */ + + NV_ENC_MAP_INPUT_RESOURCE mapIn = {NV_ENC_MAP_INPUT_RESOURCE_VER}; + mapIn.registeredResource = nvtex->res; + if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &mapIn))) { + return false; + } + + nvtex->mapped_res = mapIn.mappedResource; + + /* ------------------------------------ */ + /* do actual encode call */ + nvtex->input_resource.version = NV_ENC_INPUT_RESOURCE_D3D12_VER; + nvtex->input_resource.pInputBuffer = nvtex->mapped_res; + + NV_ENC_MAP_INPUT_RESOURCE mapOut = {NV_ENC_MAP_INPUT_RESOURCE_VER}; + mapOut.registeredResource = bs->ptr; + if (NV_FAILED(nv.nvEncMapInputResource(enc->session, &mapOut))) { + return false; + } + + bs->mapped_res = mapOut.mappedResource; + bs->output_resource.version = NV_ENC_OUTPUT_RESOURCE_D3D12_VER; + bs->output_resource.pOutputBuffer = bs->mapped_res; + + return nvenc_encode_base_d3d12(enc, bs, nvtex, pts, packet, received_packet); +} diff --git a/plugins/obs-nvenc/nvenc-internal.h b/plugins/obs-nvenc/nvenc-internal.h index 2b233223b9ba8f..c953d81d359afb 100644 --- a/plugins/obs-nvenc/nvenc-internal.h +++ b/plugins/obs-nvenc/nvenc-internal.h @@ -9,8 +9,11 @@ #ifdef _WIN32 #define INITGUID #include +#include #include #include +#include +#include #else #include #endif @@ -89,9 +92,19 @@ struct nvenc_data { int packet_priority; #ifdef _WIN32 + bool is_use_d3d12; DARRAY(struct nv_texture) textures; - ID3D11Device *device; + ID3D11Device *device11; ID3D11DeviceContext *context; + + ID3D12Device *device12; + ID3D12CommandQueue *command_queue; + ID3D12Fence *fence; + uint64_t next_fence_value; + uint64_t last_completed_fence_value; + HANDLE fence_event_handle; + ID3D12GraphicsCommandList *command_list; + ID3D12CommandAllocator *allocator; #endif uint32_t cx; @@ -125,8 +138,8 @@ struct nvenc_data { struct handle_tex { #ifdef _WIN32 uint32_t handle; - ID3D11Texture2D *tex; - IDXGIKeyedMutex *km; + void *tex; // D3D11 is ID3D11Texture2D, D3D12 is ID3D12Resource + IDXGIKeyedMutex *km; // only for D3D11 #else GLuint tex_id; /* CUDA mappings */ @@ -137,7 +150,12 @@ struct handle_tex { /* Bitstream buffer */ struct nv_bitstream { - void *ptr; + // D3D11 and D3D12 + void *ptr; // register resource + // D3D12 + ID3D12Resource *tex; + void *mapped_res; + NV_ENC_OUTPUT_RESOURCE_D3D12 output_resource; }; /** Mapped resources **/ @@ -151,9 +169,12 @@ struct nv_cuda_surface { #ifdef _WIN32 /* DX11 textures */ struct nv_texture { - void *res; - ID3D11Texture2D *tex; + void *res; // register Resource + // D3D11 + void *tex; // D3D11 is ID3D11Texture2D, D3D12 is ID3D12Resource void *mapped_res; + // D3D12 + NV_ENC_INPUT_RESOURCE_D3D12 input_resource; }; #endif @@ -162,6 +183,8 @@ struct nv_texture { bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs, void *pic, int64_t pts, struct encoder_packet *packet, bool *received_packet); +bool nvenc_encode_base_d3d12(struct nvenc_data *enc, struct nv_bitstream *out, struct nv_texture *pic, int64_t pts, + struct encoder_packet *packet, bool *received_packet); /* ------------------------------------------------------------------------- */ /* Backend-specific functions */ @@ -176,6 +199,19 @@ void d3d11_free_textures(struct nvenc_data *enc); bool d3d11_encode(void *data, struct encoder_texture *texture, int64_t pts, uint64_t lock_key, uint64_t *next_key, struct encoder_packet *packet, bool *received_packet); + +bool d3d12_init(struct nvenc_data *enc, obs_data_t *settings); +void d3d12_free(struct nvenc_data *enc); + +bool d3d12_init_textures(struct nvenc_data *enc); +void d3d12_free_textures(struct nvenc_data *enc); + +bool d3d12_init_readback(struct nvenc_data *enc, struct nv_bitstream *bs); +void d3d12_free_readback(struct nvenc_data *enc, struct nv_bitstream *bs); + +bool d3d12_encode(void *data, struct encoder_texture *texture, int64_t pts, uint64_t lock_key, uint64_t *next_key, + struct encoder_packet *packet, bool *received_packet); + #endif /** CUDA **/ diff --git a/plugins/obs-nvenc/nvenc.c b/plugins/obs-nvenc/nvenc.c index fb389452591515..f06f6432dba93f 100644 --- a/plugins/obs-nvenc/nvenc.c +++ b/plugins/obs-nvenc/nvenc.c @@ -18,8 +18,11 @@ static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs) { - NV_ENC_CREATE_BITSTREAM_BUFFER buf = {NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; + if (enc->is_use_d3d12) { + return d3d12_init_readback(enc, bs); + } + NV_ENC_CREATE_BITSTREAM_BUFFER buf = {NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; if (NV_FAILED(nv.nvEncCreateBitstreamBuffer(enc->session, &buf))) { return false; } @@ -30,8 +33,12 @@ static bool nv_bitstream_init(struct nvenc_data *enc, struct nv_bitstream *bs) static void nv_bitstream_free(struct nvenc_data *enc, struct nv_bitstream *bs) { - if (bs->ptr) { - nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr); + if (enc->is_use_d3d12) { + d3d12_free_readback(enc, bs); + } else { + if (bs->ptr) { + nv.nvEncDestroyBitstreamBuffer(enc->session, bs->ptr); + } } } @@ -44,6 +51,12 @@ static const char *h264_nvenc_get_name(void *type_data) return "NVIDIA NVENC H.264"; } +static const char *h264_nvenc_get_name_d3d12(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC H.264 D3D12"; +} + static const char *h264_nvenc_soft_get_name(void *type_data) { UNUSED_PARAMETER(type_data); @@ -57,6 +70,12 @@ static const char *hevc_nvenc_get_name(void *type_data) return "NVIDIA NVENC HEVC"; } +static const char *hevc_nvenc_get_name_d3d12(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC HEVC D3D12"; +} + static const char *hevc_nvenc_soft_get_name(void *type_data) { UNUSED_PARAMETER(type_data); @@ -70,6 +89,12 @@ static const char *av1_nvenc_get_name(void *type_data) return "NVIDIA NVENC AV1"; } +static const char *av1_nvenc_get_name_d3d12(void *type_data) +{ + UNUSED_PARAMETER(type_data); + return "NVIDIA NVENC AV1 D3D12"; +} + static const char *av1_nvenc_soft_get_name(void *type_data) { UNUSED_PARAMETER(type_data); @@ -126,7 +151,12 @@ static bool init_session(struct nvenc_data *enc) params.device = enc->cu_ctx; params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; } else { - params.device = enc->device; + if (enc->is_use_d3d12) { + params.device = enc->device12; + } else { + params.device = enc->device11; + } + params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX; } #else @@ -158,8 +188,9 @@ static void initialize_params(struct nvenc_data *enc, const GUID *nv_preset, NV_ params->enablePTD = 1; params->encodeConfig = &enc->config; params->tuningInfo = nv_tuning; + params->bufferFormat = #ifdef NVENC_12_1_OR_LATER - params->splitEncodeMode = (NV_ENC_SPLIT_ENCODE_MODE)enc->props.split_encode; + params->splitEncodeMode = (NV_ENC_SPLIT_ENCODE_MODE)enc->props.split_encode; #endif } @@ -513,8 +544,11 @@ static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings) obs_encoder_set_last_error(enc->encoder, obs_module_text("Opts.Invalid")); return false; } - - if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { + enc->params.bufferFormat = is_10_bit(enc) ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT : NV_ENC_BUFFER_FORMAT_NV12; + NVENCSTATUS status = nv.nvEncInitializeEncoder(enc->session, &enc->params); + if (NV_FAILED(status)) { + const char *err = nv.nvEncGetLastErrorString(enc->session); + blog(LOG_WARNING, "nvenc init encoder failed %s ", err); return false; } @@ -632,8 +666,11 @@ static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings) obs_encoder_set_last_error(enc->encoder, obs_module_text("Opts.Invalid")); return false; } - - if (NV_FAILED(nv.nvEncInitializeEncoder(enc->session, &enc->params))) { + enc->params.bufferFormat = is_10_bit(enc) ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT : NV_ENC_BUFFER_FORMAT_NV12; + NVENCSTATUS status = nv.nvEncInitializeEncoder(enc->session, &enc->params); + if (NV_FAILED(status)) { + const char *err = nv.nvEncGetLastErrorString(enc->session); + blog(LOG_WARNING, "nvenc init encoder failed %s ", err); return false; } @@ -845,9 +882,11 @@ static bool init_encoder(struct nvenc_data *enc, enum codec_type codec, obs_data return false; } -static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, obs_encoder_t *encoder, bool texture) +static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, obs_encoder_t *encoder, bool texture, + bool is_d3d12) { struct nvenc_data *enc = bzalloc(sizeof(*enc)); + enc->is_use_d3d12 = is_d3d12; enc->encoder = encoder; enc->codec = codec; enc->first_packet = true; @@ -873,8 +912,18 @@ static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, goto fail; #ifdef _WIN32 - if (texture ? !d3d11_init(enc, settings) : !init_cuda(encoder)) - goto fail; + if (texture) { + if (is_d3d12) { + if (!d3d12_init(enc, settings)) + goto fail; + } else { + if (!d3d11_init(enc, settings)) + goto fail; + } + } else { + if (!init_cuda(encoder)) + goto fail; + } #else if (!init_cuda(encoder)) goto fail; @@ -889,21 +938,32 @@ static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, if (!init_session(enc)) { goto fail; } + if (!init_encoder(enc, codec, settings, encoder)) { goto fail; } + if (!init_bitstreams(enc)) { goto fail; } #ifdef _WIN32 - if (texture ? !d3d11_init_textures(enc) : !cuda_init_surfaces(enc)) - goto fail; + if (texture) { + if (is_d3d12) { + if (!d3d12_init_textures(enc)) + goto fail; + } else { + if (!d3d11_init_textures(enc)) + goto fail; + } + } else { + if (!cuda_init_surfaces(enc)) + goto fail; + } #else if (!cuda_init_surfaces(enc)) goto fail; #endif - enc->codec = codec; return enc; @@ -913,7 +973,8 @@ static void *nvenc_create_internal(enum codec_type codec, obs_data_t *settings, return NULL; } -static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings, obs_encoder_t *encoder, bool texture) +static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings, obs_encoder_t *encoder, bool texture, + bool is_d3d12) { /* This encoder requires shared textures, this cannot be used on a * gpu other than the one OBS is currently running on. @@ -955,7 +1016,7 @@ static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings, obs_ goto reroute; } - struct nvenc_data *enc = nvenc_create_internal(codec, settings, encoder, texture); + struct nvenc_data *enc = nvenc_create_internal(codec, settings, encoder, texture, is_d3d12); if (enc) { return enc; @@ -967,53 +1028,69 @@ static void *nvenc_create_base(enum codec_type codec, obs_data_t *settings, obs_ return NULL; } - switch (codec) { + /* switch (codec) { case CODEC_H264: return obs_encoder_create_rerouted(encoder, "obs_nvenc_h264_soft"); case CODEC_HEVC: return obs_encoder_create_rerouted(encoder, "obs_nvenc_hevc_soft"); case CODEC_AV1: return obs_encoder_create_rerouted(encoder, "obs_nvenc_av1_soft"); - } + }*/ return NULL; } static void *h264_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) { - return nvenc_create_base(CODEC_H264, settings, encoder, true); + return nvenc_create_base(CODEC_H264, settings, encoder, true, false); +} + +static void *h264_nvenc_create_d3d12(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_H264, settings, encoder, true, true); } #ifdef ENABLE_HEVC static void *hevc_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) { - return nvenc_create_base(CODEC_HEVC, settings, encoder, true); + return nvenc_create_base(CODEC_HEVC, settings, encoder, true, false); +} + +static void *hevc_nvenc_create_d3d12(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_HEVC, settings, encoder, true, true); } #endif static void *av1_nvenc_create(obs_data_t *settings, obs_encoder_t *encoder) { - return nvenc_create_base(CODEC_AV1, settings, encoder, true); + return nvenc_create_base(CODEC_AV1, settings, encoder, true, false); +} + +static void *av1_nvenc_create_d3d12(obs_data_t *settings, obs_encoder_t *encoder) +{ + return nvenc_create_base(CODEC_AV1, settings, encoder, true, true); } static void *h264_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder) { - return nvenc_create_base(CODEC_H264, settings, encoder, false); + return nvenc_create_base(CODEC_H264, settings, encoder, false, false); } #ifdef ENABLE_HEVC static void *hevc_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder) { - return nvenc_create_base(CODEC_HEVC, settings, encoder, false); + return nvenc_create_base(CODEC_HEVC, settings, encoder, false, false); } #endif static void *av1_nvenc_soft_create(obs_data_t *settings, obs_encoder_t *encoder) { - return nvenc_create_base(CODEC_AV1, settings, encoder, false); + return nvenc_create_base(CODEC_AV1, settings, encoder, false, false); } static bool get_encoded_packet(struct nvenc_data *enc, bool finalize); +static bool get_encoded_packet_d3d12(struct nvenc_data *enc, bool finalize); static void nvenc_destroy(void *data) { @@ -1023,7 +1100,11 @@ static void nvenc_destroy(void *data) NV_ENC_PIC_PARAMS params = {NV_ENC_PIC_PARAMS_VER}; params.encodePicFlags = NV_ENC_PIC_FLAG_EOS; nv.nvEncEncodePicture(enc->session, ¶ms); - get_encoded_packet(enc, true); + if (enc->is_use_d3d12) { + get_encoded_packet_d3d12(enc, true); + } else { + get_encoded_packet(enc, true); + } } for (size_t i = 0; i < enc->bitstreams.num; i++) { @@ -1033,8 +1114,13 @@ static void nvenc_destroy(void *data) nv.nvEncDestroyEncoder(enc->session); #ifdef _WIN32 - d3d11_free_textures(enc); - d3d11_free(enc); + if (enc->is_use_d3d12) { + d3d12_free_textures(enc); + d3d12_free(enc); + } else { + d3d11_free_textures(enc); + d3d11_free(enc); + } #else cuda_opengl_free(enc); #endif @@ -1179,6 +1265,129 @@ static bool get_encoded_packet(struct nvenc_data *enc, bool finalize) return true; } +static bool get_encoded_packet_d3d12(struct nvenc_data *enc, bool finalize) +{ + void *s = enc->session; + + da_resize(enc->packet_data, 0); + + if (!enc->buffers_queued) + return true; + if (!finalize && enc->buffers_queued < enc->output_delay) + return true; + + size_t count = finalize ? enc->buffers_queued : 1; + + for (size_t i = 0; i < count; i++) { + size_t cur_bs_idx = enc->cur_bitstream; + struct nv_bitstream *bs = &enc->bitstreams.array[cur_bs_idx]; +#ifdef _WIN32 + struct nv_texture *nvtex = enc->non_texture ? NULL : &enc->textures.array[cur_bs_idx]; + struct nv_cuda_surface *surf = enc->non_texture ? &enc->surfaces.array[cur_bs_idx] : NULL; +#else + struct nv_cuda_surface *surf = &enc->surfaces.array[cur_bs_idx]; +#endif + + /* ---------------- */ + + NV_ENC_LOCK_BITSTREAM lock = {NV_ENC_LOCK_BITSTREAM_VER}; + lock.outputBitstream = &bs->output_resource; + lock.doNotWait = false; + + NVENCSTATUS status = nv.nvEncLockBitstream(s, &lock); + if (NV_FAILED(status)) { + const char *err = nv.nvEncGetLastErrorString(enc->session); + blog(LOG_WARNING, "nvenc init encoder failed %s ", err); + return false; + } + + if (enc->first_packet) { + NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {0}; + uint8_t buf[256]; + uint32_t size = 0; + + payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; + payload.spsppsBuffer = buf; + payload.inBufferSize = sizeof(buf); + payload.outSPSPPSPayloadSize = &size; + + nv.nvEncGetSequenceParams(s, &payload); + enc->header = bmemdup(buf, size); + enc->header_size = size; + enc->first_packet = false; + } + + da_copy_array(enc->packet_data, lock.bitstreamBufferPtr, lock.bitstreamSizeInBytes); + + enc->packet_pts = (int64_t)lock.outputTimeStamp; + enc->packet_keyframe = lock.pictureType == NV_ENC_PIC_TYPE_IDR; + + switch (lock.pictureType) { + case NV_ENC_PIC_TYPE_I: + case NV_ENC_PIC_TYPE_BI: + case NV_ENC_PIC_TYPE_IDR: +#ifdef NVENC_12_2_OR_LATER + case NV_ENC_PIC_TYPE_SWITCH: +#endif + enc->packet_priority = OBS_NAL_PRIORITY_HIGHEST; + break; + case NV_ENC_PIC_TYPE_P: + enc->packet_priority = OBS_NAL_PRIORITY_HIGH; + break; + case NV_ENC_PIC_TYPE_B: + case NV_ENC_PIC_TYPE_NONREF_P: + enc->packet_priority = OBS_NAL_PRIORITY_DISPOSABLE; + break; + default: + enc->packet_priority = OBS_NAL_PRIORITY_DISPOSABLE; + } + + if (NV_FAILED(nv.nvEncUnlockBitstream(s, lock.outputBitstream))) { + return false; + } + + /* ---------------- */ +#ifdef _WIN32 + if (nvtex && nvtex->mapped_res) { + NVENCSTATUS err; + err = nv.nvEncUnmapInputResource(s, nvtex->mapped_res); + if (nv_failed(enc->encoder, err, __FUNCTION__, "unmap")) { + return false; + } + nvtex->mapped_res = NULL; + } + + if (bs && bs->mapped_res) { + NVENCSTATUS err; + err = nv.nvEncUnmapInputResource(s, bs->mapped_res); + if (nv_failed(enc->encoder, err, __FUNCTION__, "unmap")) { + return false; + } + bs->mapped_res = NULL; + } +#endif + /* ---------------- */ + + if (surf && surf->mapped_res) { + NVENCSTATUS err; + err = nv.nvEncUnmapInputResource(s, surf->mapped_res); + if (nv_failed(enc->encoder, err, __FUNCTION__, "unmap")) { + return false; + } + surf->mapped_res = NULL; + } + + /* ---------------- */ + + if (++enc->cur_bitstream == enc->buf_count) + enc->cur_bitstream = 0; + + enc->buffers_queued--; + } + + return true; +} + struct roi_params { uint32_t mb_width; uint32_t mb_height; @@ -1356,6 +1565,94 @@ bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs, void *pi return true; } +bool nvenc_encode_base_d3d12(struct nvenc_data *enc, struct nv_bitstream *out, struct nv_texture *pic, int64_t pts, + struct encoder_packet *packet, bool *received_packet) +{ + NV_ENC_PIC_PARAMS params = {0}; + params.version = NV_ENC_PIC_PARAMS_VER; + params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + params.inputBuffer = &pic->input_resource; + params.inputTimeStamp = (uint64_t)pts; + params.inputWidth = enc->cx; + params.inputHeight = enc->cy; + params.inputPitch = enc->cx; + params.outputBitstream = &out->output_resource; + params.frameIdx = (uint32_t)pts; + params.completionEvent = NULL; + + if (enc->non_texture) { + params.bufferFmt = enc->surface_format; + } else { + params.bufferFmt = obs_encoder_video_tex_active(enc->encoder, VIDEO_FORMAT_P010) + ? NV_ENC_BUFFER_FORMAT_YUV420_10BIT + : NV_ENC_BUFFER_FORMAT_NV12; + } + +#ifdef NVENC_13_0_OR_LATER + if (enc->cll) { + if (enc->codec == CODEC_AV1) + params.codecPicParams.av1PicParams.pMaxCll = enc->cll; + else if (enc->codec == CODEC_HEVC) + params.codecPicParams.hevcPicParams.pMaxCll = enc->cll; + } + if (enc->mdi) { + if (enc->codec == CODEC_AV1) + params.codecPicParams.av1PicParams.pMasteringDisplay = enc->mdi; + else if (enc->codec == CODEC_HEVC) + params.codecPicParams.hevcPicParams.pMasteringDisplay = enc->mdi; + } +#endif + + /* Add ROI map if enabled */ + if (obs_encoder_has_roi(enc->encoder)) + add_roi(enc, ¶ms); + + NVENCSTATUS err = nv.nvEncEncodePicture(enc->session, ¶ms); + if (err != NV_ENC_SUCCESS && err != NV_ENC_ERR_NEED_MORE_INPUT) { + nv_failed(enc->encoder, err, __FUNCTION__, "nvEncEncodePicture"); + return false; + } + + enc->encode_started = true; + enc->buffers_queued++; + + if (++enc->next_bitstream == enc->buf_count) { + enc->next_bitstream = 0; + } + + /* ------------------------------------ */ + /* check for encoded packet and parse */ + + if (!get_encoded_packet_d3d12(enc, false)) { + return false; + } + + /* ------------------------------------ */ + /* output encoded packet */ + + if (enc->packet_data.num) { + int64_t dts; + deque_pop_front(&enc->dts_list, &dts, sizeof(dts)); + + /* subtract bframe delay from dts for H.264/HEVC */ + if (enc->codec != CODEC_AV1) + dts -= enc->props.bf * packet->timebase_num; + + *received_packet = true; + packet->data = enc->packet_data.array; + packet->size = enc->packet_data.num; + packet->type = OBS_ENCODER_VIDEO; + packet->pts = enc->packet_pts; + packet->dts = dts; + packet->keyframe = enc->packet_keyframe; + packet->priority = enc->packet_priority; + } else { + *received_packet = false; + } + + return true; +} + static void nvenc_soft_video_info(void *data, struct video_scale_info *info) { struct nvenc_data *enc = data; @@ -1408,6 +1705,26 @@ struct obs_encoder_info h264_nvenc_info = { .get_sei_data = nvenc_sei_data, }; +struct obs_encoder_info h264_nvenc_info_d3d12 = { + .id = "obs_nvenc_h264_tex_d3d12", + .codec = "h264", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI, + .get_name = h264_nvenc_get_name_d3d12, + .create = h264_nvenc_create_d3d12, + .destroy = nvenc_destroy, + .update = nvenc_update, +#ifdef _WIN32 + .encode_texture2 = d3d12_encode, +#else + .encode_texture2 = cuda_opengl_encode, +#endif + .get_defaults = h264_nvenc_defaults, + .get_properties = h264_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_sei_data = nvenc_sei_data, +}; + #ifdef ENABLE_HEVC struct obs_encoder_info hevc_nvenc_info = { .id = "obs_nvenc_hevc_tex", @@ -1428,6 +1745,26 @@ struct obs_encoder_info hevc_nvenc_info = { .get_extra_data = nvenc_extra_data, .get_sei_data = nvenc_sei_data, }; + +struct obs_encoder_info hevc_nvenc_info_d3d12 = { + .id = "obs_nvenc_hevc_tex_d3d12", + .codec = "hevc", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI, + .get_name = hevc_nvenc_get_name_d3d12, + .create = hevc_nvenc_create_d3d12, + .destroy = nvenc_destroy, + .update = nvenc_update, +#ifdef _WIN32 + .encode_texture2 = d3d12_encode, +#else + .encode_texture2 = cuda_opengl_encode, +#endif + .get_defaults = hevc_nvenc_defaults, + .get_properties = hevc_nvenc_properties, + .get_extra_data = nvenc_extra_data, + .get_sei_data = nvenc_sei_data, +}; #endif struct obs_encoder_info av1_nvenc_info = { @@ -1449,6 +1786,25 @@ struct obs_encoder_info av1_nvenc_info = { .get_extra_data = nvenc_extra_data, }; +struct obs_encoder_info av1_nvenc_info_d3d12 = { + .id = "obs_nvenc_av1_tex_d3d12", + .codec = "av1", + .type = OBS_ENCODER_VIDEO, + .caps = OBS_ENCODER_CAP_PASS_TEXTURE | OBS_ENCODER_CAP_DYN_BITRATE | OBS_ENCODER_CAP_ROI, + .get_name = av1_nvenc_get_name_d3d12, + .create = av1_nvenc_create_d3d12, + .destroy = nvenc_destroy, + .update = nvenc_update, +#ifdef _WIN32 + .encode_texture2 = d3d12_encode, +#else + .encode_texture2 = cuda_opengl_encode, +#endif + .get_defaults = av1_nvenc_defaults, + .get_properties = av1_nvenc_properties, + .get_extra_data = nvenc_extra_data, +}; + struct obs_encoder_info h264_nvenc_soft_info = { .id = "obs_nvenc_h264_soft", .codec = "h264", @@ -1504,13 +1860,16 @@ struct obs_encoder_info av1_nvenc_soft_info = { void register_encoders(void) { obs_register_encoder(&h264_nvenc_info); + obs_register_encoder(&h264_nvenc_info_d3d12); obs_register_encoder(&h264_nvenc_soft_info); #ifdef ENABLE_HEVC obs_register_encoder(&hevc_nvenc_info); + obs_register_encoder(&hevc_nvenc_info_d3d12); obs_register_encoder(&hevc_nvenc_soft_info); #endif if (is_codec_supported(CODEC_AV1)) { obs_register_encoder(&av1_nvenc_info); + obs_register_encoder(&av1_nvenc_info_d3d12); obs_register_encoder(&av1_nvenc_soft_info); } } diff --git a/plugins/win-capture/plugin-main.c b/plugins/win-capture/plugin-main.c index 98a38efc4b7843..4e52bef1d183fc 100644 --- a/plugins/win-capture/plugin-main.c +++ b/plugins/win-capture/plugin-main.c @@ -131,7 +131,9 @@ bool obs_module_load(void) win8_or_above = ver.major > 6 || (ver.major == 6 && ver.minor >= 2); obs_enter_graphics(); - graphics_uses_d3d11 = gs_get_device_type() == GS_DEVICE_DIRECT3D_11; + // D3D12 duplicator capture maybe have issue, use shared D3D11 device instead + graphics_uses_d3d11 = gs_get_device_type() == GS_DEVICE_DIRECT3D_11 || + gs_get_device_type() == GS_DEVICE_DIRECT3D_12; obs_leave_graphics(); if (graphics_uses_d3d11)