Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ option(ENABLE_HEVC "Enable HEVC encoders" ON)
add_subdirectory(libobs)
if(OS_WINDOWS)
add_subdirectory(libobs-d3d11)
add_subdirectory(libobs-d3d12)
add_subdirectory(libobs-winrt)
endif()
add_subdirectory(libobs-opengl)
Expand Down
7 changes: 6 additions & 1 deletion cmake/windows/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,12 @@ function(set_target_properties_obs target)
elseif(target_type STREQUAL MODULE_LIBRARY)
set_target_properties(${target} PROPERTIES VERSION 0 SOVERSION ${OBS_VERSION_CANONICAL})

if(target STREQUAL libobs-d3d11 OR target STREQUAL libobs-opengl OR target STREQUAL libobs-winrt)
if(
target STREQUAL libobs-d3d11
OR target STREQUAL libobs-d3d12
OR target STREQUAL libobs-opengl
OR target STREQUAL libobs-winrt
)
set(target_destination "${OBS_EXECUTABLE_DESTINATION}")
elseif(target STREQUAL "obspython" OR target STREQUAL "obslua")
set(target_destination "${OBS_SCRIPT_PLUGIN_DESTINATION}")
Expand Down
2 changes: 1 addition & 1 deletion frontend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ elseif(OS_FREEBSD OR OS_OPENBSD)
include(cmake/os-freebsd.cmake)
endif()

foreach(graphics_library IN ITEMS opengl metal d3d11)
foreach(graphics_library IN ITEMS opengl metal d3d11 d3d12)
string(TOUPPER ${graphics_library} graphics_library_U)
if(TARGET OBS::libobs-${graphics_library})
target_compile_definitions(
Expand Down
17 changes: 14 additions & 3 deletions frontend/OBSApp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1090,12 +1090,23 @@ void OBSApp::checkForUncleanShutdown()
const char *OBSApp::GetRenderModule() const
{
#if defined(_WIN32)
// open PIX for windows support
/* if (GetModuleHandle(L"WinPixGpuCapturer.dll") == 0) {
HMODULE hModule = LoadLibraryW(L"C:\\Program Files\\Microsoft PIX\\2509.25\\WinPixGpuCapturer.dll");
if (hModule) {
blog(LOG_INFO, "Load Pixel");
}
}*/
const char *renderer = config_get_string(appConfig, "Video", "Renderer");

return (astrcmpi(renderer, "Direct3D 11") == 0) ? DL_D3D11 : DL_OPENGL;
if (astrcmpi(renderer, "Direct3D 12") == 0) {
return DL_D3D12;
} else if (astrcmpi(renderer, "Direct3D 11") == 0) {
return DL_D3D11;
} else {
return DL_OPENGL;
}
#elif defined(__APPLE__) && defined(__aarch64__)
const char *renderer = config_get_string(appConfig, "Video", "Renderer");

return (astrcmpi(renderer, "Metal") == 0) ? DL_METAL : DL_OPENGL;
#else
return DL_OPENGL;
Expand Down
1 change: 1 addition & 0 deletions frontend/settings/OBSBasicSettings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1397,6 +1397,7 @@ void OBSBasicSettings::LoadRendererList()
const char *renderer = config_get_string(App()->GetAppConfig(), "Video", "Renderer");
#ifdef _WIN32
ui->renderer->addItem(QString("Direct3D 11"), QString("Direct3D 11"));
ui->renderer->addItem(QString("Direct3D 12"), QString("Direct3D 12"));
if (opt_allow_opengl || strcmp(renderer, "OpenGL") == 0) {
ui->renderer->addItem(QString("OpenGL"), QString("OpenGL"));
}
Expand Down
2 changes: 2 additions & 0 deletions libobs-d3d11/d3d11-subsystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2189,6 +2189,8 @@ void device_begin_frame(gs_device_t *device)
reset_duplicators();
}

void device_end_frame(gs_device_t *device) {}

void device_begin_scene(gs_device_t *device)
{
clear_textures(device);
Expand Down
48 changes: 48 additions & 0 deletions libobs-d3d12/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
cmake_minimum_required(VERSION 3.28...3.30)

add_library(libobs-d3d12 MODULE)
add_library(OBS::libobs-d3d12 ALIAS libobs-d3d12)

target_sources(
libobs-d3d12
PRIVATE # cmake-format: unsort
d3d12-command-context.cpp
d3d12-command-context.hpp
d3d12-duplicator.cpp
d3d12-indexbuffer.cpp
d3d12-samplerstate.cpp
d3d12-shader.cpp
d3d12-shaderprocessor.cpp
d3d12-shaderprocessor.hpp
d3d12-stagesurf.cpp
d3d12-subsystem.cpp
d3d12-subsystem.hpp
d3d12-texture2d.cpp
d3d12-texture3d.cpp
d3d12-vertexbuffer.cpp
d3d12-zstencilbuffer.cpp
)

configure_file(cmake/windows/obs-module.rc.in libobs-d3d12.rc)
target_sources(libobs-d3d12 PRIVATE libobs-d3d12.rc)

target_compile_definitions(
libobs-d3d12
PRIVATE
$<$<BOOL:${GPU_PRIORITY_VAL}>:USE_GPU_PRIORITY>
"$<IF:$<BOOL:${GPU_PRIORITY_VAL}>,GPU_PRIORITY_VAL=${GPU_PRIORITY_VAL},GPU_PRIORITY_VAL=0>"
)

target_link_libraries(
libobs-d3d12
PRIVATE OBS::libobs d3d11 d3d12 dxguid d3dcompiler dxgi shcore
)

target_enable_feature(libobs "Direct3D 12 renderer")

set_target_properties_obs(
libobs-d3d12
PROPERTIES FOLDER core
VERSION 0
SOVERSION ${OBS_VERSION_MAJOR} COMPILE_WARNING_AS_ERROR FALSE
)
217 changes: 217 additions & 0 deletions libobs-d3d12/Common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
//
// Copyright (c) Microsoft. All rights reserved.
// This code is licensed under the MIT License (MIT).
// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
//
// Developed by Minigraph
//
// Author: James Stanard
//

#pragma once

#include <util/sse-intrin.h>

#define INLINE __forceinline

namespace Math {
template<typename T> __forceinline T AlignUpWithMask(T value, size_t mask)
{
return (T)(((size_t)value + mask) & ~mask);
}

template<typename T> __forceinline T AlignDownWithMask(T value, size_t mask)
{
return (T)((size_t)value & ~mask);
}

template<typename T> __forceinline T AlignUp(T value, size_t alignment)
{
return AlignUpWithMask(value, alignment - 1);
}

template<typename T> __forceinline T AlignDown(T value, size_t alignment)
{
return AlignDownWithMask(value, alignment - 1);
}

template<typename T> __forceinline bool IsAligned(T value, size_t alignment)
{
return 0 == ((size_t)value & (alignment - 1));
}

template<typename T> __forceinline T DivideByMultiple(T value, size_t alignment)
{
return (T)((value + alignment - 1) / alignment);
}

template<typename T> __forceinline bool IsPowerOfTwo(T value)
{
return 0 == (value & (value - 1));
}

template<typename T> __forceinline bool IsDivisible(T value, T divisor)
{
return (value / divisor) * divisor == value;
}

__forceinline uint8_t Log2(uint64_t value)
{
unsigned long mssb; // most significant set bit
unsigned long lssb; // least significant set bit

// If perfect power of two (only one set bit), return index of bit. Otherwise round up
// fractional log by adding 1 to most signicant set bit's index.
if (_BitScanReverse64(&mssb, value) > 0 && _BitScanForward64(&lssb, value) > 0)
return uint8_t(mssb + (mssb == lssb ? 0 : 1));
else
return 0;
}

template<typename T> __forceinline T AlignPowerOfTwo(T value)
{
return value == 0 ? 0 : 1 << Log2(value);
}

} // namespace Math

// A faster version of memcopy that uses SSE instructions. TODO: Write an ARM variant if necessary.
inline static void SIMDMemCopy(void *__restrict _Dest, const void *__restrict _Source, size_t NumQuadwords)
{
//ASSERT(Math::IsAligned(_Dest, 16));
//ASSERT(Math::IsAligned(_Source, 16));

__m128i *__restrict Dest = (__m128i *__restrict)_Dest;
const __m128i *__restrict Source = (const __m128i *__restrict)_Source;

// Discover how many quadwords precede a cache line boundary. Copy them separately.
size_t InitialQuadwordCount = (4 - ((size_t)Source >> 4) & 3) & 3;
if (InitialQuadwordCount > NumQuadwords)
InitialQuadwordCount = NumQuadwords;

switch (InitialQuadwordCount) {
case 3:
_mm_stream_si128(Dest + 2, _mm_load_si128(Source + 2)); // Fall through
case 2:
_mm_stream_si128(Dest + 1, _mm_load_si128(Source + 1)); // Fall through
case 1:
_mm_stream_si128(Dest + 0, _mm_load_si128(Source + 0)); // Fall through
default:
break;
}

if (NumQuadwords == InitialQuadwordCount)
return;

Dest += InitialQuadwordCount;
Source += InitialQuadwordCount;
NumQuadwords -= InitialQuadwordCount;

size_t CacheLines = NumQuadwords >> 2;

switch (CacheLines) {
default:
case 10:
_mm_prefetch((char *)(Source + 36), _MM_HINT_NTA); // Fall through
case 9:
_mm_prefetch((char *)(Source + 32), _MM_HINT_NTA); // Fall through
case 8:
_mm_prefetch((char *)(Source + 28), _MM_HINT_NTA); // Fall through
case 7:
_mm_prefetch((char *)(Source + 24), _MM_HINT_NTA); // Fall through
case 6:
_mm_prefetch((char *)(Source + 20), _MM_HINT_NTA); // Fall through
case 5:
_mm_prefetch((char *)(Source + 16), _MM_HINT_NTA); // Fall through
case 4:
_mm_prefetch((char *)(Source + 12), _MM_HINT_NTA); // Fall through
case 3:
_mm_prefetch((char *)(Source + 8), _MM_HINT_NTA); // Fall through
case 2:
_mm_prefetch((char *)(Source + 4), _MM_HINT_NTA); // Fall through
case 1:
_mm_prefetch((char *)(Source + 0), _MM_HINT_NTA); // Fall through

// Do four quadwords per loop to minimize stalls.
for (size_t i = CacheLines; i > 0; --i) {
// If this is a large copy, start prefetching future cache lines. This also prefetches the
// trailing quadwords that are not part of a whole cache line.
if (i >= 10)
_mm_prefetch((char *)(Source + 40), _MM_HINT_NTA);

_mm_stream_si128(Dest + 0, _mm_load_si128(Source + 0));
_mm_stream_si128(Dest + 1, _mm_load_si128(Source + 1));
_mm_stream_si128(Dest + 2, _mm_load_si128(Source + 2));
_mm_stream_si128(Dest + 3, _mm_load_si128(Source + 3));

Dest += 4;
Source += 4;
}

case 0: // No whole cache lines to read
break;
}

// Copy the remaining quadwords
switch (NumQuadwords & 3) {
case 3:
_mm_stream_si128(Dest + 2, _mm_load_si128(Source + 2)); // Fall through
case 2:
_mm_stream_si128(Dest + 1, _mm_load_si128(Source + 1)); // Fall through
case 1:
_mm_stream_si128(Dest + 0, _mm_load_si128(Source + 0)); // Fall through
default:
break;
}

_mm_sfence();
}

inline static void SIMDMemFill(void *__restrict _Dest, __m128 FillVector, size_t NumQuadwords)
{
// ASSERT(Math::IsAligned(_Dest, 16));

const __m128i Source = _mm_castps_si128(FillVector);
__m128i *__restrict Dest = (__m128i *__restrict)_Dest;

switch (((size_t)Dest >> 4) & 3) {
case 1:
_mm_stream_si128(Dest++, Source);
--NumQuadwords; // Fall through
case 2:
_mm_stream_si128(Dest++, Source);
--NumQuadwords; // Fall through
case 3:
_mm_stream_si128(Dest++, Source);
--NumQuadwords; // Fall through
default:
break;
}

size_t WholeCacheLines = NumQuadwords >> 2;

// Do four quadwords per loop to minimize stalls.
while (WholeCacheLines--) {
_mm_stream_si128(Dest++, Source);
_mm_stream_si128(Dest++, Source);
_mm_stream_si128(Dest++, Source);
_mm_stream_si128(Dest++, Source);
}

// Copy the remaining quadwords
switch (NumQuadwords & 3) {
case 3:
_mm_stream_si128(Dest++, Source); // Fall through
case 2:
_mm_stream_si128(Dest++, Source); // Fall through
case 1:
_mm_stream_si128(Dest++, Source); // Fall through
default:
break;
}

_mm_sfence();
}
Loading
Loading