diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt index 2e769bb18..03f676a00 100644 --- a/31_HLSLPathTracer/CMakeLists.txt +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -5,6 +5,35 @@ if(NOT RES) endif() if(NBL_BUILD_IMGUI) + # PATH_TRACER_BUILD_MODE controls how EX31 packages triangle polygon-method variants. + # WALLTIME_OPTIMIZED keeps triangle polygon-method selection inside one shared shader module. + # This preserves the same runtime feature surface while keeping triangle methods on one heavy backend compile path. + # SPECIALIZED bakes Area, SolidAngle and ProjectedSolidAngle into separate triangle entrypoints. + # That does not add only thin entrypoints. It multiplies the heavy triangle-side path tracing instantiations and pushes more work into the DXC/SPIR-V backend. + # On AMD Ryzen 5 5600G with Radeon Graphics (6C/12T), + # a Visual Studio Debug x64 full rebuild of the SPIR-V project completed in: + # WALLTIME_OPTIMIZED = 12.785 s + # SPECIALIZED = 18.314 s + # SPECIALIZED is +5.529 s slower which is +43.25%. + # Equivalently, WALLTIME_OPTIMIZED is 30.19% faster than SPECIALIZED. + # This comes from multiplying the heavy triangle-side path tracing instantiations + # and pushing more work into the DXC/SPIR-V backend. + # Therefore the default stays WALLTIME_OPTIMIZED. + set(PATH_TRACER_BUILD_MODE "WALLTIME_OPTIMIZED" CACHE STRING + "Choose the EX31 precompiled shader layout. WALLTIME_OPTIMIZED keeps polygon-method selection inside the shared triangle shader module to reduce compile wall time. SPECIALIZED bakes triangle polygon methods into distinct entrypoints, which increases backend work and rebuild time but keeps those variants as separate precompiled entrypoints." + ) + set(PATH_TRACER_CACHE_ROOT "pipeline/cache" CACHE STRING + "Relative cache root written to path_tracer.runtime.json in the common bin directory. The runtime resolves this path relative to the JSON file location. Empty disables the generated dev-mode JSON and falls back to --pipeline-cache-dir or LocalAppData." + ) + set_property(CACHE PATH_TRACER_BUILD_MODE PROPERTY STRINGS WALLTIME_OPTIMIZED SPECIALIZED) + set(_PATH_TRACER_BUILD_MODE_VALUES WALLTIME_OPTIMIZED SPECIALIZED) + if(NOT PATH_TRACER_BUILD_MODE IN_LIST _PATH_TRACER_BUILD_MODE_VALUES) + message(FATAL_ERROR "Unsupported PATH_TRACER_BUILD_MODE='${PATH_TRACER_BUILD_MODE}'. Expected one of: ${_PATH_TRACER_BUILD_MODE_VALUES}") + endif() + if(IS_ABSOLUTE "${PATH_TRACER_CACHE_ROOT}") + message(FATAL_ERROR "PATH_TRACER_CACHE_ROOT must stay relative because the runtime resolves it against path_tracer.runtime.json") + endif() + set(NBL_INCLUDE_SERACH_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include" ) @@ -16,6 +45,125 @@ if(NBL_BUILD_IMGUI) ) nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + target_compile_definitions(${EXECUTABLE_NAME} PRIVATE PATH_TRACER_BUILD_CONFIG_NAME=\"$\") + if(PATH_TRACER_BUILD_MODE STREQUAL "SPECIALIZED") + target_compile_definitions(${EXECUTABLE_NAME} PRIVATE PATH_TRACER_BUILD_MODE_SPECIALIZED=1) + else() + target_compile_definitions(${EXECUTABLE_NAME} PRIVATE PATH_TRACER_BUILD_MODE_WALLTIME_OPTIMIZED=1) + endif() + if(NOT PATH_TRACER_CACHE_ROOT STREQUAL "") + string(REPLACE "\\" "/" PATH_TRACER_CACHE_ROOT_JSON "${PATH_TRACER_CACHE_ROOT}") + file(GENERATE + OUTPUT "$/path_tracer.runtime.json" + CONTENT "{\n \"cache_root\": \"${PATH_TRACER_CACHE_ROOT_JSON}\"\n}\n" + ) + unset(PATH_TRACER_CACHE_ROOT_JSON) + endif() + + set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen-flat") + get_filename_component(OUTPUT_DIRECTORY_ABSOLUTE "${OUTPUT_DIRECTORY}" ABSOLUTE) + target_compile_definitions(${EXECUTABLE_NAME} PRIVATE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT="${OUTPUT_DIRECTORY_ABSOLUTE}") + + file(GLOB_RECURSE EXAMPLE_HLSL_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/app_resources/hlsl/*.hlsl") + target_sources(${EXECUTABLE_NAME} PRIVATE ${EXAMPLE_HLSL_SOURCES}) + set_source_files_properties(${EXAMPLE_HLSL_SOURCES} PROPERTIES HEADER_FILE_ONLY ON) + + set(SM 6_8) + set(COMPILE_OPTIONS + -I "${CMAKE_CURRENT_SOURCE_DIR}/include" + -I "${CMAKE_CURRENT_SOURCE_DIR}/app_resources/hlsl" + -I "${OUTPUT_DIRECTORY_ABSOLUTE}/$" + -T "lib_${SM}" + -Wno-conversion + -Wno-sign-conversion + -Wno-float-conversion + -Wno-shorten-64-to-32 + -Wno-shadow + -Wno-literal-range + ) + + set(SPIRV_FOLDER "SPIRV") + macro(PATH_TRACER_APPEND_SPIRV_RULE) + set(options) + set(oneValueArgs INPUT KEY) + set(multiValueArgs COMPILE_OPTIONS) + cmake_parse_arguments(PATH_TRACER_RULE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if(PATH_TRACER_RULE_INPUT STREQUAL "" OR PATH_TRACER_RULE_KEY STREQUAL "") + message(FATAL_ERROR "PATH_TRACER_APPEND_SPIRV_RULE requires INPUT and KEY") + endif() + set(PATH_TRACER_RULE_JSON "{\"INPUT\":\"${PATH_TRACER_RULE_INPUT}\",\"KEY\":\"${PATH_TRACER_RULE_KEY}\"") + if(PATH_TRACER_RULE_COMPILE_OPTIONS) + set(PATH_TRACER_RULE_COMPILE_OPTIONS_JSON "") + foreach(PATH_TRACER_RULE_COMPILE_OPTION IN LISTS PATH_TRACER_RULE_COMPILE_OPTIONS) + string(APPEND PATH_TRACER_RULE_COMPILE_OPTIONS_JSON "\"${PATH_TRACER_RULE_COMPILE_OPTION}\",") + endforeach() + string(REGEX REPLACE ",$" "" PATH_TRACER_RULE_COMPILE_OPTIONS_JSON "${PATH_TRACER_RULE_COMPILE_OPTIONS_JSON}") + string(APPEND PATH_TRACER_RULE_JSON ",\"COMPILE_OPTIONS\":[${PATH_TRACER_RULE_COMPILE_OPTIONS_JSON}]") + endif() + string(APPEND PATH_TRACER_RULE_JSON "}") + list(APPEND SPIRV_RULE_OBJECTS "${PATH_TRACER_RULE_JSON}") + unset(PATH_TRACER_RULE_JSON) + unset(PATH_TRACER_RULE_COMPILE_OPTIONS_JSON) + endmacro() + + set(SPIRV_RULE_OBJECTS "") + set(PATH_TRACER_SCENE_SPHERE 0) + set(PATH_TRACER_SCENE_TRIANGLE 1) + set(PATH_TRACER_SCENE_RECTANGLE 2) + set(PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM 0) + set(PATH_TRACER_ENTRYPOINT_LINEAR 1) + set(PATH_TRACER_ENTRYPOINT_PERSISTENT 2) + # Keep the payload flat and explicit here. Once Nabla PR #988 lands, these per-rule compile axes should move to first-class packaged-variant support there. + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl" KEY "pt.compute.sphere" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_SPHERE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=0") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl" KEY "pt.compute.sphere.rwmc" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_SPHERE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=0") + if(PATH_TRACER_BUILD_MODE STREQUAL "SPECIALIZED") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl" KEY "pt.compute.triangle.linear" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_LINEAR}") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl" KEY "pt.compute.triangle.persistent" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_PERSISTENT}") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl" KEY "pt.compute.triangle.rwmc.linear" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_LINEAR}") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl" KEY "pt.compute.triangle.rwmc.persistent" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_PERSISTENT}") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl" KEY "pt.compute.rectangle.rwmc.linear" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_LINEAR}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=1") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl" KEY "pt.compute.rectangle.rwmc.persistent" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_PERSISTENT}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=1") + else() + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.triangle.proxy.hlsl" KEY "pt.compute.triangle" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_TRIANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=1") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.triangle.rwmc.proxy.hlsl" KEY "pt.compute.triangle.rwmc" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_TRIANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=1") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.proxy.hlsl" KEY "pt.compute.rectangle.rwmc" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=1" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=1") + endif() + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl" KEY "pt.compute.rectangle" COMPILE_OPTIONS "-DPT_VARIANT_USE_RWMC=0" "-DPT_VARIANT_SCENE_KIND=${PATH_TRACER_SCENE_RECTANGLE}" "-DPT_VARIANT_ENTRYPOINT_KIND=${PATH_TRACER_ENTRYPOINT_RUNTIME_UNIFORM}" "-DPT_VARIANT_RUNTIME_POLYGON_METHOD=1") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/resolve.comp.hlsl" KEY "pt.compute.resolve") + PATH_TRACER_APPEND_SPIRV_RULE(INPUT "app_resources/hlsl/spirv/pt.misc.proxy.hlsl" KEY "pt.misc") + string(JOIN ",\n" PATH_TRACER_RULES_BODY ${SPIRV_RULE_OBJECTS}) + set(SPIRV_RULES_JSON_TEMPLATE [=[ +[ +@PATH_TRACER_RULES_BODY@ +] +]=]) + string(CONFIGURE "${SPIRV_RULES_JSON_TEMPLATE}" SPIRV_RULES_JSON @ONLY) + unset(SPIRV_RULE_OBJECTS) + unset(PATH_TRACER_RULES_BODY) + set(ALL_SPIRV_KEYS "") + NBL_CREATE_NSC_COMPILE_RULES( + DISCARD_DEFAULT_GLOB + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS ${COMPILE_OPTIONS} + OUTPUT_VAR ALL_SPIRV_KEYS + INCLUDE "nbl/this_example/builtin/build/spirv/generated/PathTracerKeys.hpp" + NAMESPACE nbl::this_example::builtin::build + INPUTS ${SPIRV_RULES_JSON} + ) + set_target_properties(${EXECUTABLE_NAME}SPIRV PROPERTIES FOLDER "${SPIRV_FOLDER}") + set(SPIRV_KEYS ${ALL_SPIRV_KEYS}) + list(FILTER SPIRV_KEYS INCLUDE REGEX "\\.spv$") + + NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${SPIRV_KEYS} + ) if(NBL_EMBED_BUILTIN_RESOURCES) set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) @@ -34,6 +182,7 @@ if(NBL_BUILD_IMGUI) ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + set_target_properties(${_BR_TARGET_} PROPERTIES FOLDER "${SPIRV_FOLDER}") endif() endif() diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl new file mode 100644 index 000000000..be055bd83 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.common.hlsl @@ -0,0 +1,62 @@ +#ifndef PATH_TRACER_USE_RWMC +#error PATH_TRACER_USE_RWMC must be defined before including compute.render.common.hlsl +#endif + +#ifndef PATH_TRACER_ENABLE_LINEAR +#define PATH_TRACER_ENABLE_LINEAR 1 +#endif + +#ifndef PATH_TRACER_ENABLE_PERSISTENT +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#endif + +#if !PATH_TRACER_ENABLE_LINEAR && !PATH_TRACER_ENABLE_PERSISTENT +#error At least one path tracer entrypoint mode must be enabled +#endif + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#if PATH_TRACER_ENABLE_PERSISTENT +#include "nbl/builtin/hlsl/morton.hlsl" +#endif +#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" +#include "nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl" +#include "nbl/builtin/hlsl/path_tracing/unidirectional.hlsl" +#include "render_common.hlsl" + +#if PATH_TRACER_USE_RWMC +#include "nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl" +#include "render_rwmc_common.hlsl" +#else +#include "nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl" +#endif + +#if PATH_TRACER_USE_RWMC +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D envMap; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState envSampler; + +[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] Texture2D scramblebuf; +[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]] [[vk::binding(2, 0)]] RWTexture2DArray outImage; + +#if PATH_TRACER_USE_RWMC +[[vk::image_format("rgba16f")]] [[vk::binding(3, 0)]] RWTexture2DArray cascade; +#endif + +#include "example_common.hlsl" +#include "rand_gen.hlsl" +#include "intersector.hlsl" +#include "material_system.hlsl" +#include "next_event_estimator.hlsl" + +using namespace nbl; +using namespace hlsl; diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.linear.entrypoints.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.linear.entrypoints.hlsl new file mode 100644 index 000000000..163efd952 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.linear.entrypoints.hlsl @@ -0,0 +1,25 @@ +#ifndef PATH_TRACER_ENTRYPOINT_NAME +#define PATH_TRACER_ENTRYPOINT_NAME main +#endif + +#ifndef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#ifdef PATH_TRACER_RUNTIME_POLYGON_METHOD +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PATH_TRACER_RUNTIME_POLYGON_METHOD +#else +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#endif +#endif + +#if !PATH_TRACER_ENABLE_LINEAR +#error Linear entrypoint requested while PATH_TRACER_ENABLE_LINEAR is disabled +#endif + +[numthreads(RenderWorkgroupSize, 1, 1)] +[shader("compute")] +void PATH_TRACER_ENTRYPOINT_NAME(uint32_t3 threadID : SV_DispatchThreadID) +{ + pathtracer_render_variant::runLinear(threadID, PATH_TRACER_ENTRYPOINT_POLYGON_METHOD); +} + +#undef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#undef PATH_TRACER_ENTRYPOINT_NAME diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute.render.persistent.entrypoints.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute.render.persistent.entrypoints.hlsl new file mode 100644 index 000000000..45ea0a773 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute.render.persistent.entrypoints.hlsl @@ -0,0 +1,25 @@ +#ifndef PATH_TRACER_ENTRYPOINT_NAME +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistent +#endif + +#ifndef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#ifdef PATH_TRACER_RUNTIME_POLYGON_METHOD +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PATH_TRACER_RUNTIME_POLYGON_METHOD +#else +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#endif +#endif + +#if !PATH_TRACER_ENABLE_PERSISTENT +#error Persistent entrypoint requested while PATH_TRACER_ENABLE_PERSISTENT is disabled +#endif + +[numthreads(RenderWorkgroupSize, 1, 1)] +[shader("compute")] +void PATH_TRACER_ENTRYPOINT_NAME(uint32_t3 threadID : SV_DispatchThreadID) +{ + pathtracer_render_variant::runPersistent(PATH_TRACER_ENTRYPOINT_POLYGON_METHOD); +} + +#undef PATH_TRACER_ENTRYPOINT_POLYGON_METHOD +#undef PATH_TRACER_ENTRYPOINT_NAME diff --git a/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl b/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl new file mode 100644 index 000000000..e4cf22ed6 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/compute_render_scene_impl.hlsl @@ -0,0 +1,152 @@ +#ifndef PATH_TRACER_USE_RWMC +#error PATH_TRACER_USE_RWMC must be defined before including compute_render_scene_impl.hlsl +#endif + +namespace pathtracer_render_variant +{ +using namespace nbl; +using namespace hlsl; + +using ray_dir_info_t = bxdf::ray_dir_info::SBasic; +using iso_interaction = PTIsotropicInteraction; +using aniso_interaction = PTAnisotropicInteraction; +using sample_t = bxdf::SLightSample; +using iso_cache = bxdf::SIsotropicMicrofacetCache; +using aniso_cache = bxdf::SAnisotropicMicrofacetCache; + +using iso_config_t = PTIsoConfiguration; +using iso_microfacet_config_t = PTIsoMicrofacetConfiguration; + +using diffuse_bxdf_type = bxdf::reflection::SOrenNayar; +using conductor_bxdf_type = bxdf::reflection::SGGXIsotropic; +using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricIsotropic; +using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; +using iri_dielectric_bxdf_type = bxdf::transmission::SIridescent; + +using payload_type = Payload; +using ray_type = Ray; +using randgen_type = RandomUniformND; +using raygen_type = path_tracing::BasicRayGenerator; +using intersector_type = Intersector; +using material_system_type = MaterialSystem; +using nee_type = NextEventEstimator; + +#if PATH_TRACER_USE_RWMC +using accumulator_type = rwmc::CascadeAccumulator >; +#else +using accumulator_type = path_tracing::DefaultAccumulator; +#endif + +using pathtracer_type = path_tracing::Unidirectional; + +RenderPushConstants getRenderPushConstants() +{ +#if PATH_TRACER_USE_RWMC + return ::pc.renderPushConstants; +#else + return ::pc; +#endif +} + +void tracePixel(int32_t2 coords, NEEPolygonMethod polygonMethod) +{ + const RenderPushConstants renderPushConstants = getRenderPushConstants(); + + uint32_t width, height, imageArraySize; + ::outImage.GetDimensions(width, height, imageArraySize); + if (any(coords < int32_t2(0, 0)) || any(coords >= int32_t2(width, height))) + return; + + float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); + texCoord.y = 1.0 - texCoord.y; + + if (((renderPushConstants.depth - 1) >> MaxDepthLog2) > 0 || ((renderPushConstants.sampleCount - 1) >> MaxSamplesLog2) > 0) + { + ::outImage[uint3(coords.x, coords.y, 0)] = float32_t4(1.0, 0.0, 0.0, 1.0); + return; + } + + pathtracer_type pathtracer; + + uint2 scrambleDim; + ::scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); + const float32_t2 pixOffsetParam = float32_t2(1.0, 1.0) / float32_t2(scrambleDim); + + float32_t4 NDC = float32_t4(texCoord * float32_t2(2.0, -2.0) + float32_t2(-1.0, 1.0), 0.0, 1.0); + float32_t3 camPos; + { + float32_t4 tmp = mul(renderPushConstants.invMVP, NDC); + camPos = tmp.xyz / tmp.w; + NDC.z = 1.0; + } + + scene_type scene; + scene.updateLight(renderPushConstants.generalPurposeLightMatrix); + + raygen_type rayGen; + rayGen.pixOffsetParam = pixOffsetParam; + rayGen.camPos = camPos; + rayGen.NDC = NDC; + rayGen.invMVP = renderPushConstants.invMVP; + + pathtracer.scene = scene; + pathtracer.randGen = randgen_type::create(::scramblebuf[coords].rg, renderPushConstants.pSampleSequence); + pathtracer.nee.lights = lights; + pathtracer.nee.polygonMethod = polygonMethod; + pathtracer.materialSystem.bxdfs = bxdfs; + pathtracer.bxdfPdfThreshold = 0.0001; + pathtracer.lumaContributionThreshold = hlsl::dot(colorspace::scRGBtoXYZ[1], colorspace::eotf::sRGB(hlsl::promote(1.0 / 255.0))); + pathtracer.spectralTypeToLumaCoeffs = colorspace::scRGBtoXYZ[1]; + +#if PATH_TRACER_USE_RWMC + accumulator_type accumulator = accumulator_type::create(::pc.splattingParameters); +#else + accumulator_type accumulator = accumulator_type::create(); +#endif + + for (int i = 0; i < renderPushConstants.sampleCount; ++i) + { + const float32_t3 uvw = pathtracer.randGen(0u, i); + ray_type ray = rayGen.generate(uvw); + ray.initPayload(); + pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); + } + +#if PATH_TRACER_USE_RWMC + for (uint32_t i = 0; i < CascadeCount; ++i) + ::cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); +#else + ::outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); +#endif +} + +#if PATH_TRACER_ENABLE_LINEAR +void runLinear(uint32_t3 threadID, NEEPolygonMethod polygonMethod) +{ + uint32_t width, height, imageArraySize; + ::outImage.GetDimensions(width, height, imageArraySize); + tracePixel(int32_t2(threadID.x % width, threadID.x / width), polygonMethod); +} +#endif + +#if PATH_TRACER_ENABLE_PERSISTENT +void runPersistent(NEEPolygonMethod polygonMethod) +{ + uint32_t width, height, imageArraySize; + ::outImage.GetDimensions(width, height, imageArraySize); + const uint32_t numWorkgroupsX = width / RenderWorkgroupSizeSqrt; + const uint32_t numWorkgroupsY = height / RenderWorkgroupSizeSqrt; + + [loop] + for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < numWorkgroupsX * numWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) + { + const int32_t2 wgCoords = int32_t2(wgBase % numWorkgroupsX, wgBase / numWorkgroupsX); + morton::code mc; + mc.value = glsl::gl_LocalInvocationIndex().x; + const int32_t2 localCoords = _static_cast(mc); + tracePixel(wgCoords * int32_t2(RenderWorkgroupSizeSqrt, RenderWorkgroupSizeSqrt) + localCoords, polygonMethod); + } +} +#endif +} +#undef PATH_TRACER_USE_RWMC diff --git a/31_HLSLPathTracer/app_resources/hlsl/imgui.unified.hlsl b/31_HLSLPathTracer/app_resources/hlsl/imgui.unified.hlsl new file mode 100644 index 000000000..f36d98144 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/imgui.unified.hlsl @@ -0,0 +1,9 @@ +#define NBL_TEXTURES_BINDING_IX 0 +#define NBL_SAMPLER_STATES_BINDING_IX 1 +#define NBL_TEXTURES_SET_IX 0 +#define NBL_SAMPLER_STATES_SET_IX 0 +#define NBL_TEXTURES_COUNT 1 +#define NBL_SAMPLERS_COUNT 2 + +#include "nbl/ext/ImGui/builtin/hlsl/fragment.hlsl" +#include "nbl/ext/ImGui/builtin/hlsl/vertex.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl index 2a50c71a6..83b3e623e 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -9,7 +9,6 @@ using namespace hlsl; template struct ShapeSampling; -// Sphere only supports solid angle template struct ShapeSampling { @@ -304,10 +303,139 @@ struct ShapeSampling Shape rect; }; -// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_TRIANGLE +// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_RECTANGLE +template +struct RuntimeShapeSamplingSelector; -template +template +struct RuntimeShapeSamplingSelector +{ + using scalar_type = T; + using vector3_type = vector; + using shape_type = Shape; + + template + static scalar_type deferredPdf(NEEPolygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template deferredPdf(ray); + } + + template + static vector3_type generate_and_pdf(NEEPolygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + } +}; + +template +struct RuntimeShapeSamplingSelector +{ + using scalar_type = T; + using vector3_type = vector; + using shape_type = Shape; + + template + static scalar_type deferredPdf(const NEEPolygonMethod polygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) + { + switch (polygonMethod) + { + case PPM_AREA: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template deferredPdf(ray); + } + case PPM_SOLID_ANGLE: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template deferredPdf(ray); + } + case PPM_APPROX_PROJECTED_SOLID_ANGLE: + default: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template deferredPdf(ray); + } + } + } + + template + static vector3_type generate_and_pdf(const NEEPolygonMethod polygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + switch (polygonMethod) + { + case PPM_AREA: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + } + case PPM_SOLID_ANGLE: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + } + case PPM_APPROX_PROJECTED_SOLID_ANGLE: + default: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + } + } + } +}; + +template +struct RuntimeShapeSamplingSelector +{ + using scalar_type = T; + using vector3_type = vector; + using shape_type = Shape; + + template + static scalar_type deferredPdf(const NEEPolygonMethod polygonMethod, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(Ray) ray) + { + switch (polygonMethod) + { + case PPM_AREA: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template deferredPdf(ray); + } + case PPM_SOLID_ANGLE: + case PPM_APPROX_PROJECTED_SOLID_ANGLE: + default: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template deferredPdf(ray); + } + } + } + + template + static vector3_type generate_and_pdf(const NEEPolygonMethod polygonMethod, NBL_REF_ARG(scalar_type) pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(shape_type) shape, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(Aniso) interaction, NBL_CONST_REF_ARG(vector3_type) xi) + { + switch (polygonMethod) + { + case PPM_AREA: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + } + case PPM_SOLID_ANGLE: + case PPM_APPROX_PROJECTED_SOLID_ANGLE: + default: + { + const ShapeSampling sampling = ShapeSampling::create(shape); + return sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + } + } + } +}; + + +template struct NextEventEstimator { using scalar_type = typename Ray::scalar_type; @@ -324,7 +452,6 @@ struct NextEventEstimator using tolerance_method_type = Tolerance; using shape_type = Shape; - using shape_sampling_type = ShapeSampling; struct SampleQuotientReturn { @@ -344,24 +471,22 @@ struct NextEventEstimator object_handle_type getLightObjectID() NBL_CONST_MEMBER_FUNC { return lightObjectID; } }; using sample_quotient_return_type = SampleQuotientReturn; + using runtime_sampling_selector = RuntimeShapeSamplingSelector; template NBL_FUNC_REQUIRES(C::value && PST==PST_SPHERE) - shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + shape_type __getShape(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) { - const shape_type sphere = scene.getSphere(lightObjectID); - return shape_sampling_type::create(sphere); + return scene.getSphere(lightObjectID); } template NBL_FUNC_REQUIRES(C::value && PST==PST_TRIANGLE) - shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + shape_type __getShape(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) { - const shape_type tri = scene.getTriangle(lightObjectID); - return shape_sampling_type::create(tri); + return scene.getTriangle(lightObjectID); } template NBL_FUNC_REQUIRES(C::value && PST==PST_RECTANGLE) - shape_sampling_type __getShapeSampling(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) + shape_type __getShape(uint32_t lightObjectID, NBL_CONST_REF_ARG(scene_type) scene) { - const shape_type rect = scene.getRectangle(lightObjectID); - return shape_sampling_type::create(rect); + return scene.getRectangle(lightObjectID); } scalar_type deferred_pdf(NBL_CONST_REF_ARG(scene_type) scene, light_id_type lightID, NBL_CONST_REF_ARG(ray_type) ray) @@ -369,8 +494,8 @@ struct NextEventEstimator if (lightID.id == 0u) return scalar_type(0.0); // env light pdf=0 const light_type light = lights[0u]; - const shape_sampling_type sampling = __getShapeSampling(light.objectID.id, scene); - return sampling.template deferredPdf(ray) / scalar_type(scene_type::SCENE_LIGHT_COUNT); + const shape_type shape = __getShape(light.objectID.id, scene); + return runtime_sampling_selector::template deferredPdf(polygonMethod, shape, ray) / scalar_type(scene_type::SCENE_LIGHT_COUNT); } template @@ -381,11 +506,11 @@ struct NextEventEstimator // use constant indices because with variables, driver (at least nvidia) seemed to nuke the light array and propagated constants throughout the code // which caused frame times to increase from 16ms to 85ms const light_type light = lights[0u]; - const shape_sampling_type sampling = __getShapeSampling(light.objectID.id, scene); + const shape_type shape = __getShape(light.objectID.id, scene); sample_quotient_return_type retval; scalar_type pdf, newRayMaxT; - const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, xi); + const vector3_type sampleL = runtime_sampling_selector::template generate_and_pdf(polygonMethod, pdf, newRayMaxT, shape, origin, interaction, xi); const vector3_type N = interaction.getN(); const scalar_type NdotL = nbl::hlsl::dot(N, sampleL); @@ -435,6 +560,7 @@ struct NextEventEstimator } light_type lights[scene_type::SCENE_LIGHT_COUNT]; + NEEPolygonMethod polygonMethod; }; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl index d556a7162..d69815fba 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -2,8 +2,6 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#pragma wave shader_stage(fragment) - // vertex shader is provided by the fullScreenTriangle extension #include using namespace nbl::hlsl; @@ -13,7 +11,8 @@ using namespace ext::FullScreenTriangle; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2DArray texture; [[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; -[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +[shader("pixel")] +float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 { return float32_t4(texture.Sample(samplerState, float3(vxAttr.uv, 0)).rgb, 1.0f); -} \ No newline at end of file +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl deleted file mode 100644 index 204020719..000000000 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ /dev/null @@ -1,222 +0,0 @@ -#include "nbl/builtin/hlsl/cpp_compat.hlsl" -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/random/pcg.hlsl" -#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" -#ifdef PERSISTENT_WORKGROUPS -#include "nbl/builtin/hlsl/morton.hlsl" -#endif - -#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" -#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" - -#include "nbl/builtin/hlsl/path_tracing/basic_ray_gen.hlsl" -#include "nbl/builtin/hlsl/path_tracing/unidirectional.hlsl" - -// add these defines (one at a time) using -D argument to dxc -// #define SPHERE_LIGHT -// #define TRIANGLE_LIGHT -// #define RECTANGLE_LIGHT - -#include "render_common.hlsl" -#include "resolve_common.hlsl" - -#ifdef RWMC_ENABLED -#include -#include -#endif - -#ifdef RWMC_ENABLED -[[vk::push_constant]] RenderRWMCPushConstants pc; -#else -[[vk::push_constant]] RenderPushConstants pc; -#endif - -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D envMap; // unused -[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState envSampler; - -[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] Texture2D scramblebuf; -[[vk::combinedImageSampler]] [[vk::binding(1, 0)]] SamplerState scrambleSampler; - -[[vk::image_format("rgba16f")]] [[vk::binding(2, 0)]] RWTexture2DArray outImage; -[[vk::image_format("rgba16f")]] [[vk::binding(3, 0)]] RWTexture2DArray cascade; - -#include "example_common.hlsl" -#include "rand_gen.hlsl" -#include "intersector.hlsl" -#include "material_system.hlsl" -#include "next_event_estimator.hlsl" - -using namespace nbl; -using namespace hlsl; - -#ifdef SPHERE_LIGHT -#include "scene_sphere_light.hlsl" -#endif -#ifdef TRIANGLE_LIGHT -#include "scene_triangle_light.hlsl" -#endif -#ifdef RECTANGLE_LIGHT -#include "scene_rectangle_light.hlsl" -#endif - -NBL_CONSTEXPR NEEPolygonMethod POLYGON_METHOD = PPM_APPROX_PROJECTED_SOLID_ANGLE; - -int32_t2 getCoordinates() -{ - uint32_t width, height, imageArraySize; - outImage.GetDimensions(width, height, imageArraySize); - return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); -} - -float32_t2 getTexCoords() -{ - uint32_t width, height, imageArraySize; - outImage.GetDimensions(width, height, imageArraySize); - int32_t2 iCoords = getCoordinates(); - return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); -} - -using spectral_t = vector; -using ray_dir_info_t = bxdf::ray_dir_info::SBasic; -using iso_interaction = PTIsotropicInteraction; -using aniso_interaction = PTAnisotropicInteraction; -using sample_t = bxdf::SLightSample; -using iso_cache = bxdf::SIsotropicMicrofacetCache; -using aniso_cache = bxdf::SAnisotropicMicrofacetCache; -using quotient_pdf_t = sampling::quotient_and_pdf; - -using iso_config_t = PTIsoConfiguration; -using iso_microfacet_config_t = PTIsoMicrofacetConfiguration; - -using diffuse_bxdf_type = bxdf::reflection::SOrenNayar; -using conductor_bxdf_type = bxdf::reflection::SGGXIsotropic; -using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricIsotropic; -using iri_conductor_bxdf_type = bxdf::reflection::SIridescent; -using iri_dielectric_bxdf_type = bxdf::transmission::SIridescent; - -using payload_type = Payload; -using ray_type = Ray; -using randgen_type = RandomUniformND; -using raygen_type = path_tracing::BasicRayGenerator; -using intersector_type = Intersector; -using material_system_type = MaterialSystem; -using nee_type = NextEventEstimator; - -#ifdef RWMC_ENABLED -using accumulator_type = rwmc::CascadeAccumulator >; -#else -#include "nbl/builtin/hlsl/path_tracing/default_accumulator.hlsl" -using accumulator_type = path_tracing::DefaultAccumulator; -#endif - -using pathtracer_type = path_tracing::Unidirectional; - -RenderPushConstants retireveRenderPushConstants() -{ -#ifdef RWMC_ENABLED - return pc.renderPushConstants; -#else - return pc; -#endif -} - -[numthreads(RenderWorkgroupSize, 1, 1)] -void main(uint32_t3 threadID : SV_DispatchThreadID) -{ - const RenderPushConstants renderPushConstants = retireveRenderPushConstants(); - - uint32_t width, height, imageArraySize; - outImage.GetDimensions(width, height, imageArraySize); -#ifdef PERSISTENT_WORKGROUPS - const uint32_t NumWorkgroupsX = width / RenderWorkgroupSizeSqrt; - const uint32_t NumWorkgroupsY = height / RenderWorkgroupSizeSqrt; - [loop] - for (uint32_t wgBase = glsl::gl_WorkGroupID().x; wgBase < NumWorkgroupsX*NumWorkgroupsY; wgBase += glsl::gl_NumWorkGroups().x) - { - const int32_t2 wgCoords = int32_t2(wgBase % NumWorkgroupsX, wgBase / NumWorkgroupsX); - morton::code mc; - mc.value = glsl::gl_LocalInvocationIndex().x; - const int32_t2 localCoords = _static_cast(mc); - const int32_t2 coords = wgCoords * int32_t2(RenderWorkgroupSizeSqrt,RenderWorkgroupSizeSqrt) + localCoords; -#else - const int32_t2 coords = getCoordinates(); -#endif - float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); - texCoord.y = 1.0 - texCoord.y; - - if (any(coords < int32_t2(0,0)) || any(coords >= int32_t2(width, height))) { -#ifdef PERSISTENT_WORKGROUPS - continue; -#else - return; -#endif - } - - if (((renderPushConstants.depth - 1) >> MaxDepthLog2) > 0 || ((renderPushConstants.sampleCount - 1) >> MaxSamplesLog2) > 0) - { - float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); - outImage[uint3(coords.x, coords.y, 0)] = pixelCol; -#ifdef PERSISTENT_WORKGROUPS - continue; -#else - return; -#endif - } - - // set up path tracer - pathtracer_type pathtracer; - - uint2 scrambleDim; - scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); - float32_t2 pixOffsetParam = (float2)1.0 / float2(scrambleDim); - - float32_t4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); - float32_t3 camPos; - { - float4 tmp = mul(renderPushConstants.invMVP, NDC); - camPos = tmp.xyz / tmp.w; - NDC.z = 1.0; - } - - scene_type scene; - scene.updateLight(renderPushConstants.generalPurposeLightMatrix); - - raygen_type rayGen; - rayGen.pixOffsetParam = pixOffsetParam; - rayGen.camPos = camPos; - rayGen.NDC = NDC; - rayGen.invMVP = renderPushConstants.invMVP; - - pathtracer.scene = scene; - pathtracer.randGen = randgen_type::create(scramblebuf[coords].rg, renderPushConstants.pSampleSequence); - pathtracer.nee.lights = lights; - pathtracer.materialSystem.bxdfs = bxdfs; - pathtracer.bxdfPdfThreshold = 0.0001; - pathtracer.lumaContributionThreshold = hlsl::dot(colorspace::scRGBtoXYZ[1], colorspace::eotf::sRGB(hlsl::promote(1.0 / 255.0))); - pathtracer.spectralTypeToLumaCoeffs = colorspace::scRGBtoXYZ[1]; - -#ifdef RWMC_ENABLED - accumulator_type accumulator = accumulator_type::create(pc.splattingParameters); -#else - accumulator_type accumulator = accumulator_type::create(); -#endif - // path tracing loop - for(int i = 0; i < renderPushConstants.sampleCount; ++i) - { - float32_t3 uvw = pathtracer.randGen(0u, i); - ray_type ray = rayGen.generate(uvw); - ray.initPayload(); - pathtracer.sampleMeasure(ray, i, renderPushConstants.depth, accumulator); - } - -#ifdef RWMC_ENABLED - for (uint32_t i = 0; i < CascadeCount; ++i) - cascade[uint3(coords.x, coords.y, i)] = float32_t4(accumulator.accumulation.data[i], 1.0f); -#else - outImage[uint3(coords.x, coords.y, 0)] = float32_t4(accumulator.accumulation, 1.0); -#endif - -#ifdef PERSISTENT_WORKGROUPS - } -#endif -} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index f69496c48..2b4f69f05 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -11,6 +11,7 @@ struct RenderPushConstants float32_t3x4 generalPurposeLightMatrix; int sampleCount; int depth; + uint32_t polygonMethod; uint64_t pSampleSequence; }; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl index 540aadf76..7f1794b15 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -1,6 +1,7 @@ #ifndef _PATHTRACER_EXAMPLE_RENDER_RWMC_COMMON_INCLUDED_ #define _PATHTRACER_EXAMPLE_RENDER_RWMC_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "rwmc_common.hlsl" #include "nbl/builtin/hlsl/rwmc/SplattingParameters.hlsl" #include "render_common.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl index c0982e9f2..346ff7322 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -47,7 +47,8 @@ int32_t2 getImageExtents() } [numthreads(ResolveWorkgroupSizeX, ResolveWorkgroupSizeY, 1)] -void main(uint32_t3 threadID : SV_DispatchThreadID) +[shader("compute")] +void resolve(uint32_t3 threadID : SV_DispatchThreadID) { const int32_t2 coords = int32_t2(threadID.x, threadID.y); const int32_t2 imageExtents = getImageExtents(); diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl index 66fb20acb..ec13c0080 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -1,6 +1,7 @@ #ifndef _PATHTRACER_EXAMPLE_RESOLVE_COMMON_INCLUDED_ #define _PATHTRACER_EXAMPLE_RESOLVE_COMMON_INCLUDED_ #include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "rwmc_common.hlsl" #include "nbl/builtin/hlsl/rwmc/ResolveParameters.hlsl" struct ResolvePushConstants @@ -10,6 +11,4 @@ struct ResolvePushConstants NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeX = 32u; NBL_CONSTEXPR uint32_t ResolveWorkgroupSizeY = 16u; -NBL_CONSTEXPR uint32_t CascadeCount = 6u; - #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_common.hlsl new file mode 100644 index 000000000..77020ce17 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_common.hlsl @@ -0,0 +1,8 @@ +#ifndef _PATHTRACER_EXAMPLE_RWMC_COMMON_INCLUDED_ +#define _PATHTRACER_EXAMPLE_RWMC_COMMON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t CascadeCount = 6u; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl index 070a7c164..59a9f3c57 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/scene_base.hlsl @@ -1,7 +1,13 @@ -#ifndef _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ -#define _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ +#if !defined(PATHTRACER_SCENE_BASE_MULTI_INCLUDE) + #ifndef _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ + #define _PATHTRACER_EXAMPLE_SCENE_BASE_INCLUDED_ + #define PATHTRACER_SCENE_BASE_EMIT_BODY 1 + #endif +#else + #define PATHTRACER_SCENE_BASE_EMIT_BODY 1 +#endif -#include "example_common.hlsl" +#if PATHTRACER_SCENE_BASE_EMIT_BODY using namespace nbl; using namespace hlsl; @@ -73,4 +79,5 @@ static const bxdfnode_type bxdfs[SceneBase::SCENE_BXDF_COUNT] = { bxdfnode_type::create(MaterialType::EMISSIVE, LightEminence) }; +#undef PATHTRACER_SCENE_BASE_EMIT_BODY #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.linear.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.persistent.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.rectangle.rwmc.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.sphere.rwmc.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.linear.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.methods.shared.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.methods.shared.hlsl new file mode 100644 index 000000000..53f6cdd19 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.methods.shared.hlsl @@ -0,0 +1,48 @@ +#pragma once + +#if !defined(PT_VARIANT_USE_RWMC) || !defined(PT_VARIANT_ENTRYPOINT_KIND) +#error Missing triangle method compile options +#endif + +#define PT_VARIANT_ENTRYPOINT_LINEAR 1 +#define PT_VARIANT_ENTRYPOINT_PERSISTENT 2 +#define PATH_TRACER_USE_RWMC PT_VARIANT_USE_RWMC +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENABLE_LINEAR 1 +#define PATH_TRACER_ENABLE_PERSISTENT 0 +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_PERSISTENT +#define PATH_TRACER_ENABLE_LINEAR 0 +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#else +#error Unsupported PT_VARIANT_ENTRYPOINT_KIND +#endif + +#include "compute.render.common.hlsl" +#include "scene_triangle_light.hlsl" +#include "compute_render_scene_impl.hlsl" + +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENTRYPOINT_NAME main +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#include "compute.render.linear.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainArea +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_AREA +#include "compute.render.linear.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainSolidAngle +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_SOLID_ANGLE +#include "compute.render.linear.entrypoints.hlsl" +#else +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistent +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_APPROX_PROJECTED_SOLID_ANGLE +#include "compute.render.persistent.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistentArea +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_AREA +#include "compute.render.persistent.entrypoints.hlsl" + +#define PATH_TRACER_ENTRYPOINT_NAME mainPersistentSolidAngle +#define PATH_TRACER_ENTRYPOINT_POLYGON_METHOD PPM_SOLID_ANGLE +#include "compute.render.persistent.entrypoints.hlsl" +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.persistent.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.linear.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl new file mode 100644 index 000000000..5035e960d --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.persistent.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.triangle.methods.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.proxy.hlsl new file mode 100644 index 000000000..8e04bcfc1 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.triangle.rwmc.proxy.hlsl @@ -0,0 +1 @@ +#include "pt.compute.variant.shared.hlsl" diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl new file mode 100644 index 000000000..db5e335b4 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.compute.variant.shared.hlsl @@ -0,0 +1,51 @@ +#pragma once + +#if !defined(PT_VARIANT_USE_RWMC) || !defined(PT_VARIANT_SCENE_KIND) || !defined(PT_VARIANT_ENTRYPOINT_KIND) || !defined(PT_VARIANT_RUNTIME_POLYGON_METHOD) +#error Missing path tracer variant compile options +#endif + +#define PT_VARIANT_SCENE_SPHERE 0 +#define PT_VARIANT_SCENE_TRIANGLE 1 +#define PT_VARIANT_SCENE_RECTANGLE 2 +#define PT_VARIANT_ENTRYPOINT_RUNTIME_UNIFORM 0 +#define PT_VARIANT_ENTRYPOINT_LINEAR 1 +#define PT_VARIANT_ENTRYPOINT_PERSISTENT 2 + +#define PATH_TRACER_USE_RWMC PT_VARIANT_USE_RWMC +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_RUNTIME_UNIFORM +#define PATH_TRACER_ENABLE_LINEAR 1 +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#define PATH_TRACER_ENABLE_LINEAR 1 +#define PATH_TRACER_ENABLE_PERSISTENT 0 +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_PERSISTENT +#define PATH_TRACER_ENABLE_LINEAR 0 +#define PATH_TRACER_ENABLE_PERSISTENT 1 +#else +#error Unsupported PT_VARIANT_ENTRYPOINT_KIND +#endif + +#include "compute.render.common.hlsl" +#if PT_VARIANT_SCENE_KIND == PT_VARIANT_SCENE_SPHERE +#include "scene_sphere_light.hlsl" +#elif PT_VARIANT_SCENE_KIND == PT_VARIANT_SCENE_TRIANGLE +#include "scene_triangle_light.hlsl" +#elif PT_VARIANT_SCENE_KIND == PT_VARIANT_SCENE_RECTANGLE +#include "scene_rectangle_light.hlsl" +#else +#error Unsupported PT_VARIANT_SCENE_KIND +#endif +#include "compute_render_scene_impl.hlsl" + +#if PT_VARIANT_RUNTIME_POLYGON_METHOD +#define PATH_TRACER_RUNTIME_POLYGON_METHOD ((NEEPolygonMethod)(pathtracer_render_variant::getRenderPushConstants().polygonMethod)) +#endif + +#if PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_RUNTIME_UNIFORM +#include "compute.render.linear.entrypoints.hlsl" +#include "compute.render.persistent.entrypoints.hlsl" +#elif PT_VARIANT_ENTRYPOINT_KIND == PT_VARIANT_ENTRYPOINT_LINEAR +#include "compute.render.linear.entrypoints.hlsl" +#else +#include "compute.render.persistent.entrypoints.hlsl" +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.misc.proxy.hlsl b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.misc.proxy.hlsl new file mode 100644 index 000000000..8a2882574 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/spirv/pt.misc.proxy.hlsl @@ -0,0 +1,5 @@ +#include "present.frag.hlsl" + +#define pc ex31_imgui_pc +#include "imgui.unified.hlsl" +#undef pc diff --git a/31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl b/31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl new file mode 100644 index 000000000..bdcd5ae31 --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/builtin/hlsl/bxdf/ndf/microfacet_to_light_transform.hlsl @@ -0,0 +1,119 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_BXDF_NDF_MICROFACET_LIGHT_TRANSFORM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_BXDF_NDF_MICROFACET_LIGHT_TRANSFORM_INCLUDED_ + +#include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/bxdf/common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace bxdf +{ +namespace ndf +{ + +enum MicrofacetTransformTypes : uint16_t +{ + MTT_REFLECT = 0b01, + MTT_REFRACT = 0b10, + MTT_REFLECT_REFRACT = 0b11 +}; + +namespace microfacet_transform_concepts +{ +#define NBL_CONCEPT_NAME QuantQuery +#define NBL_CONCEPT_TPLT_PRM_KINDS (typename) +#define NBL_CONCEPT_TPLT_PRM_NAMES (T) +#define NBL_CONCEPT_PARAM_0 (query, T) +NBL_CONCEPT_BEGIN(1) +#define query NBL_CONCEPT_PARAM_T NBL_CONCEPT_PARAM_0 +NBL_CONCEPT_END( + ((NBL_CONCEPT_REQ_TYPE)(T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((query.getVdotHLdotH()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) + ((NBL_CONCEPT_REQ_EXPR_RET_TYPE)((query.getNeg_rcp2_VdotH_etaLdotH()), ::nbl::hlsl::is_same_v, typename T::scalar_type)) +); +#undef query +#include +} + +template +struct DualMeasureQuantQuery +{ + using scalar_type = T; + + template + static DualMeasureQuantQuery create(NBL_CONST_REF_ARG(Interaction) interaction, NBL_CONST_REF_ARG(MicrofacetCache) cache, scalar_type orientedEta) + { + DualMeasureQuantQuery retval; + retval.VdotHLdotH = cache.getVdotHLdotH(); + const scalar_type VdotH = cache.getVdotH(); + const scalar_type VdotH_etaLdotH = hlsl::mix(VdotH + orientedEta * cache.getLdotH(), + VdotH / orientedEta + cache.getLdotH(), + interaction.getPathOrigin() == PathOrigin::PO_SENSOR); + retval.neg_rcp2_refractionDenom = scalar_type(-1.0) / (VdotH_etaLdotH * VdotH_etaLdotH); + return retval; + } + + scalar_type getVdotHLdotH() NBL_CONST_MEMBER_FUNC { return VdotHLdotH; } + scalar_type getNeg_rcp2_refractionDenom() NBL_CONST_MEMBER_FUNC { return neg_rcp2_refractionDenom ; } + + scalar_type VdotHLdotH; + scalar_type neg_rcp2_refractionDenom; +}; + + +template +struct SDualMeasureQuant +{ + using value_type = T; + + T microfacetMeasure; + T projectedLightMeasure; +}; + +namespace impl +{ +template +struct createDualMeasureQuantity_helper +{ + using scalar_type = typename vector_traits::scalar_type; + + static SDualMeasureQuant __call(const T microfacetMeasure, scalar_type clampedNdotV, scalar_type clampedNdotL, scalar_type VdotHLdotH, scalar_type neg_rcp2_refractionDenom) + { + assert(clampedNdotV >= scalar_type(0.0) && clampedNdotL >= scalar_type(0.0)); + SDualMeasureQuant retval; + retval.microfacetMeasure = microfacetMeasure; + const bool transmitted = reflect_refract==MTT_REFRACT || (reflect_refract!=MTT_REFLECT && VdotHLdotH < scalar_type(0.0)); + retval.projectedLightMeasure = microfacetMeasure * hlsl::mix(scalar_type(0.25),VdotHLdotH*neg_rcp2_refractionDenom,transmitted)/clampedNdotV; + return retval; + } +}; +} + +template +SDualMeasureQuant createDualMeasureQuantity(const T specialMeasure, typename vector_traits::scalar_type clampedNdotV, typename vector_traits::scalar_type clampedNdotL) +{ + typename vector_traits::scalar_type dummy; + return impl::createDualMeasureQuantity_helper::__call(specialMeasure,clampedNdotV,clampedNdotL,dummy,dummy); +} +template +SDualMeasureQuant createDualMeasureQuantity(const T specialMeasure, typename vector_traits::scalar_type clampedNdotV, typename vector_traits::scalar_type clampedNdotL, typename vector_traits::scalar_type VdotHLdotH, typename vector_traits::scalar_type neg_rcp2_refractionDenom) +{ + return impl::createDualMeasureQuantity_helper::__call(specialMeasure,clampedNdotV,clampedNdotL,VdotHLdotH,neg_rcp2_refractionDenom); +} +template +SDualMeasureQuant createDualMeasureQuantity(const T specialMeasure, typename vector_traits::scalar_type clampedNdotV, typename vector_traits::scalar_type clampedNdotL, NBL_CONST_REF_ARG(Query) query) +{ + return impl::createDualMeasureQuantity_helper::__call(specialMeasure,clampedNdotV,clampedNdotL,query.getVdotHLdotH(),query.getNeg_rcp2_refractionDenom()); +} + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index f6f180a2c..f797d382f 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -7,12 +7,27 @@ #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/math/thin_lens_projection.hlsl" #include "nbl/this_example/common.hpp" +#include "nbl/this_example/builtin/build/spirv/generated/PathTracerKeys.hpp" #include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" #include "nbl/builtin/hlsl/sampling/quantized_sequence.hlsl" +#include "nbl/asset/utils/ISPIRVEntryPointTrimmer.h" +#include "nbl/system/ModuleLookupUtils.h" #include "app_resources/hlsl/render_common.hlsl" #include "app_resources/hlsl/render_rwmc_common.hlsl" #include "app_resources/hlsl/resolve_common.hlsl" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nlohmann/json.hpp" + using namespace nbl; using namespace core; using namespace hlsl; @@ -38,24 +53,32 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui ELG_COUNT }; + enum E_POLYGON_METHOD : uint8_t + { + EPM_AREA, + EPM_SOLID_ANGLE, + EPM_PROJECTED_SOLID_ANGLE, + EPM_COUNT + }; + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; constexpr static inline uint32_t MaxFramesInFlight = 5; + static constexpr size_t BinaryToggleCount = 2ull; + static constexpr std::string_view BuildConfigName = PATH_TRACER_BUILD_CONFIG_NAME; + static constexpr std::string_view RuntimeConfigFilename = "path_tracer.runtime.json"; static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; - static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; - static inline std::array PTHLSLShaderVariants = { - "SPHERE_LIGHT", - "TRIANGLE_LIGHT", - "RECTANGLE_LIGHT" - }; - static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; - static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { "ELG_SPHERE", "ELG_TRIANGLE", "ELG_RECTANGLE" }; + const char* polygonMethodNames[EPM_COUNT] = { + "Area", + "Solid Angle", + "Projected Solid Angle" + }; public: inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) @@ -63,17 +86,15 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline bool isComputeOnly() const override { return false; } - inline video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + inline core::bitflag getLogLevelMask() override { - video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); - retval.storagePushConstant16 = true; - return retval; + return core::bitflag(system::ILogger::ELL_INFO) | system::ILogger::ELL_WARNING | system::ILogger::ELL_PERFORMANCE | system::ILogger::ELL_ERROR; } - virtual SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + inline video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override { - auto retval = device_base_t::getPreferredDeviceFeatures(); - retval.pipelineExecutableInfo = true; + video::SPhysicalDeviceLimits retval = device_base_t::getRequiredDeviceLimits(); + retval.storagePushConstant16 = true; return retval; } @@ -107,6 +128,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline bool onAppInitialized(smart_refctd_ptr&& system) override { + m_startupBeganAt = clock_t::now(); + // Init systems { m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); @@ -122,7 +145,6 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_semaphore) return logFail("Failed to create semaphore!"); } - // Create renderpass and init surface nbl::video::IGPURenderpass* renderpass; { @@ -165,7 +187,6 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) return logFail("Could not create Window & Surface or initialize the Surface!"); } - // Create command pool and buffers { auto gQueue = getGraphicsQueue(); @@ -176,7 +197,21 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) return logFail("Couldn't create Command Buffer!"); } - + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + m_intendedSubmit.queue = getGraphicsQueue(); + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + } + initializePipelineCache(); ISampler::SParams samplerParams = { .AnisotropicFilter = 0 }; @@ -289,202 +324,101 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); - // Create Shaders - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr - { - IAssetLoader::SAssetLoadParams lp = {}; - lp.workingDirectory = localInputCWD; - auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - { - m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - auto source = smart_refctd_ptr_static_cast(assets[0]); - // The down-cast should not fail! - assert(source); - - auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - CHLSLCompiler::SOptions options = {}; - options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; - options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; - options.spirvOptimizer = nullptr; -#ifndef _NBL_DEBUG - ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; - auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - options.spirvOptimizer = opt.get(); -#endif - options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; - options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); - options.preprocessorOptions.logger = m_logger.get(); - options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - - core::vector defines; - defines.reserve(3); - if (!defineMacro.empty()) - defines.push_back({ defineMacro, "" }); - if(persistentWorkGroups) - defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); - if(rwmc) - defines.push_back({ "RWMC_ENABLED", "" }); - - options.preprocessorOptions.extraDefines = defines; - - source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); - - auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); - if (!shader) - { - m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); - std::exit(-1); - } - - return shader; - }; - const uint32_t deviceMinSubgroupSize = m_device->getPhysicalDevice()->getLimits().minSubgroupSize; - const bool pipelineExecutableInfo = m_device->getEnabledFeatures().pipelineExecutableInfo; - auto getComputePipelineCreationParams = [deviceMinSubgroupSize, pipelineExecutableInfo](IShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams - { - IGPUComputePipeline::SCreationParams params = {}; - params.layout = pipelineLayout; - params.shader.shader = shader; - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.cached.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(hlsl::log2(float(deviceMinSubgroupSize))); - if (pipelineExecutableInfo) - { - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_STATISTICS; - params.flags |= IGPUComputePipeline::SCreationParams::FLAGS::CAPTURE_INTERNAL_REPRESENTATIONS; - } - return params; - }; + m_requiredSubgroupSize = static_cast(hlsl::log2(float(deviceMinSubgroupSize))); - // Create compute pipelines { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) - { - const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(RenderPushConstants) - }; - auto ptPipelineLayout = m_device->createPipelineLayout( - { &pcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - if (!ptPipelineLayout) - return logFail("Failed to create Pathtracing pipeline layout"); - - const nbl::asset::SPushConstantRange rwmcPcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(RenderRWMCPushConstants) - }; - auto rwmcPtPipelineLayout = m_device->createPipelineLayout( - { &rwmcPcRange, 1 }, - core::smart_refctd_ptr(gpuDescriptorSetLayout), - nullptr, - nullptr, - nullptr - ); - if (!rwmcPtPipelineLayout) - return logFail("Failed to create RWMC Pathtracing pipeline layout"); - - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); - auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) - return logFail("Failed to create HLSL compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPipelines[index]->getExecutableInfo()); - m_logger->log("%s Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); - auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) - return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPersistentWGPipelines[index]->getExecutableInfo()); - m_logger->log("%s PersistentWG Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - - // rwmc pipelines - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); - auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPipelinesRWMC[index]->getExecutableInfo()); - m_logger->log("%s RWMC Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - { - auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); - auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); - - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) - return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); - - if (m_device->getEnabledFeatures().pipelineExecutableInfo) - { - auto report = system::to_string(m_PTHLSLPersistentWGPipelinesRWMC[index]->getExecutableInfo()); - m_logger->log("%s RWMC PersistentWG Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, shaderNames[index], report.c_str()); - } - } - } + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderPushConstants) + }; + m_renderPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + if (!m_renderPipelineLayout) + return logFail("Failed to create Pathtracing pipeline layout"); } - // Create resolve pipelines { const nbl::asset::SPushConstantRange pcRange = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, - .offset = 0, - .size = sizeof(ResolvePushConstants) + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) }; + m_rwmcRenderPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + if (!m_rwmcRenderPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); + } - auto pipelineLayout = m_device->createPipelineLayout( + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0u, + .size = sizeof(ResolvePushConstants) + }; + m_resolvePipelineState.layout = m_device->createPipelineLayout( { &pcRange, 1 }, core::smart_refctd_ptr(gpuDescriptorSetLayout) ); - - if (!pipelineLayout) { + if (!m_resolvePipelineState.layout) return logFail("Failed to create resolve pipeline layout"); - } + } - { - auto shader = loadAndCompileHLSLShader(ResolveShaderPath); - auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + const auto ensureRenderShaderLoaded = [this](const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc) -> bool + { + auto& shaderSlot = m_renderPipelines.getShaders(persistentWorkGroups, rwmc)[geometry]; + if (shaderSlot) + return true; + shaderSlot = loadRenderShader(geometry, persistentWorkGroups, rwmc); + return static_cast(shaderSlot); + }; + const auto ensureResolveShaderLoaded = [this]() -> bool + { + if (m_resolvePipelineState.shader) + return true; + m_resolvePipelineState.shader = loadPrecompiledShader(); + return static_cast(m_resolvePipelineState.shader); + }; - if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) - return logFail("Failed to create HLSL resolve compute pipeline!\n"); + const auto startupGeometry = static_cast(guiControlled.PTPipeline); + if (!ensureRenderShaderLoaded(startupGeometry, guiControlled.usePersistentWorkGroups, guiControlled.useRWMC)) + return logFail("Failed to load current precompiled compute shader variant"); + if (guiControlled.useRWMC && !ensureResolveShaderLoaded()) + return logFail("Failed to load precompiled resolve compute shader"); + + ensureRenderPipeline( + startupGeometry, + guiControlled.usePersistentWorkGroups, + guiControlled.useRWMC, + static_cast(guiControlled.polygonMethod) + ); + if (guiControlled.useRWMC) + ensureResolvePipeline(); - if (m_device->getEnabledFeatures().pipelineExecutableInfo) + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (const auto persistentWorkGroups : { false, true }) + { + for (const auto rwmc : { false, true }) { - auto report = system::to_string(m_resolvePipeline->getExecutableInfo()); - m_logger->log("Resolve Pipeline Executable Report:\n%s", ILogger::ELL_PERFORMANCE, report.c_str()); + if (!ensureRenderShaderLoaded(static_cast(geometry), persistentWorkGroups, rwmc)) + return logFail("Failed to load precompiled compute shader variant"); } } } + if (!ensureResolveShaderLoaded()) + return logFail("Failed to load precompiled resolve compute shader"); // Create graphics pipeline { @@ -493,8 +427,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui if (!fsTriProtoPPln) return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); - // Load Fragment Shader - auto fragmentShader = loadAndCompileHLSLShader(PresentShaderPath); + auto fragmentShader = loadPrecompiledShader(); if (!fragmentShader) return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); @@ -510,9 +443,10 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui nullptr, nullptr ); - m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass(), 0u, {}, hlsl::SurfaceTransform::FLAG_BITS::IDENTITY_BIT, m_pipelineCache.object.get()); if (!m_presentPipeline) return logFail("Could not create Graphics Pipeline!"); + m_pipelineCache.dirty = true; } } @@ -742,65 +676,80 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui { // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` auto createBufferFromCacheFile = [this]( - system::path filename, - size_t bufferSize, - void *data, + const system::path& filePath, + size_t byteSize, + void* data, smart_refctd_ptr& buffer - ) -> std::pair, bool> + ) -> bool { ISystem::future_t> owenSamplerFileFuture; ISystem::future_t owenSamplerFileReadFuture; - size_t owenSamplerFileBytesRead; + size_t owenSamplerFileBytesRead = 0ull; - m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); + m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_READ); smart_refctd_ptr owenSamplerFile; if (owenSamplerFileFuture.wait()) { owenSamplerFileFuture.acquire().move_into(owenSamplerFile); if (!owenSamplerFile) - return { nullptr, false }; + return false; - owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); + owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, byteSize); if (owenSamplerFileReadFuture.wait()) { owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); - if (owenSamplerFileBytesRead < bufferSize) - { - buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); - return { owenSamplerFile, false }; - } + if (owenSamplerFileBytesRead < byteSize) + return false; - buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); + buffer = asset::ICPUBuffer::create({ { byteSize }, data }); + return true; } } - return { owenSamplerFile, true }; + return false; }; - auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + auto writeBufferIntoCacheFile = [this](const system::path& filePath, size_t byteSize, const void* data) { + std::filesystem::create_directories(filePath.parent_path()); + + ISystem::future_t> owenSamplerFileFuture; ISystem::future_t owenSamplerFileWriteFuture; - size_t owenSamplerFileBytesWritten; + size_t owenSamplerFileBytesWritten = 0ull; - file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); + m_system->createFile(owenSamplerFileFuture, filePath, IFile::ECF_WRITE); + if (!owenSamplerFileFuture.wait()) + return; + + smart_refctd_ptr file; + owenSamplerFileFuture.acquire().move_into(file); + if (!file) + return; + + file->write(owenSamplerFileWriteFuture, const_cast(data), 0, byteSize); if (owenSamplerFileWriteFuture.wait()) owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); }; constexpr uint32_t quantizedDimensions = MaxBufferDimensions / 3u; - constexpr size_t bufferSize = quantizedDimensions * MaxSamplesBuffer; using sequence_type = sampling::QuantizedSequence; - std::array data = {}; + constexpr size_t sequenceCount = quantizedDimensions * MaxSamplesBuffer; + constexpr size_t sequenceByteSize = sequenceCount * sizeof(sequence_type); + std::array data = {}; smart_refctd_ptr sampleSeq; - auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); - if (!cacheBufferResult.second) + const auto packagedOwenSamplerPath = sharedInputCWD / OwenSamplerFilePath; + const auto generatedOwenSamplerPath = sharedOutputCWD / OwenSamplerFilePath; + const bool cacheLoaded = + createBufferFromCacheFile(packagedOwenSamplerPath, sequenceByteSize, data.data(), sampleSeq) || + createBufferFromCacheFile(generatedOwenSamplerPath, sequenceByteSize, data.data(), sampleSeq); + if (!cacheLoaded) { core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); ICPUBuffer::SCreationParams params = {}; - params.size = quantizedDimensions * MaxSamplesBuffer * sizeof(sequence_type); + params.size = sequenceByteSize; sampleSeq = ICPUBuffer::create(std::move(params)); auto out = reinterpret_cast(sampleSeq->getPointer()); @@ -813,20 +762,31 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui const uint32_t sample = sampler.sample(dim, i); seq.set(offset, sample); } - if (cacheBufferResult.first) - writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + writeBufferIntoCacheFile(generatedOwenSamplerPath, sequenceByteSize, out); } IGPUBuffer::SCreationParams params = {}; params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = bufferSize; - - // we don't want to overcomplicate the example with multi-queue - m_utils->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, + params.size = sampleSeq->getSize(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.queue = queue; + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( + m_intendedSubmit, std::move(params), sampleSeq->getPointer() - ).move_into(m_sequenceBuffer); + ); + bufferFuture.wait(); + const auto uploadedBuffer = bufferFuture.get(); + if (!uploadedBuffer || !uploadedBuffer->get()) + return logFail("Failed to upload sequence buffer"); + m_sequenceBuffer = smart_refctd_ptr(*uploadedBuffer); m_sequenceBuffer->setObjectDebugName("Sequence buffer"); } @@ -941,15 +901,23 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; params.assetManager = m_assetMgr; - params.pipelineCache = nullptr; + params.pipelineCache = m_pipelineCache.object; params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); params.renderpass = smart_refctd_ptr(renderpass); params.streamingBuffer = nullptr; params.subpassIx = 0u; params.transfer = getTransferUpQueue(); params.utilities = m_utils; + params.spirv = nbl::ext::imgui::UI::SCreationParameters::PrecompiledShaders{ + .vertex = loadPrecompiledShader(), + .fragment = loadPrecompiledShader() + }; + if (!params.spirv->vertex || !params.spirv->fragment) + return logFail("Failed to load precompiled ImGui shaders"); { m_ui.manager = ext::imgui::UI::create(std::move(params)); + if (m_ui.manager) + m_pipelineCache.dirty = true; // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); @@ -978,41 +946,342 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui const auto aspectRatio = io.DisplaySize.x / io.DisplaySize.y; m_camera.setProjectionMatrix(hlsl::math::thin_lens::rhPerspectiveFovMatrix(hlsl::radians(guiControlled.fov), aspectRatio, guiControlled.zNear, guiControlled.zFar)); - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Controls"); - - ImGui::SameLine(); - - ImGui::Text("Camera"); + const ImGuiViewport* viewport = ImGui::GetMainViewport(); + const ImVec2 viewportPos = viewport->Pos; + const ImVec2 viewportSize = viewport->Size; + const ImGuiStyle& style = ImGui::GetStyle(); + const float panelMargin = 10.f; + const auto currentGeometry = static_cast(guiControlled.PTPipeline); + const auto requestedMethod = static_cast(guiControlled.polygonMethod); + const auto currentVariant = getRenderVariantInfo(currentGeometry, guiControlled.usePersistentWorkGroups, requestedMethod); + const size_t readyRenderPipelines = getReadyRenderPipelineCount(); + const size_t totalRenderPipelines = getKnownRenderPipelineCount(); + const size_t readyTotalPipelines = readyRenderPipelines + (m_resolvePipelineState.pipeline ? 1ull : 0ull); + const size_t totalKnownPipelines = totalRenderPipelines + 1ull; + const size_t runningPipelineBuilds = getRunningPipelineBuildCount(); + const size_t queuedPipelineBuilds = m_pipelineCache.warmup.queue.size(); + const bool warmupInProgress = m_hasPathtraceOutput && !m_pipelineCache.warmup.loggedComplete; + const char* const effectiveEntryPoint = currentVariant.entryPoint; + struct SFloatSliderRow + { + const char* label; + float* value; + float min; + float max; + const char* format; + }; + struct SIntSliderRow + { + const char* label; + int* value; + int min; + int max; + }; + struct SCheckboxRow + { + const char* label; + bool* value; + }; + struct SComboRow + { + const char* label; + int* value; + const char* const* items; + int count; + }; + struct STextRow + { + const char* label; + std::string value; + }; + const auto calcMaxTextWidth = [](const auto& items, auto&& toText) -> float + { + float width = 0.f; + for (const auto& item : items) + width = std::max(width, ImGui::CalcTextSize(toText(item)).x); + return width; + }; + const auto makeReadyText = [](const size_t ready, const size_t total) -> std::string + { + return std::to_string(ready) + "/" + std::to_string(total); + }; + const auto makeRunQueueText = [](const size_t running, const size_t queued) -> std::string + { + return std::to_string(running) + " / " + std::to_string(queued); + }; + const std::string pipelineStatusText = !m_hasPathtraceOutput ? + "Building pipeline..." : + (warmupInProgress ? + ("Warmup " + std::to_string(readyTotalPipelines) + "/" + std::to_string(totalKnownPipelines)) : + "All pipelines ready"); + const std::string cacheStateText = m_pipelineCache.loadedFromDisk ? "loaded from disk" : "cold start"; + const std::string trimCacheText = std::to_string(m_pipelineCache.trimmedShaders.loadedFromDiskCount + m_pipelineCache.trimmedShaders.generatedCount) + " ready"; + const std::string parallelismText = std::to_string(m_pipelineCache.warmup.budget); + const std::string renderStateText = makeReadyText(readyTotalPipelines, totalKnownPipelines); + const std::string warmupStateText = makeRunQueueText(runningPipelineBuilds, queuedPipelineBuilds); + const std::string cursorText = "cursor " + std::to_string(static_cast(io.MousePos.x)) + " " + std::to_string(static_cast(io.MousePos.y)); + const SFloatSliderRow cameraFloatRows[] = { + { "move", &guiControlled.moveSpeed, 0.1f, 10.f, "%.2f" }, + { "rotate", &guiControlled.rotateSpeed, 0.1f, 10.f, "%.2f" }, + { "fov", &guiControlled.fov, 20.f, 150.f, "%.0f" }, + { "zNear", &guiControlled.zNear, 0.1f, 100.f, "%.2f" }, + { "zFar", &guiControlled.zFar, 110.f, 10000.f, "%.0f" }, + }; + const SComboRow renderComboRows[] = { + { "shader", &guiControlled.PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT }, + { "method", &guiControlled.polygonMethod, polygonMethodNames, EPM_COUNT }, + }; + const SIntSliderRow renderIntRows[] = { + { "spp", &guiControlled.spp, 1, MaxSamplesBuffer }, + { "depth", &guiControlled.depth, 1, MaxBufferDimensions / 4 }, + }; + const SCheckboxRow renderCheckboxRows[] = { + { "persistent WG", &guiControlled.usePersistentWorkGroups }, + }; + const SCheckboxRow rwmcCheckboxRows[] = { + { "enable", &guiControlled.useRWMC }, + }; + const SFloatSliderRow rwmcFloatRows[] = { + { "start", &guiControlled.rwmcParams.start, 1.0f, 32.0f, "%.3f" }, + { "base", &guiControlled.rwmcParams.base, 1.0f, 32.0f, "%.3f" }, + { "min rel.", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f, "%.3f" }, + { "kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f, "%.3f" }, + }; + const STextRow diagnosticsRows[] = { + { "geometry", shaderNames[currentGeometry] }, + { "req. method", polygonMethodNames[requestedMethod] }, + { "eff. method", polygonMethodNames[currentVariant.effectiveMethod] }, + { "entrypoint", effectiveEntryPoint }, + { "mode", PathTracerBuildModeName }, + { "config", std::string(BuildConfigName) }, + { "cache", cacheStateText }, + { "trim cache", trimCacheText }, + { "parallel", parallelismText }, + { "render", renderStateText }, + { "run/queue", warmupStateText }, + }; + const char* const standaloneTexts[] = { + "PATH_TRACER", + "Home camera End light", + pipelineStatusText.c_str(), + cursorText.c_str(), + }; + const char* const sliderPreviewTexts[] = { + "10000.000", + "1024.000", + effectiveEntryPoint, + PathTracerBuildModeName, + BuildConfigName.data(), + cacheStateText.c_str(), + renderStateText.c_str(), + warmupStateText.c_str(), + }; + const float maxStandaloneTextWidth = calcMaxTextWidth(standaloneTexts, [](const char* text) { return text; }); + const float maxLabelTextWidth = std::max({ + calcMaxTextWidth(cameraFloatRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(renderComboRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(renderIntRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(renderCheckboxRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(rwmcCheckboxRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(rwmcFloatRows, [](const auto& row) { return row.label; }), + calcMaxTextWidth(diagnosticsRows, [](const auto& row) { return row.label; }) + }); + const float comboPreviewWidth = std::max( + calcMaxTextWidth(shaderNames, [](const char* text) { return text; }), + calcMaxTextWidth(polygonMethodNames, [](const char* text) { return text; }) + ); + const float sliderPreviewWidth = calcMaxTextWidth(sliderPreviewTexts, [](const char* text) { return text; }); + const float tableLabelColumnWidth = std::ceil(maxLabelTextWidth + style.FramePadding.x * 2.f + style.CellPadding.x * 2.f); + const float tableValueColumnMinWidth = + std::ceil(std::max(comboPreviewWidth, sliderPreviewWidth) + style.FramePadding.x * 2.f + style.ItemInnerSpacing.x + ImGui::GetFrameHeight() + 18.f); + const float sectionTableWidth = tableLabelColumnWidth + tableValueColumnMinWidth + style.CellPadding.x * 4.f + style.ItemSpacing.x; + const float contentWidth = std::max(maxStandaloneTextWidth, sectionTableWidth); + const float panelWidth = std::min( + std::ceil(contentWidth + style.WindowPadding.x * 2.f), + std::max(0.f, viewportSize.x - panelMargin * 2.f) + ); + const float panelMaxHeight = ImMax(300.0f, viewportSize.y * 0.84f); + ImGui::SetNextWindowPos(ImVec2(viewportPos.x + panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always); + ImGui::SetNextWindowSizeConstraints(ImVec2(panelWidth, 0.0f), ImVec2(panelWidth, panelMaxHeight)); + ImGui::SetNextWindowBgAlpha(0.72f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(5.f, 5.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 10.f); + ImGui::PushStyleVar(ImGuiStyleVar_FrameRounding, 4.f); + ImGui::PushStyleVar(ImGuiStyleVar_GrabRounding, 4.f); + ImGui::PushStyleVar(ImGuiStyleVar_ItemSpacing, ImVec2(5.f, 2.f)); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.08f, 0.10f, 0.13f, 0.88f)); + ImGui::PushStyleColor(ImGuiCol_Border, ImVec4(0.32f, 0.39f, 0.47f, 0.65f)); + ImGui::PushStyleColor(ImGuiCol_Header, ImVec4(0.18f, 0.28f, 0.36f, 0.92f)); + ImGui::PushStyleColor(ImGuiCol_HeaderHovered, ImVec4(0.24f, 0.36f, 0.46f, 0.96f)); + ImGui::PushStyleColor(ImGuiCol_HeaderActive, ImVec4(0.28f, 0.42f, 0.54f, 1.0f)); + + const ImGuiWindowFlags panelFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoResize; + + const auto beginSectionTable = [](const char* id) -> bool + { + return ImGui::BeginTable(id, 2, ImGuiTableFlags_SizingFixedFit); + }; + const auto setupSectionTable = [tableLabelColumnWidth]() -> void + { + ImGui::TableSetupColumn("label", ImGuiTableColumnFlags_WidthFixed, tableLabelColumnWidth); + ImGui::TableSetupColumn("value", ImGuiTableColumnFlags_WidthStretch); + }; + const auto sliderFloatRow = [](const SFloatSliderRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(row.label); + ImGui::SliderFloat("##value", row.value, row.min, row.max, row.format, ImGuiSliderFlags_AlwaysClamp); + ImGui::PopID(); + }; + const auto sliderIntRow = [](const SIntSliderRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(row.label); + ImGui::SliderInt("##value", row.value, row.min, row.max); + ImGui::PopID(); + }; + const auto comboRow = [](const SComboRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::SetNextItemWidth(-FLT_MIN); + ImGui::PushID(row.label); + ImGui::Combo("##value", row.value, row.items, row.count); + ImGui::PopID(); + }; + const auto checkboxRow = [](const SCheckboxRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::PushID(row.label); + ImGui::Checkbox("##value", row.value); + ImGui::PopID(); + }; + const auto textRow = [](const STextRow& row) -> void + { + ImGui::TableNextRow(); + ImGui::TableSetColumnIndex(0); + ImGui::TextUnformatted(row.label); + ImGui::TableSetColumnIndex(1); + ImGui::TextUnformatted(row.value.c_str()); + }; - ImGui::Text("Press Home to reset camera."); - ImGui::Text("Press End to reset light."); + if (ImGui::Begin("Path Tracer Controls", nullptr, panelFlags)) + { + ImGui::TextUnformatted("PATH_TRACER"); + ImGui::Separator(); + ImGui::TextDisabled("Home camera End light"); + if (!m_hasPathtraceOutput) + ImGui::TextColored(ImVec4(0.83f, 0.86f, 0.90f, 1.0f), "Building pipeline..."); + else if (warmupInProgress) + ImGui::TextColored(ImVec4(0.83f, 0.86f, 0.90f, 1.0f), "Warmup %zu/%zu", readyTotalPipelines, totalKnownPipelines); + else + ImGui::TextDisabled("All pipelines ready"); + ImGui::Dummy(ImVec2(0.f, 2.f)); + + if (ImGui::CollapsingHeader("Controls", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (ImGui::CollapsingHeader("Camera")) + { + if (beginSectionTable("##camera_controls_table")) + { + setupSectionTable(); + for (const auto& row : cameraFloatRows) + sliderFloatRow(row); + ImGui::EndTable(); + } + } - ImGui::SliderFloat("Move speed", &guiControlled.moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &guiControlled.rotateSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Fov", &guiControlled.fov, 20.f, 150.f); - ImGui::SliderFloat("zNear", &guiControlled.zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &guiControlled.zFar, 110.f, 10000.f); - ImGui::Combo("Shader", &guiControlled.PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); - ImGui::SliderInt("SPP", &guiControlled.spp, 1, MaxSamplesBuffer); - ImGui::SliderInt("Depth", &guiControlled.depth, 1, MaxBufferDimensions / 4); - ImGui::Checkbox("Persistent WorkGroups", &guiControlled.usePersistentWorkGroups); + if (ImGui::CollapsingHeader("Render", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (beginSectionTable("##render_controls_table")) + { + setupSectionTable(); + for (const auto& row : renderComboRows) + comboRow(row); + for (const auto& row : renderIntRows) + sliderIntRow(row); + for (const auto& row : renderCheckboxRows) + checkboxRow(row); + ImGui::EndTable(); + } + } - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGui::CollapsingHeader("RWMC", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (beginSectionTable("##rwmc_controls_table")) + { + setupSectionTable(); + for (const auto& row : rwmcCheckboxRows) + checkboxRow(row); + for (const auto& row : rwmcFloatRows) + sliderFloatRow(row); + ImGui::EndTable(); + } + } - ImGui::Text("\nRWMC settings:"); - ImGui::Checkbox("Enable RWMC", &guiControlled.useRWMC); - ImGui::SliderFloat("start", &guiControlled.rwmcParams.start, 1.0f, 32.0f); - ImGui::SliderFloat("base", &guiControlled.rwmcParams.base, 1.0f, 32.0f); - ImGui::SliderFloat("minReliableLuma", &guiControlled.rwmcParams.minReliableLuma, 0.1f, 1024.0f); - ImGui::SliderFloat("kappa", &guiControlled.rwmcParams.kappa, 0.1f, 1024.0f); + if (ImGui::CollapsingHeader("Diagnostics")) + { + if (beginSectionTable("##diagnostics_controls_table")) + { + setupSectionTable(); + for (const auto& row : diagnosticsRows) + textRow(row); + ImGui::EndTable(); + } + } + ImGui::Dummy(ImVec2(0.f, 2.f)); + ImGui::Separator(); + ImGui::TextDisabled("%s", cursorText.c_str()); + } + } ImGui::End(); + + if (!m_hasPathtraceOutput || warmupInProgress) + { + ImGui::SetNextWindowPos(ImVec2(viewportPos.x + viewportSize.x - panelMargin, viewportPos.y + panelMargin), ImGuiCond_Always, ImVec2(1.0f, 0.0f)); + ImGui::SetNextWindowBgAlpha(0.62f); + ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(12.f, 10.f)); + ImGui::PushStyleVar(ImGuiStyleVar_WindowRounding, 8.f); + ImGui::PushStyleColor(ImGuiCol_WindowBg, ImVec4(0.07f, 0.09f, 0.12f, 0.90f)); + const ImGuiWindowFlags overlayFlags = + ImGuiWindowFlags_NoDecoration | + ImGuiWindowFlags_NoSavedSettings | + ImGuiWindowFlags_NoMove | + ImGuiWindowFlags_NoNav | + ImGuiWindowFlags_AlwaysAutoResize | + ImGuiWindowFlags_NoInputs; + if (ImGui::Begin("##path_tracer_status_overlay", nullptr, overlayFlags)) + { + ImGui::TextUnformatted(pipelineStatusText.c_str()); + ImGui::Text("Run %zu Queue %zu", runningPipelineBuilds, queuedPipelineBuilds); + ImGui::Text("Cache: %s", m_pipelineCache.loadedFromDisk ? "disk" : "cold"); + } + ImGui::End(); + ImGui::PopStyleColor(1); + ImGui::PopStyleVar(2); + } + ImGui::PopStyleColor(5); + ImGui::PopStyleVar(5); } ); @@ -1112,6 +1381,14 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline void workLoopBody() override { + pollPendingPipelines(); + pumpPipelineWarmup(); + if (!m_loggedFirstFrameLoop) + { + logStartupEvent("first_frame_loop"); + m_loggedFirstFrameLoop = true; + } + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); // We block for semaphores for 2 reasons here: @@ -1164,6 +1441,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui rwmcPushConstants.renderPushConstants.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); rwmcPushConstants.renderPushConstants.depth = guiControlled.depth; rwmcPushConstants.renderPushConstants.sampleCount = guiControlled.rwmcParams.sampleCount = guiControlled.spp; + rwmcPushConstants.renderPushConstants.polygonMethod = guiControlled.polygonMethod; rwmcPushConstants.renderPushConstants.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); rwmcPushConstants.splattingParameters = rwmc::SPackedSplattingParameters::create(guiControlled.rwmcParams.base, guiControlled.rwmcParams.start, CascadeCount); } @@ -1173,10 +1451,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui pc.generalPurposeLightMatrix = hlsl::float32_t3x4(transpose(m_lightModelMatrix)); pc.sampleCount = guiControlled.spp; pc.depth = guiControlled.depth; + pc.polygonMethod = guiControlled.polygonMethod; pc.pSampleSequence = m_sequenceBuffer->getDeviceAddress(); } }; updatePathtracerPushConstants(); + bool producedRenderableOutput = false; // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) { @@ -1240,15 +1520,18 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui 1 + (WindowDimensions.x * WindowDimensions.y - 1) / RenderWorkgroupSize; IGPUComputePipeline* pipeline = pickPTPipeline(); + if (pipeline) + { + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); - - const uint32_t pushConstantsSize = guiControlled.useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); - const void* pushConstantsPtr = guiControlled.useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); + const uint32_t pushConstantsSize = guiControlled.useRWMC ? sizeof(RenderRWMCPushConstants) : sizeof(RenderPushConstants); + const void* pushConstantsPtr = guiControlled.useRWMC ? reinterpret_cast(&rwmcPushConstants) : reinterpret_cast(&pc); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, pushConstantsSize, pushConstantsPtr); - cmdbuf->dispatch(dispatchSize, 1u, 1u); + cmdbuf->dispatch(dispatchSize, 1u, 1u); + producedRenderableOutput = !guiControlled.useRWMC; + } } // m_cascadeView synchronization - wait for previous compute shader to write into the cascade @@ -1283,15 +1566,28 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui (m_window->getHeight() + ResolveWorkgroupSizeY - 1) / ResolveWorkgroupSizeY ); - IGPUComputePipeline* pipeline = m_resolvePipeline.get(); + IGPUComputePipeline* pipeline = ensureResolvePipeline(); + if (pipeline) + { + resolvePushConstants.resolveParameters = rwmc::SResolveParameters::create(guiControlled.rwmcParams); - resolvePushConstants.resolveParameters = rwmc::SResolveParameters::create(guiControlled.rwmcParams); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0u, sizeof(ResolvePushConstants), &resolvePushConstants); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + producedRenderableOutput = true; + } + } - cmdbuf->dispatch(dispatchSize.x, dispatchSize.y, 1u); + if (producedRenderableOutput) + { + m_hasPathtraceOutput = true; + if (!m_loggedFirstRenderDispatch) + { + logStartupEvent("first_render_dispatch"); + m_loggedFirstRenderDispatch = true; + } } // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) @@ -1358,9 +1654,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); - ext::FullScreenTriangle::recordDrawCall(cmdbuf); + if (m_hasPathtraceOutput) + { + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + } if (m_showUI) { @@ -1417,6 +1716,17 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } } + if (producedRenderableOutput && !m_loggedFirstRenderSubmit) + { + logStartupEvent("first_render_submit"); + m_loggedFirstRenderSubmit = true; + } + if (m_hasPathtraceOutput && !m_pipelineCache.warmup.started) + { + kickoffPipelineWarmup(); + } + maybeCheckpointPipelineCache(); + m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); m_surface->present(m_currentImageAcquire.imageIndex, rendered); } @@ -1432,6 +1742,8 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui inline bool onAppTerminated() override { + waitForPendingPipelines(); + savePipelineCache(); return device_base_t::onAppTerminated(); } @@ -1524,16 +1836,1182 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui } private: + template + smart_refctd_ptr loadPrecompiledShader() + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = "app_resources"; + + const auto key = nbl::this_example::builtin::build::get_spirv_key(m_device.get()); + auto assetBundle = m_assetMgr->getAsset(key, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load precompiled shader: %s", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } - IGPUComputePipeline* pickPTPipeline() + auto shader = IAsset::castDown(assets[0]); + if (!shader) + { + m_logger->log("Failed to cast %s asset to IShader!", ILogger::ELL_ERROR, key.c_str()); + return nullptr; + } + + shader->setFilePathHint(std::string(std::string_view(ShaderKey.value))); + return shader; + } + + void logStartupEvent(const char* const eventName) + { + const auto elapsedMs = std::chrono::duration_cast(clock_t::now() - m_startupBeganAt).count(); + m_logger->log("PATH_TRACER_STARTUP %s_ms=%lld", ILogger::ELL_INFO, eventName, static_cast(elapsedMs)); + } + + std::optional tryGetPipelineCacheDirOverride() const + { + constexpr std::string_view prefix = "--pipeline-cache-dir="; + for (size_t i = 1ull; i < argv.size(); ++i) + { + const std::string_view arg = argv[i]; + if (arg.rfind(prefix, 0ull) == 0ull) + { + const auto value = arg.substr(prefix.size()); + if (!value.empty()) + return path(std::string(value)); + return std::nullopt; + } + if (arg == "--pipeline-cache-dir") + { + if (i + 1ull < argv.size()) + return path(argv[i + 1ull]); + return std::nullopt; + } + } + return std::nullopt; + } + + bool shouldClearPipelineCacheOnStartup() const + { + for (const auto& arg : argv) + { + if (arg == "--clear-pipeline-cache") + return true; + } + return false; + } + + static std::string hashToHex(const core::blake3_hash_t& hash) + { + static constexpr char digits[] = "0123456789abcdef"; + static constexpr size_t HexCharsPerByte = 2ull; + static constexpr uint32_t HighNibbleBitOffset = 4u; + static constexpr uint8_t NibbleMask = 0xfu; + const auto hashByteCount = sizeof(hash.data); + std::string retval; + retval.resize(hashByteCount * HexCharsPerByte); + for (size_t i = 0ull; i < hashByteCount; ++i) + { + const auto hexOffset = i * HexCharsPerByte; + retval[hexOffset] = digits[(hash.data[i] >> HighNibbleBitOffset) & NibbleMask]; + retval[hexOffset + 1ull] = digits[hash.data[i] & NibbleMask]; + } + return retval; + } + + path getDefaultPipelineCacheDir() const + { + if (const auto* localAppData = std::getenv("LOCALAPPDATA"); localAppData && localAppData[0] != '\0') + return path(localAppData) / "nabla/examples/31_HLSLPathTracer/pipeline/cache"; + return localOutputCWD / "pipeline/cache"; + } + + path getRuntimeConfigPath() const + { + return system::executableDirectory() / RuntimeConfigFilename; + } + + std::optional tryGetPipelineCacheDirFromRuntimeConfig() const + { + const auto configPath = getRuntimeConfigPath(); + if (!m_system->exists(configPath, IFile::ECF_READ)) + return std::nullopt; + + std::ifstream input(configPath); + if (!input.is_open()) + return std::nullopt; + + nlohmann::json json; + try + { + input >> json; + } + catch (const std::exception& e) + { + m_logger->log("Failed to parse PATH_TRACER runtime config %s: %s", ILogger::ELL_WARNING, configPath.string().c_str(), e.what()); + return std::nullopt; + } + + const auto cacheRootIt = json.find("cache_root"); + if (cacheRootIt == json.end() || !cacheRootIt->is_string()) + return std::nullopt; + + const auto cacheRoot = cacheRootIt->get(); + if (cacheRoot.empty()) + return std::nullopt; + + const path relativeRoot(cacheRoot); + if (relativeRoot.is_absolute()) + { + m_logger->log("Ignoring absolute cache_root in %s", ILogger::ELL_WARNING, configPath.string().c_str()); + return std::nullopt; + } + + return (configPath.parent_path() / relativeRoot).lexically_normal(); + } + + path getPipelineCacheRootDir() const + { + if (const auto overrideDir = tryGetPipelineCacheDirOverride(); overrideDir.has_value()) + return overrideDir.value(); + if (const auto runtimeConfigDir = tryGetPipelineCacheDirFromRuntimeConfig(); runtimeConfigDir.has_value()) + return runtimeConfigDir.value(); + return getDefaultPipelineCacheDir(); + } + + path getPipelineCacheBlobPath() const + { + const auto key = m_device->getPipelineCacheKey(); + return getPipelineCacheRootDir() / "blob" / BuildConfigName / (std::string(key.deviceAndDriverUUID) + ".bin"); + } + + path getSpirvCacheDir() const { - IGPUComputePipeline* pipeline; - if (guiControlled.useRWMC) - pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[guiControlled.PTPipeline].get() : m_PTHLSLPipelinesRWMC[guiControlled.PTPipeline].get(); - else - pipeline = guiControlled.usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelines[guiControlled.PTPipeline].get() : m_PTHLSLPipelines[guiControlled.PTPipeline].get(); + return getPipelineCacheRootDir() / "spirv" / BuildConfigName; + } + + path getTrimmedShaderCachePath(const IShader* shader, const char* const entryPoint) const + { + core::blake3_hasher hasher; + hasher << std::string_view(shader ? shader->getFilepathHint() : std::string_view{}); + hasher << std::string_view(entryPoint); + return getSpirvCacheDir() / (hashToHex(static_cast(hasher)) + ".spv"); + } - return pipeline; + path getValidatedSpirvMarkerPath(const ICPUBuffer* spirvBuffer) const + { + auto contentHash = spirvBuffer->getContentHash(); + if (contentHash == ICPUBuffer::INVALID_HASH) + contentHash = spirvBuffer->computeContentHash(); + return getSpirvCacheDir() / (hashToHex(contentHash) + ".hash"); + } + + size_t getBackgroundPipelineBuildBudget() const + { + static constexpr uint32_t ReservedForegroundThreadCount = 1u; + const auto concurrency = std::thread::hardware_concurrency(); + if (concurrency > ReservedForegroundThreadCount) + return static_cast(concurrency - ReservedForegroundThreadCount); + return ReservedForegroundThreadCount; + } + + bool ensureCacheDirectoryExists(const path& dir, const char* const description) + { + if (dir.empty() || m_system->isDirectory(dir)) + return true; + + if (m_system->createDirectory(dir) || m_system->isDirectory(dir)) + return true; + + m_logger->log("Failed to create %s %s", ILogger::ELL_WARNING, description, dir.string().c_str()); + return false; + } + + bool finalizeCacheFile(const path& tempPath, const path& finalPath, const char* const description) + { + m_system->deleteFile(finalPath); + const auto ec = m_system->moveFileOrDirectory(tempPath, finalPath); + if (!ec) + return true; + + m_system->deleteFile(tempPath); + m_logger->log("Failed to finalize %s %s", ILogger::ELL_WARNING, description, finalPath.string().c_str()); + return false; + } + + void initializePipelineCache() + { + m_pipelineCache.blobPath = getPipelineCacheBlobPath(); + m_pipelineCache.trimmedShaders.rootDir = getSpirvCacheDir(); + m_pipelineCache.trimmedShaders.validationDir = getSpirvCacheDir(); + if (!m_pipelineCache.trimmedShaders.trimmer) + m_pipelineCache.trimmedShaders.trimmer = core::make_smart_refctd_ptr(); + const auto pipelineCacheRootDir = getPipelineCacheRootDir(); + std::error_code ec; + m_pipelineCache.loadedBytes = 0ull; + m_pipelineCache.loadedFromDisk = false; + m_pipelineCache.clearedOnStartup = shouldClearPipelineCacheOnStartup(); + m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; + m_pipelineCache.checkpointedAfterFirstSubmit = false; + m_pipelineCache.lastSaveAt = clock_t::now(); + if (shouldClearPipelineCacheOnStartup()) + { + if (m_system->isDirectory(pipelineCacheRootDir) && !m_system->deleteDirectory(pipelineCacheRootDir)) + m_logger->log("Failed to clear pipeline cache directory %s", ILogger::ELL_WARNING, pipelineCacheRootDir.string().c_str()); + else + m_logger->log("PATH_TRACER_PIPELINE_CACHE clear root=%s", ILogger::ELL_INFO, pipelineCacheRootDir.string().c_str()); + } + ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory"); + ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.rootDir, "trimmed shader cache directory"); + ensureCacheDirectoryExists(m_pipelineCache.trimmedShaders.validationDir, "validated shader cache directory"); + + std::vector initialData; + { + std::ifstream input(m_pipelineCache.blobPath, std::ios::binary | std::ios::ate); + if (input.is_open()) + { + const auto size = input.tellg(); + if (size > 0) + { + initialData.resize(static_cast(size)); + input.seekg(0, std::ios::beg); + input.read(reinterpret_cast(initialData.data()), static_cast(initialData.size())); + if (!input) + initialData.clear(); + } + } + } + + std::span initialDataSpan = {}; + if (!initialData.empty()) + { + initialDataSpan = { initialData.data(), initialData.size() }; + m_pipelineCache.loadedBytes = initialData.size(); + m_pipelineCache.loadedFromDisk = true; + } + + m_pipelineCache.object = m_device->createPipelineCache(initialDataSpan); + if (!m_pipelineCache.object && !initialData.empty()) + { + m_logger->log("Pipeline cache blob at %s was rejected. Falling back to empty cache.", ILogger::ELL_WARNING, m_pipelineCache.blobPath.string().c_str()); + m_pipelineCache.object = m_device->createPipelineCache(std::span{}); + } + if (!m_pipelineCache.object) + { + m_logger->log("Failed to create PATH_TRACER pipeline cache.", ILogger::ELL_WARNING); + return; + } + + m_pipelineCache.object->setObjectDebugName("PATH_TRACER Pipeline Cache"); + m_logger->log("PATH_TRACER pipeline cache path: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); + m_logger->log("PATH_TRACER trimmed shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.rootDir.string().c_str()); + m_logger->log("PATH_TRACER validated shader cache path: %s", ILogger::ELL_INFO, m_pipelineCache.trimmedShaders.validationDir.string().c_str()); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE init clear=%u loaded_from_disk=%u loaded_bytes=%zu path=%s", + ILogger::ELL_INFO, + m_pipelineCache.clearedOnStartup ? 1u : 0u, + m_pipelineCache.loadedFromDisk ? 1u : 0u, + m_pipelineCache.loadedBytes, + m_pipelineCache.blobPath.string().c_str() + ); + if (!initialData.empty()) + m_logger->log("Loaded PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); + } + + smart_refctd_ptr tryLoadTrimmedShaderFromDisk(const IShader* sourceShader, const char* const entryPoint) + { + const auto cachePath = getTrimmedShaderCachePath(sourceShader, entryPoint); + std::ifstream input(cachePath, std::ios::binary | std::ios::ate); + if (!input.is_open()) + return nullptr; + + const auto size = input.tellg(); + if (size <= 0) + return nullptr; + + std::vector bytes(static_cast(size)); + input.seekg(0, std::ios::beg); + input.read(reinterpret_cast(bytes.data()), static_cast(bytes.size())); + if (!input) + return nullptr; + + auto buffer = ICPUBuffer::create({ { bytes.size() }, bytes.data() }); + if (!buffer) + return nullptr; + buffer->setContentHash(buffer->computeContentHash()); + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + m_pipelineCache.trimmedShaders.loadedBytes += bytes.size(); + ++m_pipelineCache.trimmedShaders.loadedFromDiskCount; + } + m_logger->log( + "PATH_TRACER_SHADER_CACHE load entrypoint=%s bytes=%zu path=%s", + ILogger::ELL_INFO, + entryPoint, + bytes.size(), + cachePath.string().c_str() + ); + return core::make_smart_refctd_ptr(std::move(buffer), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(sourceShader->getFilepathHint())); + } + + bool hasValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) const + { + return m_system->exists(getValidatedSpirvMarkerPath(spirvBuffer), IFile::ECF_READ); + } + + void saveValidatedSpirvMarker(const ICPUBuffer* spirvBuffer) + { + const auto markerPath = getValidatedSpirvMarkerPath(spirvBuffer); + if (!ensureCacheDirectoryExists(markerPath.parent_path(), "validated shader cache directory")) + return; + + auto tempPath = markerPath; + tempPath += ".tmp"; + { + std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); + if (!output.is_open()) + { + m_logger->log("Failed to open validated shader marker temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + output << "ok\n"; + output.flush(); + if (!output) + { + output.close(); + m_system->deleteFile(tempPath); + m_logger->log("Failed to write validated shader marker %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + } + + finalizeCacheFile(tempPath, markerPath, "validated shader marker"); + } + + bool ensurePreparedShaderValidated(const smart_refctd_ptr& preparedShader) + { + if (!preparedShader) + return false; + + auto* const content = preparedShader->getContent(); + if (!content) + return false; + + if (hasValidatedSpirvMarker(content)) + return true; + + if (!m_pipelineCache.trimmedShaders.trimmer->ensureValidated(content, m_logger.get())) + return false; + + saveValidatedSpirvMarker(content); + return true; + } + + void saveTrimmedShaderToDisk(const IShader* shader, const char* const entryPoint, const path& cachePath) + { + const auto* content = shader->getContent(); + if (!content || !content->getPointer() || cachePath.empty()) + return; + + if (!ensureCacheDirectoryExists(cachePath.parent_path(), "trimmed shader cache directory")) + return; + + auto tempPath = cachePath; + tempPath += ".tmp"; + { + std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); + if (!output.is_open()) + { + m_logger->log("Failed to open trimmed shader cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + output.write(reinterpret_cast(content->getPointer()), static_cast(content->getSize())); + output.flush(); + if (!output) + { + output.close(); + m_system->deleteFile(tempPath); + m_logger->log("Failed to write trimmed shader cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + } + + if (!finalizeCacheFile(tempPath, cachePath, "trimmed shader cache blob")) + return; + + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + m_pipelineCache.trimmedShaders.savedBytes += content->getSize(); + ++m_pipelineCache.trimmedShaders.savedToDiskCount; + } + m_logger->log( + "PATH_TRACER_SHADER_CACHE save entrypoint=%s bytes=%zu path=%s", + ILogger::ELL_INFO, + entryPoint, + content->getSize(), + cachePath.string().c_str() + ); + } + + smart_refctd_ptr getPreparedShaderForEntryPoint(const smart_refctd_ptr& shaderModule, const char* const entryPoint) + { + if (!shaderModule || shaderModule->getContentType() != IShader::E_CONTENT_TYPE::ECT_SPIRV) + return shaderModule; + + const auto cachePath = getTrimmedShaderCachePath(shaderModule.get(), entryPoint); + const auto cacheKey = cachePath.string(); + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + const auto found = m_pipelineCache.trimmedShaders.runtimeShaders.find(cacheKey); + if (found != m_pipelineCache.trimmedShaders.runtimeShaders.end()) + return found->second; + } + + const auto startedAt = clock_t::now(); + auto preparedShader = tryLoadTrimmedShaderFromDisk(shaderModule.get(), entryPoint); + bool cameFromDisk = static_cast(preparedShader); + bool wasTrimmed = false; + if (!preparedShader) + { + const core::set entryPoints = { asset::ISPIRVEntryPointTrimmer::EntryPoint{ .name = entryPoint, .stage = hlsl::ShaderStage::ESS_COMPUTE } }; + const auto result = m_pipelineCache.trimmedShaders.trimmer->trim(shaderModule->getContent(), entryPoints, nullptr); + if (!result) + { + m_logger->log("Failed to prepare trimmed PATH_TRACER shader for %s. Falling back to the original module.", ILogger::ELL_WARNING, entryPoint); + return shaderModule; + } + if (result.spirv) + { + result.spirv->setContentHash(result.spirv->computeContentHash()); + preparedShader = core::make_smart_refctd_ptr(core::smart_refctd_ptr(result.spirv), IShader::E_CONTENT_TYPE::ECT_SPIRV, std::string(shaderModule->getFilepathHint())); + } + else + preparedShader = shaderModule; + + saveTrimmedShaderToDisk(preparedShader.get(), entryPoint, cachePath); + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + ++m_pipelineCache.trimmedShaders.generatedCount; + } + wasTrimmed = (preparedShader != shaderModule); + } + + if (!ensurePreparedShaderValidated(preparedShader)) + { + m_logger->log("Prepared PATH_TRACER shader for %s is not valid SPIR-V", ILogger::ELL_ERROR, entryPoint); + return nullptr; + } + + { + std::lock_guard lock(m_pipelineCache.trimmedShaders.mutex); + const auto [it, inserted] = m_pipelineCache.trimmedShaders.runtimeShaders.emplace(cacheKey, preparedShader); + if (!inserted) + preparedShader = it->second; + } + + const auto wallMs = std::chrono::duration_cast(clock_t::now() - startedAt).count(); + m_logger->log( + "PATH_TRACER_SHADER_CACHE ready entrypoint=%s wall_ms=%lld from_disk=%u trimmed=%u", + ILogger::ELL_INFO, + entryPoint, + static_cast(wallMs), + cameFromDisk ? 1u : 0u, + wasTrimmed ? 1u : 0u + ); + return preparedShader; + } + + void savePipelineCache() + { + if (!m_pipelineCache.object || !m_pipelineCache.dirty || m_pipelineCache.blobPath.empty()) + return; + + const auto saveStartedAt = clock_t::now(); + auto cpuCache = m_pipelineCache.object->convertToCPUCache(); + if (!cpuCache) + return; + + const auto& entries = cpuCache->getEntries(); + const auto found = entries.find(m_device->getPipelineCacheKey()); + if (found == entries.end() || !found->second.bin || found->second.bin->empty()) + return; + + if (!ensureCacheDirectoryExists(m_pipelineCache.blobPath.parent_path(), "pipeline cache directory")) + return; + + auto tempPath = m_pipelineCache.blobPath; + tempPath += ".tmp"; + { + std::ofstream output(tempPath, std::ios::binary | std::ios::trunc); + if (!output.is_open()) + { + m_logger->log("Failed to open pipeline cache temp file %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + output.write(reinterpret_cast(found->second.bin->data()), static_cast(found->second.bin->size())); + output.flush(); + if (!output) + { + output.close(); + m_system->deleteFile(tempPath); + m_logger->log("Failed to write pipeline cache blob to %s", ILogger::ELL_WARNING, tempPath.string().c_str()); + return; + } + } + + if (!finalizeCacheFile(tempPath, m_pipelineCache.blobPath, "pipeline cache blob")) + return; + + m_pipelineCache.dirty = false; + m_pipelineCache.savedBytes = found->second.bin->size(); + m_pipelineCache.newlyReadyPipelinesSinceLastSave = 0ull; + m_pipelineCache.lastSaveAt = clock_t::now(); + const auto saveElapsedMs = std::chrono::duration_cast(clock_t::now() - saveStartedAt).count(); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE save bytes=%zu wall_ms=%lld path=%s", + ILogger::ELL_INFO, + m_pipelineCache.savedBytes, + static_cast(saveElapsedMs), + m_pipelineCache.blobPath.string().c_str() + ); + m_logger->log("Saved PATH_TRACER pipeline cache blob: %s", ILogger::ELL_INFO, m_pipelineCache.blobPath.string().c_str()); + } + + void maybeCheckpointPipelineCache() + { + if (!m_pipelineCache.object || !m_pipelineCache.dirty) + return; + + if (m_loggedFirstRenderSubmit && !m_pipelineCache.checkpointedAfterFirstSubmit) + { + savePipelineCache(); + m_pipelineCache.checkpointedAfterFirstSubmit = true; + return; + } + + if (!m_pipelineCache.warmup.started || m_pipelineCache.warmup.loggedComplete) + return; + + static constexpr size_t WarmupCheckpointThreshold = 4ull; + if (m_pipelineCache.newlyReadyPipelinesSinceLastSave < WarmupCheckpointThreshold) + return; + + const auto elapsedSinceLastSave = std::chrono::duration_cast(clock_t::now() - m_pipelineCache.lastSaveAt).count(); + if (elapsedSinceLastSave < 1000ll) + return; + + savePipelineCache(); + } + + smart_refctd_ptr loadRenderShader(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc) + { + switch (geometry) + { + case ELG_SPHERE: + if (rwmc) + return loadPrecompiledShader(); + return loadPrecompiledShader(); + case ELG_TRIANGLE: +#if defined(PATH_TRACER_BUILD_MODE_SPECIALIZED) + if (rwmc) + return persistentWorkGroups ? + loadPrecompiledShader() : + loadPrecompiledShader(); + return persistentWorkGroups ? + loadPrecompiledShader() : + loadPrecompiledShader(); +#else + if (rwmc) + return loadPrecompiledShader(); + return loadPrecompiledShader(); +#endif + case ELG_RECTANGLE: +#if defined(PATH_TRACER_BUILD_MODE_SPECIALIZED) + if (rwmc) + return persistentWorkGroups ? + loadPrecompiledShader() : + loadPrecompiledShader(); +#else + if (rwmc) + return loadPrecompiledShader(); +#endif + return loadPrecompiledShader(); + default: + return nullptr; + } + } + + using pipeline_future_t = std::future>; + using shader_array_t = std::array, E_LIGHT_GEOMETRY::ELG_COUNT>; + using pipeline_method_array_t = std::array, EPM_COUNT>; + using pipeline_future_method_array_t = std::array; + using pipeline_array_t = std::array; + using pipeline_future_array_t = std::array; + struct SRenderPipelineStorage + { + std::array, BinaryToggleCount> shaders = {}; + std::array, BinaryToggleCount> pipelines = {}; + std::array, BinaryToggleCount> pendingPipelines = {}; + + static constexpr size_t boolToIndex(const bool value) + { + return static_cast(value); + } + + shader_array_t& getShaders(const bool persistentWorkGroups, const bool rwmc) + { + return shaders[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + const shader_array_t& getShaders(const bool persistentWorkGroups, const bool rwmc) const + { + return shaders[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + pipeline_array_t& getPipelines(const bool persistentWorkGroups, const bool rwmc) + { + return pipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + const pipeline_array_t& getPipelines(const bool persistentWorkGroups, const bool rwmc) const + { + return pipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + pipeline_future_array_t& getPendingPipelines(const bool persistentWorkGroups, const bool rwmc) + { + return pendingPipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + + const pipeline_future_array_t& getPendingPipelines(const bool persistentWorkGroups, const bool rwmc) const + { + return pendingPipelines[boolToIndex(rwmc)][boolToIndex(persistentWorkGroups)]; + } + }; + + struct SResolvePipelineState + { + smart_refctd_ptr layout; + smart_refctd_ptr shader; + smart_refctd_ptr pipeline; + pipeline_future_t pendingPipeline; + }; + struct SWarmupJob + { + enum class E_TYPE : uint8_t + { + Render, + Resolve + }; + + E_TYPE type = E_TYPE::Render; + E_LIGHT_GEOMETRY geometry = ELG_SPHERE; + bool persistentWorkGroups = false; + bool rwmc = false; + E_POLYGON_METHOD polygonMethod = EPM_PROJECTED_SOLID_ANGLE; + }; + + struct SPipelineCacheState + { + struct STrimmedShaderCache + { + smart_refctd_ptr trimmer; + path rootDir; + path validationDir; + size_t loadedFromDiskCount = 0ull; + size_t generatedCount = 0ull; + size_t savedToDiskCount = 0ull; + size_t loadedBytes = 0ull; + size_t savedBytes = 0ull; + core::unordered_map> runtimeShaders; + std::mutex mutex; + } trimmedShaders; + + struct SWarmupState + { + bool started = false; + bool loggedComplete = false; + clock_t::time_point beganAt = clock_t::now(); + size_t budget = 1ull; + size_t queuedJobs = 0ull; + size_t launchedJobs = 0ull; + size_t skippedJobs = 0ull; + std::deque queue; + } warmup; + + smart_refctd_ptr object; + path blobPath; + bool dirty = false; + bool loadedFromDisk = false; + bool clearedOnStartup = false; + size_t loadedBytes = 0ull; + size_t savedBytes = 0ull; + size_t newlyReadyPipelinesSinceLastSave = 0ull; + bool checkpointedAfterFirstSubmit = false; + clock_t::time_point lastSaveAt = clock_t::now(); + }; + + static constexpr bool SpecializedBuildMode = +#if defined(PATH_TRACER_BUILD_MODE_SPECIALIZED) + true; +#else + false; +#endif + + static constexpr const char* PathTracerBuildModeName = +#if defined(PATH_TRACER_BUILD_MODE_SPECIALIZED) + "SPECIALIZED"; +#else + "WALLTIME_OPTIMIZED"; +#endif + + struct SRenderVariantInfo + { + E_POLYGON_METHOD effectiveMethod; + E_POLYGON_METHOD pipelineMethod; + const char* entryPoint; + }; + + static constexpr const char* getDefaultRenderEntryPointName(const bool persistentWorkGroups) + { + return persistentWorkGroups ? "mainPersistent" : "main"; + } + + static constexpr SRenderVariantInfo getRenderVariantInfo(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const E_POLYGON_METHOD requestedMethod) + { + const char* const defaultEntryPoint = getDefaultRenderEntryPointName(persistentWorkGroups); + switch (geometry) + { + case ELG_SPHERE: + return { EPM_SOLID_ANGLE, EPM_SOLID_ANGLE, defaultEntryPoint }; + case ELG_TRIANGLE: + if (!SpecializedBuildMode) + return { requestedMethod, EPM_PROJECTED_SOLID_ANGLE, defaultEntryPoint }; + switch (requestedMethod) + { + case EPM_AREA: + return { EPM_AREA, EPM_AREA, persistentWorkGroups ? "mainPersistentArea" : "mainArea" }; + case EPM_SOLID_ANGLE: + return { EPM_SOLID_ANGLE, EPM_SOLID_ANGLE, persistentWorkGroups ? "mainPersistentSolidAngle" : "mainSolidAngle" }; + case EPM_PROJECTED_SOLID_ANGLE: + default: + return { EPM_PROJECTED_SOLID_ANGLE, EPM_PROJECTED_SOLID_ANGLE, defaultEntryPoint }; + } + case ELG_RECTANGLE: + return { EPM_SOLID_ANGLE, EPM_SOLID_ANGLE, defaultEntryPoint }; + default: + return { EPM_PROJECTED_SOLID_ANGLE, EPM_PROJECTED_SOLID_ANGLE, defaultEntryPoint }; + } + } + + size_t getRunningPipelineBuildCount() const + { + size_t count = 0ull; + const auto countPending = [&count](const pipeline_future_array_t& futures, const pipeline_array_t& pipelines) -> void + { + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + { + if (futures[geometry][method].valid() && !pipelines[geometry][method]) + ++count; + } + } + }; + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + countPending(m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc), m_renderPipelines.getPipelines(persistentWorkGroups, rwmc)); + } + if (m_resolvePipelineState.pendingPipeline.valid() && !m_resolvePipelineState.pipeline) + ++count; + return count; + } + + size_t getKnownRenderPipelineCount() const + { + size_t count = 0ull; + bool seen[ELG_COUNT][BinaryToggleCount][BinaryToggleCount][EPM_COUNT] = {}; + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto persistentWorkGroups = 0u; persistentWorkGroups < BinaryToggleCount; ++persistentWorkGroups) + { + for (auto rwmc = 0u; rwmc < BinaryToggleCount; ++rwmc) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + { + const auto pipelineMethod = static_cast(getRenderVariantInfo( + static_cast(geometry), + static_cast(persistentWorkGroups), + static_cast(method) + ).pipelineMethod); + if (seen[geometry][persistentWorkGroups][rwmc][pipelineMethod]) + continue; + seen[geometry][persistentWorkGroups][rwmc][pipelineMethod] = true; + ++count; + } + } + } + } + return count; + } + + size_t getReadyRenderPipelineCount() const + { + size_t count = 0ull; + const auto countReady = [&count](const pipeline_array_t& pipelines) -> void + { + for (const auto& perGeometry : pipelines) + { + for (const auto& pipeline : perGeometry) + { + if (pipeline) + ++count; + } + } + }; + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + countReady(m_renderPipelines.getPipelines(persistentWorkGroups, rwmc)); + } + return count; + } + + void enqueueWarmupJob(const SWarmupJob& job) + { + for (const auto& existing : m_pipelineCache.warmup.queue) + { + if (existing.type != job.type) + continue; + if (existing.type == SWarmupJob::E_TYPE::Resolve) + return; + if ( + existing.geometry == job.geometry && + existing.persistentWorkGroups == job.persistentWorkGroups && + existing.rwmc == job.rwmc && + getRenderVariantInfo(existing.geometry, existing.persistentWorkGroups, existing.polygonMethod).pipelineMethod == + getRenderVariantInfo(job.geometry, job.persistentWorkGroups, job.polygonMethod).pipelineMethod + ) + return; + } + m_pipelineCache.warmup.queue.push_back(job); + } + + bool launchWarmupJobIfNeeded(const SWarmupJob& job) + { + if (job.type == SWarmupJob::E_TYPE::Resolve) + { + if (m_resolvePipelineState.pipeline || m_resolvePipelineState.pendingPipeline.valid()) + return false; + ensureResolvePipeline(); + return m_resolvePipelineState.pendingPipeline.valid(); + } + + auto& pipelines = m_renderPipelines.getPipelines(job.persistentWorkGroups, job.rwmc); + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(job.persistentWorkGroups, job.rwmc); + const auto methodIx = static_cast(getRenderVariantInfo(job.geometry, job.persistentWorkGroups, job.polygonMethod).pipelineMethod); + if (pipelines[job.geometry][methodIx] || pendingPipelines[job.geometry][methodIx].valid()) + return false; + + ensureRenderPipeline(job.geometry, job.persistentWorkGroups, job.rwmc, job.polygonMethod); + return pendingPipelines[job.geometry][methodIx].valid(); + } + + void pumpPipelineWarmup() + { + if (!m_pipelineCache.warmup.started) + return; + + while (!m_pipelineCache.warmup.queue.empty() && getRunningPipelineBuildCount() < m_pipelineCache.warmup.budget) + { + const auto job = m_pipelineCache.warmup.queue.front(); + m_pipelineCache.warmup.queue.pop_front(); + if (launchWarmupJobIfNeeded(job)) + ++m_pipelineCache.warmup.launchedJobs; + else + ++m_pipelineCache.warmup.skippedJobs; + } + + if (!m_pipelineCache.warmup.loggedComplete && m_pipelineCache.warmup.queue.empty() && getRunningPipelineBuildCount() == 0ull) + { + m_pipelineCache.warmup.loggedComplete = true; + const auto warmupElapsedMs = std::chrono::duration_cast(clock_t::now() - m_pipelineCache.warmup.beganAt).count(); + const auto readyRenderPipelines = getReadyRenderPipelineCount(); + const auto totalRenderPipelines = getKnownRenderPipelineCount(); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE warmup_complete wall_ms=%lld queued_jobs=%zu launched_jobs=%zu skipped_jobs=%zu max_parallel=%zu ready_render=%zu total_render=%zu resolve_ready=%u", + ILogger::ELL_INFO, + static_cast(warmupElapsedMs), + m_pipelineCache.warmup.queuedJobs, + m_pipelineCache.warmup.launchedJobs, + m_pipelineCache.warmup.skippedJobs, + m_pipelineCache.warmup.budget, + readyRenderPipelines, + totalRenderPipelines, + m_resolvePipelineState.pipeline ? 1u : 0u + ); + logStartupEvent("pipeline_warmup_complete"); + savePipelineCache(); + } + } + + pipeline_future_t requestComputePipelineBuild(smart_refctd_ptr shaderModule, IGPUPipelineLayout* const pipelineLayout, const char* const entryPoint) + { + if (!shaderModule) + return {}; + + return std::async( + std::launch::async, + [ + this, + device = m_device, + pipelineCache = m_pipelineCache.object, + shader = std::move(shaderModule), + layout = smart_refctd_ptr(pipelineLayout), + requiredSubgroupSize = m_requiredSubgroupSize, + logger = m_logger.get(), + entryPointName = std::string(entryPoint), + cacheLoadedFromDisk = m_pipelineCache.loadedFromDisk + ]() -> smart_refctd_ptr + { + const auto startedAt = clock_t::now(); + auto preparedShader = getPreparedShaderForEntryPoint(shader, entryPointName.c_str()); + if (!preparedShader) + return nullptr; + smart_refctd_ptr pipeline; + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = preparedShader.get(); + params.shader.entryPoint = entryPointName.c_str(); + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = requiredSubgroupSize; + if (!device->createComputePipelines(pipelineCache.get(), { ¶ms, 1 }, &pipeline)) + { + if (logger) + logger->log("Failed to create precompiled path tracing pipeline for %s", ILogger::ELL_ERROR, entryPointName.c_str()); + return nullptr; + } + if (logger) + { + const auto wallMs = std::chrono::duration_cast(clock_t::now() - startedAt).count(); + logger->log( + "PATH_TRACER_PIPELINE_BUILD entrypoint=%s wall_ms=%lld cache_loaded_from_disk=%u", + ILogger::ELL_INFO, + entryPointName.c_str(), + static_cast(wallMs), + cacheLoadedFromDisk ? 1u : 0u + ); + } + return pipeline; + } + ); + } + + void pollPendingPipeline(pipeline_future_t& future, smart_refctd_ptr& pipeline) + { + if (!future.valid() || pipeline) + return; + if (future.wait_for(std::chrono::milliseconds(0)) != std::future_status::ready) + return; + pipeline = future.get(); + if (pipeline) + { + m_pipelineCache.dirty = true; + ++m_pipelineCache.newlyReadyPipelinesSinceLastSave; + } + } + + void pollPendingPipelines() + { + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + { + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc); + auto& pipelines = m_renderPipelines.getPipelines(persistentWorkGroups, rwmc); + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + pollPendingPipeline(pendingPipelines[geometry][method], pipelines[geometry][method]); + } + } + } + pollPendingPipeline(m_resolvePipelineState.pendingPipeline, m_resolvePipelineState.pipeline); + } + + void waitForPendingPipelines() + { + auto waitAndStore = [](pipeline_future_t& future, smart_refctd_ptr& pipeline) -> void + { + if (!future.valid() || pipeline) + return; + future.wait(); + pipeline = future.get(); + }; + + for (const auto rwmc : { false, true }) + { + for (const auto persistentWorkGroups : { false, true }) + { + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc); + auto& pipelines = m_renderPipelines.getPipelines(persistentWorkGroups, rwmc); + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + for (auto method = 0u; method < EPM_COUNT; ++method) + { + const auto hadPipeline = static_cast(pipelines[geometry][method]); + waitAndStore(pendingPipelines[geometry][method], pipelines[geometry][method]); + const auto pipelineBecameReady = !hadPipeline && static_cast(pipelines[geometry][method]); + m_pipelineCache.dirty = m_pipelineCache.dirty || pipelineBecameReady; + m_pipelineCache.newlyReadyPipelinesSinceLastSave += pipelineBecameReady ? 1ull : 0ull; + } + } + } + } + const auto hadResolvePipeline = static_cast(m_resolvePipelineState.pipeline); + waitAndStore(m_resolvePipelineState.pendingPipeline, m_resolvePipelineState.pipeline); + m_pipelineCache.dirty = m_pipelineCache.dirty || (!hadResolvePipeline && static_cast(m_resolvePipelineState.pipeline)); + if (!hadResolvePipeline && static_cast(m_resolvePipelineState.pipeline)) + ++m_pipelineCache.newlyReadyPipelinesSinceLastSave; + } + + IGPUComputePipeline* ensureRenderPipeline(const E_LIGHT_GEOMETRY geometry, const bool persistentWorkGroups, const bool rwmc, const E_POLYGON_METHOD polygonMethod) + { + auto& pipelines = m_renderPipelines.getPipelines(persistentWorkGroups, rwmc); + auto& pendingPipelines = m_renderPipelines.getPendingPipelines(persistentWorkGroups, rwmc); + const auto variantInfo = getRenderVariantInfo(geometry, persistentWorkGroups, polygonMethod); + const auto methodIx = static_cast(variantInfo.pipelineMethod); + auto& pipeline = pipelines[geometry][methodIx]; + auto& future = pendingPipelines[geometry][methodIx]; + + pollPendingPipeline(future, pipeline); + if (pipeline) + return pipeline.get(); + + if (!future.valid()) + { + const auto& shaders = m_renderPipelines.getShaders(persistentWorkGroups, rwmc); + auto* const layout = rwmc ? m_rwmcRenderPipelineLayout.get() : m_renderPipelineLayout.get(); + future = requestComputePipelineBuild(shaders[geometry], layout, variantInfo.entryPoint); + } + + return nullptr; + } + + IGPUComputePipeline* ensureResolvePipeline() + { + pollPendingPipeline(m_resolvePipelineState.pendingPipeline, m_resolvePipelineState.pipeline); + if (m_resolvePipelineState.pipeline) + return m_resolvePipelineState.pipeline.get(); + + if (!m_resolvePipelineState.pendingPipeline.valid()) + m_resolvePipelineState.pendingPipeline = requestComputePipelineBuild(m_resolvePipelineState.shader, m_resolvePipelineState.layout.get(), "resolve"); + + return nullptr; + } + + void kickoffPipelineWarmup() + { + m_pipelineCache.warmup.started = true; + m_pipelineCache.warmup.queue.clear(); + m_pipelineCache.warmup.loggedComplete = false; + m_pipelineCache.warmup.beganAt = clock_t::now(); + m_pipelineCache.warmup.budget = getBackgroundPipelineBuildBudget(); + m_pipelineCache.warmup.queuedJobs = 0ull; + m_pipelineCache.warmup.launchedJobs = 0ull; + m_pipelineCache.warmup.skippedJobs = 0ull; + const auto currentGeometry = static_cast(guiControlled.PTPipeline); + const auto currentMethod = static_cast(guiControlled.polygonMethod); + const auto enqueueRenderVariants = [this, currentGeometry](const E_LIGHT_GEOMETRY geometry, const E_POLYGON_METHOD preferredMethod) -> void + { + const auto enqueueForMethods = [this, geometry](const std::initializer_list methods, const bool preferPersistent, const bool preferRWMC) -> void + { + const bool persistentOrder[2] = { preferPersistent, !preferPersistent }; + const bool rwmcOrder[2] = { preferRWMC, !preferRWMC }; + for (const auto method : methods) + { + for (const auto persistentWorkGroups : persistentOrder) + { + for (const auto rwmc : rwmcOrder) + { + enqueueWarmupJob({ + .type = SWarmupJob::E_TYPE::Render, + .geometry = geometry, + .persistentWorkGroups = persistentWorkGroups, + .rwmc = rwmc, + .polygonMethod = method + }); + } + } + } + }; + + const bool preferPersistent = geometry == currentGeometry ? guiControlled.usePersistentWorkGroups : false; + const bool preferRWMC = geometry == currentGeometry ? guiControlled.useRWMC : false; + switch (geometry) + { + case ELG_SPHERE: + enqueueForMethods({ EPM_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + case ELG_TRIANGLE: + { + switch (preferredMethod) + { + case EPM_AREA: + enqueueForMethods({ EPM_AREA, EPM_SOLID_ANGLE, EPM_PROJECTED_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + case EPM_SOLID_ANGLE: + enqueueForMethods({ EPM_SOLID_ANGLE, EPM_AREA, EPM_PROJECTED_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + case EPM_PROJECTED_SOLID_ANGLE: + default: + enqueueForMethods({ EPM_PROJECTED_SOLID_ANGLE, EPM_AREA, EPM_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + } + break; + } + case ELG_RECTANGLE: + enqueueForMethods({ EPM_SOLID_ANGLE }, preferPersistent, preferRWMC); + break; + default: + break; + } + }; + + enqueueRenderVariants(currentGeometry, currentMethod); + for (auto geometry = 0u; geometry < ELG_COUNT; ++geometry) + { + const auto geometryEnum = static_cast(geometry); + if (geometryEnum == currentGeometry) + continue; + enqueueRenderVariants(geometryEnum, currentMethod); + } + enqueueWarmupJob({ .type = SWarmupJob::E_TYPE::Resolve }); + m_pipelineCache.warmup.queuedJobs = m_pipelineCache.warmup.queue.size(); + const auto logicalConcurrency = std::thread::hardware_concurrency(); + m_logger->log( + "PATH_TRACER_PIPELINE_CACHE warmup_start queued_jobs=%zu max_parallel=%zu logical_threads=%u current_geometry=%u current_method=%u", + ILogger::ELL_INFO, + m_pipelineCache.warmup.queuedJobs, + m_pipelineCache.warmup.budget, + logicalConcurrency, + static_cast(currentGeometry), + static_cast(currentMethod) + ); + pumpPipelineWarmup(); + } + + IGPUComputePipeline* pickPTPipeline() + { + return ensureRenderPipeline( + static_cast(guiControlled.PTPipeline), + guiControlled.usePersistentWorkGroups, + guiControlled.useRWMC, + static_cast(guiControlled.polygonMethod) + ); } private: @@ -1542,12 +3020,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui // gpu resources smart_refctd_ptr m_cmdPool; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; - std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; - smart_refctd_ptr m_resolvePipeline; + SRenderPipelineStorage m_renderPipelines; + smart_refctd_ptr m_renderPipelineLayout; + smart_refctd_ptr m_rwmcRenderPipelineLayout; + SResolvePipelineState m_resolvePipelineState; smart_refctd_ptr m_presentPipeline; + IPipelineBase::SUBGROUP_SIZE m_requiredSubgroupSize = IPipelineBase::SUBGROUP_SIZE::UNKNOWN; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; @@ -1590,7 +3068,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui video::CDumbPresentationOracle m_oracle; - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + uint16_t gcIndex = {}; struct GUIControllables { @@ -1599,6 +3077,7 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui float camYAngle = 165.f / 180.f * 3.14159f; float camXAngle = 32.f / 180.f * 3.14159f; int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int polygonMethod = EPM_PROJECTED_SOLID_ANGLE; int spp = 32; int depth = 3; rwmc::SResolveParameters::SCreateParams rwmcParams; @@ -1615,7 +3094,12 @@ class HLSLComputePathtracer final : public SimpleWindowedApplication, public Bui }; TransformRequestParams m_transformParams; - bool m_firstFrame = true; + clock_t::time_point m_startupBeganAt = clock_t::now(); + bool m_hasPathtraceOutput = false; + bool m_loggedFirstFrameLoop = false; + bool m_loggedFirstRenderDispatch = false; + bool m_loggedFirstRenderSubmit = false; + SPipelineCacheState m_pipelineCache; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; };