diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index f5d5206dc..66c720782 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -265,32 +265,37 @@ struct Unidirectional // #endif } + measure_type getSingleSampleMeasure(uint32_t sampleID, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + { + vector3_type uvw = rand3d(0u, sampleID, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); + + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) + { + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); + + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, sampleID, ray, scene); + } + if (!hit) + missProgram(ray); + + return ray.payload.accumulation; + } + // Li measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) { measure_type Li = (measure_type)0.0; - scalar_type meanLumaSq = 0.0; + //scalar_type meanLumaSq = 0.0; for (uint32_t i = 0; i < numSamples; i++) { - vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? - ray_type ray = rayGen.generate(uvw); - - // bounces - bool hit = true; - bool rayAlive = true; - for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) - { - ray.intersectionT = numeric_limits::max; - ray.objectID = intersector_type::traceRay(ray, scene); - - hit = ray.objectID.id != -1; - if (hit) - rayAlive = closestHitProgram(1, i, ray, scene); - } - if (!hit) - missProgram(ray); - - measure_type accumulation = ray.payload.accumulation; + measure_type accumulation = getSingleSampleMeasure(i, depth, scene); scalar_type rcpSampleSize = 1.0 / (i + 1); Li += (accumulation - Li) * rcpSampleSize; @@ -302,6 +307,65 @@ struct Unidirectional return Li; } + struct RWMCCascadeSettings + { + uint32_t size; + uint32_t start; + uint32_t base; + }; + + void generateCascade(int32_t2 coords, uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(RWMCCascadeSettings) cascadeSettings, NBL_CONST_REF_ARG(scene_type) scene) + { + // TODO: move `MaxCascadeSize` somewhere else + const static uint32_t MaxCascadeSize = 10u; + float32_t4 cascadeEntry[MaxCascadeSize]; + for (int i = 0; i < MaxCascadeSize; ++i) + cascadeEntry[i] = float32_t4(0.0f, 0.0f, 0.0f, 0.0f); + + float lowerScale = cascadeSettings.start; + float upperScale = lowerScale * cascadeSettings.base; + + // most of this code is stolen from https://cg.ivd.kit.edu/publications/2018/rwmc/tool/split.cpp + for (uint32_t i = 0; i < numSamples; i++) + { + measure_type accumulation = getSingleSampleMeasure(i, depth, scene); + + const float luma = getLuma(accumulation); + + uint32_t lowerCascadeIndex = 0u; + while (!(luma < upperScale) && lowerCascadeIndex < cascadeSettings.size - 2) + { + lowerScale = upperScale; + upperScale *= cascadeSettings.base; + ++lowerCascadeIndex; + } + + float lowerCascadeLevelWeight; + float higherCascadeLevelWeight; + + if (luma <= lowerScale) + lowerCascadeLevelWeight = 1.0f; + else if (luma < upperScale) + lowerCascadeLevelWeight = max(0.0f, (lowerScale / luma - lowerScale / upperScale) / (1.0f - lowerScale / upperScale)); + else // Inf, NaN ... + lowerCascadeLevelWeight = 0.0f; + + if (luma < upperScale) + higherCascadeLevelWeight = max(0.0f, 1.0f - lowerCascadeLevelWeight); + else + higherCascadeLevelWeight = upperScale / luma; + + // TODO: odrazu liczyc srednia + cascadeEntry[lowerCascadeIndex] += float32_t4(accumulation * lowerCascadeLevelWeight, 1.0f); + } + + for (uint32_t i = 0; i < 6; i++) + { + cascadeEntry[i] /= float(numSamples); + cascade[uint3(coords.x, coords.y, i)] = cascadeEntry[i]; + } + } + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_SAMPLES_LOG2 = 10u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 81736f508..ee0486865 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -217,9 +217,23 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); - float32_t4 pixCol = float32_t4(color, 1.0); - outImage[coords] = pixCol; + bool useRWMC = bool(pc.useRWMC); + if (!useRWMC) + { + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + float32_t4 pixCol = float32_t4(color, 1.0); + outImage[coords] = pixCol; + } + else + { + pathtracer_type::RWMCCascadeSettings cascadeSettings; + cascadeSettings.size = pc.rwmcCascadeSize; + cascadeSettings.start = pc.rwmcCascadeStart; + cascadeSettings.base = pc.rwmcCascadeBase; + + // TODO: template parameter should be + pathtracer.generateCascade(coords, pc.sampleCount, pc.depth, cascadeSettings, scene); + } #ifdef PERSISTENT_WORKGROUPS } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 5e5cf89da..b54d28227 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -6,6 +6,10 @@ struct SPushConstants float32_t4x4 invMVP; int sampleCount; int depth; + uint32_t rwmcCascadeSize; + int useRWMC; + uint32_t rwmcCascadeStart; + uint32_t rwmcCascadeBase; }; [[vk::push_constant]] SPushConstants pc; @@ -19,5 +23,6 @@ struct SPushConstants [[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; [[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; +[[vk::image_format("rgba16f")]][[vk::binding(1, 0)]] RWTexture2DArray cascade; #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl new file mode 100644 index 000000000..49c1e306c --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -0,0 +1,194 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include + +struct SPushConstants +{ + uint32_t cascadeCount; + float base; + uint32_t sampleCount; + float minReliableLuma; + float kappa; +}; + +[[vk::push_constant]] SPushConstants pc; +[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(1, 0)]] RWTexture2DArray cascade; + +using namespace nbl; +using namespace hlsl; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 512; +NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; +NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; + +struct RWMCReweightingParameters +{ + uint32_t lastCascadeIndex; + float initialEmin; // a minimum image brightness that we always consider reliable + float reciprocalBase; + float reciprocalN; + float reciprocalKappa; + float colorReliabilityFactor; + float NOverKappa; +}; + +RWMCReweightingParameters computeReweightingParameters(uint32_t cascadeCount, float base, uint32_t sampleCount, float minReliableLuma, float kappa) +{ + RWMCReweightingParameters retval; + retval.lastCascadeIndex = cascadeCount - 1u; + retval.initialEmin = minReliableLuma; + retval.reciprocalBase = 1.f / base; + const float N = float(sampleCount); + retval.reciprocalN = 1.f / N; + retval.reciprocalKappa = 1.f / kappa; + // if not interested in exact expected value estimation (kappa!=1.f), can usually accept a bit more variance relative to the image brightness we already have + // allow up to ~ more energy in one sample to lessen bias in some cases + retval.colorReliabilityFactor = base + (1.f - base) * retval.reciprocalKappa; + retval.NOverKappa = N * retval.reciprocalKappa; + + return retval; +} + +struct RWMCCascadeSample +{ + float32_t3 centerValue; + float normalizedCenterLuma; + float normalizedNeighbourhoodAverageLuma; +}; + +// TODO: figure out what values should pixels outside have, 0.0f is incorrect +float32_t3 RWMCsampleCascadeTexel(int32_t2 currentCoord, int32_t2 offset, uint32_t cascadeIndex) +{ + const int32_t2 texelCoord = currentCoord + offset; + if (any(texelCoord < int32_t2(0, 0))) + return float32_t3(0.0f, 0.0f, 0.0f); + + float32_t4 output = cascade.Load(int32_t3(texelCoord, int32_t(cascadeIndex))); + return float32_t3(output.r, output.g, output.b); +} + +float32_t calcLuma(in float32_t3 col) +{ + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); +} + +RWMCCascadeSample RWMCSampleCascade(in int32_t2 coord, in uint cascadeIndex, in float reciprocalBaseI) +{ + float32_t3 neighbourhood[9]; + neighbourhood[0] = RWMCsampleCascadeTexel(coord, int32_t2(-1, -1), cascadeIndex); + neighbourhood[1] = RWMCsampleCascadeTexel(coord, int32_t2(0, -1), cascadeIndex); + neighbourhood[2] = RWMCsampleCascadeTexel(coord, int32_t2(1, -1), cascadeIndex); + neighbourhood[3] = RWMCsampleCascadeTexel(coord, int32_t2(-1, 0), cascadeIndex); + neighbourhood[4] = RWMCsampleCascadeTexel(coord, int32_t2(0, 0), cascadeIndex); + neighbourhood[5] = RWMCsampleCascadeTexel(coord, int32_t2(1, 0), cascadeIndex); + neighbourhood[6] = RWMCsampleCascadeTexel(coord, int32_t2(-1, 1), cascadeIndex); + neighbourhood[7] = RWMCsampleCascadeTexel(coord, int32_t2(0, 1), cascadeIndex); + neighbourhood[8] = RWMCsampleCascadeTexel(coord, int32_t2(1, 1), cascadeIndex); + + // numerical robustness + float32_t3 excl_hood_sum = ((neighbourhood[0] + neighbourhood[1]) + (neighbourhood[2] + neighbourhood[3])) + + ((neighbourhood[5] + neighbourhood[6]) + (neighbourhood[7] + neighbourhood[8])); + + RWMCCascadeSample retval; + retval.centerValue = neighbourhood[4]; + retval.normalizedNeighbourhoodAverageLuma = retval.normalizedCenterLuma = calcLuma(neighbourhood[4]) * reciprocalBaseI; + retval.normalizedNeighbourhoodAverageLuma = (calcLuma(excl_hood_sum) * reciprocalBaseI + retval.normalizedNeighbourhoodAverageLuma) / 9.f; + return retval; +} + +float32_t3 RWMCReweight(in RWMCReweightingParameters params, in int32_t2 coord) +{ + float reciprocalBaseI = 1.f; + RWMCCascadeSample curr = RWMCSampleCascade(coord, 0u, reciprocalBaseI); + + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + float Emin = params.initialEmin; + + float prevNormalizedCenterLuma, prevNormalizedNeighbourhoodAverageLuma; + for (uint i = 0u; i <= params.lastCascadeIndex; i++) + { + const bool notFirstCascade = i != 0u; + const bool notLastCascade = i != params.lastCascadeIndex; + + RWMCCascadeSample next; + if (notLastCascade) + { + reciprocalBaseI *= params.reciprocalBase; + next = RWMCSampleCascade(coord, i + 1u, reciprocalBaseI); + } + + float reliability = 1.f; + // sample counting-based reliability estimation + if (params.reciprocalKappa <= 1.f) + { + float localReliability = curr.normalizedCenterLuma; + // reliability in 3x3 pixel block (see robustness) + float globalReliability = curr.normalizedNeighbourhoodAverageLuma; + if (notFirstCascade) + { + localReliability += prevNormalizedCenterLuma; + globalReliability += prevNormalizedNeighbourhoodAverageLuma; + } + if (notLastCascade) + { + localReliability += next.normalizedCenterLuma; + globalReliability += next.normalizedNeighbourhoodAverageLuma; + } + // check if above minimum sampling threshold (avg 9 sample occurences in 3x3 neighbourhood), then use per-pixel reliability (NOTE: tertiary op is in reverse) + reliability = globalReliability < params.reciprocalN ? globalReliability : localReliability; + { + const float accumLuma = calcLuma(accumulation); + if (accumLuma > Emin) + Emin = accumLuma; + + const float colorReliability = Emin * reciprocalBaseI * params.colorReliabilityFactor; + + reliability += colorReliability; + reliability *= params.NOverKappa; + reliability -= params.reciprocalKappa; + reliability = clamp(reliability * 0.5f, 0.f, 1.f); + } + } + accumulation += curr.centerValue * reliability; + + prevNormalizedCenterLuma = curr.normalizedCenterLuma; + prevNormalizedNeighbourhoodAverageLuma = curr.normalizedNeighbourhoodAverageLuma; + curr = next; + } + + return accumulation; +} + +int32_t2 getCoordinates() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +// this function is for testing purpose +// simply adds every cascade buffer, output shoud be nearly the same as output of default accumulator (RWMC off) +float32_t3 sumCascade(in const int32_t2 coords) +{ + float32_t3 accumulation = float32_t3(0.0f, 0.0f, 0.0f); + + for (int i = 0; i < 6; ++i) + { + float32_t4 cascadeLevel = cascade.Load(uint3(coords, i)); + accumulation += float32_t3(cascadeLevel.r, cascadeLevel.g, cascadeLevel.b); + } + + return accumulation; +} + +[numthreads(WorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + const int32_t2 coords = getCoordinates(); + //float32_t3 color = sumCascade(coords); + + RWMCReweightingParameters reweightingParameters = computeReweightingParameters(pc.cascadeCount, pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa); + float32_t3 color = RWMCReweight(reweightingParameters, coords); + + outImage[coords] = float32_t4(color, 1.0f); +} diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 0dc5fc053..fb59d71cb 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -15,10 +15,25 @@ using namespace asset; using namespace ui; using namespace video; -struct PTPushConstant { +static constexpr uint32_t CascadeSize = 6u; +struct PTPushConstant +{ matrix4SIMD invMVP; int sampleCount; int depth; + const uint32_t rwmcCascadeSize = CascadeSize; + int useRWMC; + uint32_t rwmcCascadeStart; + uint32_t rwmcCascadeBase; +}; + +struct RWMCPushConstants +{ + const uint32_t cascadeSize = CascadeSize; + float base; + uint32_t sampleCount; + float minReliableLuma; + float kappa; }; // TODO: Add a QueryPool for timestamping once its ready @@ -60,6 +75,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; + static inline std::string ReweightingShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { @@ -256,7 +272,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return gpuDS; }; - std::array descriptorSet0Bindings = {}; + std::array descriptorSet0Bindings = {}; std::array descriptorSet3Bindings = {}; std::array presentDescriptorSetBindings; @@ -268,6 +284,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + descriptorSet0Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -292,6 +317,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + presentDescriptorSetBindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -496,6 +522,40 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } } + // Create reweighting pipeline + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RWMCPushConstants) + }; + + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0) + ); + + if (!pipelineLayout) { + return logFail("Failed to create reweighting pipeline layout"); + } + + { + auto shader = loadAndCompileHLSLShader(ReweightingShaderPath); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_reweightingPipeline)) + return logFail("Failed to create HLSL reweighting compute pipeline!\n"); + } + + + } + // Create graphics pipeline { auto scRes = static_cast(m_surface->getSwapchainResources()); @@ -676,7 +736,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // create views for textures { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { IGPUImage::SCreationParams imgInfo; imgInfo.format = colorFormat; imgInfo.type = IGPUImage::ET_2D; @@ -684,10 +744,19 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgInfo.extent.height = height; imgInfo.extent.depth = 1u; imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; imgInfo.samples = IGPUImage::ESCF_1_BIT; imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + if (!useCascadeCreationParameters) + { + imgInfo.arrayLayers = 1u; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + } + else + { + imgInfo.arrayLayers = CascadeSize; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; + } auto image = m_device->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); @@ -696,35 +765,54 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; imgViewInfo.image = std::move(img); imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; imgViewInfo.flags = static_cast(0u); imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; imgViewInfo.subresourceRange.baseArrayLayer = 0u; imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; imgViewInfo.subresourceRange.levelCount = 1u; + if (!useCascadeCreationParameters) + { + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.viewType = IGPUImageView::ET_2D; + } + else + { + imgViewInfo.subresourceRange.layerCount = CascadeSize; + imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; + } + return m_device->createImageView(std::move(imgViewInfo)); }; auto params = envMap->getCreationParameters(); auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); m_envMapView = createHDRIImageView(envMap); m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); m_scrambleView = createHDRIImageView(scrambleMap); m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); outImg->setObjectDebugName("Output Image"); m_outImgView = createHDRIImageView(outImg); m_outImgView->setObjectDebugName("Output Image View"); + + auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); + cascade->setObjectDebugName("Cascade"); + m_cascadeView = createHDRIImageView(cascade, true); + m_cascadeView->setObjectDebugName("Cascade View"); + + // TODO: change cascade layout to general } // create sequence buffer view @@ -855,22 +943,24 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto sampler1 = m_device->createSampler(samplerParams1); - std::array writeDSInfos = {}; + std::array writeDSInfos = {}; writeDSInfos[0].desc = m_outImgView; writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; + writeDSInfos[1].desc = m_cascadeView; + writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; + writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_sequenceBufferView; + writeDSInfos[4].desc = m_scrambleView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[4].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[5].desc = m_outImgView; + writeDSInfos[5].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writeDescriptorSets = {}; + std::array writeDescriptorSets = {}; writeDescriptorSets[0] = { .dstSet = m_descriptorSet0.get(), .binding = 0, @@ -879,32 +969,39 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .info = &writeDSInfos[0] }; writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), - .binding = 0, + .dstSet = m_descriptorSet0.get(), + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[1] }; writeDescriptorSets[2] = { .dstSet = m_descriptorSet2.get(), - .binding = 1, + .binding = 0, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[2] }; writeDescriptorSets[3] = { .dstSet = m_descriptorSet2.get(), - .binding = 2, + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[3] }; writeDescriptorSets[4] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + writeDescriptorSets[5] = { .dstSet = m_presentDescriptorSet.get(), .binding = 0, .arrayElement = 0u, .count = 1u, - .info = &writeDSInfos[4] + .info = &writeDSInfos[5] }; m_device->updateDescriptorSets(writeDescriptorSets, {}); @@ -1000,6 +1097,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + ImGui::Text("\nRWMC settings:"); + ImGui::Checkbox("Enable RWMC", &useRWMC); + ImGui::SliderFloat("base", &rwmcPushConstants.base, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &rwmcPushConstants.minReliableLuma, 0.1f, 32.0f); + ImGui::SliderFloat("kappa", &rwmcPushConstants.kappa, 0.1f, 32.0f); + ImGui::End(); } ); @@ -1022,6 +1125,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_oracle.reportBeginFrameRecord(); m_camera.mapKeysToWASD(); + // set initial push constants contents + rwmcPushConstants.base = 8.0f; + rwmcPushConstants.sampleCount = spp; + rwmcPushConstants.minReliableLuma = 1.0f; + rwmcPushConstants.kappa = 5.0f; + + pc.rwmcCascadeStart = 1.0; + pc.rwmcCascadeBase = 8.0f; + return true; } @@ -1083,11 +1195,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); // disregard surface/swapchain transformation for now const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.useRWMC = useRWMC ? 1 : 0; pc.sampleCount = spp; pc.depth = depth; + rwmcPushConstants.sampleCount = spp; + // safe to proceed // upload buffer data cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); @@ -1120,6 +1234,34 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); } + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from cascade + if (useRWMC) + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeSize + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + // cube envmap handle { IGPUComputePipeline* pipeline; @@ -1140,6 +1282,53 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); } + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeSize + } + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // reweighting + if(useRWMC) + { + IGPUComputePipeline* pipeline; + if (usePersistentWorkGroups) + pipeline = nullptr; + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_reweightingPipeline.get() : nullptr; + + if (!pipeline) + { + m_logger->log("Reweighting pipeline is not valid", ILogger::ELL_ERROR); + std::exit(-1); + } + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RWMCPushConstants), &rwmcPushConstants); + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) { const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { @@ -1371,6 +1560,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + smart_refctd_ptr m_reweightingPipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; @@ -1388,6 +1578,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, smart_refctd_ptr m_envMapView, m_scrambleView; smart_refctd_ptr m_sequenceBufferView; smart_refctd_ptr m_outImgView; + smart_refctd_ptr m_cascadeView; // sync smart_refctd_ptr m_semaphore; @@ -1423,6 +1614,9 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int spp = 32; int depth = 3; bool usePersistentWorkGroups = false; + bool useRWMC = false; + RWMCPushConstants rwmcPushConstants; + PTPushConstant pc; bool m_firstFrame = true; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} };