Conversation
| .baseArrayLayer = 0u, | ||
| .layerCount = 1u | ||
| }, | ||
| .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, |
There was a problem hiding this comment.
oldLayout be UNDEFINED, you'll be overwriting all data anyway on the destination mip
| core::smart_refctd_ptr<IGPUBuffer> lumaTexelBuffer; | ||
| const auto lumaMapLastMip = lumaMapMipLevels - 1; | ||
| const auto lumaMapLastMipExtent = lumaMapImage->getMipSize(lumaMapLastMip); | ||
| const auto lumaMapLastTexelCount = lumaMapLastMipExtent.x * lumaMapLastMipExtent.y * lumaMapLastMipExtent.z; | ||
| { | ||
| IGPUImage::SBufferCopy region = {}; | ||
| region.imageSubresource.aspectMask = IImage::EAF_COLOR_BIT; | ||
| region.imageSubresource.mipLevel = lumaMapLastMip; | ||
| region.imageSubresource.baseArrayLayer = 0; | ||
| region.imageSubresource.layerCount = 1; | ||
| region.imageExtent = { lumaMapLastMipExtent.x, lumaMapLastMipExtent.y, lumaMapLastMipExtent.z }; | ||
|
|
||
| IGPUBuffer::SCreationParams bufferCreationParams = {}; | ||
| bufferCreationParams.size = lumaMapLastTexelCount * getTexelOrBlockBytesize(EF_R32_SFLOAT); | ||
| bufferCreationParams.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT; | ||
| lumaTexelBuffer = logicalDevice->createBuffer(std::move(bufferCreationParams)); | ||
| if (!lumaTexelBuffer) | ||
| { | ||
| if (auto* logger = logicalDevice->getLogger()) | ||
| logger->log("ScreenShot: failed to create GPU texel buffer.", system::ILogger::ELL_ERROR); | ||
| return; | ||
| } | ||
| auto gpuTexelBufferMemReqs = lumaTexelBuffer->getMemoryReqs(); | ||
| gpuTexelBufferMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); | ||
| if (!gpuTexelBufferMemReqs.memoryTypeBits) | ||
| { | ||
| if (auto* logger = logicalDevice->getLogger()) | ||
| logger->log("ScreenShot: no down-streaming memory type for texel buffer.", system::ILogger::ELL_ERROR); | ||
| return; | ||
| } | ||
| auto gpuTexelBufferMem = logicalDevice->allocate(gpuTexelBufferMemReqs, lumaTexelBuffer.get()); | ||
| if (!gpuTexelBufferMem.isValid()) | ||
| { | ||
| if (auto* logger = logicalDevice->getLogger()) | ||
| logger->log("ScreenShot: failed to allocate texel buffer memory.", system::ILogger::ELL_ERROR); | ||
| return; | ||
| } | ||
|
|
||
| IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {}; | ||
| decltype(info)::image_barrier_t barrier = {}; | ||
| info.imgBarriers = { &barrier, &barrier + 1 }; | ||
|
|
||
| { | ||
| barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT; | ||
| barrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; | ||
| barrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; | ||
| barrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT; | ||
| barrier.oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL; | ||
| barrier.newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; | ||
| barrier.image = lumaMapImage; | ||
| barrier.subresourceRange.aspectMask = IImage::EAF_COLOR_BIT; | ||
| barrier.subresourceRange.baseMipLevel = lumaMapMipLevels - 1; | ||
| barrier.subresourceRange.levelCount = 1u; | ||
| barrier.subresourceRange.baseArrayLayer = 0; | ||
| barrier.subresourceRange.layerCount = 1; | ||
| cmdBuf->pipelineBarrier(EDF_NONE,info); | ||
| } | ||
| cmdBuf->copyImageToBuffer(lumaMapImage,IImage::LAYOUT::TRANSFER_SRC_OPTIMAL,lumaTexelBuffer.get(),1,®ion); | ||
| } |
There was a problem hiding this comment.
I don't think you need to download the average luma back to CPU, use an accessor for the average luma in your shader code
There was a problem hiding this comment.
even if last mip level you compute is 2x1 or 2x2, the GPU can do a blilinearly filtered read right in the middle to get the average luma
| { | ||
| .barrier = { | ||
| .dep = { | ||
| .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, | ||
| .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, | ||
| .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, | ||
| .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ||
| } | ||
| }, | ||
| .image = lumaMapImage, | ||
| .subresourceRange = { | ||
| .aspectMask = IImage::EAF_COLOR_BIT, | ||
| .baseMipLevel = 0u, | ||
| .levelCount = lumaMapMipLevels - 1, | ||
| .baseArrayLayer = 0u, | ||
| .layerCount = 1u | ||
| }, | ||
| .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, | ||
| .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, | ||
| }, | ||
| { | ||
| .barrier = { | ||
| .dep = { | ||
| .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, | ||
| .srcAccessMask = ACCESS_FLAGS::TRANSFER_READ_BIT, | ||
| .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, | ||
| .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ||
| } | ||
| }, | ||
| .image = lumaMapImage, | ||
| .subresourceRange = { | ||
| .aspectMask = IImage::EAF_COLOR_BIT, | ||
| .baseMipLevel = lumaMapMipLevels - 1, | ||
| .levelCount = 1, | ||
| .baseArrayLayer = 0u, | ||
| .layerCount = 1u | ||
| }, | ||
| .oldLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL, | ||
| .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, | ||
| }, |
There was a problem hiding this comment.
whats the point of the second barrier?
Barriering against a texture upload/update before this function is user's responsibility so I don't think the second barrier belongs here
Also you're supposed to join barriers so the bits would just OR together
Finally destStageMasks of ALL_COMMANDS_BITS is overkill, only barrier against your own work, the barrier the user performs after should take care of this
| .barrier = { | ||
| .dep = { | ||
| .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, | ||
| .srcAccessMask = ACCESS_FLAGS::NONE, | ||
| .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, | ||
| .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS | ||
| } | ||
| }, | ||
| .image = warpMapImage, | ||
| .subresourceRange = { | ||
| .aspectMask = IImage::EAF_COLOR_BIT, | ||
| .baseMipLevel = 0, | ||
| .levelCount = 1, | ||
| .baseArrayLayer = 0u, | ||
| .layerCount = 1u | ||
| }, | ||
| .oldLayout = IImage::LAYOUT::UNDEFINED, | ||
| .newLayout = IImage::LAYOUT::GENERAL, | ||
| } |
There was a problem hiding this comment.
again this barrier should be user's resposibility
There was a problem hiding this comment.
your Session::getPrevBarrier(..) would basically add bits for computeWarpMap not to step on its own toes
| { | ||
| IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t barriers[] = { | ||
| { | ||
| .barrier = { | ||
| .dep = { | ||
| .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, | ||
| .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, | ||
| .dstStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS, | ||
| .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS | ||
| } | ||
| }, | ||
| .image = warpMapImage, | ||
| .subresourceRange = { | ||
| .aspectMask = IImage::EAF_COLOR_BIT, | ||
| .baseMipLevel = 0, | ||
| .levelCount = 1, | ||
| .baseArrayLayer = 0u, | ||
| .layerCount = 1u | ||
| }, | ||
| .oldLayout = IImage::LAYOUT::GENERAL, | ||
| .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, | ||
| } | ||
| }; | ||
| cmdBuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = barriers }); | ||
| } | ||
|
|
||
| if (!cmdBuf->end()) | ||
| { | ||
| if (auto* logger = logicalDevice->getLogger()) | ||
| logger->log("ScreenShot: failed to end command buffer.", system::ILogger::ELL_ERROR); | ||
| return; | ||
| } |
There was a problem hiding this comment.
responsibility of the user, and they'll have a helper in the form of SSession::getNextBarrier()
| // TODO: Add an option for corner sampling or centered sampling as boolean parameter | ||
| template <typename ScalarT, typename LuminanceAccessorT |
There was a problem hiding this comment.
adjust the comment, its the LuminanceAccessor that should tell you if you're supposed to corner sample (because the luma image needs to take that into account at the borders so it had to be made "that way")
| LuminanceAccessorT _map; | ||
| float32_t _rcpAvgLuma; | ||
| float32_t2 _rcpWarpSize; | ||
| uint16_t2 _mapSize; | ||
| uint16_t _mip2x1 : 15; | ||
| uint16_t _aspect2x1 : 1; | ||
|
|
||
| static HierarchicalWarpGenerator<ScalarT, LuminanceAccessorT> create(NBL_CONST_REF_ARG(LuminanceAccessorT) lumaMap, uint32_t2 mapSize, bool aspect2x1) | ||
| { | ||
| HierarchicalWarpGenerator<ScalarT, LuminanceAccessorT> result; | ||
| result._map = lumaMap; | ||
| result._mapSize = mapSize; | ||
| // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 | ||
| result._mip2x1 = _static_cast<uint16_t>(findMSB(mapSize.y)); | ||
| result._aspect2x1 = aspect2x1; | ||
| return result; | ||
| } |
There was a problem hiding this comment.
your create is not setting _rcpAvgLuma nor _rcpWarpSize
Also what's ofc its Luma size, the luma accessor is called mapmapSize ? The size of the lumaMap or the warp ?
There was a problem hiding this comment.
also why pass aspect2x1 if you can check it from the lumaMap or warpmap size ?
There was a problem hiding this comment.
_rcpWarpSize is an unused variable and should be removed
| hierarchical_image::LuminanceReadAccessor<LuminanceAccessorT, ScalarT> && | ||
| concepts::Warp<PostWarpT> | ||
| ) | ||
| struct HierarchicalWarpSampler |
There was a problem hiding this comment.
just call it ComposedHierarchicalSampler, no warp in the name
| sample_type generate(domain_type xi) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| const warp_sample_type warpSample = _warpGenerator.generate(xi); | ||
| const WarpResult<codomain_type> postWarpResult = PostWarpT::warp(warpSample.value()); |
There was a problem hiding this comment.
you need to store a postWarp member copied/initialized in create, because while the samplers will have constant methods, they will be allowed to be stateful
| is_scalar_v<ScalarT> && | ||
| hierarchical_image::LuminanceReadAccessor<LuminanceAccessorT, ScalarT> && | ||
| concepts::Warp<PostWarpT> | ||
| ) |
There was a problem hiding this comment.
when #1001 gets merged, this sampler will be a BackwardTractableSampler because PDF is known cheaply in both directions
and your PostWarpT will be required to meet actually it will need to be BackwardTractableSampler as well to have a backwardPdfBijectiveSampler because you need to call generateInverse on it to get a backwardPdf https://github.com/Devsh-Graphics-Programming/Nabla/pull/969/changes#r2991518272
| density_type backwardPdf(codomain_type codomainVal, scalar_type rcpAvgLuma) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return _map.load(codomainVal) * rcpAvgLuma; |
There was a problem hiding this comment.
but you already have a _rcpAvgLuma member ?
There was a problem hiding this comment.
also you need to use the texelFetch signature, same as your gather, to make sure you sample level 0 and with no bilinear interpolation, so this means turning your floating point cornerSampled normalized UV into integer texel coordinate
const uint16_t2 coord = codomainVal*vector2_type(_lastTexel) + promote<vector2_type>(0.5);| density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return generate(xi).pdf(); | ||
| } |
There was a problem hiding this comment.
PR #1001 lets you pass a cache_Type from generate to forwardPdf
There was a problem hiding this comment.
and you'll be able to stuff rcpPmf in that cache, so here all you'd do would just be return (_mapSize.x*_mapSize.y)/cache.rcpPmf;
Edit: taking into account https://github.com/Devsh-Graphics-Programming/Nabla/pull/969/changes#r2991807597 it would be _lastTexel.x * _lastTexel.y / cache.rcpPmf
| is_scalar_v<ScalarT> && | ||
| hierarchical_image::LuminanceReadAccessor<LuminanceAccessorT, ScalarT> | ||
| ) | ||
| struct HierarchicalWarpGenerator |
There was a problem hiding this comment.
since thanks to PR #1001 warps will be indistinguishable from BackwardTractableSampler, this can be called HierarchicalSampler or HierarchicalLuminanceSampler and meet BackwardTractableSampler constraints
| result._map = lumaMap; | ||
| result._mapSize = mapSize; | ||
| // Note: We use mapSize.y here because the currently the map aspect ratio can only be 1x1 or 2x1 | ||
| result._mip2x1 = _static_cast<uint16_t>(findMSB(mapSize.y)); |
There was a problem hiding this comment.
_mip2x1 misleading name, _penultimateMip better
| struct HierarchicalWarpSampler | ||
| { | ||
| using warp_generator_type = HierarchicalWarpGenerator<ScalarT, LuminanceAccessorT>; | ||
| using warp_sample_type = typename warp_generator_type::sample_type; |
There was a problem hiding this comment.
could just assert that your PostWarpT::domain_type is same as the HierarhicalSampler::codomain_type (current warp_generator_type::codomain_type)
| using domain_type = vector2_type; | ||
| using codomain_type = vector3_type; |
There was a problem hiding this comment.
domain_type = warp_generator_type::domain_type and codomain_type = PostWarp::codomain_type you can static_assert or require that they have vector_traits<>::dimension 2 and 3 respectively
| sample_type generate(domain_type xi) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| const warp_sample_type warpSample = _warpGenerator.generate(xi); | ||
| const WarpResult<codomain_type> postWarpResult = PostWarpT::warp(warpSample.value()); | ||
| return sample_type::create(postWarpResult.dst, postWarpResult.density * warpSample.pdf()); | ||
| } | ||
|
|
||
| density_type forwardPdf(domain_type xi) NBL_CONST_MEMBER_FUNC |
There was a problem hiding this comment.
when #1001 comes, you'll need to put into this struct's cache:
- the
_warpGenerator.generate's cache members - the hierarchical output sample (since its hidden and intermediate value needed to call postWarp.forwardPdf)
postWarp's cache
| density_type backwardPdf(codomain_type codomainVal) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return PostWarpT::backwardPdf(codomainVal, _rcpAvgLuma) * _warpGenerator.backwardPdf(codomainVal); | ||
| } |
There was a problem hiding this comment.
does your test even compile, its the _warpGenerator (hierarhical sampler) that currently cares about _rcpAvgLuma not the post-warp
anyway neither should care.
There was a problem hiding this comment.
Anyway the function is wrong, because you'd need to call uv = _postWarp.generateInverse(codomainVal) to get the uv to feed into _warpGenerator.backwardPdf(uv);
| template <typename ScalarT, typename LuminanceAccessorT, typename PostWarpT | ||
| NBL_PRIMARY_REQUIRES( | ||
| is_scalar_v<ScalarT> && | ||
| hierarchical_image::LuminanceReadAccessor<LuminanceAccessorT, ScalarT> && |
There was a problem hiding this comment.
LuminanceAccessorT should tell you the PostWarpT since the luma map had to be made with luminance weighed by 1/forwardPdf of the post-warp, so the warp mustl be strictly paired with the luma map
and then in turn the PostWarpT also tells you your scalar_type (its own typedef)
So only one tempalte parameter needed
| template <typename ScalarT, typename LuminanceAccessorT, typename HierarchicalSamplerT, typename PostWarpT | ||
| NBL_PRIMARY_REQUIRES(is_scalar_v<ScalarT> && | ||
| concepts::accessors::GenericReadAccessor<LuminanceAccessorT, ScalarT, float32_t2> && | ||
| hierarchical_image::WarpAccessor<HierarchicalSamplerT, ScalarT> && | ||
| concepts::Warp<PostWarpT>) |
There was a problem hiding this comment.
I think you can pretty much just template on LuminanceAccessorT for same reasons I outlined for the ComposedHierarchicalSampler you don't need PostWarpT or ScalarT
and I think there isn't much freedom in HierarchicalSamplerT , you can construct it from the other 3
There was a problem hiding this comment.
wait, but your HierarchicalSamplerT isn't a HierarchicalWarpGenerator at alll, such a misleading name, its a WarpmapAccessorT
| using domain_type = vector2_type; | ||
| using codomain_type = vector3_type; |
There was a problem hiding this comment.
same as with the ComposedHierarchicalSampler
| using vector4_type = vector<ScalarT, 4>; | ||
| using domain_type = vector2_type; | ||
| using codomain_type = vector3_type; | ||
| using weight_type = scalar_type; |
There was a problem hiding this comment.
after PR1001 you'll also need a density_type
| if (p.x == 0) | ||
| xi.x = xi.x * scalar_type(0.5) + scalar_type(0.5); | ||
| if (p.y == 0) | ||
| xi.y = xi.y * scalar_type(0.5) + scalar_type(0.5); | ||
| if (p.x == _mapSize.x - 1) | ||
| xi.x = xi.x * scalar_type(0.5); | ||
| if (p.y == _mapSize.y - 1) | ||
| xi.y = xi.y * scalar_type(0.5); | ||
|
|
||
| const vector2_type directionUV = (vector2_type(p.x, p.y) + xi) / _mapSize; |
There was a problem hiding this comment.
there's a bit of a problem with what you're returning (corner sampled) because the PostWarp is resolution agnostic, it doesn't know about corner sampling and expects a [0,1]^2 Unit Square to map to the Sphere where the boundaries touch
So I'd divide by _mapSize - promote(1) and subtract 0.5 from the p+xi before that so you get a normalized coordinate within the corner sampling
There was a problem hiding this comment.
This also means that you return _lastTexel.x * _lastTexel.y /rcpPmf as the PDF instead because of how the UV domain gets shrunk
| LuminanceAccessorT _map; | ||
| float32_t _rcpAvgLuma; | ||
| float32_t2 _rcpWarpSize; | ||
| uint16_t2 _mapSize; |
There was a problem hiding this comment.
you shold really store _lastTexel (_mapSize-1) instead of mapSize
| WarpmapSampler<ScalarT, LuminanceAccessorT, HierarchicalSamplerT, PostWarpT> result; | ||
| result._lumaMap = lumaMap; | ||
| result._warpMap = warpMap; | ||
| result._effectiveWarpArea = (warpSize.x - 1) * (warpSize.y - 1); |
There was a problem hiding this comment.
warpMap is using a custom concept anyway, add a resolution() getter to it
There was a problem hiding this comment.
also store _lastWarpTexel instead of _effectiveWarpArea
|
|
||
| template <typename ScalarT, typename LuminanceAccessorT, typename HierarchicalSamplerT, typename PostWarpT | ||
| NBL_PRIMARY_REQUIRES(is_scalar_v<ScalarT> && | ||
| concepts::accessors::GenericReadAccessor<LuminanceAccessorT, ScalarT, float32_t2> && |
There was a problem hiding this comment.
use LuminanceRead accessor, and add a getAvgValue method to the concept
| const vector2_type interpolant; | ||
| matrix<scalar_type, 4, 2> uvs; | ||
| _warpMap.gatherUv(xi, uvs, interpolant); |
There was a problem hiding this comment.
you know the warp size, make it your job to compute the interpolant and apply corner sampling, so you give the gatherUv(coord,uvs) method a coord thats in [0.5,WarpMapSize-0.5]^2
Lesss code for user to write and get wrong, see https://github.com/Devsh-Graphics-Programming/Nabla-Examples-and-Tests/pull/257/changes#r2991829432
| const scalar_type detInterpolJacobian = determinant(matrix<scalar_type, 2, 2>( | ||
| lerp(xDiffs[0], xDiffs[1], interpolant.y), // first column dFdx | ||
| yDiff // second column dFdy | ||
| )) * _effectiveWarpArea; | ||
|
|
||
| const scalar_type pdf = abs(warpResult.density / detInterpolJacobian); |
There was a problem hiding this comment.
postWarp density, xDiffs, yDiff and interpolant.y go in the cache between generate and forwardPdf
There was a problem hiding this comment.
also the uv put into the post warp (for the forward weight function)
| weight_type forwardWeight(domain_type xi) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| return generate(xi).value(); | ||
| } | ||
|
|
||
| weight_type backwardWeight(codomain_type direction) NBL_CONST_MEMBER_FUNC | ||
| { | ||
| vector2_type envmapUv = PostWarpT::inverseWarp(direction); | ||
| scalar_type luma; | ||
| _lumaMap.get(envmapUv, luma); | ||
| return luma * _rcpAvgLuma * PostWarpT::backwardDensity(direction); | ||
| } |
There was a problem hiding this comment.
no the forward and backward weights need to be identical, so:
forwardWeightmust returnlumaMap.get(generateCache.uv) * _rcpAvgLuma * generateCache.postWarpPdfbackwardWeightmust do what it does now*
- except that right now it doesn't apply corner sampling to
envmapUvwhich it must do
Description
Rework environment map importance sampling to vulkan and hlsl
Testing
Rework example 0 to use vulkan and hlsl
Unit Test Pull Request
TODO list: