Skip to content

Commit 5c519d0

Browse files
author
devsh
committed
core::makeRAIIExiter is literally the best thing since sliced bread
Fix bugs: - ReBAR only buffer transfers dereferencing a nullptr transfer cmbduf - BLAS and TLAS memory allocations latching on semaphores which will never signal if the command recording fails for some reason
1 parent 310eafd commit 5c519d0

File tree

1 file changed

+43
-12
lines changed

1 file changed

+43
-12
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3991,7 +3991,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
39913991
core::vector<buffer_mem_barrier_t> finalReleases;
39923992
finalReleases.reserve(buffersToUpload.size());
39933993
// do the uploads
3994-
if (!buffersToUpload.empty())
3994+
if (!buffersToUpload.empty() && xferCmdBuf)
39953995
{
39963996
xferCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Upload Buffers START");
39973997
xferCmdBuf->cmdbuf->endDebugMarker();
@@ -4039,7 +4039,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
40394039
.range = range
40404040
});
40414041
}
4042-
if (!buffersToUpload.empty())
4042+
if (!buffersToUpload.empty() && xferCmdBuf)
40434043
{
40444044
xferCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Upload Buffers END");
40454045
xferCmdBuf->cmdbuf->endDebugMarker();
@@ -4653,6 +4653,12 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
46534653
return false;
46544654
};
46554655
//
4656+
using scratch_allocator_t = std::remove_reference_t<decltype(*params.scratchForDeviceASBuild)>;
4657+
using addr_t = typename scratch_allocator_t::size_type;
4658+
core::vector<addr_t> scratchOffsets;
4659+
scratchOffsets.reserve(maxASCount);
4660+
core::vector<addr_t> scratchSizes;
4661+
scratchSizes.reserve(maxASCount);
46564662
auto recordBuildCommandsBase = [&](auto& buildInfos, auto& rangeInfos)->void
46574663
{
46584664
if (buildInfos.empty())
@@ -4665,13 +4671,25 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
46654671
bool success = !uniQueue || !deviceASBuildScratchPtr || pipelineBarrier(computeCmdBuf,{.memBarriers={&readGeometryOrInstanceInASBuildBarrier,1}},"Pipeline Barriers of Acceleration Structure backing Buffers failed!");
46664672
//
46674673
success = success && computeCmdBuf->cmdbuf->buildAccelerationStructures({buildInfos},rangeInfos.data());
4668-
if (!success)
4669-
for (const auto& info : buildInfos)
4674+
if (success)
46704675
{
4671-
const auto stagingFound = findInStaging.template operator()<ICPUTopLevelAccelerationStructure>(info.dstAS);
4672-
smart_refctd_ptr<const ICPUTopLevelAccelerationStructure> dummy; // already null at this point
4673-
markFailure("AS Build Command Recording",&dummy,&stagingFound->second);
4676+
submitsNeeded |= IQueue::FAMILY_FLAGS::COMPUTE_BIT;
4677+
// queue up a deferred allocation
4678+
params.scratchForDeviceASBuild->multi_deallocate(scratchOffsets.size(),scratchOffsets.data(),scratchSizes.data(),params.compute->getFutureScratchSemaphore());
4679+
}
4680+
else
4681+
{
4682+
// release right away
4683+
params.scratchForDeviceASBuild->multi_deallocate(scratchOffsets.size(),scratchOffsets.data(),scratchSizes.data());
4684+
for (const auto& info : buildInfos)
4685+
{
4686+
const auto stagingFound = findInStaging.template operator()<ICPUTopLevelAccelerationStructure>(info.dstAS);
4687+
smart_refctd_ptr<const ICPUTopLevelAccelerationStructure> dummy; // already null at this point
4688+
markFailure("AS Build Command Recording",&dummy,&stagingFound->second);
4689+
}
46744690
}
4691+
scratchOffsets.clear();
4692+
scratchSizes.clear();
46754693
buildInfos.clear();
46764694
rangeInfos.clear();
46774695
};
@@ -4813,8 +4831,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48134831
trackedBLASes.clear();
48144832
};
48154833
//
4816-
using scratch_allocator_t = std::remove_reference_t<decltype(*params.scratchForDeviceASBuild)>;
4817-
using addr_t = typename scratch_allocator_t::size_type;
48184834
const auto& limits = physDev->getLimits();
48194835
for (auto& tlasToBuild : tlasesToBuild)
48204836
{
@@ -4865,9 +4881,25 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48654881
constexpr uint32_t MaxAllocCount = 3;
48664882
addr_t offsets[MaxAllocCount] = {scratch_allocator_t::invalid_value,scratch_allocator_t::invalid_value,scratch_allocator_t::invalid_value};
48674883
const addr_t sizes[MaxAllocCount] = {tlasToBuild.second.scratchSize,instanceDataSize,sizeof(void*)*instanceCount};
4884+
const auto AllocCount = as->usesMotion() ? 3:2;
4885+
// clean up the allocation if we fail to make it to the end of loop for whatever reason
4886+
bool abortAllocation = true;
4887+
auto deallocSrc = core::makeRAIIExiter([&params,&scratchOffsets,&scratchSizes,AllocCount,&offsets,&sizes,&abortAllocation]()->void
4888+
{
4889+
// if got to end of loop queue up the release of memory, otherwise release right away
4890+
if (abortAllocation)
4891+
params.scratchForDeviceASBuild->multi_deallocate(AllocCount,&offsets[0],&sizes[0]);
4892+
else
4893+
for (auto i=0; i<AllocCount; i++)
4894+
{
4895+
scratchOffsets.push_back(offsets[i]);
4896+
scratchSizes.push_back(sizes[i]);
4897+
}
4898+
}
4899+
);
4900+
// allocate out scratch or submit overflow
48684901
{
48694902
const addr_t alignments[MaxAllocCount] = {limits.minAccelerationStructureScratchOffsetAlignment,16,alignof(uint64_t)};
4870-
const auto AllocCount = as->usesMotion() ? 3:2;
48714903
// if fail then flush and keep trying till space is made
48724904
for (uint32_t t=0; params.scratchForDeviceASBuild->multi_allocate(AllocCount,&offsets[0],&sizes[0],&alignments[0])!=0u; t++)
48734905
if (t==1) // don't flush right away cause allocator not defragmented yet
@@ -4881,8 +4913,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
48814913
}
48824914
drainCompute();
48834915
}
4884-
// queue up a deferred allocation
4885-
params.scratchForDeviceASBuild->multi_deallocate(AllocCount,&offsets[0],&sizes[0],params.compute->getFutureScratchSemaphore());
48864916
}
48874917
// stream the instance/geometry input in
48884918
const size_t trackedBLASesOffset = trackedBLASes.size();
@@ -4983,6 +5013,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
49835013
buildInfo.trackedBLASes = {reinterpret_cast<const p_p_BLAS_t&>(trackedBLASesOffset),trackedBLASes.size()-trackedBLASesOffset};
49845014
// no special extra byte offset into the instance buffer
49855015
rangeInfos.emplace_back(instanceCount,0u);
5016+
abortAllocation = false;
49865017
//
49875018
const bool willCompact = tlasToBuild.second.compact;
49885019
if (willCompact)

0 commit comments

Comments
 (0)