Skip to content

Commit e0e452b

Browse files
author
devsh
committed
add a codepath which sidesteps the transfer queue if Scratch Buffer is mapped
1 parent a416a0c commit e0e452b

File tree

1 file changed

+51
-7
lines changed

1 file changed

+51
-7
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3537,6 +3537,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
35373537
}
35383538
}
35393539

3540+
// unfortunately can't count on large ReBAR heaps so we can't require the `scratchBuffer` to be mapped and writable
3541+
uint8_t* deviceASBuildScratchPtr = nullptr;
35403542
// check things necessary for building Acceleration Structures
35413543
if (reservations.willDeviceASBuild())
35423544
{
@@ -3554,8 +3556,9 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
35543556
return retval;
35553557
}
35563558
constexpr buffer_usage_f asBuildScratchFlags = buffer_usage_f::EUF_STORAGE_BUFFER_BIT|buffer_usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT;
3559+
auto* scratchBuffer = params.scratchForDeviceASBuild->getBuffer();
35573560
// we use the scratch allocator both for scratch and uploaded geometry data
3558-
if (!params.scratchForDeviceASBuild->getBuffer()->getCreationParams().usage.hasFlags(asBuildScratchFlags|asBuildInputFlags))
3561+
if (!scratchBuffer->getCreationParams().usage.hasFlags(asBuildScratchFlags|asBuildInputFlags))
35593562
{
35603563
logger.log("An Acceleration Structure will be built on Device but scratch buffer doesn't have required usage flags!",system::ILogger::ELL_ERROR);
35613564
return retval;
@@ -3573,6 +3576,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
35733576
logger.log("Accceleration Structure Scratch Device Memory Allocator cannot allocate with Physical Device's minimum required AS-build scratch alignment %u",system::ILogger::ELL_ERROR,minScratchAlignment);
35743577
return retval;
35753578
}
3579+
// returns non-null pointer if the buffer is writeable directly byt the host
3580+
deviceASBuildScratchPtr = reinterpret_cast<uint8_t*>(scratchBuffer->getBoundMemory().memory->getMappedPointer());
35763581
}
35773582
// the elusive and exotic host builds
35783583
if (reservations.willHostASBuild() && !params.scratchForHostASBuild)
@@ -4260,11 +4265,17 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
42604265
const auto tlasCount = tlasesToBuild.size();
42614266
ownershipTransfers.reserve(blasCount+tlasCount);
42624267

4268+
auto* scratchBuffer = params.scratchForDeviceASBuild->getBuffer();
4269+
core::vector<ILogicalDevice::MappedMemoryRange> flushRanges;
4270+
const bool manualFlush = scratchBuffer->getBoundMemory().memory->haveToMakeVisible();
4271+
if (manualFlush) // BLAS builds do max 3 writes each TLAS builds do max 2 writes each
4272+
flushRanges.reserve(hlsl::max<uint32_t>(blasCount*3,tlasCount*2));
4273+
42634274
// Right now we build all BLAS first, then all TLAS
42644275
// (didn't fancy horrible concurrency managment taking compactions into account)
42654276
auto queryPool = device->createQueryPool({.queryCount=hlsl::max<uint32_t>(blasCount,tlasCount),.queryType=IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE});
4266-
// whether we actually reset more than we need shouldn't cost us anything
4267-
computeCmdBuf->cmdbuf->resetQueryPool(queryPool.get(),0,queryPool->getCreationParameters().queryCount);
4277+
4278+
// lambdas!
42684279

42694280
// Not messing around with listing AS backing buffers individually, ergonomics of that are null
42704281
const asset::SMemoryBarrier readASInASCompactBarrier = {
@@ -4471,13 +4482,18 @@ if (worstSize>minScratchSize)
44714482
{
44724483
recordBuildCommands();
44734484
// TODO: make sure compute acquires ownership of geometry data for the build
4485+
// if writing to scratch directly, flush the writes
4486+
if (!flushRanges.empty())
4487+
{
4488+
device->flushMappedMemoryRanges(flushRanges);
4489+
flushRanges.clear();
4490+
}
44744491
drainCompute();
44754492
}
44764493
// queue up a deferred allocation
44774494
params.scratchForDeviceASBuild->multi_deallocate(AllocCount,&offsets[0],&sizes[0],params.compute->getFutureScratchSemaphore());
44784495
}
44794496
// stream the instance/geometry input in
4480-
// unfortunately can't count on large ReBAR heaps so we can't force the `scratchBuffer` to be mapped and writable
44814497
SBufferRange<IGPUBuffer> range = {.offset=offsets[1],.size=sizes[1],.buffer=smart_refctd_ptr<IGPUBuffer>(params.scratchForDeviceASBuild->getBuffer())};
44824498
{
44834499
bool success = true;
@@ -4519,9 +4535,20 @@ if (worstSize>minScratchSize)
45194535
fillInstances.blasBuildMap = &reservations.m_blasBuildMap;
45204536
fillInstances.dedupBLASesUsed = &dedupBLASesUsed;
45214537
fillInstances.instances = instances;
4522-
success = success && params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,fillInstances);
4538+
if (deviceASBuildScratchPtr)
4539+
{
4540+
fillInstances(deviceASBuildScratchPtr+range.offset,0ull,range.size);
4541+
if (manualFlush)
4542+
flushRanges.emplace_back(scratchBuffer->getBoundMemory().memory,range.offset,range.size,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
4543+
}
4544+
else if (params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,fillInstances))
4545+
{
4546+
// TODO: release and acquire ownership if necessary
4547+
}
4548+
else
4549+
success = false;
45234550
}
4524-
if (as->usesMotion())
4551+
if (success && as->usesMotion())
45254552
{
45264553
range.offset = offsets[2];
45274554
range.size = sizes[2];
@@ -4549,7 +4576,18 @@ if (worstSize>minScratchSize)
45494576
FillInstancePointers fillInstancePointers;
45504577
fillInstancePointers.instances = instances;
45514578
fillInstancePointers.instanceAddress = range.buffer->getDeviceAddress()+offsets[1];
4552-
success = success && params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,fillInstancePointers);
4579+
if (deviceASBuildScratchPtr)
4580+
{
4581+
fillInstancePointers(deviceASBuildScratchPtr+range.offset,0ull,range.size);
4582+
if (manualFlush)
4583+
flushRanges.emplace_back(scratchBuffer->getBoundMemory().memory,range.offset,range.size,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
4584+
}
4585+
else if (params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,fillInstancePointers))
4586+
{
4587+
// TODO: release and acquire ownership if necessary
4588+
}
4589+
else
4590+
success = false;
45534591
}
45544592
// TODO: pipeline barrier & ownership release between xfer and compute
45554593
// current recording buffer may have changed
@@ -4607,6 +4645,11 @@ if (worstSize>minScratchSize)
46074645
}
46084646
// finish the last batch
46094647
recordBuildCommands();
4648+
if (!flushRanges.empty())
4649+
{
4650+
device->flushMappedMemoryRanges(flushRanges);
4651+
flushRanges.clear();
4652+
}
46104653
computeCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Compact TLASes END");
46114654
computeCmdBuf->cmdbuf->endDebugMarker();
46124655
}
@@ -4617,6 +4660,7 @@ if (worstSize>minScratchSize)
46174660
// compact needs to wait for Build then record queries
46184661
if (!compactions.empty() &&
46194662
pipelineBarrier(computeCmdBuf,{.memBarriers={&readASInASCompactBarrier,1}},"Failed to sync Acceleration Structure builds with compactions!") &&
4663+
computeCmdBuf->cmdbuf->resetQueryPool(queryPool.get(),0,compactions.size()) &&
46204664
computeCmdBuf->cmdbuf->writeAccelerationStructureProperties(compactions,IQueryPool::TYPE::ACCELERATION_STRUCTURE_COMPACTED_SIZE,queryPool.get(),0)
46214665
)
46224666
{

0 commit comments

Comments
 (0)