@@ -3537,6 +3537,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3537
3537
}
3538
3538
}
3539
3539
3540
+ // unfortunately can't count on large ReBAR heaps so we can't require the `scratchBuffer` to be mapped and writable
3541
+ uint8_t * deviceASBuildScratchPtr = nullptr ;
3540
3542
// check things necessary for building Acceleration Structures
3541
3543
if (reservations.willDeviceASBuild ())
3542
3544
{
@@ -3554,8 +3556,9 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3554
3556
return retval;
3555
3557
}
3556
3558
constexpr buffer_usage_f asBuildScratchFlags = buffer_usage_f::EUF_STORAGE_BUFFER_BIT|buffer_usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT;
3559
+ auto * scratchBuffer = params.scratchForDeviceASBuild ->getBuffer ();
3557
3560
// we use the scratch allocator both for scratch and uploaded geometry data
3558
- if (!params. scratchForDeviceASBuild -> getBuffer () ->getCreationParams ().usage .hasFlags (asBuildScratchFlags|asBuildInputFlags))
3561
+ if (!scratchBuffer ->getCreationParams ().usage .hasFlags (asBuildScratchFlags|asBuildInputFlags))
3559
3562
{
3560
3563
logger.log (" An Acceleration Structure will be built on Device but scratch buffer doesn't have required usage flags!" ,system::ILogger::ELL_ERROR);
3561
3564
return retval;
@@ -3573,6 +3576,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3573
3576
logger.log (" Accceleration Structure Scratch Device Memory Allocator cannot allocate with Physical Device's minimum required AS-build scratch alignment %u" ,system::ILogger::ELL_ERROR,minScratchAlignment);
3574
3577
return retval;
3575
3578
}
3579
+ // returns non-null pointer if the buffer is writeable directly byt the host
3580
+ deviceASBuildScratchPtr = reinterpret_cast <uint8_t *>(scratchBuffer->getBoundMemory ().memory ->getMappedPointer ());
3576
3581
}
3577
3582
// the elusive and exotic host builds
3578
3583
if (reservations.willHostASBuild () && !params.scratchForHostASBuild )
@@ -4260,11 +4265,17 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4260
4265
const auto tlasCount = tlasesToBuild.size ();
4261
4266
ownershipTransfers.reserve (blasCount+tlasCount);
4262
4267
4268
+ auto * scratchBuffer = params.scratchForDeviceASBuild ->getBuffer ();
4269
+ core::vector<ILogicalDevice::MappedMemoryRange> flushRanges;
4270
+ const bool manualFlush = scratchBuffer->getBoundMemory ().memory ->haveToMakeVisible ();
4271
+ if (manualFlush) // BLAS builds do max 3 writes each TLAS builds do max 2 writes each
4272
+ flushRanges.reserve (hlsl::max<uint32_t >(blasCount*3 ,tlasCount*2 ));
4273
+
4263
4274
// Right now we build all BLAS first, then all TLAS
4264
4275
// (didn't fancy horrible concurrency managment taking compactions into account)
4265
4276
auto queryPool = device->createQueryPool ({.queryCount =hlsl::max<uint32_t >(blasCount,tlasCount),.queryType =IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE});
4266
- // whether we actually reset more than we need shouldn't cost us anything
4267
- computeCmdBuf-> cmdbuf -> resetQueryPool (queryPool. get (), 0 ,queryPool-> getCreationParameters (). queryCount );
4277
+
4278
+ // lambdas!
4268
4279
4269
4280
// Not messing around with listing AS backing buffers individually, ergonomics of that are null
4270
4281
const asset::SMemoryBarrier readASInASCompactBarrier = {
@@ -4471,13 +4482,18 @@ if (worstSize>minScratchSize)
4471
4482
{
4472
4483
recordBuildCommands ();
4473
4484
// TODO: make sure compute acquires ownership of geometry data for the build
4485
+ // if writing to scratch directly, flush the writes
4486
+ if (!flushRanges.empty ())
4487
+ {
4488
+ device->flushMappedMemoryRanges (flushRanges);
4489
+ flushRanges.clear ();
4490
+ }
4474
4491
drainCompute ();
4475
4492
}
4476
4493
// queue up a deferred allocation
4477
4494
params.scratchForDeviceASBuild ->multi_deallocate (AllocCount,&offsets[0 ],&sizes[0 ],params.compute ->getFutureScratchSemaphore ());
4478
4495
}
4479
4496
// stream the instance/geometry input in
4480
- // unfortunately can't count on large ReBAR heaps so we can't force the `scratchBuffer` to be mapped and writable
4481
4497
SBufferRange<IGPUBuffer> range = {.offset =offsets[1 ],.size =sizes[1 ],.buffer =smart_refctd_ptr<IGPUBuffer>(params.scratchForDeviceASBuild ->getBuffer ())};
4482
4498
{
4483
4499
bool success = true ;
@@ -4519,9 +4535,20 @@ if (worstSize>minScratchSize)
4519
4535
fillInstances.blasBuildMap = &reservations.m_blasBuildMap ;
4520
4536
fillInstances.dedupBLASesUsed = &dedupBLASesUsed;
4521
4537
fillInstances.instances = instances;
4522
- success = success && params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,fillInstances);
4538
+ if (deviceASBuildScratchPtr)
4539
+ {
4540
+ fillInstances (deviceASBuildScratchPtr+range.offset ,0ull ,range.size );
4541
+ if (manualFlush)
4542
+ flushRanges.emplace_back (scratchBuffer->getBoundMemory ().memory ,range.offset ,range.size ,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
4543
+ }
4544
+ else if (params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,fillInstances))
4545
+ {
4546
+ // TODO: release and acquire ownership if necessary
4547
+ }
4548
+ else
4549
+ success = false ;
4523
4550
}
4524
- if (as->usesMotion ())
4551
+ if (success && as->usesMotion ())
4525
4552
{
4526
4553
range.offset = offsets[2 ];
4527
4554
range.size = sizes[2 ];
@@ -4549,7 +4576,18 @@ if (worstSize>minScratchSize)
4549
4576
FillInstancePointers fillInstancePointers;
4550
4577
fillInstancePointers.instances = instances;
4551
4578
fillInstancePointers.instanceAddress = range.buffer ->getDeviceAddress ()+offsets[1 ];
4552
- success = success && params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,fillInstancePointers);
4579
+ if (deviceASBuildScratchPtr)
4580
+ {
4581
+ fillInstancePointers (deviceASBuildScratchPtr+range.offset ,0ull ,range.size );
4582
+ if (manualFlush)
4583
+ flushRanges.emplace_back (scratchBuffer->getBoundMemory ().memory ,range.offset ,range.size ,ILogicalDevice::MappedMemoryRange::align_non_coherent_tag);
4584
+ }
4585
+ else if (params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,fillInstancePointers))
4586
+ {
4587
+ // TODO: release and acquire ownership if necessary
4588
+ }
4589
+ else
4590
+ success = false ;
4553
4591
}
4554
4592
// TODO: pipeline barrier & ownership release between xfer and compute
4555
4593
// current recording buffer may have changed
@@ -4607,6 +4645,11 @@ if (worstSize>minScratchSize)
4607
4645
}
4608
4646
// finish the last batch
4609
4647
recordBuildCommands ();
4648
+ if (!flushRanges.empty ())
4649
+ {
4650
+ device->flushMappedMemoryRanges (flushRanges);
4651
+ flushRanges.clear ();
4652
+ }
4610
4653
computeCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Compact TLASes END" );
4611
4654
computeCmdBuf->cmdbuf ->endDebugMarker ();
4612
4655
}
@@ -4617,6 +4660,7 @@ if (worstSize>minScratchSize)
4617
4660
// compact needs to wait for Build then record queries
4618
4661
if (!compactions.empty () &&
4619
4662
pipelineBarrier (computeCmdBuf,{.memBarriers ={&readASInASCompactBarrier,1 }}," Failed to sync Acceleration Structure builds with compactions!" ) &&
4663
+ computeCmdBuf->cmdbuf ->resetQueryPool (queryPool.get (),0 ,compactions.size ()) &&
4620
4664
computeCmdBuf->cmdbuf ->writeAccelerationStructureProperties (compactions,IQueryPool::TYPE::ACCELERATION_STRUCTURE_COMPACTED_SIZE,queryPool.get (),0 )
4621
4665
)
4622
4666
{
0 commit comments