Skip to content

Commit 3433067

Browse files
author
devsh
committed
added commandbuffer debug markers to CAssetConverter, sketch out the TLAS build and compaction procedure
1 parent b2282f2 commit 3433067

File tree

2 files changed

+103
-18
lines changed

2 files changed

+103
-18
lines changed

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ class CAssetConverter : public core::IReferenceCounted
4141
asset::ICPUBuffer,
4242
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4343
asset::ICPUBottomLevelAccelerationStructure,
44-
asset::ICPUTopLevelAccelerationStructure,
4544
#endif
45+
asset::ICPUTopLevelAccelerationStructure,
4646
asset::ICPUImage,
4747
asset::ICPUBufferView,
4848
asset::ICPUImageView,

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 102 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3565,12 +3565,18 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
35653565
return true;
35663566
};
35673567

3568+
// some state so we don't need to look later
3569+
auto xferCmdBuf = params.transfer->getCommandBufferForRecording();
3570+
3571+
using buffer_mem_barrier_t = IGPUCommandBuffer::SBufferMemoryBarrier<IGPUCommandBuffer::SOwnershipTransferBarrier>;
35683572
// upload Buffers
35693573
auto& buffersToUpload = reservations.m_bufferConversions;
35703574
{
3571-
core::vector<IGPUCommandBuffer::SBufferMemoryBarrier<IGPUCommandBuffer::SOwnershipTransferBarrier>> ownershipTransfers;
3575+
core::vector<buffer_mem_barrier_t> ownershipTransfers;
35723576
ownershipTransfers.reserve(buffersToUpload.size());
35733577
// do the uploads
3578+
if (!buffersToUpload.empty())
3579+
xferCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Upload Buffers");
35743580
for (auto& item : buffersToUpload)
35753581
{
35763582
auto* buffer = item.gpuObj;
@@ -3585,6 +3591,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
35853591
bool success = ownerQueueFamily!=QueueFamilyInvalid;
35863592
// do the upload
35873593
success = success && params.utilities->updateBufferRangeViaStagingBuffer(*params.transfer,range,item.canonical->getPointer());
3594+
// current recording buffer may have changed
3595+
xferCmdBuf = params.transfer->getCommandBufferForRecording();
35883596
// let go of canonical asset (may free RAM)
35893597
item.canonical = nullptr;
35903598
if (!success)
@@ -3608,14 +3616,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
36083616
.range = range
36093617
});
36103618
}
3619+
if (!buffersToUpload.empty())
3620+
xferCmdBuf->cmdbuf->endDebugMarker();
36113621
buffersToUpload.clear();
36123622
// release ownership
36133623
if (!ownershipTransfers.empty())
3614-
pipelineBarrier(params.transfer->getCommandBufferForRecording(),{.memBarriers={},.bufBarriers=ownershipTransfers},"Ownership Releases of Buffers Failed");
3624+
pipelineBarrier(xferCmdBuf,{.memBarriers={},.bufBarriers=ownershipTransfers},"Ownership Releases of Buffers Failed");
36153625
}
36163626

3617-
// some state so we don't need to look later
3618-
auto xferCmdBuf = params.transfer->getCommandBufferForRecording();
36193627
// whether we actually get around to doing that depends on validity and success of transfers
36203628
const bool shouldDoSomeCompute = reqQueueFlags.hasFlags(IQueue::FAMILY_FLAGS::COMPUTE_BIT);
36213629
// the flag check stops us derefercing an invalid pointer
@@ -3729,6 +3737,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
37293737
transferBarriers.reserve(MaxMipLevelsPastBase);
37303738
computeBarriers.reserve(MaxMipLevelsPastBase);
37313739
// finally go over the images
3740+
xferCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Upload Images");
37323741
for (auto& item : imagesToUpload)
37333742
{
37343743
// basiscs
@@ -4114,15 +4123,47 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
41144123
}
41154124
}
41164125
}
4126+
xferCmdBuf->cmdbuf->endDebugMarker();
41174127
imagesToUpload.clear();
41184128
}
41194129

4120-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4121-
// BLAS builds
4122-
core::unordered_map<IGPUBottomLevelAccelerationStructure*,smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> compactedBLASMap;
4123-
auto& blasToBuild = reservations.m_blasConversions[0];
4124-
if (const auto blasCount = blasToBuild.size(); blasCount)
4130+
// Host builds are unsupported
4131+
assert(reservations.m_blasConversions[1].empty() && reservations.m_tlasConversions[1].empty());
4132+
4133+
// Acceleration Structures
4134+
if (reservations.willDeviceASBuild())
41254135
{
4136+
// we release BLAS and TLAS Storage Buffer ownership at the same time, because BLASes about to be released may need to be read by TLAS builds
4137+
core::vector<buffer_mem_barrier_t> ownershipTransfers;
4138+
// the already compacted BLASes need to be written into the TLASes using them
4139+
core::unordered_map<IGPUBottomLevelAccelerationStructure*,smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> compactedBLASMap;
4140+
4141+
// Device Builds
4142+
auto& blasesToBuild = reservations.m_blasConversions[0];
4143+
auto& tlasesToBuild = reservations.m_tlasConversions[0];
4144+
const auto blasCount = blasesToBuild.size();
4145+
const auto tlasCount = tlasesToBuild.size();
4146+
ownershipTransfers.reserve(blasCount+tlasCount);
4147+
4148+
// Right now we build all BLAS first, then all TLAS
4149+
// (didn't fancy horrible concurrency managment taking compactions into account)
4150+
auto queryPool = device->createQueryPool({.queryCount=hlsl::max<uint32_t>(blasCount,tlasCount),.queryType=IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE});
4151+
// whether we actually reset more than we need shouldn't cost us anything
4152+
computeCmdBuf->cmdbuf->resetQueryPool(queryPool.get(),0,queryPool->getCreationParameters().queryCount);
4153+
4154+
// Not messing around with listing AS backing buffers individually, ergonomics of that are null
4155+
const asset::SMemoryBarrier readASInASCompactBarrier = {
4156+
.srcStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,
4157+
.srcAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,
4158+
.dstStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,
4159+
.dstAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT
4160+
};
4161+
4162+
// Device BLAS builds
4163+
if (blasCount)
4164+
{
4165+
compactedBLASMap.reserve(blasCount);
4166+
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
41264167
constexpr auto GeometryIsAABBFlag = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
41274168

41284169
core::vector<IGPUBottomLevelAccelerationStructure::DeviceBuildInfo> buildInfos; buildInfos.reserve(blasCount);
@@ -4143,7 +4184,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
41434184
triangles.reserve(totalTriGeoCount);
41444185
triangles.reserve(totalAABBGeoCount);
41454186
}
4146-
#if 0
41474187
for (auto& item : blasToBuild)
41484188
{
41494189
auto* as = item.gpuObj;
@@ -4175,15 +4215,60 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
41754215
}
41764216
}
41774217
#endif
4178-
}
4218+
blasesToBuild.clear();
4219+
}
41794220

4180-
// TLAS builds
4181-
auto& tlasToBuild = reservations.m_tlasConversions[0];
4182-
if (!tlasToBuild.empty())
4183-
{
4221+
// Device TLAS builds
4222+
if (tlasCount)
4223+
{
4224+
computeCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Build TLASes");
4225+
// A single pipeline barrier to ensure BLASes build before TLASes is needed
4226+
const asset::SMemoryBarrier readBLASInTLASBuildBarrier = {
4227+
// the last use of the source BLAS could have been a build or a compaction
4228+
.srcStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,
4229+
.srcAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,
4230+
.dstStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,
4231+
.dstAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT
4232+
};
4233+
if (blasCount==0 || pipelineBarrier(computeCmdBuf,{.memBarriers={&readBLASInTLASBuildBarrier,1}},"Failed to sync BLAS with TLAS build!"))
4234+
{
4235+
core::vector<IGPUTopLevelAccelerationStructure*> compactions;
4236+
compactions.reserve(tlasCount);
4237+
// build
4238+
for (const auto& tlasToBuild : tlasesToBuild)
4239+
{
4240+
// allocate scratch
4241+
// check dependents
4242+
// stream build infos
4243+
// record builds
4244+
// record compaction queries
4245+
}
4246+
computeCmdBuf->cmdbuf->endDebugMarker();
4247+
// no longer need this info
4248+
compactedBLASMap.clear();
4249+
// compact
4250+
computeCmdBuf->cmdbuf->beginDebugMarker("Asset Converter Compact TLASes");
4251+
// compact needs to wait for Build
4252+
if (!compactions.empty() && pipelineBarrier(computeCmdBuf,{.memBarriers={&readASInASCompactBarrier,1}},"Failed to sync Acceleration Structure builds with compactions!"))
4253+
{
4254+
// drain compute
4255+
// get queries
4256+
for (auto* tlas : compactions)
4257+
{
4258+
// recreate Acceleration Structure
4259+
// record compaction
4260+
// insert into compaction map
4261+
}
4262+
}
4263+
}
4264+
computeCmdBuf->cmdbuf->endDebugMarker();
4265+
tlasesToBuild.clear();
4266+
}
4267+
4268+
// release ownership
4269+
if (!ownershipTransfers.empty())
4270+
pipelineBarrier(computeCmdBuf,{.memBarriers={},.bufBarriers=ownershipTransfers},"Ownership Releases of Acceleration Structure backing Buffers failed!");
41844271
}
4185-
compactedBLASMap.clear();
4186-
#endif
41874272

41884273
const bool computeSubmitIsNeeded = submitsNeeded.hasFlags(IQueue::FAMILY_FLAGS::COMPUTE_BIT);
41894274
// first submit transfer

0 commit comments

Comments
 (0)