@@ -3565,12 +3565,18 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3565
3565
return true ;
3566
3566
};
3567
3567
3568
+ // some state so we don't need to look later
3569
+ auto xferCmdBuf = params.transfer ->getCommandBufferForRecording ();
3570
+
3571
+ using buffer_mem_barrier_t = IGPUCommandBuffer::SBufferMemoryBarrier<IGPUCommandBuffer::SOwnershipTransferBarrier>;
3568
3572
// upload Buffers
3569
3573
auto & buffersToUpload = reservations.m_bufferConversions ;
3570
3574
{
3571
- core::vector<IGPUCommandBuffer::SBufferMemoryBarrier<IGPUCommandBuffer::SOwnershipTransferBarrier> > ownershipTransfers;
3575
+ core::vector<buffer_mem_barrier_t > ownershipTransfers;
3572
3576
ownershipTransfers.reserve (buffersToUpload.size ());
3573
3577
// do the uploads
3578
+ if (!buffersToUpload.empty ())
3579
+ xferCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Upload Buffers" );
3574
3580
for (auto & item : buffersToUpload)
3575
3581
{
3576
3582
auto * buffer = item.gpuObj ;
@@ -3585,6 +3591,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3585
3591
bool success = ownerQueueFamily!=QueueFamilyInvalid;
3586
3592
// do the upload
3587
3593
success = success && params.utilities ->updateBufferRangeViaStagingBuffer (*params.transfer ,range,item.canonical ->getPointer ());
3594
+ // current recording buffer may have changed
3595
+ xferCmdBuf = params.transfer ->getCommandBufferForRecording ();
3588
3596
// let go of canonical asset (may free RAM)
3589
3597
item.canonical = nullptr ;
3590
3598
if (!success)
@@ -3608,14 +3616,14 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3608
3616
.range = range
3609
3617
});
3610
3618
}
3619
+ if (!buffersToUpload.empty ())
3620
+ xferCmdBuf->cmdbuf ->endDebugMarker ();
3611
3621
buffersToUpload.clear ();
3612
3622
// release ownership
3613
3623
if (!ownershipTransfers.empty ())
3614
- pipelineBarrier (params. transfer -> getCommandBufferForRecording () ,{.memBarriers ={},.bufBarriers =ownershipTransfers}," Ownership Releases of Buffers Failed" );
3624
+ pipelineBarrier (xferCmdBuf ,{.memBarriers ={},.bufBarriers =ownershipTransfers}," Ownership Releases of Buffers Failed" );
3615
3625
}
3616
3626
3617
- // some state so we don't need to look later
3618
- auto xferCmdBuf = params.transfer ->getCommandBufferForRecording ();
3619
3627
// whether we actually get around to doing that depends on validity and success of transfers
3620
3628
const bool shouldDoSomeCompute = reqQueueFlags.hasFlags (IQueue::FAMILY_FLAGS::COMPUTE_BIT);
3621
3629
// the flag check stops us derefercing an invalid pointer
@@ -3729,6 +3737,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3729
3737
transferBarriers.reserve (MaxMipLevelsPastBase);
3730
3738
computeBarriers.reserve (MaxMipLevelsPastBase);
3731
3739
// finally go over the images
3740
+ xferCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Upload Images" );
3732
3741
for (auto & item : imagesToUpload)
3733
3742
{
3734
3743
// basiscs
@@ -4114,15 +4123,47 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4114
4123
}
4115
4124
}
4116
4125
}
4126
+ xferCmdBuf->cmdbuf ->endDebugMarker ();
4117
4127
imagesToUpload.clear ();
4118
4128
}
4119
4129
4120
- # ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4121
- // BLAS builds
4122
- core::unordered_map<IGPUBottomLevelAccelerationStructure*,smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> compactedBLASMap;
4123
- auto & blasToBuild = reservations. m_blasConversions [ 0 ];
4124
- if (const auto blasCount = blasToBuild. size (); blasCount )
4130
+ // Host builds are unsupported
4131
+ assert (reservations. m_blasConversions [ 1 ]. empty () && reservations. m_tlasConversions [ 1 ]. empty ());
4132
+
4133
+ // Acceleration Structures
4134
+ if (reservations. willDeviceASBuild () )
4125
4135
{
4136
+ // we release BLAS and TLAS Storage Buffer ownership at the same time, because BLASes about to be released may need to be read by TLAS builds
4137
+ core::vector<buffer_mem_barrier_t > ownershipTransfers;
4138
+ // the already compacted BLASes need to be written into the TLASes using them
4139
+ core::unordered_map<IGPUBottomLevelAccelerationStructure*,smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> compactedBLASMap;
4140
+
4141
+ // Device Builds
4142
+ auto & blasesToBuild = reservations.m_blasConversions [0 ];
4143
+ auto & tlasesToBuild = reservations.m_tlasConversions [0 ];
4144
+ const auto blasCount = blasesToBuild.size ();
4145
+ const auto tlasCount = tlasesToBuild.size ();
4146
+ ownershipTransfers.reserve (blasCount+tlasCount);
4147
+
4148
+ // Right now we build all BLAS first, then all TLAS
4149
+ // (didn't fancy horrible concurrency managment taking compactions into account)
4150
+ auto queryPool = device->createQueryPool ({.queryCount =hlsl::max<uint32_t >(blasCount,tlasCount),.queryType =IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE});
4151
+ // whether we actually reset more than we need shouldn't cost us anything
4152
+ computeCmdBuf->cmdbuf ->resetQueryPool (queryPool.get (),0 ,queryPool->getCreationParameters ().queryCount );
4153
+
4154
+ // Not messing around with listing AS backing buffers individually, ergonomics of that are null
4155
+ const asset::SMemoryBarrier readASInASCompactBarrier = {
4156
+ .srcStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,
4157
+ .srcAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,
4158
+ .dstStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,
4159
+ .dstAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT
4160
+ };
4161
+
4162
+ // Device BLAS builds
4163
+ if (blasCount)
4164
+ {
4165
+ compactedBLASMap.reserve (blasCount);
4166
+ #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4126
4167
constexpr auto GeometryIsAABBFlag = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
4127
4168
4128
4169
core::vector<IGPUBottomLevelAccelerationStructure::DeviceBuildInfo> buildInfos; buildInfos.reserve (blasCount);
@@ -4143,7 +4184,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4143
4184
triangles.reserve (totalTriGeoCount);
4144
4185
triangles.reserve (totalAABBGeoCount);
4145
4186
}
4146
- #if 0
4147
4187
for (auto & item : blasToBuild)
4148
4188
{
4149
4189
auto * as = item.gpuObj ;
@@ -4175,15 +4215,60 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4175
4215
}
4176
4216
}
4177
4217
#endif
4178
- }
4218
+ blasesToBuild.clear ();
4219
+ }
4179
4220
4180
- // TLAS builds
4181
- auto & tlasToBuild = reservations.m_tlasConversions [0 ];
4182
- if (!tlasToBuild.empty ())
4183
- {
4221
+ // Device TLAS builds
4222
+ if (tlasCount)
4223
+ {
4224
+ computeCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Build TLASes" );
4225
+ // A single pipeline barrier to ensure BLASes build before TLASes is needed
4226
+ const asset::SMemoryBarrier readBLASInTLASBuildBarrier = {
4227
+ // the last use of the source BLAS could have been a build or a compaction
4228
+ .srcStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,
4229
+ .srcAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,
4230
+ .dstStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,
4231
+ .dstAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT
4232
+ };
4233
+ if (blasCount==0 || pipelineBarrier (computeCmdBuf,{.memBarriers ={&readBLASInTLASBuildBarrier,1 }}," Failed to sync BLAS with TLAS build!" ))
4234
+ {
4235
+ core::vector<IGPUTopLevelAccelerationStructure*> compactions;
4236
+ compactions.reserve (tlasCount);
4237
+ // build
4238
+ for (const auto & tlasToBuild : tlasesToBuild)
4239
+ {
4240
+ // allocate scratch
4241
+ // check dependents
4242
+ // stream build infos
4243
+ // record builds
4244
+ // record compaction queries
4245
+ }
4246
+ computeCmdBuf->cmdbuf ->endDebugMarker ();
4247
+ // no longer need this info
4248
+ compactedBLASMap.clear ();
4249
+ // compact
4250
+ computeCmdBuf->cmdbuf ->beginDebugMarker (" Asset Converter Compact TLASes" );
4251
+ // compact needs to wait for Build
4252
+ if (!compactions.empty () && pipelineBarrier (computeCmdBuf,{.memBarriers ={&readASInASCompactBarrier,1 }}," Failed to sync Acceleration Structure builds with compactions!" ))
4253
+ {
4254
+ // drain compute
4255
+ // get queries
4256
+ for (auto * tlas : compactions)
4257
+ {
4258
+ // recreate Acceleration Structure
4259
+ // record compaction
4260
+ // insert into compaction map
4261
+ }
4262
+ }
4263
+ }
4264
+ computeCmdBuf->cmdbuf ->endDebugMarker ();
4265
+ tlasesToBuild.clear ();
4266
+ }
4267
+
4268
+ // release ownership
4269
+ if (!ownershipTransfers.empty ())
4270
+ pipelineBarrier (computeCmdBuf,{.memBarriers ={},.bufBarriers =ownershipTransfers}," Ownership Releases of Acceleration Structure backing Buffers failed!" );
4184
4271
}
4185
- compactedBLASMap.clear ();
4186
- #endif
4187
4272
4188
4273
const bool computeSubmitIsNeeded = submitsNeeded.hasFlags (IQueue::FAMILY_FLAGS::COMPUTE_BIT);
4189
4274
// first submit transfer
0 commit comments