@@ -3377,7 +3377,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3377
3377
{
3378
3378
if (reqQueueFlags.hasFlags (IQueue::FAMILY_FLAGS::TRANSFER_BIT) && (!params.utilities || params.utilities ->getLogicalDevice ()!=device))
3379
3379
{
3380
- logger.log (" Transfer Capability required for this conversion and no compatible `utilities` provided!" , system::ILogger::ELL_ERROR);
3380
+ logger.log (" Transfer Capability required for this conversion and no compatible `utilities` provided!" ,system::ILogger::ELL_ERROR);
3381
3381
return retval;
3382
3382
}
3383
3383
@@ -3406,6 +3406,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3406
3406
};
3407
3407
// If the transfer queue will be used, the transfer Intended Submit Info must be valid and utilities must be provided
3408
3408
auto reqTransferQueueCaps = IQueue::FAMILY_FLAGS::TRANSFER_BIT;
3409
+ // Depth/Stencil transfers need Graphics Capabilities, so make sure the queue chosen for transfers also has them!
3409
3410
if (reservations.m_queueFlags .hasFlags (IQueue::FAMILY_FLAGS::GRAPHICS_BIT))
3410
3411
reqTransferQueueCaps |= IQueue::FAMILY_FLAGS::GRAPHICS_BIT;
3411
3412
if (invalidIntended (reqTransferQueueCaps,params.transfer ))
@@ -3428,7 +3429,52 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3428
3429
}
3429
3430
}
3430
3431
3431
- // wipe gpu item in staging cache (this may drop it as well if it was made for only a root asset == no users)
3432
+ // check things necessary for building Acceleration Structures
3433
+ using buffer_usage_f = IGPUBuffer::E_USAGE_FLAGS;
3434
+ if (reservations.m_ASBuildScratchUsages !=buffer_usage_f::EUF_NONE)
3435
+ {
3436
+ if (!params.scratchForDeviceASBuild )
3437
+ {
3438
+ logger.log (" An Acceleration Structure will be built on Device but no scratch allocator provided!" ,system::ILogger::ELL_ERROR);
3439
+ return retval;
3440
+ }
3441
+ // TODO: do the build input buffers also need `EUF_STORAGE_BUFFER_BIT` ?
3442
+ constexpr buffer_usage_f asBuildInputFlags = buffer_usage_f::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT|buffer_usage_f::EUF_TRANSFER_DST_BIT|buffer_usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT;
3443
+ // we may use the staging buffer directly to skip an extra copy on small enough geometries
3444
+ if (!params.utilities ->getDefaultUpStreamingBuffer ()->getBuffer ()->getCreationParams ().usage .hasFlags (asBuildInputFlags))
3445
+ {
3446
+ logger.log (" An Acceleration Structure will be built on Device but Default UpStreaming Buffer from IUtilities doesn't have required usage flags!" ,system::ILogger::ELL_ERROR);
3447
+ return retval;
3448
+ }
3449
+ constexpr buffer_usage_f asBuildScratchFlags = buffer_usage_f::EUF_STORAGE_BUFFER_BIT|buffer_usage_f::EUF_SHADER_DEVICE_ADDRESS_BIT;
3450
+ // we use the scratch allocator both for scratch and uploaded geometry data
3451
+ if (!params.scratchForDeviceASBuild ->getBuffer ()->getCreationParams ().usage .hasFlags (asBuildScratchFlags|asBuildInputFlags))
3452
+ {
3453
+ logger.log (" An Acceleration Structure will be built on Device but scratch buffer doesn't have required usage flags!" ,system::ILogger::ELL_ERROR);
3454
+ return retval;
3455
+ }
3456
+ const auto & addrAlloc = params.scratchForDeviceASBuild ->getAddressAllocator ();
3457
+ // could have used an address allocator trait to work this out, same verbosity
3458
+ if (addrAlloc.get_allocated_size ()+addrAlloc.get_free_size ()<reservations.m_minASBuildScratchSize [0 ])
3459
+ {
3460
+ logger.log (" Acceleration Structure Scratch Device Memory Allocator not large enough!" ,system::ILogger::ELL_ERROR);
3461
+ return retval;
3462
+ }
3463
+ }
3464
+ // the elusive and exotic host builds
3465
+ if (reservations.m_willHostBuildSomeAS && !params.scratchForHostASBuild )
3466
+ {
3467
+ logger.log (" An Acceleration Structure will be built on the Host but no Scratch Memory Allocator provided!" , system::ILogger::ELL_ERROR);
3468
+ return retval;
3469
+ }
3470
+ // and compacting
3471
+ if (reservations.m_willCompactSomeAS && !params.compactedASAllocator )
3472
+ {
3473
+ logger.log (" An Acceleration Structure will be compacted but no Device Memory Allocator provided!" , system::ILogger::ELL_ERROR);
3474
+ return retval;
3475
+ }
3476
+
3477
+ //
3432
3478
auto findInStaging = [&reservations]<Asset AssetType>(const typename asset_traits<AssetType>::video_t * gpuObj)->core ::blake3_hash_t *
3433
3479
{
3434
3480
auto & stagingCache = std::get<SReserveResult::staging_cache_t <AssetType>>(reservations.m_stagingCaches );
@@ -3547,9 +3593,9 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3547
3593
const auto computeFamily = shouldDoSomeCompute ? params.compute ->queue ->getFamilyIndex ():IQueue::FamilyIgnored;
3548
3594
// whenever transfer needs to do a submit overflow because it ran out of memory for streaming an image, we can already submit the recorded mip-map compute shader dispatches
3549
3595
auto computeCmdBuf = shouldDoSomeCompute ? params.compute ->getCommandBufferForRecording ():nullptr ;
3550
- auto drainCompute = [¶ms,shouldDoSomeCompute, &computeCmdBuf](const std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignal={})->auto
3596
+ auto drainCompute = [¶ms,&computeCmdBuf](const std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> extraSignal={})->auto
3551
3597
{
3552
- if (!shouldDoSomeCompute || computeCmdBuf->cmdbuf ->empty ())
3598
+ if (!computeCmdBuf || computeCmdBuf->cmdbuf ->empty ())
3553
3599
return IQueue::RESULT::SUCCESS;
3554
3600
// before we overflow submit we need to inject extra wait semaphores
3555
3601
auto & waitSemaphoreSpan = params.compute ->waitSemaphores ;
@@ -3568,6 +3614,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
3568
3614
IQueue::RESULT res = params.compute ->submit (computeCmdBuf,extraSignal);
3569
3615
if (res!=IQueue::RESULT::SUCCESS)
3570
3616
return res;
3617
+ // set to empty so we don't grow over and over again
3618
+ waitSemaphoreSpan = {};
3571
3619
if (!params.compute ->beginNextCommandBuffer (computeCmdBuf))
3572
3620
return IQueue::RESULT::OTHER_ERROR;
3573
3621
return res;
@@ -4039,7 +4087,65 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4039
4087
imagesToUpload.clear ();
4040
4088
}
4041
4089
4042
- // TODO: build BLASes and TLASes
4090
+ // BLAS builds
4091
+ auto & blasToBuild = std::get<SReserveResult::conversion_requests_t <ICPUBottomLevelAccelerationStructure>>(reservations.m_conversionRequests );
4092
+ if (const auto blasCount = blasToBuild.size (); blasCount)
4093
+ {
4094
+ constexpr auto GeometryIsAABBFlag = ICPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT;
4095
+
4096
+ core::vector<IGPUBottomLevelAccelerationStructure::DeviceBuildInfo> buildInfos; buildInfos.reserve (blasCount);
4097
+ core::vector<IGPUBottomLevelAccelerationStructure::DeviceBuildInfo> rangeInfo; rangeInfo.reserve (blasCount);
4098
+ core::vector<IGPUBottomLevelAccelerationStructure::Triangles<const IGPUBuffer>> triangles;
4099
+ core::vector<IGPUBottomLevelAccelerationStructure::AABBs<const IGPUBuffer>> aabbs;
4100
+ {
4101
+ size_t totalTriGeoCount = 0 ;
4102
+ size_t totalAABBGeoCount = 0 ;
4103
+ for (auto & item : blasToBuild)
4104
+ {
4105
+ const size_t geoCount = item.canonical ->getGeometryCount ();
4106
+ if (item.canonical ->getBuildFlags ().hasFlags (GeometryIsAABBFlag))
4107
+ totalAABBGeoCount += geoCount;
4108
+ else
4109
+ totalTriGeoCount += geoCount;
4110
+ }
4111
+ triangles.reserve (totalTriGeoCount);
4112
+ triangles.reserve (totalAABBGeoCount);
4113
+ }
4114
+ for (auto & item : blasToBuild)
4115
+ {
4116
+ auto * as = item.gpuObj ;
4117
+ auto pFoundHash = findInStaging.operator ()<ICPUBottomLevelAccelerationStructure>(as);
4118
+ if (item.asBuildParams .host )
4119
+ {
4120
+ auto dOp = device->createDeferredOperation ();
4121
+ //
4122
+ if (!device->buildAccelerationStructure (dOp.get (),info,range))
4123
+ {
4124
+ markFailureInStaging (gpuObj,pFoundHash);
4125
+ continue ;
4126
+ }
4127
+ }
4128
+ else
4129
+ {
4130
+ auto & buildInfo = buildInfo.emplace_back ({
4131
+ .buildFlags = item.buildFlags ,
4132
+ .geometryCount = item.canonical ->getGeometryCount (),
4133
+ // this is not an update
4134
+ .srcAS = nullptr ,
4135
+ .dstAS = as.get ()
4136
+ });
4137
+ if (item.canonical ->getBuildFlags ().hasFlags (GeometryIsAABBFlag))
4138
+ buildInfo.aabbs = nullptr ;
4139
+ else
4140
+ buildInfo.triangles = nullptr ;
4141
+ computeCmdBuf->cmdbuf ->buildAccelerationStructures (buildInfo,rangeInfo);
4142
+ }
4143
+ }
4144
+ }
4145
+
4146
+ // TLAS builds
4147
+ auto & tlasToBuild = std::get<SReserveResult::conversion_requests_t <ICPUTopLevelAccelerationStructure>>(reservations.m_conversionRequests );
4148
+ if (!tlasToBuild.empty ())
4043
4149
{
4044
4150
}
4045
4151
@@ -4100,6 +4206,10 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4100
4206
// rescan all the GPU objects and find out if they depend on anything that failed, if so add to failure set
4101
4207
bool depsMissing = false ;
4102
4208
// only go over types we could actually break via missing upload/build (i.e. pipelines are unbreakable)
4209
+ if constexpr (std::is_same_v<AssetType,ICPUTopLevelAccelerationStructure>)
4210
+ {
4211
+ // there's no lifetime tracking (refcounting) from TLAS to BLAS, so one just must trust the pre-TLAS-build input validation to do its job
4212
+ }
4103
4213
if constexpr (std::is_same_v<AssetType,ICPUBufferView>)
4104
4214
depsMissing = missingDependent.operator ()<ICPUBuffer>(item.first ->getUnderlyingBuffer ());
4105
4215
if constexpr (std::is_same_v<AssetType,ICPUImageView>)
@@ -4141,8 +4251,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4141
4251
depsMissing = missingDependent.operator ()<ICPUBufferView>(static_cast <const IGPUBufferView*>(untypedDesc));
4142
4252
break ;
4143
4253
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
4144
- _NBL_TODO ( );
4145
- [[fallthrough]] ;
4254
+ depsMissing = missingDependent. operator ()<ICPUTopLevelAccelerationStructure>( static_cast < const ICPUTopLevelAccelerationStructure*>(untypedDesc) );
4255
+ break ;
4146
4256
default :
4147
4257
assert (false );
4148
4258
depsMissing = true ;
@@ -4170,8 +4280,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
4170
4280
// again, need to go bottom up so we can check dependencies being successes
4171
4281
mergeCache.operator ()<ICPUBuffer>();
4172
4282
mergeCache.operator ()<ICPUImage>();
4173
- // mergeCache.operator()<ICPUBottomLevelAccelerationStructure>();
4174
- // mergeCache.operator()<ICPUTopLevelAccelerationStructure>();
4283
+ mergeCache.operator ()<ICPUBottomLevelAccelerationStructure>();
4284
+ mergeCache.operator ()<ICPUTopLevelAccelerationStructure>();
4175
4285
mergeCache.operator ()<ICPUBufferView>();
4176
4286
mergeCache.operator ()<ICPUImageView>();
4177
4287
mergeCache.operator ()<ICPUShader>();
0 commit comments