@@ -2759,10 +2759,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2759
2759
// BLAS and TLAS creation is somewhat delayed by buffer creation and allocation
2760
2760
struct DeferredASCreationParams
2761
2761
{
2762
- const IAccelerationStructure* canonical;
2763
2762
asset_cached_t <ICPUBuffer> storage = {};
2764
- uint64_t patchIx = 0 ;
2765
- uint64_t uniqueCopyGroupID = 0 ;
2766
2763
uint64_t scratchSize = 0 ;
2767
2764
uint64_t buildSize = 0 ;
2768
2765
};
@@ -2931,7 +2928,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2931
2928
2932
2929
// we need to save the buffer in a side-channel for later
2933
2930
auto & out = accelerationStructureParams[IsTLAS][entry.second .firstCopyIx +i];
2934
- out.canonical = as;
2935
2931
// this is where it gets a bit weird, we need to create a buffer to back the acceleration structure
2936
2932
{
2937
2933
IGPUBuffer::SCreationParams params = {};
@@ -2950,8 +2946,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2950
2946
continue ;
2951
2947
}
2952
2948
}
2953
- out.patchIx = patchIx;
2954
- out.uniqueCopyGroupID = uniqueCopyGroupID;
2955
2949
out.scratchSize = sizes.buildScratchSize ;
2956
2950
out.buildSize = buildSize;
2957
2951
}
@@ -3386,7 +3380,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3386
3380
}
3387
3381
3388
3382
// clear what we don't need
3389
- conversionRequests.gpuObjUniqueCopyGroupIDs .clear ();
3383
+ if constexpr (!std::is_base_of_v<IAccelerationStructure,AssetType>)
3384
+ conversionRequests.gpuObjUniqueCopyGroupIDs .clear ();
3390
3385
// This gets deferred till AFTER the Buffer Memory Allocations and Binding
3391
3386
if constexpr (!std::is_base_of_v<IAccelerationStructure,AssetType> && !std::is_base_of_v<IDeviceMemoryBacked,typename asset_traits<AssetType>::video_t >)
3392
3387
{
@@ -3418,7 +3413,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3418
3413
bufferConversions.propagateToCaches (std::get<dfs_cache<ICPUBuffer>>(dfsCaches),std::get<SReserveResult::staging_cache_t <ICPUBuffer>>(retval.m_stagingCaches ));
3419
3414
// Deal with Deferred Creation of Acceleration structures
3420
3415
{
3421
- auto createAccelerationStructures = [&]<typename AccelerationStructure>()->void
3416
+ auto createAccelerationStructures = [&]<typename AccelerationStructure>(conversions_t <AccelerationStructure>& requests )->void
3422
3417
{
3423
3418
constexpr bool IsTLAS = std::is_same_v<AccelerationStructure,ICPUTopLevelAccelerationStructure>;
3424
3419
//
@@ -3428,63 +3423,70 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3428
3423
else
3429
3424
pConversions = retval.m_blasConversions ;
3430
3425
// we enqueue the conversions AFTER making sure that the BLAS / TLAS can actually be created
3431
- for (size_t i= 0 ; i<accelerationStructureParams[IsTLAS]. size (); i++ )
3432
- if ( const auto & deferredParams=accelerationStructureParams[IsTLAS][i]; deferredParams. storage )
3426
+ for (auto & entry : requests. contentHashToCanonical )
3427
+ for ( auto i= 0ull ; i<entry. second . copyCount ; i++ )
3433
3428
{
3434
- const auto canonical = static_cast <const AccelerationStructure*>(deferredParams.canonical );
3435
- const auto & dfsNode = std::get<dfs_cache<AccelerationStructure>>(dfsCaches).nodes [deferredParams.patchIx ];
3436
- const auto & patch = dfsNode.patch ;
3437
- // create the AS
3438
- const auto bufSz = deferredParams.storage .get ()->getSize ();
3439
- IGPUAccelerationStructure::SCreationParams baseParams;
3440
- {
3441
- using create_f = IGPUAccelerationStructure::SCreationParams::FLAGS;
3442
- baseParams = {
3443
- .bufferRange = {.offset =0 ,.size =bufSz,.buffer =deferredParams.storage .value },
3444
- .flags = patch.isMotion ? create_f::MOTION_BIT:create_f::NONE
3445
- };
3446
- }
3447
- smart_refctd_ptr<typename asset_traits<AccelerationStructure>::video_t > as;
3448
- CAssetConverter::SReserveResult::SConvReqTLAS::cpu_to_gpu_blas_map_t blasInstanceMap;
3449
- if constexpr (IsTLAS)
3450
- {
3451
- // check if the BLASes we want to use for the instances were successfully allocated and created
3452
- AssetVisitor<GetDependantVisit<ICPUTopLevelAccelerationStructure>> visitor = {
3453
- {inputs,dfsCaches,&blasInstanceMap},
3454
- {canonical,deferredParams.uniqueCopyGroupID },
3455
- patch
3456
- };
3457
- if (!visitor ())
3458
- {
3459
- inputs.logger .log (
3460
- " Failed to find all GPU Bottom Level Acceleration Structures needed to build TLAS %8llx%8llx%8llx%8llx" ,
3461
- system::ILogger::ELL_ERROR// ,hashAsU64[0],hashAsU64[1],hashAsU64[2],hashAsU64[3]
3462
- );
3463
- continue ;
3464
- }
3465
- as = device->createTopLevelAccelerationStructure ({std::move (baseParams),patch.maxInstances });
3466
- }
3467
- else
3468
- as = device->createBottomLevelAccelerationStructure (std::move (baseParams));
3469
- if (!as)
3429
+ const auto reqIx = entry.second .firstCopyIx +i;
3430
+ if (const auto & deferredParams=accelerationStructureParams[IsTLAS][reqIx]; deferredParams.storage )
3470
3431
{
3471
- inputs.logger .log (" Failed to Create Acceleration Structure." ,system::ILogger::ELL_ERROR);
3472
- continue ;
3432
+ const auto * canonical = entry.second .canonicalAsset ;
3433
+ const auto & dfsNode = std::get<dfs_cache<AccelerationStructure>>(dfsCaches).nodes [entry.second .patchIndex .value ];
3434
+ const auto & patch = dfsNode.patch ;
3435
+ // create the AS
3436
+ const auto bufSz = deferredParams.storage .get ()->getSize ();
3437
+ IGPUAccelerationStructure::SCreationParams baseParams;
3438
+ {
3439
+ using create_f = IGPUAccelerationStructure::SCreationParams::FLAGS;
3440
+ baseParams = {
3441
+ .bufferRange = {.offset =0 ,.size =bufSz,.buffer =deferredParams.storage .value },
3442
+ .flags = patch.isMotion ? create_f::MOTION_BIT:create_f::NONE
3443
+ };
3444
+ }
3445
+ smart_refctd_ptr<typename asset_traits<AccelerationStructure>::video_t > as;
3446
+ CAssetConverter::SReserveResult::SConvReqTLAS::cpu_to_gpu_blas_map_t blasInstanceMap;
3447
+ if constexpr (IsTLAS)
3448
+ {
3449
+ // check if the BLASes we want to use for the instances were successfully allocated and created
3450
+ AssetVisitor<GetDependantVisit<ICPUTopLevelAccelerationStructure>> visitor = {
3451
+ {inputs,dfsCaches,&blasInstanceMap},
3452
+ {canonical,requests.gpuObjUniqueCopyGroupIDs [reqIx]},
3453
+ patch
3454
+ };
3455
+ if (!visitor ())
3456
+ {
3457
+ const auto hashAsU64 = reinterpret_cast <const uint64_t *>(entry.first .data );
3458
+ inputs.logger .log (
3459
+ " Failed to find all GPU Bottom Level Acceleration Structures needed to build TLAS %8llx%8llx%8llx%8llx" ,
3460
+ system::ILogger::ELL_ERROR,hashAsU64[0 ],hashAsU64[1 ],hashAsU64[2 ],hashAsU64[3 ]
3461
+ );
3462
+ continue ;
3463
+ }
3464
+ as = device->createTopLevelAccelerationStructure ({std::move (baseParams),patch.maxInstances });
3465
+ }
3466
+ else
3467
+ as = device->createBottomLevelAccelerationStructure (std::move (baseParams));
3468
+ if (!as)
3469
+ {
3470
+ inputs.logger .log (" Failed to Create Acceleration Structure." ,system::ILogger::ELL_ERROR);
3471
+ continue ;
3472
+ }
3473
+ // file the request for conversion
3474
+ auto & request = pConversions[patch.hostBuild ][as.get ()];
3475
+ request.canonical = smart_refctd_ptr<const AccelerationStructure>(canonical);
3476
+ request.scratchSize = deferredParams.scratchSize ;
3477
+ request.compact = patch.compactAfterBuild ;
3478
+ request.buildFlags = static_cast <uint16_t >(patch.getBuildFlags (canonical).value );
3479
+ request.buildSize = deferredParams.buildSize ;
3480
+ if constexpr (IsTLAS)
3481
+ request.instanceMap = std::move (blasInstanceMap);
3482
+ requests.assign (entry.first ,entry.second .firstCopyIx ,i,std::move (as));
3473
3483
}
3474
- // file the request for conversion
3475
- auto & request = pConversions[patch.hostBuild ][as.get ()];
3476
- request.canonical = smart_refctd_ptr<const AccelerationStructure>(canonical);
3477
- request.scratchSize = deferredParams.scratchSize ;
3478
- request.compact = patch.compactAfterBuild ;
3479
- request.buildFlags = static_cast <uint16_t >(patch.getBuildFlags (canonical).value );
3480
- request.buildSize = deferredParams.buildSize ;
3481
- if constexpr (IsTLAS)
3482
- request.instanceMap = std::move (blasInstanceMap);
3483
3484
}
3485
+ requests.gpuObjUniqueCopyGroupIDs .clear ();
3484
3486
};
3485
- createAccelerationStructures.template operator ()<ICPUBottomLevelAccelerationStructure>();
3487
+ createAccelerationStructures.template operator ()<ICPUBottomLevelAccelerationStructure>(blasConversions );
3486
3488
blasConversions.propagateToCaches (std::get<dfs_cache<ICPUBottomLevelAccelerationStructure>>(dfsCaches),std::get<SReserveResult::staging_cache_t <ICPUBottomLevelAccelerationStructure>>(retval.m_stagingCaches ));
3487
- createAccelerationStructures.template operator ()<ICPUTopLevelAccelerationStructure>();
3489
+ createAccelerationStructures.template operator ()<ICPUTopLevelAccelerationStructure>(tlasConversions );
3488
3490
tlasConversions.propagateToCaches (std::get<dfs_cache<ICPUTopLevelAccelerationStructure>>(dfsCaches),std::get<SReserveResult::staging_cache_t <ICPUTopLevelAccelerationStructure>>(retval.m_stagingCaches ));
3489
3491
}
3490
3492
// enqueue successfully created images with data to upload for conversion
@@ -3577,6 +3579,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3577
3579
retval.m_tlasConversions [i].erase (gpuObj);
3578
3580
if constexpr (std::is_same_v<AssetType,ICPUImage>)
3579
3581
retval.m_imageConversions .erase (gpuObj);
3582
+ // TODO: erase from `retval.m_gpuObjects` as well
3580
3583
return true ;
3581
3584
}
3582
3585
// still referenced, keep it around
0 commit comments