@@ -1873,6 +1873,8 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
1873
1873
// returns if there are any writes to do
1874
1874
bool finalizeWrites (IGPUDescriptorSet* dstSet)
1875
1875
{
1876
+ for (auto & deferredWrite : deferredTLASWrites)
1877
+ deferredWrite.dstSet = dstSet;
1876
1878
if (writes.empty ())
1877
1879
return false ;
1878
1880
// now infos can't move in memory anymore
@@ -1889,6 +1891,7 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
1889
1891
// okay to do non-owning, cache has ownership
1890
1892
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes = {};
1891
1893
core::vector<IGPUDescriptorSet::SDescriptorInfo> infos = {};
1894
+ core::vector<CAssetConverter::SReserveResult::SDeferredTLASWrite> deferredTLASWrites;
1892
1895
// has to be public because of aggregate init, but its only for internal usage!
1893
1896
uint32_t lastBinding;
1894
1897
uint32_t lastElement;
@@ -1946,6 +1949,12 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
1946
1949
else
1947
1950
writes.back ().count ++;
1948
1951
lastElement = element;
1952
+ // the RLE will always finish a write because a single binding can only be a single descriptor type, important that the TLAS path happens after that check
1953
+ if constexpr (std::is_same_v<DepType,ICPUTopLevelAccelerationStructure>)
1954
+ {
1955
+ deferredTLASWrites.push_back ({nullptr ,binding.data ,element,depObj});
1956
+ return true ;
1957
+ }
1949
1958
//
1950
1959
auto & outInfo = infos.emplace_back ();
1951
1960
outInfo.desc = std::move (depObj);
@@ -2607,20 +2616,20 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2607
2616
);
2608
2617
2609
2618
// work out mapping of `conversionRequests` to multiple GPU objects and their copy groups via counting sort
2610
- auto exclScanConvReqs = [&]()->size_t
2611
- {
2612
- size_t sum = 0 ;
2613
- for (auto & entry : conversionRequests)
2614
- {
2615
- entry.second .firstCopyIx = sum;
2616
- sum += entry.second .copyCount ;
2617
- }
2618
- return sum;
2619
- };
2620
2619
const auto gpuObjUniqueCopyGroupIDs = [&]()->core ::vector<size_t >
2621
2620
{
2622
2621
core::vector<size_t > retval;
2623
2622
// now assign storage offsets via exclusive scan and put the `uniqueGroupID` mappings in sorted order
2623
+ auto exclScanConvReqs = [&]()->size_t
2624
+ {
2625
+ size_t sum = 0 ;
2626
+ for (auto & entry : conversionRequests)
2627
+ {
2628
+ entry.second .firstCopyIx = sum;
2629
+ sum += entry.second .copyCount ;
2630
+ }
2631
+ return sum;
2632
+ };
2624
2633
retval.resize (exclScanConvReqs ());
2625
2634
//
2626
2635
dfsCache.for_each ([&inputs,&retval,&conversionRequests](const instance_t <AssetType>& instance, dfs_cache<AssetType>::created_t & created)->void
@@ -2644,8 +2653,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
2644
2653
exclScanConvReqs ();
2645
2654
return retval;
2646
2655
}();
2647
- core::vector<asset_cached_t <AssetType>> gpuObjects (gpuObjUniqueCopyGroupIDs.size ());
2648
2656
2657
+ core::vector<asset_cached_t <AssetType>> gpuObjects (gpuObjUniqueCopyGroupIDs.size ());
2649
2658
// Only warn once to reduce log spam
2650
2659
auto assign = [&]<bool GPUObjectWhollyImmutable=false >(const core::blake3_hash_t & contentHash, const size_t baseIx, const size_t copyIx, asset_cached_t <AssetType>::type&& gpuObj)->bool
2651
2660
{
@@ -3258,6 +3267,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3258
3267
// fail
3259
3268
ds = nullptr ;
3260
3269
}
3270
+ else
3271
+ retval.m_deferredTLASDescriptorWrites .insert (visitor.deferredTLASWrites .begin (),visitor.deferredTLASWrites .end ());
3261
3272
}
3262
3273
else
3263
3274
inputs.logger .log (" Failed to create Descriptor Pool suited for Layout %s" ,system::ILogger::ELL_ERROR,layout->getObjectDebugName ());
@@ -3350,10 +3361,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3350
3361
// Both so we can hash in O(Depth) and not O(Depth^2) but also so we have all the possible dependants ready.
3351
3362
// If two Asset chains are independent then we order them from most catastrophic failure to least.
3352
3363
dedupCreateProp.operator ()<ICPUBuffer>();
3353
- #ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
3354
3364
dedupCreateProp.operator ()<ICPUBottomLevelAccelerationStructure>();
3355
3365
dedupCreateProp.operator ()<ICPUTopLevelAccelerationStructure>();
3356
- #endif
3357
3366
dedupCreateProp.operator ()<ICPUImage>();
3358
3367
// now allocate the memory for buffers and images
3359
3368
deferredAllocator.finalize ();
@@ -5006,7 +5015,7 @@ if (worstSize>minScratchSize)
5006
5015
.arrayElement = i-firstElementOffset
5007
5016
});
5008
5017
// was scheduled to write some TLAS to this binding, but TLAS is now null
5009
- depsMissing = foundWrite!=reservations.m_deferredTLASDescriptorWrites .end () && !foundWrite->second ;
5018
+ depsMissing = foundWrite!=reservations.m_deferredTLASDescriptorWrites .end () && !foundWrite->tlas ;
5010
5019
break ;
5011
5020
}
5012
5021
default :
@@ -5067,7 +5076,8 @@ if (worstSize>minScratchSize)
5067
5076
auto * pInfo = infos.data ();
5068
5077
for (auto & inWrite : tlasWriteMap)
5069
5078
{
5070
- auto & tlas = inWrite.second ;
5079
+ // I know what I'm doing, this member has no influence on the set key hash
5080
+ auto & tlas = const_cast <smart_refctd_ptr<IGPUTopLevelAccelerationStructure>&>(inWrite.tlas );
5071
5081
assert (tlas);
5072
5082
if (missingDependent.operator ()<ICPUTopLevelAccelerationStructure>(tlas.get ()))
5073
5083
{
@@ -5078,9 +5088,9 @@ if (worstSize>minScratchSize)
5078
5088
tlas = foundCompacted->second ;
5079
5089
pInfo->desc = tlas;
5080
5090
writes.push_back ({
5081
- .dstSet = inWrite.first . dstSet ,
5082
- .binding = inWrite.first . binding ,
5083
- .arrayElement = inWrite.first . arrayElement ,
5091
+ .dstSet = inWrite.dstSet ,
5092
+ .binding = inWrite.binding ,
5093
+ .arrayElement = inWrite.arrayElement ,
5084
5094
.count = 1 ,
5085
5095
.info = pInfo++
5086
5096
});
@@ -5090,7 +5100,7 @@ if (worstSize>minScratchSize)
5090
5100
// if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
5091
5101
if (!writes.empty () && !device->updateDescriptorSets (writes,{}))
5092
5102
for (auto & inWrite : tlasWriteMap)
5093
- inWrite.second = nullptr ;
5103
+ const_cast <smart_refctd_ptr<IGPUTopLevelAccelerationStructure>&>( inWrite.tlas ) = nullptr ;
5094
5104
}
5095
5105
mergeCache.operator ()<ICPUDescriptorSet>();
5096
5106
// needed for the IGPUDescriptorSets to check if TLAS exists/was written, can be released now
0 commit comments