@@ -612,8 +612,9 @@ class AssetVisitor : public CRTP
612
612
const IDescriptorSetLayoutBase::CBindingRedirect::storage_range_index_t storageRangeIx (j);
613
613
const auto binding = redirect.getBinding (storageRangeIx);
614
614
const uint32_t count = redirect.getCount (storageRangeIx);
615
- // this is where the descriptors have their flattened place in a unified array
616
- const auto * infos = allInfos.data ()+redirect.getStorageOffset (storageRangeIx).data ;
615
+ // this is where the descriptors have their flattened place in a unified array
616
+ const auto storageBaseOffset = redirect.getStorageOffset (storageRangeIx);
617
+ const auto * infos = allInfos.data ()+storageBaseOffset.data ;
617
618
for (uint32_t el=0u ; el<count; el++)
618
619
{
619
620
const auto & info = infos[el];
@@ -702,7 +703,7 @@ class AssetVisitor : public CRTP
702
703
case IDescriptor::EC_ACCELERATION_STRUCTURE:
703
704
{
704
705
auto tlas = static_cast <const ICPUTopLevelAccelerationStructure*>(untypedDesc);
705
- if (!descend (tlas,{tlas},type,binding,el))
706
+ if (!descend (tlas,{tlas},type,binding,el,storageBaseOffset ))
706
707
return false ;
707
708
break ;
708
709
}
@@ -1164,6 +1165,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUTopLevelAcc
1164
1165
// extras from the patch
1165
1166
hasher << lookup.patch ->hostBuild ;
1166
1167
hasher << lookup.patch ->compactAfterBuild ;
1168
+ hasher << (lookup.patch ->isMotion ? lookup.patch ->maxInstances :0u );
1167
1169
const auto instances = asset->getInstances ();
1168
1170
hasher << instances.size ();
1169
1171
AssetVisitor<HashVisit<ICPUTopLevelAccelerationStructure>> visitor = {
@@ -1883,7 +1885,7 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
1883
1885
// okay to do non-owning, cache has ownership
1884
1886
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes = {};
1885
1887
core::vector<IGPUDescriptorSet::SDescriptorInfo> infos = {};
1886
- CAssetConverter::SReserveResult:: deferred_tlas_write_set_t deferredTLASWrites ;
1888
+ core::vector<IGPUDescriptorSetLayout::CBindingRedirect:: storage_offset_t > potentialTLASRewrites = {} ;
1887
1889
// has to be public because of aggregate init, but its only for internal usage!
1888
1890
uint32_t lastBinding;
1889
1891
uint32_t lastElement;
@@ -1941,17 +1943,6 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
1941
1943
else
1942
1944
writes.back ().count ++;
1943
1945
lastElement = element;
1944
- // the RLE will always finish a write because a single binding can only be a single descriptor type, important that the TLAS path happens after that check
1945
- if constexpr (std::is_same_v<DepType,ICPUTopLevelAccelerationStructure>)
1946
- {
1947
- // not built yet?
1948
- if (depObj->)
1949
- {
1950
- const auto [where,inserted] = deferredTLASWrites.insert ({binding.data ,element,depObj});
1951
- assert (inserted);
1952
- return true ;
1953
- }
1954
- }
1955
1946
//
1956
1947
auto & outInfo = infos.emplace_back ();
1957
1948
outInfo.desc = std::move (depObj);
@@ -1962,10 +1953,18 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
1962
1953
if (IDescriptor::GetTypeCategory (type)==IDescriptor::E_CATEGORY::EC_BUFFER)
1963
1954
{
1964
1955
// outInfo.info.buffer = std::get<0>(argTuple);
1965
- outInfo.info .buffer .offset = std::get<0 >(argTuple).offset ;
1956
+ outInfo.info .buffer .offset = std::get<0 >(argTuple).offset ;
1966
1957
outInfo.info .buffer .size = std::get<0 >(argTuple).size ;
1967
1958
}
1968
1959
}
1960
+ // mark potential TLAS rewrites (with compaction) so we don't have to scan entire descriptor set for potentially compacted TLASes
1961
+ if constexpr (std::is_same_v<DepType,ICPUTopLevelAccelerationStructure>)
1962
+ if (depObj->getPendingBuildVer ()==0 ) // means not built yet, so compactable by next `convert` run
1963
+ {
1964
+ auto storageOffset = std::get<0 >(argTuple);
1965
+ storageOffset.data += element;
1966
+ potentialTLASRewrites.push_back (storageOffset);
1967
+ }
1969
1968
if constexpr (std::is_same_v<DepType,ICPUImageView>)
1970
1969
{
1971
1970
outInfo.info .image .imageLayout = std::get<0 >(argTuple);
@@ -3366,7 +3365,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3366
3365
ds = nullptr ;
3367
3366
}
3368
3367
else
3369
- retval.m_deferredTLASDescriptorWrites [ds.get ()] = std::move (visitor.deferredTLASWrites );
3368
+ for (const auto storageIx : visitor.potentialTLASRewrites )
3369
+ retval.m_potentialTLASRewrites .insert ({ds.get (),storageIx});
3370
3370
}
3371
3371
else
3372
3372
inputs.logger .log (" Failed to create Descriptor Pool suited for Layout %s" ,system::ILogger::ELL_ERROR,layout->getObjectDebugName ());
@@ -3453,9 +3453,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3453
3453
);
3454
3454
continue ;
3455
3455
}
3456
- // is there any reason for it to be more?
3457
- const uint32_t maxInstances = canonical->getInstances ().size ();
3458
- as = device->createTopLevelAccelerationStructure ({std::move (baseParams),maxInstances});
3456
+ as = device->createTopLevelAccelerationStructure ({std::move (baseParams),patch.maxInstances });
3459
3457
}
3460
3458
else
3461
3459
as = device->createBottomLevelAccelerationStructure (std::move (baseParams));
@@ -3564,9 +3562,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3564
3562
retval.m_tlasConversions [i].erase (entry.first );
3565
3563
if constexpr (std::is_same_v<AssetType,ICPUImage>)
3566
3564
retval.m_imageConversions .erase (entry.first );
3567
- // because Descriptor Sets don't hold onto TLASes yet, we need to drop the TLASes in deferred descriptor writes
3568
- if constexpr (std::is_same_v<AssetType,ICPUDescriptorSet>)
3569
- retval.m_deferredTLASDescriptorWrites .erase (entry.first );
3570
3565
return true ;
3571
3566
}
3572
3567
// still referenced, keep it around
@@ -3604,7 +3599,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
3604
3599
else
3605
3600
{
3606
3601
smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure> gpuBLAS;
3607
- // TODO
3602
+ // TODO: figure out the BLAS that will be used, (this requires UUID)
3608
3603
retval.m_blasBuildMap .insert (foundBLAS,{cpuBLAS,{std::move (gpuBLAS),1 ,1 }});
3609
3604
}
3610
3605
}
@@ -5231,30 +5226,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5231
5226
depsMissing = missingDependent.template operator ()<ICPUBufferView>(static_cast <const IGPUBufferView*>(untypedDesc));
5232
5227
break ;
5233
5228
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
5234
- {
5235
- const auto * tlas = static_cast <const IGPUTopLevelAccelerationStructure*>(untypedDesc);
5236
- // successfully written a TLAS into the binding, nothing to check
5237
- if (tlas)
5238
- break ;
5239
- // we have a null TLAS in the binding, and we have to check if we were supposed to have one in it
5240
- using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
5241
- const redirect_t & redirect = layout->getDescriptorRedirect (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
5242
- const auto bindingRange = redirect.findBindingStorageIndex (redirect_t::storage_offset_t (i));
5243
- const auto firstElementOffset = redirect.getStorageOffset (bindingRange).data ;
5244
- auto foundSet = reservations.m_deferredTLASDescriptorWrites .find (item.first );
5245
- if (foundSet!=reservations.m_deferredTLASDescriptorWrites .end ())
5246
- {
5247
- const auto foundWrite = foundSet->second .find ({
5248
- .binding = redirect.getBinding (bindingRange).data ,
5249
- .arrayElement = i-firstElementOffset
5250
- });
5251
- // was scheduled to write some TLAS to this binding, but TLAS is now null
5252
- depsMissing = foundWrite!=foundSet->second .end () && !foundWrite->tlas ;
5253
- }
5254
- else
5255
- depsMissing = true ;
5229
+ depsMissing = missingDependent.template operator ()<ICPUTopLevelAccelerationStructure>(static_cast <const IGPUTopLevelAccelerationStructure*>(untypedDesc));
5256
5230
break ;
5257
- }
5258
5231
default :
5259
5232
assert (false );
5260
5233
depsMissing = true ;
@@ -5305,53 +5278,45 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
5305
5278
mergeCache.template operator ()<ICPURenderpass>();
5306
5279
mergeCache.template operator ()<ICPUGraphicsPipeline>();
5307
5280
// write the TLASes into Descriptor Set finally
5308
- if (auto & tlasWriteDSMap =reservations.m_deferredTLASDescriptorWrites ; !tlasWriteDSMap .empty ())
5281
+ if (auto & tlasRewriteSet =reservations.m_potentialTLASRewrites ; !tlasRewriteSet .empty ())
5309
5282
{
5310
5283
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes;
5311
- core::vector<IGPUDescriptorSet::SDescriptorInfo> infos;
5312
- for (auto & tlasWriteMap : tlasWriteDSMap)
5284
+ writes.reserve (tlasRewriteSet.size ());
5285
+ core::vector<IGPUDescriptorSet::SDescriptorInfo> infos (tlasRewriteSet.size ());
5286
+ auto * pInfo = infos.data ();
5287
+ for (auto & entry : tlasRewriteSet)
5313
5288
{
5314
- writes.clear ();
5315
- infos.clear ();
5316
- auto * dstSet = tlasWriteMap.first ;
5317
- for (auto & inWrite : tlasWriteMap.second )
5289
+ auto * const dstSet = entry.dstSet ;
5290
+ // we need to check if the descriptor set itself didn't get deleted in the meantime
5291
+ auto & stagingCache = std::get<SReserveResult::staging_cache_t <ICPUDescriptorSet>>(reservations.m_stagingCaches );
5292
+ const auto found = stagingCache.find (dstSet);
5293
+ if (found==stagingCache.end ())
5294
+ continue ;
5295
+ // rewtrieve the binding from the TLAS
5296
+ const auto * const tlas = static_cast <const IGPUTopLevelAccelerationStructure*>(dstSet->getAllDescriptors (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE)[entry.storageOffset .data ].get ());
5297
+ assert (tlas);
5298
+ // only rewrite if successfully compacted
5299
+ if (const auto foundCompacted=compactedTLASMap.find (tlas); foundCompacted!=compactedTLASMap.end ())
5318
5300
{
5319
- // I know what I'm doing, this member has no influence on the set key hash or equal comparison operator
5320
- auto & tlas = const_cast <smart_refctd_ptr<IGPUTopLevelAccelerationStructure>&>(inWrite.tlas );
5321
- assert (tlas);
5322
- if (missingDependent.template operator ()<ICPUTopLevelAccelerationStructure>(tlas.get ()))
5323
- {
5324
- tlas = {};
5325
- continue ;
5326
- }
5327
- if (const auto foundCompacted=compactedTLASMap.find (tlas.get ()); foundCompacted!=compactedTLASMap.end ())
5328
- tlas = foundCompacted->second ;
5329
- infos.emplace_back ().desc = std::move (tlas);
5301
+ pInfo->desc = foundCompacted->second ;
5302
+ using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
5303
+ const redirect_t & redirect = dstSet->getLayout ()->getDescriptorRedirect (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
5304
+ const auto bindingRange = redirect.findBindingStorageIndex (entry.storageOffset );
5305
+ const auto firstElementOffset = redirect.getStorageOffset (bindingRange);
5330
5306
writes.push_back ({
5331
5307
.dstSet = dstSet,
5332
- .binding = inWrite.binding ,
5333
- .arrayElement = inWrite.arrayElement ,
5334
- .count = 1
5308
+ .binding = redirect.getBinding (bindingRange).data ,
5309
+ .arrayElement = entry.storageOffset .data -firstElementOffset.data ,
5310
+ .count = 1 ,
5311
+ .info = pInfo++
5335
5312
});
5336
5313
}
5337
- //
5338
- auto * pInfo = infos.data ();
5339
- for (auto & outWrite : writes)
5340
- outWrite.info = pInfo++;
5341
- // if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
5342
- if (!writes.empty () && !device->updateDescriptorSets (writes,{}))
5343
- {
5344
- auto * pHash = findInStaging.template operator ()<ICPUDescriptorSet>(dstSet);
5345
- smart_refctd_ptr<const ICPUDescriptorSet> dummy;
5346
- markFailureInStaging (" writing TLAS to Descriptor Set binding" ,dummy,dstSet,pHash);
5347
- }
5348
5314
}
5349
- // not strictly necessary, just provoking refcounting bugs right away if they exist
5350
- compactedTLASMap.clear ();
5315
+ // if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
5316
+ if (!writes.empty () && !device->updateDescriptorSets (writes,{}))
5317
+ logger.log (" Failed to write one of the compacted TLASes into a Descriptor Set, all Descriptor Sets will still use non-compacted TLASes" ,system::ILogger::ELL_ERROR);
5351
5318
}
5352
5319
mergeCache.template operator ()<ICPUDescriptorSet>();
5353
- // needed for the IGPUDescriptorSets to check if TLAS exists/was written, can be released now
5354
- reservations.m_deferredTLASDescriptorWrites .clear ();
5355
5320
// mergeCache.template operator()<ICPUFramebuffer>();
5356
5321
5357
5322
// no submit was necessary, so should signal the extra semaphores from the host
0 commit comments