Skip to content

Commit b2282f2

Browse files
author
devsh
committed
compacted TLAS substitution in cache and descriptor sets
1 parent 31c9a61 commit b2282f2

File tree

1 file changed

+19
-25
lines changed

1 file changed

+19
-25
lines changed

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3401,10 +3401,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
34013401
auto device = m_params.device;
34023402
const auto reqQueueFlags = reservations.getRequiredQueueFlags();
34033403

3404-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
34053404
// compacted TLASes need to be substituted in cache and Descriptor Sets
3406-
core::unordered_map<IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
3407-
#endif
3405+
core::unordered_map<const IGPUTopLevelAccelerationStructure*,smart_refctd_ptr<IGPUTopLevelAccelerationStructure>> compactedTLASMap;
34083406
// Anything to do?
34093407
if (reqQueueFlags.value!=IQueue::FAMILY_FLAGS::NONE)
34103408
{
@@ -4221,19 +4219,18 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
42214219
}
42224220

42234221

4224-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
42254222
// Descriptor Sets need their TLAS descriptors substituted if they've been compacted
42264223
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> tlasRewrites; tlasRewrites.reserve(compactedTLASMap.size());
42274224
core::vector<IGPUDescriptorSet::SDescriptorInfo> tlasInfos; tlasInfos.reserve(compactedTLASMap.size());
4228-
#endif
42294225
// want to check if deps successfully exist
42304226
auto missingDependent = [&reservations]<Asset AssetType>(const typename asset_traits<AssetType>::video_t* dep)->bool
42314227
{
42324228
auto& stagingCache = std::get<SReserveResult::staging_cache_t<AssetType>>(reservations.m_stagingCaches);
42334229
auto found = stagingCache.find(const_cast<asset_traits<AssetType>::video_t*>(dep));
4230+
// this only checks if whether we had to convert and failed
42344231
if (found!=stagingCache.end() && found->second.value==CHashCache::NoContentHash)
42354232
return true;
4236-
// dependent might be in readCache of one or more converters, so if in doubt assume its okay
4233+
// but the dependent might be in readCache of one or more converters, so if in doubt assume its okay
42374234
return false;
42384235
};
42394236
// insert items into cache if overflows handled fine and commandbuffers ready to be recorded
@@ -4250,12 +4247,11 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
42504247
// rescan all the GPU objects and find out if they depend on anything that failed, if so add to failure set
42514248
bool depsMissing = false;
42524249
// only go over types we could actually break via missing upload/build (i.e. pipelines are unbreakable)
4253-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
42544250
if constexpr (IsTLAS)
42554251
{
4256-
// there's no lifetime tracking (refcounting) from TLAS to BLAS, so one just must trust the pre-TLAS-build input validation to do its job
4252+
// A built TLAS cannot be queried about the BLASes it contains, so just trust the pre-TLAS-build input validation did its job
42574253
}
4258-
#endif
4254+
42594255
if constexpr (std::is_same_v<AssetType,ICPUBufferView>)
42604256
depsMissing = missingDependent.operator()<ICPUBuffer>(item.first->getUnderlyingBuffer());
42614257
if constexpr (std::is_same_v<AssetType,ICPUImageView>)
@@ -4271,9 +4267,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
42714267
if (samplers[i])
42724268
depsMissing = missingDependent.operator()<ICPUSampler>(samplers[i].get());
42734269
}
4274-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
42754270
const auto tlasRewriteOldSize = tlasRewrites.size();
4276-
#endif
42774271
for (auto i=0u; !depsMissing && i<static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); i++)
42784272
{
42794273
const auto type = static_cast<asset::IDescriptor::E_TYPE>(i);
@@ -4299,7 +4293,6 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
42994293
case asset::IDescriptor::EC_BUFFER_VIEW:
43004294
depsMissing = missingDependent.operator()<ICPUBufferView>(static_cast<const IGPUBufferView*>(untypedDesc));
43014295
break;
4302-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
43034296
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
43044297
{
43054298
const auto* tlas = static_cast<const IGPUTopLevelAccelerationStructure*>(untypedDesc);
@@ -4314,33 +4307,30 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
43144307
const redirect_t& redirect = layout->getDescriptorRedirect(IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
43154308
const auto bindingRange = redirect.findBindingStorageIndex(redirect_t::storage_offset_t(i));
43164309
const auto firstElementOffset = redirect.getStorageOffset(bindingRange).data;
4317-
tlasRewrites.push_back({
4318-
.set = item.first,
4310+
tlasRewrites.push_back(IGPUDescriptorSet::SWriteDescriptorSet{
4311+
.dstSet = item.first,
43194312
.binding = redirect.getBinding(bindingRange).data,
43204313
.arrayElement = i-firstElementOffset,
43214314
.count = 1, // write them one by one, no point optimizing
4322-
.info = nullptr // for now
4315+
.info = nullptr // for now, will set once the vector of infos stops growing
43234316
});
43244317
tlasInfos.emplace_back().desc = smart_refctd_ptr<IGPUTopLevelAccelerationStructure>(found->second);
43254318
}
43264319
break;
43274320
}
4328-
#endif
43294321
default:
43304322
assert(false);
43314323
depsMissing = true;
43324324
break;
43334325
}
43344326
}
43354327
}
4336-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
43374328
// don't bother overwriting a Descriptor Set that won't be marked as successfully converted (inserted into write cache)
43384329
if (depsMissing)
43394330
{
43404331
tlasRewrites.resize(tlasRewriteOldSize);
43414332
tlasInfos.resize(tlasRewriteOldSize);
43424333
}
4343-
#endif
43444334
}
43454335
auto* pGpuObj = item.first;
43464336
if (depsMissing)
@@ -4351,16 +4341,16 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
43514341
item.second.value = {};
43524342
continue;
43534343
}
4354-
if (!params.writeCache(item.second)) // TODO: let the user know the pointer too?
4355-
continue;
4356-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4344+
// For TLASes we need to write the compacted TLAS and not the intermediate build to the Cache
43574345
if constexpr (IsTLAS)
43584346
{
43594347
auto found = compactedTLASMap.find(pGpuObj);
43604348
if (found!=compactedTLASMap.end())
43614349
pGpuObj = found->second.get();
43624350
}
4363-
#endif
4351+
// We have success now, but ask callback if we write to the new cache.
4352+
if (!params.writeCache(item.second)) // TODO: let the user know the pointer to the GPU Object too?
4353+
continue;
43644354
asset_cached_t<AssetType> cached;
43654355
cached.value = core::smart_refctd_ptr<typename asset_traits<AssetType>::video_t>(pGpuObj);
43664356
cache.m_reverseMap.emplace(pGpuObj,item.second);
@@ -4385,17 +4375,21 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
43854375
mergeCache.operator()<ICPURenderpass>();
43864376
mergeCache.operator()<ICPUGraphicsPipeline>();
43874377
mergeCache.operator()<ICPUDescriptorSet>();
4388-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
43894378
// deal with rewriting the TLASes with compacted ones
43904379
{
4380+
// not strictly necessary, just provoking refcounting bugs right away if they exist
43914381
compactedTLASMap.clear();
43924382
auto* infoIt = tlasInfos.data();
4383+
// writes map 1:1 with infos, the lazy way, can finally write the pointer as vector stops growing
43934384
for (auto& write : tlasRewrites)
43944385
write.info = infoIt++;
43954386
if (!tlasRewrites.empty())
4396-
device->updateDescriptorSets(tlasRewrites,{});
4387+
{
4388+
const bool success = device->updateDescriptorSets(tlasRewrites,{});
4389+
// There's no point in any fault handling, everything we have done should have been valid
4390+
assert(success);
4391+
}
43974392
}
4398-
#endif
43994393
// mergeCache.operator()<ICPUFramebuffer>();
44004394

44014395
// no submit was necessary, so should signal the extra semaphores from the host

0 commit comments

Comments
 (0)