Skip to content

Commit 99e473d

Browse files
author
devsh
committed
Ok, so descriptor sets can actually track TLASes which are yet-unbuilt, makes life a lot easier.
add `IGPUTopLevelAccelerationStructure::getPendingBuildVer()` to detect if TLAS built yet also make sure the maxInstanceCount gets hashed properly
1 parent 69df18a commit 99e473d

File tree

3 files changed

+59
-90
lines changed

3 files changed

+59
-90
lines changed

include/nbl/video/IGPUAccelerationStructure.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
667667

668668
//
669669
using build_ver_t = uint32_t;
670+
//
671+
inline build_ver_t getPendingBuildVer() const {return m_pendingBuildVer;}
670672
// this gets called when execution is sure to happen 100%, e.g. not during command recording but during submission
671673
inline build_ver_t registerNextBuildVer()
672674
{

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,22 +1131,24 @@ class CAssetConverter : public core::IReferenceCounted
11311131
{
11321132
inline bool operator==(const SDeferredTLASWrite& other) const
11331133
{
1134-
return binding==other.binding && arrayElement==other.arrayElement;
1134+
return dstSet==other.dstSet && storageOffset.data==other.storageOffset.data;
11351135
}
11361136

1137-
uint32_t binding;
1138-
uint32_t arrayElement;
1139-
core::smart_refctd_ptr<IGPUTopLevelAccelerationStructure> tlas;
1137+
IGPUDescriptorSet* dstSet;
1138+
// binding and array element rolled up into one
1139+
IGPUDescriptorSetLayout::CBindingRedirect::storage_offset_t storageOffset;
11401140
};
11411141
struct SDeferredTLASWriteHasher
11421142
{
11431143
inline size_t operator()(const SDeferredTLASWrite& write) const
11441144
{
1145-
return std::hash<uint64_t>()((uint64_t(write.binding)<<32)|write.arrayElement);
1145+
size_t retval = write.storageOffset.data;
1146+
core::hash_combine(retval,write.dstSet);
1147+
return retval;
11461148
}
11471149
};
1148-
using deferred_tlas_write_set_t = core::unordered_set<SDeferredTLASWrite,SDeferredTLASWriteHasher>;
1149-
core::unordered_map<IGPUDescriptorSet*,deferred_tlas_write_set_t> m_deferredTLASDescriptorWrites;
1150+
using compacted_tlas_rewrite_set_t = core::unordered_set<SDeferredTLASWrite,SDeferredTLASWriteHasher>;
1151+
compacted_tlas_rewrite_set_t m_potentialTLASRewrites;
11501152

11511153
//
11521154
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 48 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -612,8 +612,9 @@ class AssetVisitor : public CRTP
612612
const IDescriptorSetLayoutBase::CBindingRedirect::storage_range_index_t storageRangeIx(j);
613613
const auto binding = redirect.getBinding(storageRangeIx);
614614
const uint32_t count = redirect.getCount(storageRangeIx);
615-
// this is where the descriptors have their flattened place in a unified array
616-
const auto* infos = allInfos.data()+redirect.getStorageOffset(storageRangeIx).data;
615+
// this is where the descriptors have their flattened place in a unified array
616+
const auto storageBaseOffset = redirect.getStorageOffset(storageRangeIx);
617+
const auto* infos = allInfos.data()+storageBaseOffset.data;
617618
for (uint32_t el=0u; el<count; el++)
618619
{
619620
const auto& info = infos[el];
@@ -702,7 +703,7 @@ class AssetVisitor : public CRTP
702703
case IDescriptor::EC_ACCELERATION_STRUCTURE:
703704
{
704705
auto tlas = static_cast<const ICPUTopLevelAccelerationStructure*>(untypedDesc);
705-
if (!descend(tlas,{tlas},type,binding,el))
706+
if (!descend(tlas,{tlas},type,binding,el,storageBaseOffset))
706707
return false;
707708
break;
708709
}
@@ -1164,6 +1165,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUTopLevelAcc
11641165
// extras from the patch
11651166
hasher << lookup.patch->hostBuild;
11661167
hasher << lookup.patch->compactAfterBuild;
1168+
hasher << (lookup.patch->isMotion ? lookup.patch->maxInstances:0u);
11671169
const auto instances = asset->getInstances();
11681170
hasher << instances.size();
11691171
AssetVisitor<HashVisit<ICPUTopLevelAccelerationStructure>> visitor = {
@@ -1883,7 +1885,7 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
18831885
// okay to do non-owning, cache has ownership
18841886
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes = {};
18851887
core::vector<IGPUDescriptorSet::SDescriptorInfo> infos = {};
1886-
CAssetConverter::SReserveResult::deferred_tlas_write_set_t deferredTLASWrites;
1888+
core::vector<IGPUDescriptorSetLayout::CBindingRedirect::storage_offset_t> potentialTLASRewrites = {};
18871889
// has to be public because of aggregate init, but its only for internal usage!
18881890
uint32_t lastBinding;
18891891
uint32_t lastElement;
@@ -1941,17 +1943,6 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
19411943
else
19421944
writes.back().count++;
19431945
lastElement = element;
1944-
// the RLE will always finish a write because a single binding can only be a single descriptor type, important that the TLAS path happens after that check
1945-
if constexpr (std::is_same_v<DepType,ICPUTopLevelAccelerationStructure>)
1946-
{
1947-
// not built yet?
1948-
if (depObj->)
1949-
{
1950-
const auto [where,inserted] = deferredTLASWrites.insert({binding.data,element,depObj});
1951-
assert(inserted);
1952-
return true;
1953-
}
1954-
}
19551946
//
19561947
auto& outInfo = infos.emplace_back();
19571948
outInfo.desc = std::move(depObj);
@@ -1962,10 +1953,18 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
19621953
if (IDescriptor::GetTypeCategory(type)==IDescriptor::E_CATEGORY::EC_BUFFER)
19631954
{
19641955
//outInfo.info.buffer = std::get<0>(argTuple);
1965-
outInfo.info.buffer.offset= std::get<0>(argTuple).offset;
1956+
outInfo.info.buffer.offset = std::get<0>(argTuple).offset;
19661957
outInfo.info.buffer.size = std::get<0>(argTuple).size;
19671958
}
19681959
}
1960+
// mark potential TLAS rewrites (with compaction) so we don't have to scan entire descriptor set for potentially compacted TLASes
1961+
if constexpr (std::is_same_v<DepType,ICPUTopLevelAccelerationStructure>)
1962+
if (depObj->getPendingBuildVer()==0) // means not built yet, so compactable by next `convert` run
1963+
{
1964+
auto storageOffset = std::get<0>(argTuple);
1965+
storageOffset.data += element;
1966+
potentialTLASRewrites.push_back(storageOffset);
1967+
}
19691968
if constexpr (std::is_same_v<DepType,ICPUImageView>)
19701969
{
19711970
outInfo.info.image.imageLayout = std::get<0>(argTuple);
@@ -3366,7 +3365,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33663365
ds = nullptr;
33673366
}
33683367
else
3369-
retval.m_deferredTLASDescriptorWrites[ds.get()] = std::move(visitor.deferredTLASWrites);
3368+
for (const auto storageIx : visitor.potentialTLASRewrites)
3369+
retval.m_potentialTLASRewrites.insert({ds.get(),storageIx});
33703370
}
33713371
else
33723372
inputs.logger.log("Failed to create Descriptor Pool suited for Layout %s",system::ILogger::ELL_ERROR,layout->getObjectDebugName());
@@ -3453,9 +3453,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
34533453
);
34543454
continue;
34553455
}
3456-
// is there any reason for it to be more?
3457-
const uint32_t maxInstances = canonical->getInstances().size();
3458-
as = device->createTopLevelAccelerationStructure({std::move(baseParams),maxInstances});
3456+
as = device->createTopLevelAccelerationStructure({std::move(baseParams),patch.maxInstances});
34593457
}
34603458
else
34613459
as = device->createBottomLevelAccelerationStructure(std::move(baseParams));
@@ -3564,9 +3562,6 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
35643562
retval.m_tlasConversions[i].erase(entry.first);
35653563
if constexpr (std::is_same_v<AssetType,ICPUImage>)
35663564
retval.m_imageConversions.erase(entry.first);
3567-
// because Descriptor Sets don't hold onto TLASes yet, we need to drop the TLASes in deferred descriptor writes
3568-
if constexpr (std::is_same_v<AssetType,ICPUDescriptorSet>)
3569-
retval.m_deferredTLASDescriptorWrites.erase(entry.first);
35703565
return true;
35713566
}
35723567
// still referenced, keep it around
@@ -3604,7 +3599,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
36043599
else
36053600
{
36063601
smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure> gpuBLAS;
3607-
// TODO
3602+
// TODO: figure out the BLAS that will be used, (this requires UUID)
36083603
retval.m_blasBuildMap.insert(foundBLAS,{cpuBLAS,{std::move(gpuBLAS),1,1}});
36093604
}
36103605
}
@@ -5231,30 +5226,8 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
52315226
depsMissing = missingDependent.template operator()<ICPUBufferView>(static_cast<const IGPUBufferView*>(untypedDesc));
52325227
break;
52335228
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
5234-
{
5235-
const auto* tlas = static_cast<const IGPUTopLevelAccelerationStructure*>(untypedDesc);
5236-
// successfully written a TLAS into the binding, nothing to check
5237-
if (tlas)
5238-
break;
5239-
// we have a null TLAS in the binding, and we have to check if we were supposed to have one in it
5240-
using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
5241-
const redirect_t& redirect = layout->getDescriptorRedirect(IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
5242-
const auto bindingRange = redirect.findBindingStorageIndex(redirect_t::storage_offset_t(i));
5243-
const auto firstElementOffset = redirect.getStorageOffset(bindingRange).data;
5244-
auto foundSet = reservations.m_deferredTLASDescriptorWrites.find(item.first);
5245-
if (foundSet!=reservations.m_deferredTLASDescriptorWrites.end())
5246-
{
5247-
const auto foundWrite = foundSet->second.find({
5248-
.binding = redirect.getBinding(bindingRange).data,
5249-
.arrayElement = i-firstElementOffset
5250-
});
5251-
// was scheduled to write some TLAS to this binding, but TLAS is now null
5252-
depsMissing = foundWrite!=foundSet->second.end() && !foundWrite->tlas;
5253-
}
5254-
else
5255-
depsMissing = true;
5229+
depsMissing = missingDependent.template operator()<ICPUTopLevelAccelerationStructure>(static_cast<const IGPUTopLevelAccelerationStructure*>(untypedDesc));
52565230
break;
5257-
}
52585231
default:
52595232
assert(false);
52605233
depsMissing = true;
@@ -5305,53 +5278,45 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
53055278
mergeCache.template operator()<ICPURenderpass>();
53065279
mergeCache.template operator()<ICPUGraphicsPipeline>();
53075280
// write the TLASes into Descriptor Set finally
5308-
if (auto& tlasWriteDSMap=reservations.m_deferredTLASDescriptorWrites; !tlasWriteDSMap.empty())
5281+
if (auto& tlasRewriteSet=reservations.m_potentialTLASRewrites; !tlasRewriteSet.empty())
53095282
{
53105283
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes;
5311-
core::vector<IGPUDescriptorSet::SDescriptorInfo> infos;
5312-
for (auto& tlasWriteMap : tlasWriteDSMap)
5284+
writes.reserve(tlasRewriteSet.size());
5285+
core::vector<IGPUDescriptorSet::SDescriptorInfo> infos(tlasRewriteSet.size());
5286+
auto* pInfo = infos.data();
5287+
for (auto& entry : tlasRewriteSet)
53135288
{
5314-
writes.clear();
5315-
infos.clear();
5316-
auto* dstSet = tlasWriteMap.first;
5317-
for (auto& inWrite : tlasWriteMap.second)
5289+
auto* const dstSet = entry.dstSet;
5290+
// we need to check if the descriptor set itself didn't get deleted in the meantime
5291+
auto& stagingCache = std::get<SReserveResult::staging_cache_t<ICPUDescriptorSet>>(reservations.m_stagingCaches);
5292+
const auto found = stagingCache.find(dstSet);
5293+
if (found==stagingCache.end())
5294+
continue;
5295+
// rewtrieve the binding from the TLAS
5296+
const auto* const tlas = static_cast<const IGPUTopLevelAccelerationStructure*>(dstSet->getAllDescriptors(IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE)[entry.storageOffset.data].get());
5297+
assert(tlas);
5298+
// only rewrite if successfully compacted
5299+
if (const auto foundCompacted=compactedTLASMap.find(tlas); foundCompacted!=compactedTLASMap.end())
53185300
{
5319-
// I know what I'm doing, this member has no influence on the set key hash or equal comparison operator
5320-
auto& tlas = const_cast<smart_refctd_ptr<IGPUTopLevelAccelerationStructure>&>(inWrite.tlas);
5321-
assert(tlas);
5322-
if (missingDependent.template operator()<ICPUTopLevelAccelerationStructure>(tlas.get()))
5323-
{
5324-
tlas = {};
5325-
continue;
5326-
}
5327-
if (const auto foundCompacted=compactedTLASMap.find(tlas.get()); foundCompacted!=compactedTLASMap.end())
5328-
tlas = foundCompacted->second;
5329-
infos.emplace_back().desc = std::move(tlas);
5301+
pInfo->desc = foundCompacted->second;
5302+
using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
5303+
const redirect_t& redirect = dstSet->getLayout()->getDescriptorRedirect(IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
5304+
const auto bindingRange = redirect.findBindingStorageIndex(entry.storageOffset);
5305+
const auto firstElementOffset = redirect.getStorageOffset(bindingRange);
53305306
writes.push_back({
53315307
.dstSet = dstSet,
5332-
.binding = inWrite.binding,
5333-
.arrayElement = inWrite.arrayElement,
5334-
.count = 1
5308+
.binding = redirect.getBinding(bindingRange).data,
5309+
.arrayElement = entry.storageOffset.data-firstElementOffset.data,
5310+
.count = 1,
5311+
.info = pInfo++
53355312
});
53365313
}
5337-
//
5338-
auto* pInfo = infos.data();
5339-
for (auto& outWrite : writes)
5340-
outWrite.info = pInfo++;
5341-
// if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
5342-
if (!writes.empty() && !device->updateDescriptorSets(writes,{}))
5343-
{
5344-
auto* pHash = findInStaging.template operator()<ICPUDescriptorSet>(dstSet);
5345-
smart_refctd_ptr<const ICPUDescriptorSet> dummy;
5346-
markFailureInStaging("writing TLAS to Descriptor Set binding",dummy,dstSet,pHash);
5347-
}
53485314
}
5349-
// not strictly necessary, just provoking refcounting bugs right away if they exist
5350-
compactedTLASMap.clear();
5315+
// if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
5316+
if (!writes.empty() && !device->updateDescriptorSets(writes,{}))
5317+
logger.log("Failed to write one of the compacted TLASes into a Descriptor Set, all Descriptor Sets will still use non-compacted TLASes",system::ILogger::ELL_ERROR);
53515318
}
53525319
mergeCache.template operator()<ICPUDescriptorSet>();
5353-
// needed for the IGPUDescriptorSets to check if TLAS exists/was written, can be released now
5354-
reservations.m_deferredTLASDescriptorWrites.clear();
53555320
// mergeCache.template operator()<ICPUFramebuffer>();
53565321

53575322
// no submit was necessary, so should signal the extra semaphores from the host

0 commit comments

Comments
 (0)