Skip to content

Commit 1d80b6e

Browse files
author
devsh
committed
fully implement the deferred write of TLASes to Descriptor Sets
1 parent a66c565 commit 1d80b6e

File tree

2 files changed

+35
-21
lines changed

2 files changed

+35
-21
lines changed

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,11 +1130,15 @@ class CAssetConverter : public core::IReferenceCounted
11301130
//
11311131
struct SDeferredTLASWrite
11321132
{
1133-
inline bool operator==(const SDeferredTLASWrite& other) const = default;
1133+
inline bool operator==(const SDeferredTLASWrite& other) const
1134+
{
1135+
return dstSet==other.dstSet && binding==other.binding && arrayElement==other.arrayElement;
1136+
}
11341137

11351138
IGPUDescriptorSet* dstSet;
11361139
uint32_t binding;
11371140
uint32_t arrayElement;
1141+
core::smart_refctd_ptr<IGPUTopLevelAccelerationStructure> tlas;
11381142
};
11391143
struct SDeferredTLASWriteHasher
11401144
{
@@ -1146,7 +1150,7 @@ class CAssetConverter : public core::IReferenceCounted
11461150
return retval;
11471151
}
11481152
};
1149-
core::unordered_map<SDeferredTLASWrite,core::smart_refctd_ptr<IGPUTopLevelAccelerationStructure>,SDeferredTLASWriteHasher> m_deferredTLASDescriptorWrites;
1153+
core::unordered_set<SDeferredTLASWrite,SDeferredTLASWriteHasher> m_deferredTLASDescriptorWrites;
11501154

11511155
//
11521156
core::bitflag<IQueue::FAMILY_FLAGS> m_queueFlags = IQueue::FAMILY_FLAGS::NONE;

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,6 +1873,8 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
18731873
// returns if there are any writes to do
18741874
bool finalizeWrites(IGPUDescriptorSet* dstSet)
18751875
{
1876+
for (auto& deferredWrite : deferredTLASWrites)
1877+
deferredWrite.dstSet = dstSet;
18761878
if (writes.empty())
18771879
return false;
18781880
// now infos can't move in memory anymore
@@ -1889,6 +1891,7 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
18891891
// okay to do non-owning, cache has ownership
18901892
core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes = {};
18911893
core::vector<IGPUDescriptorSet::SDescriptorInfo> infos = {};
1894+
core::vector<CAssetConverter::SReserveResult::SDeferredTLASWrite> deferredTLASWrites;
18921895
// has to be public because of aggregate init, but its only for internal usage!
18931896
uint32_t lastBinding;
18941897
uint32_t lastElement;
@@ -1946,6 +1949,12 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
19461949
else
19471950
writes.back().count++;
19481951
lastElement = element;
1952+
// the RLE will always finish a write because a single binding can only be a single descriptor type, important that the TLAS path happens after that check
1953+
if constexpr (std::is_same_v<DepType,ICPUTopLevelAccelerationStructure>)
1954+
{
1955+
deferredTLASWrites.push_back({nullptr,binding.data,element,depObj});
1956+
return true;
1957+
}
19491958
//
19501959
auto& outInfo = infos.emplace_back();
19511960
outInfo.desc = std::move(depObj);
@@ -2607,20 +2616,20 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
26072616
);
26082617

26092618
// work out mapping of `conversionRequests` to multiple GPU objects and their copy groups via counting sort
2610-
auto exclScanConvReqs = [&]()->size_t
2611-
{
2612-
size_t sum = 0;
2613-
for (auto& entry : conversionRequests)
2614-
{
2615-
entry.second.firstCopyIx = sum;
2616-
sum += entry.second.copyCount;
2617-
}
2618-
return sum;
2619-
};
26202619
const auto gpuObjUniqueCopyGroupIDs = [&]()->core::vector<size_t>
26212620
{
26222621
core::vector<size_t> retval;
26232622
// now assign storage offsets via exclusive scan and put the `uniqueGroupID` mappings in sorted order
2623+
auto exclScanConvReqs = [&]()->size_t
2624+
{
2625+
size_t sum = 0;
2626+
for (auto& entry : conversionRequests)
2627+
{
2628+
entry.second.firstCopyIx = sum;
2629+
sum += entry.second.copyCount;
2630+
}
2631+
return sum;
2632+
};
26242633
retval.resize(exclScanConvReqs());
26252634
//
26262635
dfsCache.for_each([&inputs,&retval,&conversionRequests](const instance_t<AssetType>& instance, dfs_cache<AssetType>::created_t& created)->void
@@ -2644,8 +2653,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
26442653
exclScanConvReqs();
26452654
return retval;
26462655
}();
2647-
core::vector<asset_cached_t<AssetType>> gpuObjects(gpuObjUniqueCopyGroupIDs.size());
26482656

2657+
core::vector<asset_cached_t<AssetType>> gpuObjects(gpuObjUniqueCopyGroupIDs.size());
26492658
// Only warn once to reduce log spam
26502659
auto assign = [&]<bool GPUObjectWhollyImmutable=false>(const core::blake3_hash_t& contentHash, const size_t baseIx, const size_t copyIx, asset_cached_t<AssetType>::type&& gpuObj)->bool
26512660
{
@@ -3258,6 +3267,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
32583267
// fail
32593268
ds = nullptr;
32603269
}
3270+
else
3271+
retval.m_deferredTLASDescriptorWrites.insert(visitor.deferredTLASWrites.begin(),visitor.deferredTLASWrites.end());
32613272
}
32623273
else
32633274
inputs.logger.log("Failed to create Descriptor Pool suited for Layout %s",system::ILogger::ELL_ERROR,layout->getObjectDebugName());
@@ -3350,10 +3361,8 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
33503361
// Both so we can hash in O(Depth) and not O(Depth^2) but also so we have all the possible dependants ready.
33513362
// If two Asset chains are independent then we order them from most catastrophic failure to least.
33523363
dedupCreateProp.operator()<ICPUBuffer>();
3353-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
33543364
dedupCreateProp.operator()<ICPUBottomLevelAccelerationStructure>();
33553365
dedupCreateProp.operator()<ICPUTopLevelAccelerationStructure>();
3356-
#endif
33573366
dedupCreateProp.operator()<ICPUImage>();
33583367
// now allocate the memory for buffers and images
33593368
deferredAllocator.finalize();
@@ -5006,7 +5015,7 @@ if (worstSize>minScratchSize)
50065015
.arrayElement = i-firstElementOffset
50075016
});
50085017
// was scheduled to write some TLAS to this binding, but TLAS is now null
5009-
depsMissing = foundWrite!=reservations.m_deferredTLASDescriptorWrites.end() && !foundWrite->second;
5018+
depsMissing = foundWrite!=reservations.m_deferredTLASDescriptorWrites.end() && !foundWrite->tlas;
50105019
break;
50115020
}
50125021
default:
@@ -5067,7 +5076,8 @@ if (worstSize>minScratchSize)
50675076
auto* pInfo = infos.data();
50685077
for (auto& inWrite : tlasWriteMap)
50695078
{
5070-
auto& tlas = inWrite.second;
5079+
// I know what I'm doing, this member has no influence on the set key hash
5080+
auto& tlas = const_cast<smart_refctd_ptr<IGPUTopLevelAccelerationStructure>&>(inWrite.tlas);
50715081
assert(tlas);
50725082
if (missingDependent.operator()<ICPUTopLevelAccelerationStructure>(tlas.get()))
50735083
{
@@ -5078,9 +5088,9 @@ if (worstSize>minScratchSize)
50785088
tlas = foundCompacted->second;
50795089
pInfo->desc = tlas;
50805090
writes.push_back({
5081-
.dstSet = inWrite.first.dstSet,
5082-
.binding = inWrite.first.binding,
5083-
.arrayElement = inWrite.first.arrayElement,
5091+
.dstSet = inWrite.dstSet,
5092+
.binding = inWrite.binding,
5093+
.arrayElement = inWrite.arrayElement,
50845094
.count = 1,
50855095
.info = pInfo++
50865096
});
@@ -5090,7 +5100,7 @@ if (worstSize>minScratchSize)
50905100
// if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
50915101
if (!writes.empty() && !device->updateDescriptorSets(writes,{}))
50925102
for (auto& inWrite : tlasWriteMap)
5093-
inWrite.second = nullptr;
5103+
const_cast<smart_refctd_ptr<IGPUTopLevelAccelerationStructure>&>(inWrite.tlas) = nullptr;
50945104
}
50955105
mergeCache.operator()<ICPUDescriptorSet>();
50965106
// needed for the IGPUDescriptorSets to check if TLAS exists/was written, can be released now

0 commit comments

Comments
 (0)