@@ -455,6 +455,7 @@ class AssetVisitor : public CRTP
455
455
for (size_t i=0 ; i<blasInstances.size (); i++)
456
456
{
457
457
const auto * blas = blasInstances[i].getBase ().blas .get ();
458
+ // TODO: can one disable instances during builds?
458
459
if (!blas)
459
460
return false ;
460
461
CAssetConverter::patch_t <ICPUBottomLevelAccelerationStructure> patch = {blas};
@@ -1145,41 +1146,40 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUBuffer> loo
1145
1146
}
1146
1147
bool CAssetConverter::CHashCache::hash_impl::operator ()(lookup_t <ICPUBottomLevelAccelerationStructure> lookup)
1147
1148
{
1149
+ hasher << lookup.patch ->isMotion ;
1150
+ // overriden flags
1151
+ hasher << lookup.patch ->getBuildFlags (lookup.asset );
1148
1152
// extras from the patch
1149
1153
hasher << lookup.patch ->hostBuild ;
1150
1154
hasher << lookup.patch ->compactAfterBuild ;
1151
- // overriden flags
1152
- hasher << lookup.patch ->isMotion ;
1153
- hasher << lookup.patch ->getBuildFlags (lookup.asset );
1154
1155
// finally the contents
1155
- // TODO: hasher << lookup.asset->getContentHash();
1156
+ hasher << lookup.asset ->getContentHash ();
1156
1157
return true ;
1157
1158
}
1158
1159
bool CAssetConverter::CHashCache::hash_impl::operator ()(lookup_t <ICPUTopLevelAccelerationStructure> lookup)
1159
1160
{
1161
+ hasher << lookup.patch ->isMotion ;
1162
+ // overriden flags
1160
1163
const auto * asset = lookup.asset ;
1161
- #if 0
1162
- //
1164
+ hasher << lookup.patch ->getBuildFlags (asset);
1165
+ // extras from the patch
1166
+ hasher << lookup.patch ->hostBuild ;
1167
+ hasher << lookup.patch ->compactAfterBuild ;
1168
+ const auto instances = asset->getInstances ();
1169
+ hasher << instances.size ();
1163
1170
AssetVisitor<HashVisit<ICPUTopLevelAccelerationStructure>> visitor = {
1164
1171
*this ,
1165
1172
{asset,static_cast <const PatchOverride*>(patchOverride)->uniqueCopyGroupID },
1166
1173
*lookup.patch
1167
1174
};
1168
1175
if (!visitor ())
1169
1176
return false ;
1170
- // extras from the patch
1171
- hasher << lookup.patch->hostBuild;
1172
- hasher << lookup.patch->compactAfterBuild;
1173
- // overriden flags
1174
- hasher << lookup.patch->isMotion;
1175
- hasher << lookup.patch->getBuildFlags(lookup.asset);
1176
- const auto instances = asset->getInstances();
1177
1177
// important two passes do not give identical data due to variable length polymorphic array being hashed
1178
1178
for (const auto & instance : instances)
1179
1179
hasher << instance.getType ();
1180
1180
for (const auto & instance : instances)
1181
1181
{
1182
- std::visit([&hasher ](const auto& typedInstance)->void
1182
+ std::visit ([&](const auto & typedInstance)->void
1183
1183
{
1184
1184
using instance_t = std::decay_t <decltype (typedInstance)>;
1185
1185
// the BLAS pointers (the BLAS contents already get hashed via asset visitor and `getDependent`, its only the metadate we need to hash
@@ -1188,7 +1188,6 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUTopLevelAcc
1188
1188
instance.instance
1189
1189
);
1190
1190
}
1191
- #endif
1192
1191
return true ;
1193
1192
}
1194
1193
bool CAssetConverter::CHashCache::hash_impl::operator ()(lookup_t <ICPUImage> lookup)
@@ -4650,7 +4649,7 @@ if (worstSize>minScratchSize)
4650
4649
if (newWritten>=blockSize)
4651
4650
return bytesWritten;
4652
4651
auto found = blasBuildMap->find (instance.getBase ().blas .get ());
4653
- assert (found!=blasBuildMap. end ());
4652
+ assert (found!=blasBuildMap-> end ());
4654
4653
const auto & blas = found->second .gpuBLAS ;
4655
4654
dst = IGPUTopLevelAccelerationStructure::writeInstance (dst,instance,blas.get ()->getReferenceForDeviceOperations ());
4656
4655
dedupBLASesUsed->emplace (blas);
@@ -4752,6 +4751,7 @@ if (worstSize>minScratchSize)
4752
4751
else
4753
4752
compactedOwnershipReleaseIndices.push_back (~0u );
4754
4753
}
4754
+ reservations.m_blasBuildMap .clear ();
4755
4755
// finish the last batch
4756
4756
recordBuildCommands ();
4757
4757
if (!flushRanges.empty ())
@@ -4918,9 +4918,7 @@ if (worstSize>minScratchSize)
4918
4918
}
4919
4919
4920
4920
4921
- // Descriptor Sets need their TLAS descriptors substituted if they've been compacted
4922
- core::vector<IGPUDescriptorSet::SWriteDescriptorSet> tlasRewrites; tlasRewrites.reserve (compactedTLASMap.size ());
4923
- core::vector<IGPUDescriptorSet::SDescriptorInfo> tlasInfos; tlasInfos.reserve (compactedTLASMap.size ());
4921
+ // Descriptor Sets need their TLAS descriptors substituted if they've been compacted
4924
4922
// want to check if deps successfully exist
4925
4923
auto missingDependent = [&reservations]<Asset AssetType>(const typename asset_traits<AssetType>::video_t * dep)->bool
4926
4924
{
@@ -4966,7 +4964,6 @@ if (worstSize>minScratchSize)
4966
4964
if (samplers[i])
4967
4965
depsMissing = missingDependent.operator ()<ICPUSampler>(samplers[i].get ());
4968
4966
}
4969
- const auto tlasRewriteOldSize = tlasRewrites.size ();
4970
4967
for (auto i=0u ; !depsMissing && i<static_cast <uint32_t >(asset::IDescriptor::E_TYPE::ET_COUNT); i++)
4971
4968
{
4972
4969
const auto type = static_cast <asset::IDescriptor::E_TYPE>(i);
@@ -4995,27 +4992,21 @@ if (worstSize>minScratchSize)
4995
4992
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
4996
4993
{
4997
4994
const auto * tlas = static_cast <const IGPUTopLevelAccelerationStructure*>(untypedDesc);
4998
- depsMissing = missingDependent.operator ()<ICPUTopLevelAccelerationStructure>(tlas);
4999
- if (!depsMissing)
5000
- {
5001
- // TODO: Descriptor sets and other things still hold old non-compacted TLASes which balloons our peak memory usage, theoretically can drop them as soon as the compaction submit is done. Maybe defer writing TLAS into acceleration structure until conversions are done?
5002
- auto found = compactedTLASMap.find (tlas);
5003
- if (found==compactedTLASMap.end ())
5004
- break ;
5005
- // written TLAS got compacted, so queue the descriptor for update
5006
- using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
5007
- const redirect_t & redirect = layout->getDescriptorRedirect (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
5008
- const auto bindingRange = redirect.findBindingStorageIndex (redirect_t::storage_offset_t (i));
5009
- const auto firstElementOffset = redirect.getStorageOffset (bindingRange).data ;
5010
- tlasRewrites.push_back (IGPUDescriptorSet::SWriteDescriptorSet{
5011
- .dstSet = item.first ,
5012
- .binding = redirect.getBinding (bindingRange).data ,
5013
- .arrayElement = i-firstElementOffset,
5014
- .count = 1 , // write them one by one, no point optimizing
5015
- .info = nullptr // for now, will set once the vector of infos stops growing
5016
- });
5017
- tlasInfos.emplace_back ().desc = smart_refctd_ptr<IGPUTopLevelAccelerationStructure>(found->second );
5018
- }
4995
+ // successfully written a TLAS into the binding, nothing to check
4996
+ if (tlas)
4997
+ break ;
4998
+ // we have a null TLAS in the binding, and we have to check if we were supposed to have one in it
4999
+ using redirect_t = IDescriptorSetLayoutBase::CBindingRedirect;
5000
+ const redirect_t & redirect = layout->getDescriptorRedirect (IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE);
5001
+ const auto bindingRange = redirect.findBindingStorageIndex (redirect_t::storage_offset_t (i));
5002
+ const auto firstElementOffset = redirect.getStorageOffset (bindingRange).data ;
5003
+ const auto foundWrite = reservations.m_deferredTLASDescriptorWrites .find ({
5004
+ .dstSet = item.first ,
5005
+ .binding = redirect.getBinding (bindingRange).data ,
5006
+ .arrayElement = i-firstElementOffset
5007
+ });
5008
+ // was scheduled to write some TLAS to this binding, but TLAS is now null
5009
+ depsMissing = foundWrite!=reservations.m_deferredTLASDescriptorWrites .end () && !foundWrite->second ;
5019
5010
break ;
5020
5011
}
5021
5012
default :
@@ -5025,12 +5016,6 @@ if (worstSize>minScratchSize)
5025
5016
}
5026
5017
}
5027
5018
}
5028
- // don't bother overwriting a Descriptor Set that won't be marked as successfully converted (inserted into write cache)
5029
- if (depsMissing)
5030
- {
5031
- tlasRewrites.resize (tlasRewriteOldSize);
5032
- tlasInfos.resize (tlasRewriteOldSize);
5033
- }
5034
5019
}
5035
5020
auto * pGpuObj = item.first ;
5036
5021
if (depsMissing)
@@ -5040,7 +5025,6 @@ if (worstSize>minScratchSize)
5040
5025
item.second .value = {};
5041
5026
continue ;
5042
5027
}
5043
- // TODO: we could just hotswap the `pGpuObj` in staging and write it to Descriptor Set here instead
5044
5028
// The BLASes don't need to do this, because no-one checks for them as dependents and we can substitute the `item.first` in the staging cache right away
5045
5029
// For TLASes we need to write the compacted TLAS and not the intermediate build to the Cache
5046
5030
if constexpr (IsTLAS)
@@ -5074,23 +5058,43 @@ if (worstSize>minScratchSize)
5074
5058
mergeCache.operator ()<ICPUComputePipeline>();
5075
5059
mergeCache.operator ()<ICPURenderpass>();
5076
5060
mergeCache.operator ()<ICPUGraphicsPipeline>();
5077
- mergeCache.operator ()<ICPUDescriptorSet>();
5078
- // TODO: should be done during `mergeCache.operator()<ICPUDescriptorSet>`
5079
- // deal with rewriting the TLASes with compacted ones
5061
+ // write the TLASes into Descriptor Set finally
5062
+ if (auto & tlasWriteMap=reservations.m_deferredTLASDescriptorWrites ; !tlasWriteMap.empty ())
5080
5063
{
5081
- // not strictly necessary, just provoking refcounting bugs right away if they exist
5082
- compactedTLASMap.clear ();
5083
- auto * infoIt = tlasInfos.data ();
5084
- // writes map 1:1 with infos, the lazy way, can finally write the pointer as vector stops growing
5085
- for (auto & write : tlasRewrites)
5086
- write.info = infoIt++;
5087
- if (!tlasRewrites.empty ())
5064
+ core::vector<IGPUDescriptorSet::SWriteDescriptorSet> writes;
5065
+ writes.reserve (tlasWriteMap.size ());
5066
+ core::vector<IGPUDescriptorSet::SDescriptorInfo> infos (writes.size ());
5067
+ auto * pInfo = infos.data ();
5068
+ for (auto & inWrite : tlasWriteMap)
5088
5069
{
5089
- const bool success = device->updateDescriptorSets (tlasRewrites,{});
5090
- // There's no point in any fault handling, everything we have done should have been valid
5091
- assert (success);
5070
+ auto & tlas = inWrite.second ;
5071
+ assert (tlas);
5072
+ if (missingDependent.operator ()<ICPUTopLevelAccelerationStructure>(tlas.get ()))
5073
+ {
5074
+ tlas = nullptr ;
5075
+ continue ;
5076
+ }
5077
+ if (const auto foundCompacted=compactedTLASMap.find (tlas.get ()); foundCompacted!=compactedTLASMap.end ())
5078
+ tlas = foundCompacted->second ;
5079
+ pInfo->desc = tlas;
5080
+ writes.push_back ({
5081
+ .dstSet = inWrite.first .dstSet ,
5082
+ .binding = inWrite.first .binding ,
5083
+ .arrayElement = inWrite.first .arrayElement ,
5084
+ .count = 1 ,
5085
+ .info = pInfo++
5086
+ });
5092
5087
}
5088
+ // not strictly necessary, just provoking refcounting bugs right away if they exist
5089
+ compactedTLASMap.clear ();
5090
+ // if the descriptor write fails, we make the Descriptor Sets behave as-if the TLAS build failed (dep is missing)
5091
+ if (!writes.empty () && !device->updateDescriptorSets (writes,{}))
5092
+ for (auto & inWrite : tlasWriteMap)
5093
+ inWrite.second = nullptr ;
5093
5094
}
5095
+ mergeCache.operator ()<ICPUDescriptorSet>();
5096
+ // needed for the IGPUDescriptorSets to check if TLAS exists/was written, can be released now
5097
+ reservations.m_deferredTLASDescriptorWrites .clear ();
5094
5098
// mergeCache.operator()<ICPUFramebuffer>();
5095
5099
5096
5100
// no submit was necessary, so should signal the extra semaphores from the host
0 commit comments