Skip to content

Commit bde8b9f

Browse files
author
devsh
committed
Fix the handling of allowDataAccess, maxInstanceCount and motion bits in the Converter
1 parent d409894 commit bde8b9f

File tree

4 files changed

+94
-35
lines changed

4 files changed

+94
-35
lines changed

include/nbl/asset/IAccelerationStructure.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class IBottomLevelAccelerationStructure : public AccelerationStructure
6060
// Provided by VK_NV_displacement_micromap
6161
ALLOW_DISPLACEMENT_MICROMAP_UPDATE_BIT = 0x1u<<9u,
6262
// Provided by VK_KHR_ray_tracing_position_fetch
63-
ALLOW_DATA_ACCESS_KHR = 0x1u<<11u,
63+
ALLOW_DATA_ACCESS = 0x1u<<11u,
6464
};
6565
static inline bool validBuildFlags(const core::bitflag<BUILD_FLAGS> flags)
6666
{

include/nbl/asset/ICPUAccelerationStructure.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ class ICPUTopLevelAccelerationStructure final : public ITopLevelAccelerationStru
361361
inline bool usesMotion() const override
362362
{
363363
for (const auto& instance : *m_instances)
364-
if (instance.getType()!=INSTANCE_TYPE::STATIC)
364+
if (instance.getType()!=INSTANCE_TYPE::STATIC || instance.getBase().blas && instance.getBase().blas->usesMotion())
365365
return true;
366366
return false;
367367
}

include/nbl/video/utilities/CAssetConverter.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ class CAssetConverter : public core::IReferenceCounted
3939
asset::ICPUSampler,
4040
asset::ICPUShader,
4141
asset::ICPUBuffer,
42-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
4342
asset::ICPUBottomLevelAccelerationStructure,
44-
#endif
4543
asset::ICPUTopLevelAccelerationStructure,
4644
asset::ICPUImage,
4745
asset::ICPUBufferView,
@@ -173,10 +171,10 @@ class CAssetConverter : public core::IReferenceCounted
173171
Invalid = 3
174172
};
175173

174+
uint8_t isMotion : 1 = false;
176175
//! select build flags
177176
uint8_t allowUpdate : 1 = false;
178177
uint8_t allowCompaction : 1 = false;
179-
uint8_t allowDataAccess : 1 = false;
180178
BuildPreference preference : 2 = BuildPreference::Invalid;
181179
uint8_t lowMemory : 1 = false;
182180
//! things that control the build
@@ -192,9 +190,9 @@ class CAssetConverter : public core::IReferenceCounted
192190
if (_this.preference!=other.preference || _this.preference==BuildPreference::Invalid)
193191
return {false,_this};
194192
CRTP retval = _this;
193+
retval.isMotion |= other.isMotion;
195194
retval.allowUpdate |= other.allowUpdate;
196195
retval.allowCompaction |= other.allowCompaction;
197-
retval.allowDataAccess |= other.allowDataAccess;
198196
retval.lowMemory |= other.lowMemory;
199197
// Host Builds are presumed to be "beter quality" and lower staging resource pressure,
200198
// we may change the behaviour here in the future
@@ -212,10 +210,14 @@ class CAssetConverter : public core::IReferenceCounted
212210
using build_flags_t = asset::ICPUBottomLevelAccelerationStructure::BUILD_FLAGS;
213211
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUBottomLevelAccelerationStructure* blas) const;
214212

213+
uint8_t allowDataAccess : 1 = false;
214+
215215
protected:
216216
inline std::pair<bool,this_t> combine(const this_t& other) const
217217
{
218-
return combine_impl<this_t>(*this,other);
218+
auto retval = combine_impl<this_t>(*this,other);
219+
retval.second.allowDataAccess |= other.allowDataAccess;
220+
return retval;
219221
}
220222
};
221223
template<>
@@ -227,10 +229,14 @@ class CAssetConverter : public core::IReferenceCounted
227229
using build_flags_t = asset::ICPUTopLevelAccelerationStructure::BUILD_FLAGS;
228230
core::bitflag<build_flags_t> getBuildFlags(const asset::ICPUTopLevelAccelerationStructure* tlas) const;
229231

232+
uint32_t maxInstances = 0;
233+
230234
protected:
231235
inline std::pair<bool,this_t> combine(const this_t& other) const
232236
{
233-
return combine_impl<this_t>(*this,other);
237+
auto retval = combine_impl<this_t>(*this,other);
238+
retval.second.maxInstances = std::max(maxInstances,other.maxInstances);
239+
return retval;
234240
}
235241
};
236242
template<>
@@ -542,10 +548,8 @@ class CAssetConverter : public core::IReferenceCounted
542548
virtual const patch_t<asset::ICPUSampler>* operator()(const lookup_t<asset::ICPUSampler>&) const = 0;
543549
virtual const patch_t<asset::ICPUShader>* operator()(const lookup_t<asset::ICPUShader>&) const = 0;
544550
virtual const patch_t<asset::ICPUBuffer>* operator()(const lookup_t<asset::ICPUBuffer>&) const = 0;
545-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
546551
virtual const patch_t<asset::ICPUBottomLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUBottomLevelAccelerationStructure>&) const = 0;
547552
virtual const patch_t<asset::ICPUTopLevelAccelerationStructure>* operator()(const lookup_t<asset::ICPUTopLevelAccelerationStructure>&) const = 0;
548-
#endif
549553
virtual const patch_t<asset::ICPUImage>* operator()(const lookup_t<asset::ICPUImage>&) const = 0;
550554
virtual const patch_t<asset::ICPUBufferView>* operator()(const lookup_t<asset::ICPUBufferView>&) const = 0;
551555
virtual const patch_t<asset::ICPUImageView>* operator()(const lookup_t<asset::ICPUImageView>&) const = 0;

src/nbl/video/utilities/CAssetConverter.cpp

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,11 @@ bool CAssetConverter::acceleration_structure_patch_base::valid(const ILogicalDev
109109
const auto& features = device->getEnabledFeatures();
110110
if (!features.accelerationStructure)
111111
return false;
112+
//
113+
if (isMotion && !features.rayTracingMotionBlur)
114+
return false;
112115
// just make the flags agree/canonicalize
113116
allowCompaction = allowCompaction || compactAfterBuild;
114-
// on a second thought, if someone asked for BLAS with data access, they probably intend to use it
115-
const auto& limits = device->getPhysicalDevice()->getLimits();
116-
if (allowDataAccess && !limits.rayTracingPositionFetch)
117-
return false;
118117
// can always build with the device
119118
if (hostBuild)
120119
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION_HOST_READY
@@ -134,25 +133,26 @@ CAssetConverter::patch_impl_t<ICPUBottomLevelAccelerationStructure>::patch_impl_
134133
if (flags.hasFlags(build_flags_t::PREFER_FAST_TRACE_BIT|build_flags_t::PREFER_FAST_BUILD_BIT))
135134
return;
136135

136+
isMotion = blas->usesMotion();
137137
allowUpdate = flags.hasFlags(build_flags_t::ALLOW_UPDATE_BIT);
138138
allowCompaction = flags.hasFlags(build_flags_t::ALLOW_COMPACTION_BIT);
139-
allowDataAccess = flags.hasFlags(build_flags_t::ALLOW_DATA_ACCESS_KHR);
140139
if (flags.hasFlags(build_flags_t::PREFER_FAST_TRACE_BIT))
141140
preference = BuildPreference::FastTrace;
142141
else if (flags.hasFlags(build_flags_t::PREFER_FAST_BUILD_BIT))
143142
preference = BuildPreference::FastBuild;
144143
else
145144
preference = BuildPreference::None;
146145
lowMemory = flags.hasFlags(build_flags_t::LOW_MEMORY_BIT);
146+
allowDataAccess = flags.hasFlags(build_flags_t::ALLOW_DATA_ACCESS);
147147
}
148148
auto CAssetConverter::patch_impl_t<ICPUBottomLevelAccelerationStructure>::getBuildFlags(const ICPUBottomLevelAccelerationStructure* blas) const -> core::bitflag<build_flags_t>
149149
{
150-
constexpr build_flags_t OverridableMask = build_flags_t::LOW_MEMORY_BIT|build_flags_t::PREFER_FAST_TRACE_BIT|build_flags_t::PREFER_FAST_BUILD_BIT|build_flags_t::ALLOW_COMPACTION_BIT|build_flags_t::ALLOW_UPDATE_BIT|build_flags_t::ALLOW_DATA_ACCESS_KHR;
150+
constexpr build_flags_t OverridableMask = build_flags_t::LOW_MEMORY_BIT|build_flags_t::PREFER_FAST_TRACE_BIT|build_flags_t::PREFER_FAST_BUILD_BIT|build_flags_t::ALLOW_COMPACTION_BIT|build_flags_t::ALLOW_UPDATE_BIT|build_flags_t::ALLOW_DATA_ACCESS;
151151
auto flags = blas->getBuildFlags()&(~OverridableMask);
152152
if (lowMemory)
153153
flags |= build_flags_t::LOW_MEMORY_BIT;
154154
if (allowDataAccess)
155-
flags |= build_flags_t::ALLOW_DATA_ACCESS_KHR;
155+
flags |= build_flags_t::ALLOW_DATA_ACCESS;
156156
if (allowCompaction)
157157
flags |= build_flags_t::ALLOW_COMPACTION_BIT;
158158
if (allowUpdate)
@@ -172,6 +172,10 @@ auto CAssetConverter::patch_impl_t<ICPUBottomLevelAccelerationStructure>::getBui
172172
}
173173
bool CAssetConverter::patch_impl_t<ICPUBottomLevelAccelerationStructure>::valid(const ILogicalDevice* device)
174174
{
175+
// on a second thought, if someone asked for BLAS with data access, they probably intend to use it
176+
const auto& limits = device->getPhysicalDevice()->getLimits();
177+
if (allowDataAccess && !limits.rayTracingPositionFetch)
178+
return false;
175179
return acceleration_structure_patch_base::valid(device);
176180
}
177181
CAssetConverter::patch_impl_t<ICPUTopLevelAccelerationStructure>::patch_impl_t(const ICPUTopLevelAccelerationStructure* tlas)
@@ -183,16 +187,17 @@ CAssetConverter::patch_impl_t<ICPUTopLevelAccelerationStructure>::patch_impl_t(c
183187
if (flags.hasFlags(build_flags_t::PREFER_FAST_TRACE_BIT|build_flags_t::PREFER_FAST_BUILD_BIT))
184188
return;
185189

190+
isMotion = tlas->usesMotion();
186191
allowUpdate = flags.hasFlags(build_flags_t::ALLOW_UPDATE_BIT);
187192
allowCompaction = flags.hasFlags(build_flags_t::ALLOW_COMPACTION_BIT);
188-
allowDataAccess = false;
189193
if (flags.hasFlags(build_flags_t::PREFER_FAST_TRACE_BIT))
190194
preference = BuildPreference::FastTrace;
191195
else if (flags.hasFlags(build_flags_t::PREFER_FAST_BUILD_BIT))
192196
preference = BuildPreference::FastBuild;
193197
else
194198
preference = BuildPreference::None;
195199
lowMemory = flags.hasFlags(build_flags_t::LOW_MEMORY_BIT);
200+
maxInstances = tlas->getInstances().size();
196201
}
197202
auto CAssetConverter::patch_impl_t<ICPUTopLevelAccelerationStructure>::getBuildFlags(const ICPUTopLevelAccelerationStructure* tlas) const -> core::bitflag<build_flags_t>
198203
{
@@ -441,7 +446,6 @@ class AssetVisitor : public CRTP
441446
}
442447

443448
private:
444-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
445449
// there is no `impl()` overload taking `ICPUTopLevelAccelerationStructure` same as there is no `ICPUmage`
446450
inline bool impl(const instance_t<ICPUTopLevelAccelerationStructure>& instance, const CAssetConverter::patch_t<ICPUTopLevelAccelerationStructure>& userPatch)
447451
{
@@ -454,14 +458,11 @@ class AssetVisitor : public CRTP
454458
if (!blas)
455459
return false;
456460
CAssetConverter::patch_t<ICPUBottomLevelAccelerationStructure> patch = {blas};
457-
if (userPatch.allowDataAccess) // TODO: check if all BLAS within TLAS need to have the flag ON vs OFF or only some
458-
patch.allowDataAccess = true;
459461
if (!descend(blas,std::move(patch),i))
460462
return false;
461463
}
462464
return true;
463465
}
464-
#endif
465466
inline bool impl(const instance_t<ICPUBufferView>& instance, const CAssetConverter::patch_t<ICPUBufferView>& userPatch)
466467
{
467468
const auto* dep = instance.asset->getUnderlyingBuffer();
@@ -698,15 +699,13 @@ class AssetVisitor : public CRTP
698699
return false;
699700
break;
700701
}
701-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
702702
case IDescriptor::EC_ACCELERATION_STRUCTURE:
703703
{
704704
auto tlas = static_cast<const ICPUTopLevelAccelerationStructure*>(untypedDesc);
705705
if (!descend(tlas,{tlas},type,binding,el))
706706
return false;
707707
break;
708708
}
709-
#endif
710709
default:
711710
assert(false);
712711
return false;
@@ -961,6 +960,45 @@ class DFSVisitor
961960
core::stack<patched_instance_t>& stack;
962961
};
963962

963+
// because we need to iterate over all BLAS' patches to check is one of them got patched with Motion
964+
class CheckBLASPatchMotions
965+
{
966+
public:
967+
using AssetType = ICPUTopLevelAccelerationStructure;
968+
969+
const CAssetConverter::SInputs& inputs;
970+
const dfs_cache<ICPUBottomLevelAccelerationStructure>& visitedBLASes;
971+
bool isMotion = false;
972+
973+
protected:
974+
template<typename DepType>
975+
void nullOptional() const {}
976+
977+
inline size_t getDependantUniqueCopyGroupID(const size_t usersGroupCopyID, const AssetType* user, const ICPUBottomLevelAccelerationStructure* dep) const
978+
{
979+
return inputs.getDependantUniqueCopyGroupID(usersGroupCopyID,user,dep);
980+
}
981+
982+
bool descend_impl(
983+
const instance_t<AssetType>& user, const CAssetConverter::patch_t<AssetType>& userPatch,
984+
const instance_t<ICPUBottomLevelAccelerationStructure>& dep, const CAssetConverter::patch_t<ICPUBottomLevelAccelerationStructure>& soloPatch,
985+
const uint32_t instanceIndex // not the custom index, its literally just an ordinal in `getInstances()`
986+
)
987+
{
988+
// find matching patch in dfsCache
989+
const auto patchIx = visitedBLASes.find(dep,soloPatch);
990+
// must be found, must have been visited
991+
assert(bool(patchIx));
992+
// want to stop the visits after finding first BLAS with motion
993+
if (visitedBLASes.nodes[patchIx.value].patch.isMotion)
994+
{
995+
isMotion = true;
996+
return false;
997+
}
998+
return true;
999+
}
1000+
};
1001+
9641002
// go forth and find first patch that matches
9651003
class PatchOverride final : public CAssetConverter::CHashCache::IPatchOverride
9661004
{
@@ -993,10 +1031,8 @@ class PatchOverride final : public CAssetConverter::CHashCache::IPatchOverride
9931031
inline const patch_t<ICPUSampler>* operator()(const lookup_t<ICPUSampler>& lookup) const override {return impl(lookup);}
9941032
inline const patch_t<ICPUShader>* operator()(const lookup_t<ICPUShader>& lookup) const override {return impl(lookup);}
9951033
inline const patch_t<ICPUBuffer>* operator()(const lookup_t<ICPUBuffer>& lookup) const override {return impl(lookup);}
996-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
9971034
inline const patch_t<ICPUBottomLevelAccelerationStructure>* operator()(const lookup_t<ICPUBottomLevelAccelerationStructure>& lookup) const override {return impl(lookup);}
9981035
inline const patch_t<ICPUTopLevelAccelerationStructure>* operator()(const lookup_t<ICPUTopLevelAccelerationStructure>& lookup) const override {return impl(lookup);}
999-
#endif
10001036
inline const patch_t<ICPUImage>* operator()(const lookup_t<ICPUImage>& lookup) const override {return impl(lookup);}
10011037
inline const patch_t<ICPUBufferView>* operator()(const lookup_t<ICPUBufferView>& lookup) const override {return impl(lookup);}
10021038
inline const patch_t<ICPUImageView>* operator()(const lookup_t<ICPUImageView>& lookup) const override {return impl(lookup);}
@@ -1107,13 +1143,13 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUBuffer> loo
11071143
hasher.update(&patchedParams,sizeof(patchedParams)) << lookup.asset->getContentHash();
11081144
return true;
11091145
}
1110-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
11111146
bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUBottomLevelAccelerationStructure> lookup)
11121147
{
11131148
// extras from the patch
11141149
hasher << lookup.patch->hostBuild;
11151150
hasher << lookup.patch->compactAfterBuild;
11161151
// overriden flags
1152+
hasher << lookup.patch->isMotion;
11171153
hasher << lookup.patch->getBuildFlags(lookup.asset);
11181154
// finally the contents
11191155
//TODO: hasher << lookup.asset->getContentHash();
@@ -1122,6 +1158,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUBottomLevel
11221158
bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUTopLevelAccelerationStructure> lookup)
11231159
{
11241160
const auto* asset = lookup.asset;
1161+
#if 0
11251162
//
11261163
AssetVisitor<HashVisit<ICPUTopLevelAccelerationStructure>> visitor = {
11271164
*this,
@@ -1134,6 +1171,7 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUTopLevelAcc
11341171
hasher << lookup.patch->hostBuild;
11351172
hasher << lookup.patch->compactAfterBuild;
11361173
// overriden flags
1174+
hasher << lookup.patch->isMotion;
11371175
hasher << lookup.patch->getBuildFlags(lookup.asset);
11381176
const auto instances = asset->getInstances();
11391177
// important two passes to not give identical data due to variable length polymorphic array being hashed
@@ -1150,9 +1188,9 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUTopLevelAcc
11501188
instance.instance
11511189
);
11521190
}
1191+
#endif
11531192
return true;
11541193
}
1155-
#endif
11561194
bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUImage> lookup)
11571195
{
11581196
// failed promotion
@@ -1564,10 +1602,8 @@ void CAssetConverter::CHashCache::eraseStale(const IPatchOverride* patchOverride
15641602
rehash.operator()<ICPUBufferView>();
15651603
rehash.operator()<ICPUImage>();
15661604
rehash.operator()<ICPUImageView>();
1567-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
15681605
rehash.operator()<ICPUBottomLevelAccelerationStructure>();
15691606
rehash.operator()<ICPUTopLevelAccelerationStructure>();
1570-
#endif
15711607
// only once all the descriptor types have been hashed, we can hash sets
15721608
rehash.operator()<ICPUDescriptorSet>();
15731609
// naturally any pipeline depends on shaders and pipeline cache
@@ -1617,12 +1653,11 @@ class GetDependantVisitBase
16171653
template<Asset AssetType>
16181654
class GetDependantVisit;
16191655

1620-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
16211656
template<>
16221657
class GetDependantVisit<ICPUTopLevelAccelerationStructure> : public GetDependantVisitBase<ICPUTopLevelAccelerationStructure>
16231658
{
16241659
public:
1625-
// because of the deferred building of TLASes and lack of lifetime tracking between them, nothing to do on some passes
1660+
// because of zero access to the lifetime tracking between TLASes and BLASes, do nothing
16261661
//core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>* const outBLASes;
16271662

16281663
protected:
@@ -1639,7 +1674,7 @@ class GetDependantVisit<ICPUTopLevelAccelerationStructure> : public GetDependant
16391674
return true;
16401675
}
16411676
};
1642-
#endif
1677+
16431678
template<>
16441679
class GetDependantVisit<ICPUBufferView> : public GetDependantVisitBase<ICPUBufferView>
16451680
{
@@ -2072,11 +2107,9 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
20722107
case ICPUImageView::AssetType:
20732108
visit.operator()<ICPUImageView>(entry);
20742109
break;
2075-
#ifdef NBL_ACCELERATION_STRUCTURE_CONVERSION
20762110
case ICPUTopLevelAccelerationStructure::AssetType:
20772111
visit.operator()<ICPUTopLevelAccelerationStructure>(entry);
20782112
break;
2079-
#endif
20802113
// these assets have no dependants, should have never been pushed on the stack
20812114
default:
20822115
assert(false);
@@ -2165,6 +2198,28 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
21652198
}
21662199
}
21672200
);
2201+
// special pass to propagate Motion Acceleration Structure flag upwards from BLAS to referencing TLAS
2202+
std::get<dfs_cache<ICPUTopLevelAccelerationStructure>>(dfsCaches).for_each([device,&inputs,&dfsCaches](const instance_t<ICPUTopLevelAccelerationStructure>& assetInstance, dfs_cache<ICPUTopLevelAccelerationStructure>::created_t& created)->void
2203+
{
2204+
auto& patch = created.patch;
2205+
// we already have motion, can stop searching
2206+
if (patch.isMotion)
2207+
return;
2208+
auto visitor = AssetVisitor<CheckBLASPatchMotions>{
2209+
{
2210+
.inputs = inputs,
2211+
.visitedBLASes = std::get<dfs_cache<ICPUBottomLevelAccelerationStructure>>(dfsCaches)
2212+
},
2213+
// construct a casted instance type
2214+
{assetInstance.asset,assetInstance.uniqueCopyGroupID},
2215+
patch
2216+
};
2217+
// don't care about success, I've abused the termination criteria, will return false sometimes
2218+
visitor();
2219+
// I don't need to check if the new patch is valid, because we checked if the Motion Raytracing feature is enabled when checking BLASes for validity
2220+
patch.isMotion = visitor.isMotion;
2221+
}
2222+
);
21682223
}
21692224
//! `inputsMetadata` is now constant!
21702225
//! `dfsCache` keys are now constant!

0 commit comments

Comments
 (0)