Skip to content

Commit 5b6e20e

Browse files
author
devsh
committed
keep a pending TLAS build BLAS tracking set linked list
Make the Acceleration Structure Copy Structs strongly typed
1 parent 02c0d94 commit 5b6e20e

12 files changed

+268
-171
lines changed

include/nbl/video/IGPUAccelerationStructure.h

Lines changed: 113 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -98,39 +98,6 @@ class IGPUAccelerationStructure : public IBackendObject
9898
}
9999
};
100100

101-
// copies
102-
enum class COPY_MODE : uint8_t
103-
{
104-
CLONE = 0,
105-
COMPACT = 1,
106-
SERIALIZE = 2,
107-
DESERIALIZE = 3,
108-
};
109-
struct CopyInfo
110-
{
111-
const IGPUAccelerationStructure* src = nullptr;
112-
IGPUAccelerationStructure* dst = nullptr;
113-
COPY_MODE mode = COPY_MODE::CLONE;
114-
};
115-
template<typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
116-
struct CopyToMemoryInfo
117-
{
118-
const IGPUAccelerationStructure* src = nullptr;
119-
asset::SBufferBinding<BufferType> dst = nullptr;
120-
COPY_MODE mode = COPY_MODE::SERIALIZE;
121-
};
122-
using DeviceCopyToMemoryInfo = CopyToMemoryInfo<IGPUBuffer>;
123-
using HostCopyToMemoryInfo = CopyToMemoryInfo<asset::ICPUBuffer>;
124-
template<typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
125-
struct CopyFromMemoryInfo
126-
{
127-
asset::SBufferBinding<const BufferType> src = nullptr;
128-
IGPUAccelerationStructure* dst = nullptr;
129-
COPY_MODE mode = COPY_MODE::DESERIALIZE;
130-
};
131-
using DeviceCopyFromMemoryInfo = CopyFromMemoryInfo<IGPUBuffer>;
132-
using HostCopyFromMemoryInfo = CopyFromMemoryInfo<asset::ICPUBuffer>;
133-
134101
// this will return false also if your deferred operation is not ready yet, so please use in combination with `isPending()`
135102
virtual bool wasCopySuccessful(const IDeferredOperation* const deferredOp) = 0;
136103

@@ -176,6 +143,30 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat
176143

177144
inline bool usesMotion() const override {return m_params.flags.hasFlags(SCreationParams::FLAGS::MOTION_BIT);}
178145

146+
// copies
147+
struct CopyInfo
148+
{
149+
const IGPUBottomLevelAccelerationStructure* src = nullptr;
150+
IGPUAccelerationStructure* dst = nullptr;
151+
bool compact = false;
152+
};
153+
template<typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
154+
struct CopyToMemoryInfo
155+
{
156+
const IGPUBottomLevelAccelerationStructure* src = nullptr;
157+
asset::SBufferBinding<BufferType> dst = nullptr;
158+
};
159+
using DeviceCopyToMemoryInfo = CopyToMemoryInfo<IGPUBuffer>;
160+
using HostCopyToMemoryInfo = CopyToMemoryInfo<asset::ICPUBuffer>;
161+
template<typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
162+
struct CopyFromMemoryInfo
163+
{
164+
asset::SBufferBinding<const BufferType> src = nullptr;
165+
IGPUBottomLevelAccelerationStructure* dst = nullptr;
166+
};
167+
using DeviceCopyFromMemoryInfo = CopyFromMemoryInfo<IGPUBuffer>;
168+
using HostCopyFromMemoryInfo = CopyFromMemoryInfo<asset::ICPUBuffer>;
169+
179170
// read the comments in the .hlsl file, AABB builds ignore certain fields
180171
using BuildRangeInfo = hlsl::acceleration_structures::bottom_level::BuildRangeInfo; // TODO: rename to GeometryRangeInfo, and make `BuildRangeInfo = const GeometryRangeInfo*`
181172
using DirectBuildRangeRangeInfos = const BuildRangeInfo* const*;
@@ -388,6 +379,34 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
388379
//
389380
inline uint32_t getMaxInstanceCount() const {return m_maxInstanceCount;}
390381

382+
// copies
383+
struct CopyInfo
384+
{
385+
const IGPUTopLevelAccelerationStructure* src = nullptr;
386+
IGPUTopLevelAccelerationStructure* dst = nullptr;
387+
bool compact = false;
388+
};
389+
template<typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
390+
struct CopyToMemoryInfo
391+
{
392+
const IGPUTopLevelAccelerationStructure* src = nullptr;
393+
asset::SBufferBinding<BufferType> dst = nullptr;
394+
// [optional] Query the tracked BLASes
395+
core::smart_refctd_dynamic_array<core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> trackedBLASes = nullptr;
396+
};
397+
using DeviceCopyToMemoryInfo = CopyToMemoryInfo<IGPUBuffer>;
398+
using HostCopyToMemoryInfo = CopyToMemoryInfo<asset::ICPUBuffer>;
399+
template<typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
400+
struct CopyFromMemoryInfo
401+
{
402+
asset::SBufferBinding<const BufferType> src = nullptr;
403+
IGPUTopLevelAccelerationStructure* dst = nullptr;
404+
// [optional] Provide info about what BLAS references to hold onto after the copy. For performance make sure the list is compact (without repeated elements).
405+
std::span<const IGPUBottomLevelAccelerationStructure*> trackedBLASes = {};
406+
};
407+
using DeviceCopyFromMemoryInfo = CopyFromMemoryInfo<IGPUBuffer>;
408+
using HostCopyFromMemoryInfo = CopyFromMemoryInfo<asset::ICPUBuffer>;
409+
391410
// read the comments in the .hlsl file
392411
using BuildRangeInfo = hlsl::acceleration_structures::top_level::BuildRangeInfo;
393412
using DirectBuildRangeRangeInfos = const BuildRangeInfo*;
@@ -677,61 +696,87 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
677696
//
678697
using blas_smart_ptr_t = core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>;
679698
// returns number of tracked BLASes if `tracked==nullptr` otherwise writes `*count` tracked BLASes from `first` into `*tracked`
680-
inline build_ver_t getTrackedBLASes(uint32_t* count, blas_smart_ptr_t* tracked, const uint32_t first=0) const
699+
inline void getPendingBuildTrackedBLASes(uint32_t* count, blas_smart_ptr_t* tracked, const build_ver_t buildVer) const
681700
{
682701
if (!count)
683-
return 0;
702+
return;
684703
// stop multiple threads messing with us
685704
std::lock_guard lk(m_trackingLock);
686-
const uint32_t toWrite = std::min<uint32_t>(std::max<uint32_t>(m_trackedBLASes.size(),first)-first,tracked ? (*count):0xffFFffFFu);
687-
*count = toWrite;
688-
if (tracked && toWrite)
689-
{
690-
auto it = m_trackedBLASes.begin();
691-
// cmon its an unordered map, iterator should have operator +=
692-
for (auto i=0; i<first; i++)
693-
it++;
694-
for (auto i=0; i<toWrite; i++)
695-
*(tracked++) = *(it++);
696-
}
697-
return m_completedBuildVer;
705+
auto pBLASes = getPendingBuildTrackedBLASes(buildVer);
706+
*count = pBLASes ? pBLASes->size():0;
707+
if (!tracked || !pBLASes)
708+
return;
709+
for (auto it=pBLASes->begin(); it!=pBLASes->end(); it++)
710+
*(tracked++) = *(it++);
698711
}
699-
// Useful if TLAS got built externally as well, returns if there were no later builds that preempted us setting the result here
712+
// Useful if TLAS got built externally as well
700713
template<typename Iterator>
701-
inline bool setTrackedBLASes(const Iterator begin, const Iterator end, const build_ver_t buildVer)
714+
inline void insertTrackedBLASes(const Iterator begin, const Iterator end, const build_ver_t buildVer)
702715
{
716+
if (buildVer==0)
717+
return;
703718
// stop multiple threads messing with us
704719
std::lock_guard lk(m_trackingLock);
705-
// stop out of order callbacks
706-
if (buildVer<=m_completedBuildVer)
707-
return false;
708-
m_completedBuildVer = buildVer;
709-
// release already tracked BLASes
710-
m_trackedBLASes.clear();
711-
// sanity check, TODO: this should be an atomic_max on the `m_pendingBuildVer`
712-
if (m_completedBuildVer>m_pendingBuildVer)
713-
m_pendingBuildVer = m_completedBuildVer;
720+
// insert in the right order
721+
auto prev = m_pendingBuilds.before_begin();
722+
for (auto it=std::next(prev); it!=m_pendingBuilds.end()&&it->ordinal>buildVer; prev=it++) {}
723+
auto inserted = m_pendingBuilds.emplace_after(prev);
714724
// now fill the contents
715-
m_trackedBLASes.insert(begin,end);
716-
return true;
725+
inserted->BLASes.insert(begin,end);
726+
inserted->ordinal = buildVer;
727+
}
728+
template<typename Iterator>
729+
inline build_ver_t pushTrackedBLASes(const Iterator begin, const Iterator end)
730+
{
731+
const auto buildVer = registerNextBuildVer();
732+
insertTrackedBLASes<Iterator>(begin,end,buildVer);
733+
return buildVer;
717734
}
718-
// a little utility to make sure nothing from this build version and before gets tracked
719-
inline bool clearTrackedBLASes(const build_ver_t buildVer)
735+
// a little utility to make sure nothing from before this build version gets tracked
736+
inline void clearTrackedBLASes(const build_ver_t buildVer)
720737
{
721-
return setTrackedBLASes<const blas_smart_ptr_t*>(nullptr,nullptr,buildVer);
738+
// stop multiple threads messing with us
739+
std::lock_guard lk(m_trackingLock);
740+
clearTrackedBLASes_impl(buildVer);
722741
}
723742

724743
protected:
725744
inline IGPUTopLevelAccelerationStructure(core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params)
726745
: Base(), IGPUAccelerationStructure(std::move(dev),std::move(params)),
727-
m_maxInstanceCount(params.maxInstanceCount),m_trackedBLASes() {}
728-
746+
m_maxInstanceCount(params.maxInstanceCount) {}
729747
const uint32_t m_maxInstanceCount;
748+
749+
private:
750+
friend class IGPUCommandBuffer;
751+
inline const core::unordered_set<blas_smart_ptr_t>* getPendingBuildTrackedBLASes(const build_ver_t buildVer) const
752+
{
753+
const auto found = std::find_if(m_pendingBuilds.begin(),m_pendingBuilds.end(),[buildVer](const auto& item)->bool{return item.ordinal==buildVer;});
754+
if (found==m_pendingBuilds.end())
755+
return nullptr;
756+
return &found->BLASes;
757+
}
758+
inline void clearTrackedBLASes_impl(const build_ver_t buildVer)
759+
{
760+
// find first element less or equal to `buildVer`
761+
auto prev = m_pendingBuilds.before_begin();
762+
for (auto it=std::next(prev); it!=m_pendingBuilds.end()&&it->ordinal>=buildVer; prev=it++) {}
763+
m_pendingBuilds.erase_after(prev,m_pendingBuilds.end());
764+
}
765+
766+
std::atomic<build_ver_t> m_pendingBuildVer = 0;
730767
// TODO: maybe replace with new readers/writers lock
731768
mutable std::mutex m_trackingLock;
732-
std::atomic<build_ver_t> m_pendingBuildVer = 0;
733-
build_ver_t m_completedBuildVer = 0;
734-
core::unordered_set<blas_smart_ptr_t> m_trackedBLASes;
769+
// TODO: this definitely needs improving with MultiEventTimelines (which also can track deferred Host ops) but then one needs to track semaphore signal-wait deps so we know what "state copy" a compaction wants
770+
// Deferred Op must complete AFTER a submit, otherwise race condition.
771+
// If we make a linked list of pending builds, then we just need to pop completed builds (traverse until current found)
772+
struct STrackingInfo
773+
{
774+
core::unordered_set<blas_smart_ptr_t> BLASes;
775+
// when the build got
776+
build_ver_t ordinal;
777+
};
778+
// a little misleading, the element is the most recently completed one
779+
core::forward_list<STrackingInfo> m_pendingBuilds;
735780
};
736781

737782
}

include/nbl/video/IGPUCommandBuffer.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,12 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
321321
}
322322

323323
//! acceleration structure transfers
324-
bool copyAccelerationStructure(const IGPUAccelerationStructure::CopyInfo& copyInfo);
325-
bool copyAccelerationStructureToMemory(const IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo);
326-
bool copyAccelerationStructureFromMemory(const IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo);
324+
template<typename AccelerationStructure> requires std::is_base_of_v<IGPUAccelerationStructure,AccelerationStructure>
325+
bool copyAccelerationStructure(const AccelerationStructure::CopyInfo& copyInfo);
326+
template<typename AccelerationStructure> requires std::is_base_of_v<IGPUAccelerationStructure,AccelerationStructure>
327+
bool copyAccelerationStructureToMemory(const AccelerationStructure::DeviceCopyToMemoryInfo& copyInfo);
328+
template<typename AccelerationStructure> requires std::is_base_of_v<IGPUAccelerationStructure,AccelerationStructure>
329+
bool copyAccelerationStructureFromMemory(const AccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo);
327330

328331
//! state setup
329332
bool bindComputePipeline(const IGPUComputePipeline* const pipeline);
@@ -549,7 +552,31 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
549552
bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs);
550553

551554
// in case you want the commandbuffer to hold onto things as long as its not RESET
552-
bool recordReferences(const std::span<const IReferenceCounted*> refs);
555+
template<typename Iterator>
556+
inline bool recordReferences(Iterator begin, const Iterator end)
557+
{
558+
auto oit = reserveReferences(std::distance(begin,end));
559+
if (oit)
560+
while (begin!=end)
561+
*(oit++) = core::smart_refctd_ptr<const core::IReferenceCounted>(*(begin++));
562+
return oit;
563+
}
564+
inline bool recordReferences(const std::span<const IReferenceCounted*> refs) {return recordReferences(refs.begin(),refs.end());}
565+
566+
// in case you want the commandbuffer to overwrite the BLAS tracking, e.g. you recorded TLAS building commands directly using `getNativeHandle()` to get the commandbuffer
567+
template<typename Iterator>
568+
inline bool recordBLASReferenceOverwrite(IGPUTopLevelAccelerationStructure* tlas, Iterator beginBLASes, const Iterator endBLASes)
569+
{
570+
const auto size = std::distance(beginBLASes,endBLASes);
571+
auto oit = reserveReferences(size);
572+
if (oit)
573+
{
574+
m_TLASToBLASReferenceSets[tlas] = {oit,size};
575+
while (beginBLASes!=endBLASes)
576+
*(oit++) = core::smart_refctd_ptr<const core::IReferenceCounted>(*(beginBLASes++));
577+
}
578+
return oit;
579+
}
553580

554581
virtual bool insertDebugMarker(const char* name, const core::vector4df_SIMD& color = core::vector4df_SIMD(1.0, 1.0, 1.0, 1.0)) = 0;
555582
virtual bool beginDebugMarker(const char* name, const core::vector4df_SIMD& color = core::vector4df_SIMD(1.0, 1.0, 1.0, 1.0)) = 0;
@@ -640,9 +667,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
640667
const uint64_t* const pIndirectOffsets, const uint32_t* const pIndirectStrides, const uint32_t* const pMaxInstanceCounts
641668
) = 0;
642669

643-
virtual bool copyAccelerationStructure_impl(const IGPUAccelerationStructure::CopyInfo& copyInfo) = 0;
644-
virtual bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure::DeviceCopyToMemoryInfo& copyInfo) = 0;
645-
virtual bool copyAccelerationStructureFromMemory_impl(const IGPUAccelerationStructure::DeviceCopyFromMemoryInfo& copyInfo) = 0;
670+
virtual bool copyAccelerationStructure_impl(const IGPUAccelerationStructure* src, IGPUAccelerationStructure* dst, const bool compact) = 0;
671+
virtual bool copyAccelerationStructureToMemory_impl(const IGPUAccelerationStructure* src, const asset::SBufferBinding<IGPUBuffer>& dst) = 0;
672+
virtual bool copyAccelerationStructureFromMemory_impl(const asset::SBufferBinding<const IGPUBuffer>& src, IGPUAccelerationStructure* dst) = 0;
646673

647674
virtual bool bindComputePipeline_impl(const IGPUComputePipeline* const pipeline) = 0;
648675
virtual bool bindGraphicsPipeline_impl(const IGPUGraphicsPipeline* const pipeline) = 0;
@@ -875,12 +902,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
875902
template<typename IndirectCommand> requires nbl::is_any_of_v<IndirectCommand, hlsl::DrawArraysIndirectCommand_t, hlsl::DrawElementsIndirectCommand_t>
876903
bool invalidDrawIndirectCount(const asset::SBufferBinding<const IGPUBuffer>& indirectBinding, const asset::SBufferBinding<const IGPUBuffer>& countBinding, const uint32_t maxDrawCount, const uint32_t stride);
877904

905+
core::smart_refctd_ptr<const core::IReferenceCounted>* reserveReferences(const uint32_t size);
878906

879907
// This bound descriptor set record doesn't include the descriptor sets whose layout has _any_ one of its bindings
880908
// created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT
881909
// or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT.
882910
core::unordered_map<const IGPUDescriptorSet*,uint64_t> m_boundDescriptorSetsRecord;
883-
911+
884912
// If the user wants the builds to be tracking, and make the TLAS remember the BLASes that have been built into it.
885913
// NOTE: We know that a TLAS may be rebuilt multiple times per frame on purpose and not only the final BLASes need to be kept alive till submission finishes.
886914
// However, the Command Pool already tracks resources referenced in the Build Infos, so we only need pointers into those records.
@@ -905,6 +933,13 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
905933
NBL_ENUM_ADD_BITWISE_OPERATORS(IGPUCommandBuffer::USAGE);
906934

907935
#ifndef _NBL_VIDEO_I_GPU_COMMAND_BUFFER_CPP_
936+
extern template bool IGPUCommandBuffer::copyAccelerationStructure<IGPUBottomLevelAccelerationStructure>(const IGPUBottomLevelAccelerationStructure::CopyInfo&);
937+
extern template bool IGPUCommandBuffer::copyAccelerationStructure<IGPUTopLevelAccelerationStructure>(const IGPUTopLevelAccelerationStructure::CopyInfo&);
938+
extern template bool IGPUCommandBuffer::copyAccelerationStructureToMemory<IGPUBottomLevelAccelerationStructure>(const IGPUBottomLevelAccelerationStructure::DeviceCopyToMemoryInfo&);
939+
extern template bool IGPUCommandBuffer::copyAccelerationStructureToMemory<IGPUTopLevelAccelerationStructure>(const IGPUTopLevelAccelerationStructure::DeviceCopyToMemoryInfo&);
940+
extern template bool IGPUCommandBuffer::copyAccelerationStructureFromMemory<IGPUBottomLevelAccelerationStructure>(const IGPUBottomLevelAccelerationStructure::DeviceCopyFromMemoryInfo&);
941+
extern template bool IGPUCommandBuffer::copyAccelerationStructureFromMemory<IGPUTopLevelAccelerationStructure>(const IGPUTopLevelAccelerationStructure::DeviceCopyFromMemoryInfo&);
942+
908943
extern template uint32_t IGPUCommandBuffer::buildAccelerationStructures_common<IGPUBottomLevelAccelerationStructure::DeviceBuildInfo,IGPUBottomLevelAccelerationStructure::DirectBuildRangeRangeInfos>(
909944
const std::span<const IGPUBottomLevelAccelerationStructure::DeviceBuildInfo>, IGPUBottomLevelAccelerationStructure::DirectBuildRangeRangeInfos, const IGPUBuffer* const
910945
);

0 commit comments

Comments
 (0)