@@ -98,39 +98,6 @@ class IGPUAccelerationStructure : public IBackendObject
98
98
}
99
99
};
100
100
101
- // copies
102
- enum class COPY_MODE : uint8_t
103
- {
104
- CLONE = 0 ,
105
- COMPACT = 1 ,
106
- SERIALIZE = 2 ,
107
- DESERIALIZE = 3 ,
108
- };
109
- struct CopyInfo
110
- {
111
- const IGPUAccelerationStructure* src = nullptr ;
112
- IGPUAccelerationStructure* dst = nullptr ;
113
- COPY_MODE mode = COPY_MODE::CLONE;
114
- };
115
- template <typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
116
- struct CopyToMemoryInfo
117
- {
118
- const IGPUAccelerationStructure* src = nullptr ;
119
- asset::SBufferBinding<BufferType> dst = nullptr ;
120
- COPY_MODE mode = COPY_MODE::SERIALIZE;
121
- };
122
- using DeviceCopyToMemoryInfo = CopyToMemoryInfo<IGPUBuffer>;
123
- using HostCopyToMemoryInfo = CopyToMemoryInfo<asset::ICPUBuffer>;
124
- template <typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
125
- struct CopyFromMemoryInfo
126
- {
127
- asset::SBufferBinding<const BufferType> src = nullptr ;
128
- IGPUAccelerationStructure* dst = nullptr ;
129
- COPY_MODE mode = COPY_MODE::DESERIALIZE;
130
- };
131
- using DeviceCopyFromMemoryInfo = CopyFromMemoryInfo<IGPUBuffer>;
132
- using HostCopyFromMemoryInfo = CopyFromMemoryInfo<asset::ICPUBuffer>;
133
-
134
101
// this will return false also if your deferred operation is not ready yet, so please use in combination with `isPending()`
135
102
virtual bool wasCopySuccessful (const IDeferredOperation* const deferredOp) = 0;
136
103
@@ -176,6 +143,30 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat
176
143
177
144
inline bool usesMotion () const override {return m_params.flags .hasFlags (SCreationParams::FLAGS::MOTION_BIT);}
178
145
146
+ // copies
147
+ struct CopyInfo
148
+ {
149
+ const IGPUBottomLevelAccelerationStructure* src = nullptr ;
150
+ IGPUAccelerationStructure* dst = nullptr ;
151
+ bool compact = false ;
152
+ };
153
+ template <typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
154
+ struct CopyToMemoryInfo
155
+ {
156
+ const IGPUBottomLevelAccelerationStructure* src = nullptr ;
157
+ asset::SBufferBinding<BufferType> dst = nullptr ;
158
+ };
159
+ using DeviceCopyToMemoryInfo = CopyToMemoryInfo<IGPUBuffer>;
160
+ using HostCopyToMemoryInfo = CopyToMemoryInfo<asset::ICPUBuffer>;
161
+ template <typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
162
+ struct CopyFromMemoryInfo
163
+ {
164
+ asset::SBufferBinding<const BufferType> src = nullptr ;
165
+ IGPUBottomLevelAccelerationStructure* dst = nullptr ;
166
+ };
167
+ using DeviceCopyFromMemoryInfo = CopyFromMemoryInfo<IGPUBuffer>;
168
+ using HostCopyFromMemoryInfo = CopyFromMemoryInfo<asset::ICPUBuffer>;
169
+
179
170
// read the comments in the .hlsl file, AABB builds ignore certain fields
180
171
using BuildRangeInfo = hlsl::acceleration_structures::bottom_level::BuildRangeInfo; // TODO: rename to GeometryRangeInfo, and make `BuildRangeInfo = const GeometryRangeInfo*`
181
172
using DirectBuildRangeRangeInfos = const BuildRangeInfo* const *;
@@ -388,6 +379,34 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
388
379
//
389
380
inline uint32_t getMaxInstanceCount () const {return m_maxInstanceCount;}
390
381
382
+ // copies
383
+ struct CopyInfo
384
+ {
385
+ const IGPUTopLevelAccelerationStructure* src = nullptr ;
386
+ IGPUTopLevelAccelerationStructure* dst = nullptr ;
387
+ bool compact = false ;
388
+ };
389
+ template <typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
390
+ struct CopyToMemoryInfo
391
+ {
392
+ const IGPUTopLevelAccelerationStructure* src = nullptr ;
393
+ asset::SBufferBinding<BufferType> dst = nullptr ;
394
+ // [optional] Query the tracked BLASes
395
+ core::smart_refctd_dynamic_array<core::smart_refctd_ptr<IGPUBottomLevelAccelerationStructure>> trackedBLASes = nullptr ;
396
+ };
397
+ using DeviceCopyToMemoryInfo = CopyToMemoryInfo<IGPUBuffer>;
398
+ using HostCopyToMemoryInfo = CopyToMemoryInfo<asset::ICPUBuffer>;
399
+ template <typename BufferType> requires (!std::is_const_v<BufferType> && std::is_base_of_v<asset::IBuffer,BufferType>)
400
+ struct CopyFromMemoryInfo
401
+ {
402
+ asset::SBufferBinding<const BufferType> src = nullptr ;
403
+ IGPUTopLevelAccelerationStructure* dst = nullptr ;
404
+ // [optional] Provide info about what BLAS references to hold onto after the copy. For performance make sure the list is compact (without repeated elements).
405
+ std::span<const IGPUBottomLevelAccelerationStructure*> trackedBLASes = {};
406
+ };
407
+ using DeviceCopyFromMemoryInfo = CopyFromMemoryInfo<IGPUBuffer>;
408
+ using HostCopyFromMemoryInfo = CopyFromMemoryInfo<asset::ICPUBuffer>;
409
+
391
410
// read the comments in the .hlsl file
392
411
using BuildRangeInfo = hlsl::acceleration_structures::top_level::BuildRangeInfo;
393
412
using DirectBuildRangeRangeInfos = const BuildRangeInfo*;
@@ -677,61 +696,87 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
677
696
//
678
697
using blas_smart_ptr_t = core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>;
679
698
// returns number of tracked BLASes if `tracked==nullptr` otherwise writes `*count` tracked BLASes from `first` into `*tracked`
680
- inline build_ver_t getTrackedBLASes (uint32_t * count, blas_smart_ptr_t * tracked, const uint32_t first= 0 ) const
699
+ inline void getPendingBuildTrackedBLASes (uint32_t * count, blas_smart_ptr_t * tracked, const build_ver_t buildVer ) const
681
700
{
682
701
if (!count)
683
- return 0 ;
702
+ return ;
684
703
// stop multiple threads messing with us
685
704
std::lock_guard lk (m_trackingLock);
686
- const uint32_t toWrite = std::min<uint32_t >(std::max<uint32_t >(m_trackedBLASes.size (),first)-first,tracked ? (*count):0xffFFffFFu );
687
- *count = toWrite;
688
- if (tracked && toWrite)
689
- {
690
- auto it = m_trackedBLASes.begin ();
691
- // cmon its an unordered map, iterator should have operator +=
692
- for (auto i=0 ; i<first; i++)
693
- it++;
694
- for (auto i=0 ; i<toWrite; i++)
695
- *(tracked++) = *(it++);
696
- }
697
- return m_completedBuildVer;
705
+ auto pBLASes = getPendingBuildTrackedBLASes (buildVer);
706
+ *count = pBLASes ? pBLASes->size ():0 ;
707
+ if (!tracked || !pBLASes)
708
+ return ;
709
+ for (auto it=pBLASes->begin (); it!=pBLASes->end (); it++)
710
+ *(tracked++) = *(it++);
698
711
}
699
- // Useful if TLAS got built externally as well, returns if there were no later builds that preempted us setting the result here
712
+ // Useful if TLAS got built externally as well
700
713
template <typename Iterator>
701
- inline bool setTrackedBLASes (const Iterator begin, const Iterator end, const build_ver_t buildVer)
714
+ inline void insertTrackedBLASes (const Iterator begin, const Iterator end, const build_ver_t buildVer)
702
715
{
716
+ if (buildVer==0 )
717
+ return ;
703
718
// stop multiple threads messing with us
704
719
std::lock_guard lk (m_trackingLock);
705
- // stop out of order callbacks
706
- if (buildVer<=m_completedBuildVer)
707
- return false ;
708
- m_completedBuildVer = buildVer;
709
- // release already tracked BLASes
710
- m_trackedBLASes.clear ();
711
- // sanity check, TODO: this should be an atomic_max on the `m_pendingBuildVer`
712
- if (m_completedBuildVer>m_pendingBuildVer)
713
- m_pendingBuildVer = m_completedBuildVer;
720
+ // insert in the right order
721
+ auto prev = m_pendingBuilds.before_begin ();
722
+ for (auto it=std::next (prev); it!=m_pendingBuilds.end ()&&it->ordinal >buildVer; prev=it++) {}
723
+ auto inserted = m_pendingBuilds.emplace_after (prev);
714
724
// now fill the contents
715
- m_trackedBLASes.insert (begin,end);
716
- return true ;
725
+ inserted->BLASes .insert (begin,end);
726
+ inserted->ordinal = buildVer;
727
+ }
728
+ template <typename Iterator>
729
+ inline build_ver_t pushTrackedBLASes (const Iterator begin, const Iterator end)
730
+ {
731
+ const auto buildVer = registerNextBuildVer ();
732
+ insertTrackedBLASes<Iterator>(begin,end,buildVer);
733
+ return buildVer;
717
734
}
718
- // a little utility to make sure nothing from this build version and before gets tracked
719
- inline bool clearTrackedBLASes (const build_ver_t buildVer)
735
+ // a little utility to make sure nothing from before this build version gets tracked
736
+ inline void clearTrackedBLASes (const build_ver_t buildVer)
720
737
{
721
- return setTrackedBLASes<const blas_smart_ptr_t *>(nullptr ,nullptr ,buildVer);
738
+ // stop multiple threads messing with us
739
+ std::lock_guard lk (m_trackingLock);
740
+ clearTrackedBLASes_impl (buildVer);
722
741
}
723
742
724
743
protected:
725
744
inline IGPUTopLevelAccelerationStructure (core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params)
726
745
: Base(), IGPUAccelerationStructure(std::move(dev),std::move(params)),
727
- m_maxInstanceCount(params.maxInstanceCount),m_trackedBLASes() {}
728
-
746
+ m_maxInstanceCount(params.maxInstanceCount) {}
729
747
const uint32_t m_maxInstanceCount;
748
+
749
+ private:
750
+ friend class IGPUCommandBuffer ;
751
+ inline const core::unordered_set<blas_smart_ptr_t >* getPendingBuildTrackedBLASes (const build_ver_t buildVer) const
752
+ {
753
+ const auto found = std::find_if (m_pendingBuilds.begin (),m_pendingBuilds.end (),[buildVer](const auto & item)->bool {return item.ordinal ==buildVer;});
754
+ if (found==m_pendingBuilds.end ())
755
+ return nullptr ;
756
+ return &found->BLASes ;
757
+ }
758
+ inline void clearTrackedBLASes_impl (const build_ver_t buildVer)
759
+ {
760
+ // find first element less or equal to `buildVer`
761
+ auto prev = m_pendingBuilds.before_begin ();
762
+ for (auto it=std::next (prev); it!=m_pendingBuilds.end ()&&it->ordinal >=buildVer; prev=it++) {}
763
+ m_pendingBuilds.erase_after (prev,m_pendingBuilds.end ());
764
+ }
765
+
766
+ std::atomic<build_ver_t > m_pendingBuildVer = 0 ;
730
767
// TODO: maybe replace with new readers/writers lock
731
768
mutable std::mutex m_trackingLock;
732
- std::atomic<build_ver_t > m_pendingBuildVer = 0 ;
733
- build_ver_t m_completedBuildVer = 0 ;
734
- core::unordered_set<blas_smart_ptr_t > m_trackedBLASes;
769
+ // TODO: this definitely needs improving with MultiEventTimelines (which also can track deferred Host ops) but then one needs to track semaphore signal-wait deps so we know what "state copy" a compaction wants
770
+ // Deferred Op must complete AFTER a submit, otherwise race condition.
771
+ // If we make a linked list of pending builds, then we just need to pop completed builds (traverse until current found)
772
+ struct STrackingInfo
773
+ {
774
+ core::unordered_set<blas_smart_ptr_t > BLASes;
775
+ // when the build got
776
+ build_ver_t ordinal;
777
+ };
778
+ // a little misleading, the element is the most recently completed one
779
+ core::forward_list<STrackingInfo> m_pendingBuilds;
735
780
};
736
781
737
782
}
0 commit comments