Skip to content

Commit 302710f

Browse files
author
devsh
committed
clean up a bit and implement BLAS tracking info for Host Copies
1 parent 0f42726 commit 302710f

File tree

4 files changed

+104
-33
lines changed

4 files changed

+104
-33
lines changed

include/nbl/video/IGPUAccelerationStructure.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,17 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
750750
const uint32_t m_maxInstanceCount;
751751

752752
private:
753+
struct DynamicUpCastingSpanIterator
754+
{
755+
inline bool operator!=(const DynamicUpCastingSpanIterator& other) const {return ptr!=other.ptr;}
756+
757+
inline DynamicUpCastingSpanIterator operator++() {return {ptr++};}
758+
759+
inline const IGPUBottomLevelAccelerationStructure* operator*() const {return dynamic_cast<const IGPUBottomLevelAccelerationStructure*>(ptr->get());}
760+
761+
std::span<const core::smart_refctd_ptr<const core::IReferenceCounted>>::iterator ptr;
762+
};
763+
friend class ILogicalDevice;
753764
friend class IQueue;
754765
inline const core::unordered_set<blas_smart_ptr_t>* getPendingBuildTrackedBLASes(const build_ver_t buildVer) const
755766
{

include/nbl/video/ILogicalDevice.h

Lines changed: 85 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -593,18 +593,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
593593
auto tlas = set.first;
594594
// we know the build is completed immediately after performing it, so we get our pending stamp then
595595
// ideally we should get our build version when the work of the deferred op gets executed for the first time
596-
using iterator = decltype(set.second)::iterator;
597-
struct CustomIterator
598-
{
599-
inline bool operator!=(const CustomIterator& other) const {return ptr!=other.ptr;}
600-
601-
inline CustomIterator operator++() {return {ptr++};}
602-
603-
inline const IGPUBottomLevelAccelerationStructure* operator*() const {return dynamic_cast<const IGPUBottomLevelAccelerationStructure*>(ptr->get());}
604-
605-
iterator ptr;
606-
};
607-
const auto buildVer = tlas->pushTrackedBLASes<CustomIterator>({set.second.begin()},{set.second.end()});
596+
const auto buildVer = tlas->pushTrackedBLASes<IGPUTopLevelAccelerationStructure::DynamicUpCastingSpanIterator>({set.second.begin()},{set.second.end()});
608597
tlas->clearTrackedBLASes(buildVer);
609598
}
610599
}
@@ -622,10 +611,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
622611
if constexpr (IsTLAS)
623612
{
624613
const auto blasCount = info.trackedBLASes.size();
625-
if (blasCount)
626-
callback.m_TLASToBLASReferenceSets[info.dstAS] = {oit-blasCount,blasCount};
627-
else
628-
callback.m_TLASToBLASReferenceSets[info.dstAS] = {};
614+
callback.m_TLASToBLASReferenceSets[info.dstAS] = {oit-blasCount,blasCount};
629615
}
630616
}
631617
if constexpr (IsTLAS)
@@ -685,10 +671,42 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
685671
}
686672
auto result = copyAccelerationStructure_impl(deferredOperation,copyInfo);
687673
if (result==DEFERRABLE_RESULT::DEFERRED)
674+
{
688675
deferredOperation->m_resourceTracking.insert(deferredOperation->m_resourceTracking.begin(),{
689676
core::smart_refctd_ptr<const IReferenceCounted>(copyInfo.src),
690677
core::smart_refctd_ptr<const IReferenceCounted>(copyInfo.dst)
691678
});
679+
constexpr bool IsTLAS = std::is_same_v<AccelerationStructure,IGPUTopLevelAccelerationStructure>;
680+
if constexpr (IsTLAS)
681+
{
682+
struct TLASCallback
683+
{
684+
// upon completion set the BLASes tracked
685+
inline void operator()(IDeferredOperation*) const
686+
{
687+
// not sure if even legal, but it would deadlock us
688+
if (src==dst)
689+
return;
690+
uint32_t buildVer;
691+
{
692+
// stop multiple threads messing with us
693+
std::lock_guard lk(src->m_trackingLock);
694+
// we know the build is completed immediately after performing it, so we get our pending stamp then
695+
// ideally we should get the BLAS set from the Source TLAS when the work of the deferred op gets executed for the first time
696+
const auto* pSrcBLASes = src->getPendingBuildTrackedBLASes(src->getPendingBuildVer());
697+
const std::span<IGPUTopLevelAccelerationStructure::blas_smart_ptr_t> emptySpan = {};
698+
buildVer = pSrcBLASes ? dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->end()):dst->pushTrackedBLASes(emptySpan.begin(),emptySpan.end());
699+
}
700+
dst->clearTrackedBLASes(buildVer);
701+
}
702+
703+
// the rawpointers are already smartpointers in whatever else the `fillTracking` declared above writes
704+
const IGPUTopLevelAccelerationStructure* src;
705+
IGPUTopLevelAccelerationStructure* dst;
706+
} callback = {.src=copyInfo.src,.dst=copyInfo.dst};
707+
deferredOperation->m_callback = std::move(callback);
708+
}
709+
}
692710

693711

694712
return result!=DEFERRABLE_RESULT::SOME_ERROR;
@@ -713,10 +731,39 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
713731
}
714732
auto result = copyAccelerationStructureToMemory_impl(deferredOperation,copyInfo);
715733
if (result==DEFERRABLE_RESULT::DEFERRED)
734+
{
716735
deferredOperation->m_resourceTracking.insert(deferredOperation->m_resourceTracking.begin(),{
717736
core::smart_refctd_ptr<const IReferenceCounted>(copyInfo.src),
718737
core::smart_refctd_ptr<const IReferenceCounted>(copyInfo.dst.buffer)
719738
});
739+
constexpr bool IsTLAS = std::is_same_v<AccelerationStructure,IGPUTopLevelAccelerationStructure>;
740+
if constexpr (IsTLAS)
741+
{
742+
struct TLASCallback
743+
{
744+
// upon completion set the BLASes tracked
745+
inline void operator()(IDeferredOperation*) const
746+
{
747+
// stop multiple threads messing with us
748+
std::lock_guard lk(src->m_trackingLock);
749+
// we know the build is completed immediately after performing it, so we get our pending stamp then
750+
// ideally we should get the BLAS set from the Source TLAS when the work of the deferred op gets executed for the first time
751+
const auto ver = src->getPendingBuildVer();
752+
uint32_t count = dst->size();
753+
src->getPendingBuildTrackedBLASes(&count,dst->data(),ver);
754+
if (count>dst->size())
755+
logger->log("BLAS output array too small, should be %d, only wrote out %d BLAS references to destination",system::ILogger::ELL_ERROR,count,dst->size());
756+
}
757+
758+
// device keeps it alive for entire lifetime of the callback
759+
system::ILogger* logger;
760+
// the rawpointers are already smartpointers in whatever else the `fillTracking` declared above writes
761+
const IGPUTopLevelAccelerationStructure* src;
762+
core::smart_refctd_dynamic_array<IGPUTopLevelAccelerationStructure::blas_smart_ptr_t> dst;
763+
} callback = {.logger=m_logger.get(),.src=copyInfo.src,.dst=copyInfo.trackedBLASes};
764+
deferredOperation->m_callback = std::move(callback);
765+
}
766+
}
720767
return result!=DEFERRABLE_RESULT::SOME_ERROR;
721768
}
722769
template<typename AccelerationStructure> requires std::is_base_of_v<IGPUAccelerationStructure,AccelerationStructure>
@@ -739,10 +786,32 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
739786
}
740787
auto result = copyAccelerationStructureFromMemory_impl(deferredOperation,copyInfo);
741788
if (result==DEFERRABLE_RESULT::DEFERRED)
789+
{
742790
deferredOperation->m_resourceTracking.insert(deferredOperation->m_resourceTracking.begin(),{
743791
core::smart_refctd_ptr<const IReferenceCounted>(copyInfo.src.buffer),
744792
core::smart_refctd_ptr<const IReferenceCounted>(copyInfo.dst)
745793
});
794+
constexpr bool IsTLAS = std::is_same_v<AccelerationStructure,IGPUTopLevelAccelerationStructure>;
795+
if constexpr (IsTLAS)
796+
{
797+
const size_t offset = deferredOperation->m_resourceTracking.size();
798+
deferredOperation->m_resourceTracking.insert(deferredOperation->m_resourceTracking.end(),copyInfo.trackedBLASes.begin(),copyInfo.trackedBLASes.end());
799+
struct TLASCallback
800+
{
801+
// upon completion set the BLASes tracked
802+
inline void operator()(IDeferredOperation*) const
803+
{
804+
const auto buildVer = dst->pushTrackedBLASes<IGPUTopLevelAccelerationStructure::DynamicUpCastingSpanIterator>({src->begin()},{src->end()});
805+
dst->clearTrackedBLASes(buildVer);
806+
}
807+
808+
// the rawpointers are already smartpointers in whatever else the `fillTracking` declared above writes
809+
std::span<const core::smart_refctd_ptr<const IReferenceCounted>> src;
810+
IGPUTopLevelAccelerationStructure* dst;
811+
} callback = {.src={deferredOperation->m_resourceTracking.data()+offset,copyInfo.trackedBLASes.size()},.dst=copyInfo.dst};
812+
deferredOperation->m_callback = std::move(callback);
813+
}
814+
}
746815
return result!=DEFERRABLE_RESULT::SOME_ERROR;
747816
}
748817

include/nbl/video/IQueue.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class IQueue : public core::Interface, public core::Unmovable
125125
class DeferredSubmitCallback final
126126
{
127127
//
128-
core::unordered_map<IGPUTopLevelAccelerationStructure*,IGPUTopLevelAccelerationStructure::build_ver_t> m_TLASBuilds;
128+
core::unordered_map<IGPUTopLevelAccelerationStructure*,IGPUTopLevelAccelerationStructure::build_ver_t> m_TLASOverwrites;
129129
//
130130
using smart_ptr = core::smart_refctd_ptr<IBackendObject>;
131131
core::smart_refctd_dynamic_array<smart_ptr> m_resources;

src/nbl/video/IQueue.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -177,18 +177,9 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info)
177177
case 0:
178178
{
179179
const IGPUCommandBuffer::TLASTrackingWrite& op = std::get<0>(var);
180-
using iterator = decltype(op.src)::iterator;
181-
struct CustomIterator
182-
{
183-
inline bool operator!=(const CustomIterator& other) const { return ptr != other.ptr; }
184-
185-
inline CustomIterator operator++() { return { ptr++ }; }
186180

187-
inline const IGPUBottomLevelAccelerationStructure* operator*() const { return dynamic_cast<const IGPUBottomLevelAccelerationStructure*>(ptr->get()); }
188-
189-
iterator ptr;
190-
};
191-
m_readTLASVersions[op.dst] = m_TLASBuilds[op.dst] = op.dst->pushTrackedBLASes<CustomIterator>({op.src.begin()},{op.src.end()});
181+
using iterator = decltype(op.src)::iterator;
182+
m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = op.dst->pushTrackedBLASes<IGPUTopLevelAccelerationStructure::DynamicUpCastingSpanIterator>({op.src.begin()},{op.src.end()});
192183
break;
193184
}
194185
case 1:
@@ -201,8 +192,8 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info)
201192
// stop multiple threads messing with us
202193
std::lock_guard lk(op.src->m_trackingLock);
203194
const auto* pSrcBLASes = op.src->getPendingBuildTrackedBLASes(ver);
204-
assert(pSrcBLASes);
205-
m_readTLASVersions[op.dst] = m_TLASBuilds[op.dst] = op.dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->end());
195+
const std::span<IGPUTopLevelAccelerationStructure::blas_smart_ptr_t> emptySpan = {};
196+
m_readTLASVersions[op.dst] = m_TLASOverwrites[op.dst] = pSrcBLASes ? op.dst->pushTrackedBLASes(pSrcBLASes->begin(),pSrcBLASes->end()):op.dst->pushTrackedBLASes(emptySpan.begin(),emptySpan.end());
206197
break;
207198
}
208199
case 2:
@@ -230,10 +221,10 @@ IQueue::DeferredSubmitCallback::DeferredSubmitCallback(const SSubmitInfo& info)
230221

231222
IQueue::DeferredSubmitCallback& IQueue::DeferredSubmitCallback::operator=(DeferredSubmitCallback&& other)
232223
{
233-
m_TLASBuilds = std::move(other.m_TLASBuilds);
224+
m_TLASOverwrites = std::move(other.m_TLASOverwrites);
234225
m_resources = std::move(other.m_resources);
235226
m_callback = std::move(other.m_callback);
236-
other.m_TLASBuilds.clear();
227+
other.m_TLASOverwrites.clear();
237228
other.m_resources = nullptr;
238229
other.m_callback = {};
239230
return *this;
@@ -243,7 +234,7 @@ IQueue::DeferredSubmitCallback& IQueue::DeferredSubmitCallback::operator=(Deferr
243234
void IQueue::DeferredSubmitCallback::operator()()
244235
{
245236
// all builds started before ours will now get overwritten (not exactly true, but without a better tracking system, this is the best we can do for now)
246-
for (const auto& build : m_TLASBuilds)
237+
for (const auto& build : m_TLASOverwrites)
247238
build.first->clearTrackedBLASes(build.second);
248239
// then free all resources
249240
m_resources = nullptr;

0 commit comments

Comments
 (0)