Skip to content

Commit 0b306e0

Browse files
author
devsh
committed
change the callback for updateBufferRangeViaStagingBuffer from a std::function to an actual interface class (may need more than one method in the future)
But also provide a path for lambdas to be used
1 parent 8d30513 commit 0b306e0

File tree

1 file changed

+64
-12
lines changed

1 file changed

+64
-12
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,31 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
297297
// updateBufferRangeViaStagingBuffer
298298
// --------------
299299

300-
/* Callback signature used for upstreaming requests, `dst` is already pre-scrolled it points at the start of the staging block. */
301-
using data_production_callback_t = void(void* /*dst*/, const size_t /*offsetInRange*/, const size_t /*blockSize*/);
300+
//! Used in `updateBufferRangeViaStagingBuffer` to provide data on demand
301+
class IUpstreamingDataProducer
302+
{
303+
public:
304+
// Returns the number of bytes written, must be more than 0 and less than or equal to `blockSize`, this is to not have to handle stopping writng mid-struct for example.
305+
// `dst` is already pre-scolled, it it points at the start of the staging block
306+
// You can be sure that subsequent calls to this function will happen "in order" meaning next call `offsetInRange` equals last call's `offsetInRange` incremented by the return value
307+
virtual uint32_t operator()(void* dst, const size_t offsetInRange, const uint32_t blockSize) = 0;
308+
};
309+
// useful for wrapping lambdas
310+
template<typename F>
311+
class CUpstreamingDataProducerLambdaWrapper final : public IUpstreamingDataProducer
312+
{
313+
F f;
314+
315+
public:
316+
inline CUpstreamingDataProducerLambdaWrapper(F&& _f) : f(std::move(_f)) {}
317+
318+
inline uint32_t operator()(void* dst, const size_t offsetInRange, const uint32_t blockSize) override {return f(dst,offsetInRange,blockSize);}
319+
};
320+
template<typename F>
321+
static inline CUpstreamingDataProducerLambdaWrapper<F> wrapUpstreamingDataProducerLambda(F&& f)
322+
{
323+
return CUpstreamingDataProducerLambdaWrapper<F>(std::move(f));
324+
}
302325

303326
//! Fills ranges with callback allocated in stagingBuffer and Records the commands needed to copy the data from stagingBuffer to `bufferRange.buffer`
304327
//! If the allocation from staging memory fails due to large buffer size or fragmentation then This function may need to submit the command buffer via the `submissionQueue`.
@@ -314,7 +337,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
314337
//! * nextSubmit must be valid (see `SIntendedSubmitInfo::valid()`)
315338
//! * bufferRange must be valid (see `SBufferRange::isValid()`)
316339
//! * data must not be nullptr
317-
inline bool updateBufferRangeViaStagingBuffer(SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, std::function<data_production_callback_t>& callback)
340+
inline bool updateBufferRangeViaStagingBuffer(SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, IUpstreamingDataProducer& callback)
318341
{
319342
if (!bufferRange.isValid() || !bufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_DST_BIT))
320343
{
@@ -329,6 +352,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
329352
const auto& limits = m_device->getPhysicalDevice()->getLimits();
330353
// TODO: Why did we settle on `/4` ? It was something about worst case fragmentation due to alignment in General Purpose Address Allocator. But need to remember what exactly.
331354
const uint32_t optimalTransferAtom = core::min<uint32_t>(limits.maxResidentInvocations*OptimalCoalescedInvocationXferSize,m_defaultUploadBuffer->get_total_size()/4);
355+
const auto minBlockSize = m_defaultUploadBuffer->getAddressAllocator().min_size();
332356

333357
// no pipeline barriers necessary because write and optional flush happens before submit, and memory allocation is reclaimed after fence signal
334358
for (size_t uploadedSize=0ull; uploadedSize<bufferRange.size;)
@@ -338,15 +362,28 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
338362
// how large we can make the allocation
339363
uint32_t maxFreeBlock = m_defaultUploadBuffer.get()->max_size();
340364
// get allocation size
341-
const uint32_t allocationSize = getAllocationSizeForStreamingBuffer(size,m_allocationAlignment,maxFreeBlock,optimalTransferAtom);
365+
uint32_t allocationSize = getAllocationSizeForStreamingBuffer(size,m_allocationAlignment,maxFreeBlock,optimalTransferAtom);
342366
// make sure we dont overrun the destination buffer due to padding
343-
const uint32_t subSize = core::min(allocationSize,size);
367+
uint32_t subSize = core::min(allocationSize,size);
344368
// cannot use `multi_place` because of the extra padding size we could have added
345369
uint32_t localOffset = StreamingTransientDataBufferMT<>::invalid_value;
346370
m_defaultUploadBuffer.get()->multi_allocate(std::chrono::steady_clock::now()+std::chrono::microseconds(500u),1u,&localOffset,&allocationSize,&m_allocationAlignment);
347371
// copy only the unpadded part
348372
if (localOffset!=StreamingTransientDataBufferMT<>::invalid_value)
349-
callback(reinterpret_cast<uint8_t*>(m_defaultUploadBuffer->getBufferPointer())+localOffset,uploadedSize,subSize);
373+
{
374+
const uint32_t bytesWritten = callback(reinterpret_cast<uint8_t*>(m_defaultUploadBuffer->getBufferPointer())+localOffset,uploadedSize,subSize);
375+
assert(bytesWritten>0 && bytesWritten<=subSize);
376+
// Highly Experimental, enable at own risk!
377+
if constexpr (false)
378+
// Reclaim the unused space if both the used part and the unused part are large enough to be their own independent free blocks in the allocator
379+
if (const uint32_t unusedSize=subSize-bytesWritten; bytesWritten>=minBlockSize && unusedSize>=minBlockSize)
380+
{
381+
const uint32_t unusedOffset = localOffset+bytesWritten;
382+
m_defaultUploadBuffer.get()->multi_deallocate(1u,&unusedOffset,&unusedSize);
383+
allocationSize = bytesWritten;
384+
}
385+
subSize = bytesWritten;
386+
}
350387
else
351388
{
352389
const auto completed = nextSubmit.getFutureScratchSemaphore();
@@ -378,7 +415,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
378415
}
379416
// overload to make invokers not care about l-value or r-value
380417
template<typename IntendedSubmitInfo> requires std::is_same_v<std::decay_t<IntendedSubmitInfo>,SIntendedSubmitInfo>
381-
inline bool updateBufferRangeViaStagingBuffer(IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, std::function<data_production_callback_t>&& callback)
418+
inline bool updateBufferRangeViaStagingBuffer(IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, IUpstreamingDataProducer&& callback)
382419
{
383420
return updateBufferRangeViaStagingBuffer(nextSubmit,bufferRange,callback);
384421
}
@@ -395,11 +432,26 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
395432
template<typename IntendedSubmitInfo> requires std::is_same_v<std::decay_t<IntendedSubmitInfo>,SIntendedSubmitInfo>
396433
inline bool updateBufferRangeViaStagingBuffer(IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, const void* data)
397434
{
398-
auto callback = [data](void* dst, const size_t offsetInRange, const size_t blockSize)->void
399-
{
400-
memcpy(dst,reinterpret_cast<const uint8_t*>(data)+offsetInRange,blockSize);
401-
};
402-
return updateBufferRangeViaStagingBuffer(nextSubmit,bufferRange,callback);
435+
// We check the guarantees of our documentation with the asserts while we're at it
436+
#ifdef _NBL_DEBUG
437+
size_t prevRangeEnd = 0;
438+
#endif
439+
440+
auto retval = updateBufferRangeViaStagingBuffer(nextSubmit,bufferRange,wrapUpstreamingDataProducerLambda(
441+
[&](void* dst, const size_t offsetInRange, const uint32_t blockSize) -> uint32_t
442+
{
443+
#ifdef _NBL_DEBUG
444+
assert(offsetInRange==prevRangeEnd);
445+
prevRangeEnd = offsetInRange+blockSize;
446+
#endif
447+
memcpy(dst,reinterpret_cast<const uint8_t*>(data)+offsetInRange,blockSize);
448+
return blockSize;
449+
}
450+
));
451+
#ifdef _NBL_DEBUG
452+
assert(prevRangeEnd==bufferRange.size);
453+
#endif
454+
return retval;
403455
}
404456

405457
//! This only needs a valid queue in `submit`

0 commit comments

Comments
 (0)