@@ -297,8 +297,31 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
297
297
// updateBufferRangeViaStagingBuffer
298
298
// --------------
299
299
300
- /* Callback signature used for upstreaming requests, `dst` is already pre-scrolled it points at the start of the staging block. */
301
- using data_production_callback_t = void (void * /* dst*/ , const size_t /* offsetInRange*/ , const size_t /* blockSize*/ );
300
+ // ! Used in `updateBufferRangeViaStagingBuffer` to provide data on demand
301
+ class IUpstreamingDataProducer
302
+ {
303
+ public:
304
+ // Returns the number of bytes written, must be more than 0 and less than or equal to `blockSize`, this is to not have to handle stopping writng mid-struct for example.
305
+ // `dst` is already pre-scolled, it it points at the start of the staging block
306
+ // You can be sure that subsequent calls to this function will happen "in order" meaning next call `offsetInRange` equals last call's `offsetInRange` incremented by the return value
307
+ virtual uint32_t operator ()(void * dst, const size_t offsetInRange, const uint32_t blockSize) = 0;
308
+ };
309
+ // useful for wrapping lambdas
310
+ template <typename F>
311
+ class CUpstreamingDataProducerLambdaWrapper final : public IUpstreamingDataProducer
312
+ {
313
+ F f;
314
+
315
+ public:
316
+ inline CUpstreamingDataProducerLambdaWrapper (F&& _f) : f(std::move(_f)) {}
317
+
318
+ inline uint32_t operator ()(void * dst, const size_t offsetInRange, const uint32_t blockSize) override {return f (dst,offsetInRange,blockSize);}
319
+ };
320
+ template <typename F>
321
+ static inline CUpstreamingDataProducerLambdaWrapper<F> wrapUpstreamingDataProducerLambda (F&& f)
322
+ {
323
+ return CUpstreamingDataProducerLambdaWrapper<F>(std::move (f));
324
+ }
302
325
303
326
// ! Fills ranges with callback allocated in stagingBuffer and Records the commands needed to copy the data from stagingBuffer to `bufferRange.buffer`
304
327
// ! If the allocation from staging memory fails due to large buffer size or fragmentation then This function may need to submit the command buffer via the `submissionQueue`.
@@ -314,7 +337,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
314
337
// ! * nextSubmit must be valid (see `SIntendedSubmitInfo::valid()`)
315
338
// ! * bufferRange must be valid (see `SBufferRange::isValid()`)
316
339
// ! * data must not be nullptr
317
- inline bool updateBufferRangeViaStagingBuffer (SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, std::function< data_production_callback_t > & callback)
340
+ inline bool updateBufferRangeViaStagingBuffer (SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, IUpstreamingDataProducer & callback)
318
341
{
319
342
if (!bufferRange.isValid () || !bufferRange.buffer ->getCreationParams ().usage .hasFlags (asset::IBuffer::EUF_TRANSFER_DST_BIT))
320
343
{
@@ -329,6 +352,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
329
352
const auto & limits = m_device->getPhysicalDevice ()->getLimits ();
330
353
// TODO: Why did we settle on `/4` ? It was something about worst case fragmentation due to alignment in General Purpose Address Allocator. But need to remember what exactly.
331
354
const uint32_t optimalTransferAtom = core::min<uint32_t >(limits.maxResidentInvocations *OptimalCoalescedInvocationXferSize,m_defaultUploadBuffer->get_total_size ()/4 );
355
+ const auto minBlockSize = m_defaultUploadBuffer->getAddressAllocator ().min_size ();
332
356
333
357
// no pipeline barriers necessary because write and optional flush happens before submit, and memory allocation is reclaimed after fence signal
334
358
for (size_t uploadedSize=0ull ; uploadedSize<bufferRange.size ;)
@@ -338,15 +362,28 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
338
362
// how large we can make the allocation
339
363
uint32_t maxFreeBlock = m_defaultUploadBuffer.get ()->max_size ();
340
364
// get allocation size
341
- const uint32_t allocationSize = getAllocationSizeForStreamingBuffer (size,m_allocationAlignment,maxFreeBlock,optimalTransferAtom);
365
+ uint32_t allocationSize = getAllocationSizeForStreamingBuffer (size,m_allocationAlignment,maxFreeBlock,optimalTransferAtom);
342
366
// make sure we dont overrun the destination buffer due to padding
343
- const uint32_t subSize = core::min (allocationSize,size);
367
+ uint32_t subSize = core::min (allocationSize,size);
344
368
// cannot use `multi_place` because of the extra padding size we could have added
345
369
uint32_t localOffset = StreamingTransientDataBufferMT<>::invalid_value;
346
370
m_defaultUploadBuffer.get ()->multi_allocate (std::chrono::steady_clock::now ()+std::chrono::microseconds (500u ),1u ,&localOffset,&allocationSize,&m_allocationAlignment);
347
371
// copy only the unpadded part
348
372
if (localOffset!=StreamingTransientDataBufferMT<>::invalid_value)
349
- callback (reinterpret_cast <uint8_t *>(m_defaultUploadBuffer->getBufferPointer ())+localOffset,uploadedSize,subSize);
373
+ {
374
+ const uint32_t bytesWritten = callback (reinterpret_cast <uint8_t *>(m_defaultUploadBuffer->getBufferPointer ())+localOffset,uploadedSize,subSize);
375
+ assert (bytesWritten>0 && bytesWritten<=subSize);
376
+ // Highly Experimental, enable at own risk!
377
+ if constexpr (false )
378
+ // Reclaim the unused space if both the used part and the unused part are large enough to be their own independent free blocks in the allocator
379
+ if (const uint32_t unusedSize=subSize-bytesWritten; bytesWritten>=minBlockSize && unusedSize>=minBlockSize)
380
+ {
381
+ const uint32_t unusedOffset = localOffset+bytesWritten;
382
+ m_defaultUploadBuffer.get ()->multi_deallocate (1u ,&unusedOffset,&unusedSize);
383
+ allocationSize = bytesWritten;
384
+ }
385
+ subSize = bytesWritten;
386
+ }
350
387
else
351
388
{
352
389
const auto completed = nextSubmit.getFutureScratchSemaphore ();
@@ -378,7 +415,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
378
415
}
379
416
// overload to make invokers not care about l-value or r-value
380
417
template <typename IntendedSubmitInfo> requires std::is_same_v<std::decay_t <IntendedSubmitInfo>,SIntendedSubmitInfo>
381
- inline bool updateBufferRangeViaStagingBuffer (IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, std::function< data_production_callback_t > && callback)
418
+ inline bool updateBufferRangeViaStagingBuffer (IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, IUpstreamingDataProducer && callback)
382
419
{
383
420
return updateBufferRangeViaStagingBuffer (nextSubmit,bufferRange,callback);
384
421
}
@@ -395,11 +432,26 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
395
432
template <typename IntendedSubmitInfo> requires std::is_same_v<std::decay_t <IntendedSubmitInfo>,SIntendedSubmitInfo>
396
433
inline bool updateBufferRangeViaStagingBuffer (IntendedSubmitInfo&& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, const void * data)
397
434
{
398
- auto callback = [data](void * dst, const size_t offsetInRange, const size_t blockSize)->void
399
- {
400
- memcpy (dst,reinterpret_cast <const uint8_t *>(data)+offsetInRange,blockSize);
401
- };
402
- return updateBufferRangeViaStagingBuffer (nextSubmit,bufferRange,callback);
435
+ // We check the guarantees of our documentation with the asserts while we're at it
436
+ #ifdef _NBL_DEBUG
437
+ size_t prevRangeEnd = 0 ;
438
+ #endif
439
+
440
+ auto retval = updateBufferRangeViaStagingBuffer (nextSubmit,bufferRange,wrapUpstreamingDataProducerLambda (
441
+ [&](void * dst, const size_t offsetInRange, const uint32_t blockSize) -> uint32_t
442
+ {
443
+ #ifdef _NBL_DEBUG
444
+ assert (offsetInRange==prevRangeEnd);
445
+ prevRangeEnd = offsetInRange+blockSize;
446
+ #endif
447
+ memcpy (dst,reinterpret_cast <const uint8_t *>(data)+offsetInRange,blockSize);
448
+ return blockSize;
449
+ }
450
+ ));
451
+ #ifdef _NBL_DEBUG
452
+ assert (prevRangeEnd==bufferRange.size );
453
+ #endif
454
+ return retval;
403
455
}
404
456
405
457
// ! This only needs a valid queue in `submit`
0 commit comments