Skip to content

Commit f4160b6

Browse files
devshkeptsecret
authored andcommitted
fix the fix in previous commit
1 parent 487238d commit f4160b6

File tree

2 files changed

+9
-12
lines changed

2 files changed

+9
-12
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,8 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
359359
if (manualFlush)
360360
flushRanges.reserve((bufferRange.size-1)/m_defaultUploadBuffer.get()->max_size()+1);
361361

362-
const auto oldScratchStage = nextSubmit.scratchSemaphore.stageMask;
362+
// for the signal to be useful for us to let go of memory, we need to signal after transfer is finished
363+
const auto oldScratchStage = nextSubmit.scratchSemaphore.stageMask|=asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
363364
//
364365
auto* uploadBuffer = m_defaultUploadBuffer.get()->getBuffer();
365366
// no pipeline barriers necessary because write and optional flush happens before submit, and memory allocation is reclaimed after fence signal
@@ -400,11 +401,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
400401
flushRanges.clear();
401402
}
402403
const auto completed = nextSubmit.getFutureScratchSemaphore();
403-
// for the signal to be useful for us to let go of memory, we need to signal after transfer is finished
404-
nextSubmit.scratchSemaphore.stageMask |= asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
405404
nextSubmit.overflowSubmit(scratch);
406405
// first submit we respect whatever stages the user had (maybe they wanted to be notified of the completion of `nextSubmit.prevCommandBuffers`
407-
nextSubmit.scratchSemaphore.stageMask = {};
406+
nextSubmit.scratchSemaphore.stageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
408407
// overflowSubmit no longer blocks for the last submit to have completed, so we must do it ourselves here
409408
// TODO: if we cleverly overflowed BEFORE completely running out of memory (better heuristics) then we wouldn't need to do this and some CPU-GPU overlap could be achieved
410409
if (nextSubmit.overflowCallback)
@@ -607,7 +606,8 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
607606
// TODO: Why did we settle on `/4` ? It definitely wasn't about the uint32_t size!
608607
const uint32_t optimalTransferAtom = core::min<uint32_t>(limits.maxResidentInvocations*OptimalCoalescedInvocationXferSize,m_defaultDownloadBuffer->get_total_size()/4);
609608

610-
const auto oldScratchStage = nextSubmit.scratchSemaphore.stageMask;
609+
// for the signal to be useful for us to execute the data consumer callback, the signal must happen after the copy is done
610+
const auto oldScratchStage = nextSubmit.scratchSemaphore.stageMask|=asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
611611
// Basically downloadedSize is downloadRecordedIntoCommandBufferSize :D
612612
for (size_t downloadedSize=0ull; downloadedSize<srcBufferRange.size;)
613613
{
@@ -643,11 +643,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
643643
else // but first sumbit the already buffered up copies
644644
{
645645
const auto completed = nextSubmit.getFutureScratchSemaphore();
646-
// for the signal to be useful for us to execute the data consumer callback, the signal must happen after the copy is done
647-
nextSubmit.scratchSemaphore.stageMask |= asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
648646
nextSubmit.overflowSubmit(scratch);
649647
// first submit we respect whatever stages the user had (maybe they wanted to be notified of the completion of `nextSubmit.prevCommandBuffers`
650-
nextSubmit.scratchSemaphore.stageMask = {};
648+
nextSubmit.scratchSemaphore.stageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
651649
// overflowSubmit no longer blocks for the last submit to have completed, so we must do it ourselves here
652650
// TODO: if we cleverly overflowed BEFORE completely running out of memory (better heuristics) then we wouldn't need to do this and some CPU-GPU overlap could be achieved
653651
if (nextSubmit.overflowCallback)

src/nbl/video/utilities/IUtilities.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ bool IUtilities::updateImageViaStagingBuffer(
7979
flushRanges.reserve(maxIterations);
8080

8181
auto* uploadBuffer = m_defaultUploadBuffer.get()->getBuffer();
82-
const auto oldScratchStage = intendedNextSubmit.scratchSemaphore.stageMask;
82+
// for the signal to be useful for us to let go of memory, we need to signal after transfer is finished
83+
const auto oldScratchStage = intendedNextSubmit.scratchSemaphore.stageMask|=asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
8384
while (!regionIterator.isFinished())
8485
{
8586
size_t memoryNeededForRemainingRegions = regionIterator.getMemoryNeededForRemainingRegions();
@@ -108,11 +109,9 @@ bool IUtilities::updateImageViaStagingBuffer(
108109
flushRanges.clear();
109110
}
110111
const auto completed = intendedNextSubmit.getFutureScratchSemaphore();
111-
// for the signal to be useful for us to let go of memory, we need to signal after transfer is finished
112-
intendedNextSubmit.scratchSemaphore.stageMask |= asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
113112
intendedNextSubmit.overflowSubmit(scratch);
114113
// first submit we respect whatever stages the user had (maybe they wanted to be notified of the completion of `nextSubmit.prevCommandBuffers`
115-
intendedNextSubmit.scratchSemaphore.stageMask = {};
114+
intendedNextSubmit.scratchSemaphore.stageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT;
116115
// overflowSubmit no longer blocks for the last submit to have completed, so we must do it ourselves here
117116
// TODO: if we cleverly overflowed BEFORE completely running out of memory (better heuristics) then we wouldn't need to do this and some CPU-GPU overlap could be achieved
118117
if (intendedNextSubmit.overflowCallback)

0 commit comments

Comments
 (0)