Skip to content

Commit 8ef7676

Browse files
CComputeBlit now builds.
1 parent 4c984cb commit 8ef7676

File tree

5 files changed

+58
-77
lines changed

5 files changed

+58
-77
lines changed

include/nbl/ext/ScreenShot/ScreenShot.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ using namespace nbl::video;
1818
TODO (Mihailo): Add support for downloading a region of a specific subresource
1919
*/
2020

21-
#if 0 // TODO (Mihailo): port
21+
2222
inline core::smart_refctd_ptr<ICPUImageView> createScreenShot(
2323
ILogicalDevice* logicalDevice,
2424
IQueue* queue,
@@ -74,8 +74,7 @@ inline core::smart_refctd_ptr<ICPUImageView> createScreenShot(
7474

7575
IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {};
7676
decltype(info)::image_barrier_t barrier = {};
77-
info.imgBarrierCount = 1u;
78-
info.imgBarriers = &barrier;
77+
info.imgBarriers = { &barrier, &barrier + 1 };
7978

8079
{
8180
barrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS;
@@ -127,9 +126,9 @@ inline core::smart_refctd_ptr<ICPUImageView> createScreenShot(
127126

128127
queue->submit({ &info, &info + 1});
129128

130-
ILogicalDevice::SSemaphoreWaitInfo waitInfo{ signalSemaphore.get(), 1u};
129+
ISemaphore::SWaitInfo waitInfo{ signalSemaphore.get(), 1u};
131130

132-
if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ILogicalDevice::WAIT_RESULT::SUCCESS)
131+
if (logicalDevice->blockForSemaphores({&waitInfo, &waitInfo + 1}) != ISemaphore::WAIT_RESULT::SUCCESS)
133132
return nullptr;
134133

135134
core::smart_refctd_ptr<ICPUImageView> cpuImageView;
@@ -209,7 +208,7 @@ inline bool createScreenShot(
209208
IAssetWriter::SAssetWriteParams writeParams(cpuImageView.get());
210209
return assetManager->writeAsset(filename.string(),writeParams); // TODO: Use std::filesystem::path
211210
}
212-
#endif
211+
213212

214213
} // namespace nbl::ext::ScreenShot
215214

include/nbl/video/utilities/CComputeBlit.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ namespace nbl::video
229229
"}\n";
230230

231231
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), asset::IShader::ESS_COMPUTE, asset::IShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlit::createBlitSpecializedShader");
232-
auto gpuShader = m_device->createShader(std::move(cpuShader));
232+
auto gpuShader = m_device->createShader(std::move(cpuShader.get()));
233233

234234
return gpuShader;
235235
}
@@ -663,17 +663,15 @@ namespace nbl::video
663663
readyForNorm.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
664664
readyForNorm.oldLayout = video::IGPUImage::LAYOUT::GENERAL;
665665
readyForNorm.newLayout = video::IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
666-
readyForNorm.image = normalizationInImage;
666+
readyForNorm.image = normalizationInImage.get();
667667
readyForNorm.subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT;
668668
readyForNorm.subresourceRange.levelCount = 1u;
669669
readyForNorm.subresourceRange.layerCount = normalizationInImage->getCreationParameters().arrayLayers;
670670

671-
depInfo.bufBarrierCount = 1;
672-
depInfo.bufBarriers = &alphaTestBarrier;
673-
depInfo.imgBarrierCount = 1;
674-
depInfo.imgBarriers = &readyForNorm;
671+
depInfo.bufBarriers = { &alphaTestBarrier, &alphaTestBarrier + 1 };
672+
depInfo.imgBarriers = { &readyForNorm, &readyForNorm + 1 };
675673

676-
cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, &depInfo);
674+
cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, depInfo);
677675

678676
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, normalizationPipeline->getLayout(), 0u, 1u, &normalizationDS);
679677
cmdbuf->bindComputePipeline(normalizationPipeline);
@@ -685,26 +683,28 @@ namespace nbl::video
685683
template <typename BlitUtilities, typename... Args>
686684
inline void blit(video::IQueue* computeQueue, Args&&... args)
687685
{
688-
auto cmdPool = m_device->createCommandPool(computeQueue->getFamilyIndex(), video::IGPUCommandPool::ECF_NONE);
686+
auto cmdPool = m_device->createCommandPool(computeQueue->getFamilyIndex(), video::IGPUCommandPool::CREATE_FLAGS::NONE);
689687
core::smart_refctd_ptr<video::IGPUCommandBuffer> cmdbuf;
690-
cmdPool->createCommandBuffers(video::IGPUCommandBuffer::EL_PRIMARY, { &cmdbuf, &cmdbuf + 1 });
688+
cmdPool->createCommandBuffers(video::IGPUCommandPool::BUFFER_LEVEL::PRIMARY, {&cmdbuf, &cmdbuf + 1});
691689

692690
auto semaphore = m_device->createSemaphore(0);
693691

694-
cmdbuf->begin(video::IGPUCommandBuffer::EU_ONE_TIME_SUBMIT_BIT);
692+
cmdbuf->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
695693
blit<BlitUtilities>(cmdbuf.get(), std::forward<Args>(args)...);
696694
cmdbuf->end();
697695

698696
video::IQueue::SSubmitInfo submitInfo;
699697
video::IQueue::SSubmitInfo::SSemaphoreInfo signalInfo;
700-
submitInfo.commandBuffers = { &cmdbuf, &cmdbuf + 1 };
698+
video::IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo;
699+
cmdbufInfo.cmdbuf = cmdbuf.get();
700+
submitInfo.commandBuffers = { &cmdbufInfo, &cmdbufInfo + 1 };
701701
submitInfo.signalSemaphores = { &signalInfo, &signalInfo + 1 };
702702
signalInfo.semaphore = semaphore.get();
703703
signalInfo.value = 1;
704-
signalInfo.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BIT;
704+
signalInfo.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS;
705705
computeQueue->submit({ &submitInfo, &submitInfo + 1 });
706706

707-
video::ILogicalDevice::SSemaphoreWaitInfo waitInfos{ semaphore.get(), 1 };
707+
video::ISemaphore::SWaitInfo waitInfos{ semaphore.get(), 1 };
708708
m_device->blockForSemaphores({ &waitInfos, &waitInfos + 1});
709709
}
710710

include/nbl/video/utilities/IUtilities.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,6 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
538538
// --------------
539539
// updateImageViaStagingBuffer
540540
// --------------
541-
#if 0 // TODO: port
542541
//! Copies `srcBuffer` to stagingBuffer and Records the commands needed to copy the image from stagingBuffer to `dstImage`
543542
//! If the allocation from staging memory fails due to large image size or fragmentation then This function may need to submit the command buffer via the `submissionQueue` and then signal the fence.
544543
//! Returns:
@@ -579,10 +578,20 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
579578
//! * submissionFence must point to a valid IGPUFence
580579
//! * submissionFence must be in `UNSIGNALED` state
581580
//! ** IUtility::getDefaultUpStreamingBuffer()->cull_frees() should be called before reseting the submissionFence and after `submissionFence` is signaled.
582-
[[nodiscard("Use The New IQueue::SubmitInfo")]] IQueue::SSubmitInfo updateImageViaStagingBuffer(
583-
asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, IGPUImage::LAYOUT currentDstImageLayout, const core::SRange<const asset::IImage::SBufferCopy>& regions,
584-
IQueue* submissionQueue, IGPUFence* submissionFence, IQueue::SSubmitInfo intendedNextSubmit);
585-
#endif
581+
bool updateImageViaStagingBuffer(
582+
SIntendedSubmitInfo& nextSubmit, asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, IGPUImage::LAYOUT currentDstImageLayout,
583+
const core::SRange<const asset::IImage::SBufferCopy>& regions);
584+
585+
inline bool updateImageViaStagingBufferAutoSubmit(
586+
const SIntendedSubmitInfo::SFrontHalf& submit, asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, IGPUImage::LAYOUT currentDstImageLayout,
587+
const core::SRange<const asset::IImage::SBufferCopy>& regions)
588+
{
589+
if (!autoSubmitAndBlock(submit, [&](SIntendedSubmitInfo& nextSubmit) { return updateImageViaStagingBuffer(nextSubmit, srcBuffer, srcFormat, dstImage, currentDstImageLayout, regions); }))
590+
{
591+
return false;
592+
}
593+
return true;
594+
}
586595

587596
protected:
588597
// The application must round down the start of the range to the nearest multiple of VkPhysicalDeviceLimits::nonCoherentAtomSize,

src/nbl/video/utilities/CComputeBlit.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
using namespace nbl;
44
using namespace video;
55

6-
#if 0 // TODO: port
7-
core::smart_refctd_ptr<video::IGPUSpecializedShader> CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
6+
7+
core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
88
{
99
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
1010
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
@@ -37,12 +37,11 @@ core::smart_refctd_ptr<video::IGPUSpecializedShader> CComputeBlit::createAlphaTe
3737
"}\n";
3838

3939
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), asset::IShader::ESS_COMPUTE, asset::IShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader");
40-
auto gpuUnspecShader = m_device->createShader(std::move(cpuShader));
4140

42-
return m_device->createSpecializedShader(gpuUnspecShader.get(), { nullptr, nullptr, "main" });
41+
return m_device->createShader(std::move(cpuShader.get()));
4342
}
4443

45-
core::smart_refctd_ptr<video::IGPUSpecializedShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
44+
core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpecializedShader(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
4645
const uint32_t alphaBinCount)
4746
{
4847
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
@@ -89,8 +88,6 @@ core::smart_refctd_ptr<video::IGPUSpecializedShader> CComputeBlit::createNormali
8988
"}\n";
9089

9190
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), asset::IShader::ESS_COMPUTE, asset::IShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader");
92-
auto gpuUnspecShader = m_device->createShader(std::move(cpuShader));
9391

94-
return m_device->createSpecializedShader(gpuUnspecShader.get(), { nullptr, nullptr, "main" });
92+
return m_device->createShader(std::move(cpuShader.get()));
9593
}
96-
#endif

src/nbl/video/utilities/IUtilities.cpp

Lines changed: 21 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,45 +11,39 @@ const char* SIntendedSubmitInfo::ErrorText = R"===(Invalid `IUtilities::SIntende
1111
- one of the `commandBuffer`s' Pool's Queue Family Index doesn't match `queue`'s Family
1212
)===";
1313

14-
#if 0 // TODO: port
15-
IQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
16-
asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, asset::IImage::LAYOUT currentDstImageLayout, const core::SRange<const asset::IImage::SBufferCopy>& regions,
17-
IQueue* submissionQueue, IGPUFence* submissionFence, IQueue::SSubmitInfo intendedNextSubmit)
14+
bool IUtilities::updateImageViaStagingBuffer(
15+
SIntendedSubmitInfo& intendedNextSubmit, asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, IGPUImage::LAYOUT currentDstImageLayout,
16+
const core::SRange<const asset::IImage::SBufferCopy>& regions)
1817
{
19-
if(!intendedNextSubmit.isValid() || intendedNextSubmit.commandBufferCount <= 0u)
18+
if(!intendedNextSubmit.valid())
2019
{
21-
m_logger.log("intendedNextSubmit is invalid.", nbl::system::ILogger::ELL_ERROR);
22-
assert(false);
23-
return intendedNextSubmit;
20+
m_logger.log("Invalid `intendedNextSubmit` cannot `updateImageViaStagingBuffer`.", nbl::system::ILogger::ELL_ERROR);
21+
return false;
2422
}
2523

26-
// Use the last command buffer in intendedNextSubmit, it should be in recording state
27-
auto& cmdbuf = intendedNextSubmit.commandBuffers[intendedNextSubmit.commandBufferCount-1];
28-
29-
assert(cmdbuf->getState() == IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable());
30-
assert(cmdbuf->getRecordingFlags().hasFlags(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT));
24+
auto cmdbuf = intendedNextSubmit.frontHalf.getScratchCommandBuffer();
3125

3226
const auto& limits = m_device->getPhysicalDevice()->getLimits();
3327

3428
if (regions.size() == 0)
35-
return intendedNextSubmit;
29+
return false;
3630

37-
if (cmdbuf == nullptr || submissionFence == nullptr || submissionQueue == nullptr || dstImage == nullptr || (srcBuffer == nullptr || srcBuffer->getPointer() == nullptr))
31+
if (dstImage == nullptr || (srcBuffer == nullptr || srcBuffer->getPointer() == nullptr))
3832
{
39-
assert(false);
40-
return intendedNextSubmit;
33+
m_logger.log("Invalid `srcBuffer` or `dstImage` cannot `updateImageViaStagingBuffer`.", nbl::system::ILogger::ELL_ERROR);
34+
return false;
4135
}
4236

43-
auto* cmdpool = cmdbuf->getPool();
44-
assert(cmdpool->getQueueFamilyIndex()==submissionQueue->getFamilyIndex());
37+
4538
if (dstImage->getCreationParameters().samples != asset::IImage::ESCF_1_BIT)
4639
{
4740
_NBL_TODO(); // "Erfan hasn't figured out yet how to copy to multisampled images"
48-
return intendedNextSubmit;
41+
return false;
4942
}
5043

5144
auto texelBlockInfo = asset::TexelBlockInfo(dstImage->getCreationParameters().format);
52-
auto queueFamProps = m_device->getPhysicalDevice()->getQueueFamilyProperties()[submissionQueue->getFamilyIndex()];
45+
assert(intendedNextSubmit.frontHalf.queue);
46+
auto queueFamProps = m_device->getPhysicalDevice()->getQueueFamilyProperties()[intendedNextSubmit.frontHalf.queue->getFamilyIndex()];
5347
auto minImageTransferGranularity = queueFamProps.minImageTransferGranularity;
5448

5549
assert(dstImage->getCreationParameters().format != asset::EF_UNKNOWN);
@@ -71,8 +65,8 @@ IQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
7165
}
7266
if (!regionsValid)
7367
{
74-
assert(false);
75-
return intendedNextSubmit;
68+
m_logger.log("Invalid regions to copy cannot `updateImageViaStagingBuffer`.", nbl::system::ILogger::ELL_ERROR);
69+
return false;
7670
}
7771

7872
ImageRegionIterator regionIterator = ImageRegionIterator(regions, queueFamProps, srcBuffer, srcFormat, dstImage, limits.optimalBufferCopyRowPitchAlignment);
@@ -109,25 +103,8 @@ IQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
109103
// keep trying again
110104
if (failedAllocation)
111105
{
112-
// but first submit the already buffered up copies and whatever previously recorded into the command buffer
113-
cmdbuf->end();
114-
IQueue::SSubmitInfo submit = intendedNextSubmit;
115-
submit.signalSemaphoreCount = 0u;
116-
submit.pSignalSemaphores = nullptr;
117-
assert(submit.isValid());
118-
submissionQueue->submit(1u, &submit, submissionFence);
119-
m_device->blockForFences(1u, &submissionFence);
120-
intendedNextSubmit.commandBufferCount = 1u;
121-
intendedNextSubmit.commandBuffers = &cmdbuf;
122-
intendedNextSubmit.waitSemaphoreCount = 0u;
123-
intendedNextSubmit.pWaitSemaphores = nullptr;
124-
intendedNextSubmit.pWaitDstStageMask = nullptr;
125-
// before resetting we need poll all events in the allocator's deferred free list
106+
intendedNextSubmit.overflowSubmit();
126107
m_defaultUploadBuffer->cull_frees();
127-
// we can reset the fence and commandbuffer because we fully wait for the GPU to finish here
128-
m_device->resetFences(1u, &submissionFence);
129-
cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
130-
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
131108
continue;
132109
}
133110
else
@@ -158,17 +135,16 @@ IQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
158135
if (m_defaultUploadBuffer.get()->needsManualFlushOrInvalidate())
159136
{
160137
const auto consumedMemory = allocationSize - availableUploadBufferMemory;
161-
auto flushRange = AlignedMappedMemoryRange(m_defaultUploadBuffer.get()->getBuffer()->getBoundMemory(), localOffset, consumedMemory, limits.nonCoherentAtomSize);
138+
auto flushRange = AlignedMappedMemoryRange(m_defaultUploadBuffer.get()->getBuffer()->getBoundMemory().memory, localOffset, consumedMemory, limits.nonCoherentAtomSize);
162139
m_device->flushMappedMemoryRanges(1u, &flushRange);
163140
}
164141
}
165142

166143
// this doesn't actually free the memory, the memory is queued up to be freed only after the GPU fence/event is signalled
167-
m_defaultUploadBuffer.get()->multi_deallocate(1u, &localOffset, &allocationSize, core::smart_refctd_ptr<IGPUFence>(submissionFence), &cmdbuf); // can queue with a reset but not yet pending fence, just fine
144+
m_defaultUploadBuffer.get()->multi_deallocate(1u, &localOffset, &allocationSize, intendedNextSubmit.getScratchSemaphoreNextWait()); // can queue with a reset but not yet pending fence, just fine
168145
}
169-
return intendedNextSubmit;
146+
return true;
170147
}
171-
#endif
172148

173149
ImageRegionIterator::ImageRegionIterator(
174150
const core::SRange<const asset::IImage::SBufferCopy>& copyRegions,

0 commit comments

Comments
 (0)