Skip to content

Commit cfd08e0

Browse files
committed
cache outcpu image as well
1 parent adb35d6 commit cfd08e0

File tree

2 files changed

+42
-42
lines changed

2 files changed

+42
-42
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,8 @@ class NBL_API ImageRegionIterator
10371037

10381038
// Mock CPU Images used to copy cpu buffer to staging buffer
10391039
std::vector<core::smart_refctd_ptr<asset::ICPUImage>> imageFilterInCPUImages;
1040+
core::smart_refctd_dynamic_array<asset::ICPUImage::SBufferCopy> outCPUImageRegions; // Because this needs to update before each upload
1041+
std::vector<core::smart_refctd_ptr<asset::ICPUImage>> imageFilterOutCPUImages;
10401042

10411043
bool canTransferMipLevelsPartially = false;
10421044
asset::VkExtent3D minImageTransferGranularity = {};

src/nbl/video/utilities/IUtilities.cpp

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ ImageRegionIterator::ImageRegionIterator(
253253
*/
254254

255255
imageFilterInCPUImages.resize(regions.size());
256-
// imageFilterOutCPUImages.resize(regions.size());
256+
imageFilterOutCPUImages.resize(regions.size());
257257
for (uint32_t i = 0; i < copyRegions.size(); ++i)
258258
{
259259
auto& inCPUImage = imageFilterInCPUImages[i];
@@ -291,6 +291,20 @@ ImageRegionIterator::ImageRegionIterator(
291291
inCPUImage->setBufferAndRegions(std::move(inCPUBuffer), inCpuImageRegionsDynArray);
292292
assert(inCPUImage->getBuffer());
293293
assert(inCPUImage->getRegions().size() > 0u);
294+
295+
// outCPUImage is an image matching the params of dstImage but with the extents and layer count of the current region being copied and mipLevel 1u
296+
// the buffer of this image is set to (stagingBufferPointer + stagingBufferOffset) and the related region is set to cover the whole copy region (offset from 0)
297+
298+
auto& outCPUImage = imageFilterOutCPUImages[i];
299+
outCPUImageRegions = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<asset::ICPUImage::SBufferCopy>>(1);
300+
301+
asset::ICPUImage::SCreationParams outCPUImageParams = dstImageParams;
302+
outCPUImageParams.flags = asset::IImage::ECF_NONE; // Because we may want to write to first few layers of CUBEMAP (<6) but it's not valid to create an Cube ICPUImage with less that 6 layers.
303+
outCPUImageParams.extent = region.imageExtent;
304+
outCPUImageParams.arrayLayers = region.imageSubresource.layerCount;
305+
outCPUImageParams.mipLevels = 1u;
306+
outCPUImage = asset::ICPUImage::create(std::move(outCPUImageParams));
307+
assert(outCPUImage);
294308
}
295309
}
296310

@@ -563,48 +577,32 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
563577
// ! Function to create mock cpu images that can go into image filters for copying/converting
564578
auto createMockInOutCPUImagesForFilter = [&](core::smart_refctd_ptr<asset::ICPUImage>& inCPUImage, core::smart_refctd_ptr<asset::ICPUImage>& outCPUImage, const size_t outCPUBufferSize) -> void
565579
{
566-
// this one is cached because we can
580+
// Cached because we can
567581
inCPUImage = imageFilterInCPUImages[currentRegion];
568-
auto dstImageParams = dstImage->getCreationParameters();
569-
570-
// this one is not cached currently
571-
// because image creation depends on creating it with a buffer pointing to stagingBuffer memory pointer which we do not have access to in initialization time
572-
// [TODO] but maybe we could cache it by tricking the filtes to have the `stagingBufferOffset` with outOffsetBaseLayer
573-
// and we know we can because `stagingBufferOffset` is a multiple of block byte size, but range checks may fail?!
574-
575-
// outCPUImage is an image matching the params of dstImage but with the extents and layer count of the current region being copied and mipLevel 1u
576-
// the buffer of this image is set to (stagingBufferPointer + stagingBufferOffset) and the related region is set to cover the whole copy region (offset from 0)
577-
{
578-
auto outCpuImageRegionsDynArray = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<asset::ICPUImage::SBufferCopy>>(1);
579-
auto& outCpuImageRegion = outCpuImageRegionsDynArray->front();
580-
outCpuImageRegion = {};
581-
outCpuImageRegion.bufferOffset = 0u;
582-
outCpuImageRegion.bufferRowLength = regionToCopyNext.bufferRowLength;
583-
outCpuImageRegion.bufferImageHeight = regionToCopyNext.bufferImageHeight;
584-
outCpuImageRegion.imageSubresource.aspectMask = mainRegion.imageSubresource.aspectMask;
585-
outCpuImageRegion.imageSubresource.mipLevel = 0u;
586-
outCpuImageRegion.imageSubresource.baseArrayLayer = 0u;
587-
outCpuImageRegion.imageOffset.x = 0u;
588-
outCpuImageRegion.imageOffset.y = 0u;
589-
outCpuImageRegion.imageOffset.z = 0u;
590-
outCpuImageRegion.imageExtent.width = regionToCopyNext.imageExtent.width;
591-
outCpuImageRegion.imageExtent.height = regionToCopyNext.imageExtent.height;
592-
outCpuImageRegion.imageExtent.depth = regionToCopyNext.imageExtent.depth;
593-
outCpuImageRegion.imageSubresource.layerCount = core::max(regionToCopyNext.imageSubresource.layerCount, 1u);
594-
595-
asset::ICPUImage::SCreationParams outCPUImageParams = dstImageParams;
596-
uint8_t* outCpuBufferPointer = reinterpret_cast<uint8_t*>(stagingBufferPointer) + stagingBufferOffset;
597-
outCPUImageParams.flags = asset::IImage::ECF_NONE; // Because we may want to write to first few layers of CUBEMAP (<6) but it's not valid to create an Cube ICPUImage with less that 6 layers.
598-
outCPUImageParams.extent = regionToCopyNext.imageExtent;
599-
outCPUImageParams.arrayLayers = regionToCopyNext.imageSubresource.layerCount;
600-
outCPUImageParams.mipLevels = 1u;
601-
outCPUImage = asset::ICPUImage::create(std::move(outCPUImageParams));
602-
assert(outCPUImage);
603-
core::smart_refctd_ptr<asset::ICPUBuffer> outCPUBuffer = core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t>>>(outCPUBufferSize, outCpuBufferPointer, core::adopt_memory);
604-
outCPUImage->setBufferAndRegions(std::move(outCPUBuffer), outCpuImageRegionsDynArray);
605-
assert(outCPUImage->getBuffer());
606-
assert(outCPUImage->getRegions().size() > 0u);
607-
}
582+
outCPUImage = imageFilterOutCPUImages[currentRegion];
583+
584+
// But we need to set outCPUImage regions and buffer since that cannot be known at initialization time.
585+
auto& outCpuImageRegion = outCPUImageRegions->front();
586+
outCpuImageRegion = {};
587+
outCpuImageRegion.bufferOffset = 0u;
588+
outCpuImageRegion.bufferRowLength = regionToCopyNext.bufferRowLength;
589+
outCpuImageRegion.bufferImageHeight = regionToCopyNext.bufferImageHeight;
590+
outCpuImageRegion.imageSubresource.aspectMask = mainRegion.imageSubresource.aspectMask;
591+
outCpuImageRegion.imageSubresource.mipLevel = 0u;
592+
outCpuImageRegion.imageSubresource.baseArrayLayer = 0u;
593+
outCpuImageRegion.imageOffset.x = 0u;
594+
outCpuImageRegion.imageOffset.y = 0u;
595+
outCpuImageRegion.imageOffset.z = 0u;
596+
outCpuImageRegion.imageExtent.width = regionToCopyNext.imageExtent.width;
597+
outCpuImageRegion.imageExtent.height = regionToCopyNext.imageExtent.height;
598+
outCpuImageRegion.imageExtent.depth = regionToCopyNext.imageExtent.depth;
599+
outCpuImageRegion.imageSubresource.layerCount = core::max(regionToCopyNext.imageSubresource.layerCount, 1u);
600+
601+
uint8_t* outCpuBufferPointer = reinterpret_cast<uint8_t*>(stagingBufferPointer) + stagingBufferOffset;
602+
core::smart_refctd_ptr<asset::ICPUBuffer> outCPUBuffer = core::make_smart_refctd_ptr<asset::CCustomAllocatorCPUBuffer<core::null_allocator<uint8_t>>>(outCPUBufferSize, outCpuBufferPointer, core::adopt_memory);
603+
outCPUImage->setBufferAndRegions(std::move(outCPUBuffer), outCPUImageRegions);
604+
assert(outCPUImage->getBuffer());
605+
assert(outCPUImage->getRegions().size() > 0u);
608606
};
609607

610608
if(currentBlockInRow == 0 && currentRowInSlice == 0 && currentSliceInLayer == 0 && uploadableArrayLayers > 0)

0 commit comments

Comments
 (0)