@@ -67,7 +67,7 @@ IGPUQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
67
67
return intendedNextSubmit;
68
68
}
69
69
70
- ImageRegionIterator regionIterator = ImageRegionIterator (regions, queueFamProps, srcBuffer, srcFormat, dstImage, 64u /* limits.optimalBufferCopyRowPitchAlignment*/ );
70
+ ImageRegionIterator regionIterator = ImageRegionIterator (regions, queueFamProps, srcBuffer, srcFormat, dstImage, limits.optimalBufferCopyRowPitchAlignment );
71
71
72
72
// Assuming each thread can handle minImageTranferGranularitySize of texelBlocks:
73
73
const uint32_t maxResidentImageTransferSize = limits.maxResidentInvocations * texelBlockInfo.getBlockByteSize () * (minImageTransferGranularity.width * minImageTransferGranularity.height * minImageTransferGranularity.depth );
@@ -86,10 +86,9 @@ IGPUQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
86
86
uint32_t memoryLowerBound = maxResidentImageTransferSize;
87
87
{
88
88
const asset::IImage::SBufferCopy & region = regions[regionIterator.getCurrentRegion ()];
89
- auto imageExtent = core::vector3du32_SIMD (region.imageExtent .width , region.imageExtent .height , region.imageExtent .depth );
90
- auto imageExtentInBlocks = texelBlockInfo.convertTexelsToBlocks (imageExtent);
91
- auto imageExtentBlockStridesInBytes = texelBlockInfo.convert3DBlockStridesTo1DByteStrides (imageExtentInBlocks);
92
- memoryLowerBound = core::max (memoryLowerBound, imageExtentBlockStridesInBytes[1 ]); // rowPitch = imageExtentBlockStridesInBytes[1]
89
+ const auto copyTexelStrides = regionIterator.getOptimalCopyTexelStrides (region.imageExtent );
90
+ const auto byteStrides = texelBlockInfo.convert3DTexelStridesTo1DByteStrides (copyTexelStrides);
91
+ memoryLowerBound = core::max (memoryLowerBound, byteStrides[1 ]); // max of memoryLowerBound and copy rowPitch
93
92
}
94
93
95
94
uint32_t localOffset = video::StreamingTransientDataBufferMT<>::invalid_value;
@@ -218,6 +217,7 @@ ImageRegionIterator::ImageRegionIterator(
218
217
, currentSliceInLayer(0u )
219
218
, currentLayerInRegion(0u )
220
219
, currentRegion(0u )
220
+ , optimalRowPitchAlignment(optimalRowPitchAlignment)
221
221
{
222
222
dstImageFormat = dstImage->getCreationParameters ().format ;
223
223
if (srcImageFormat == asset::EF_UNKNOWN)
@@ -328,15 +328,12 @@ size_t ImageRegionIterator::getMemoryNeededForRemainingRegions() const
328
328
{
329
329
const asset::IImage::SBufferCopy & region = regions[i];
330
330
331
- // auto optimalRegion = region;
332
- // optimalRegion.bufferRowLength = core::alignUp(optimalRegion.bufferRowLength, optimalRowPitchAlignment);
333
- auto imageExtent = core::vector3du32_SIMD (region.imageExtent .width , region.imageExtent .height , region.imageExtent .depth );
334
- auto imageExtentInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (imageExtent);
335
-
336
- // TODO: This needs to change with optimal rowpitch
337
- auto imageExtentBlockStridesInBytes = dstImageTexelBlockInfo.convert3DBlockStridesTo1DByteStrides (imageExtentInBlocks);
331
+ auto imageExtentInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (core::vector3du32_SIMD (region.imageExtent .width , region.imageExtent .height , region.imageExtent .depth ));
332
+
333
+ const auto copyTexelStrides = getOptimalCopyTexelStrides (region.imageExtent );
334
+ const core::vector4du32_SIMD copyByteStrides = dstImageTexelBlockInfo.convert3DTexelStridesTo1DByteStrides (copyTexelStrides);
338
335
339
- if (i == currentRegion)
336
+ if (i == currentRegion)
340
337
{
341
338
auto remainingBlocksInRow = imageExtentInBlocks.x - currentBlockInRow;
342
339
auto remainingRowsInSlice = imageExtentInBlocks.y - currentRowInSlice;
@@ -345,42 +342,42 @@ size_t ImageRegionIterator::getMemoryNeededForRemainingRegions() const
345
342
346
343
if (currentBlockInRow == 0 && currentRowInSlice == 0 && currentSliceInLayer == 0 && remainingLayersInRegion > 0 )
347
344
{
348
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * remainingLayersInRegion);
345
+ incrementMemoryNeeded (copyByteStrides [3 ] * remainingLayersInRegion);
349
346
}
350
347
else if (currentBlockInRow == 0 && currentRowInSlice == 0 && currentSliceInLayer > 0 )
351
348
{
352
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [2 ] * remainingSlicesInLayer);
349
+ incrementMemoryNeeded (copyByteStrides [2 ] * remainingSlicesInLayer);
353
350
if (remainingLayersInRegion > 1u )
354
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * (remainingLayersInRegion - 1u ));
351
+ incrementMemoryNeeded (copyByteStrides [3 ] * (remainingLayersInRegion - 1u ));
355
352
}
356
353
else if (currentBlockInRow == 0 && currentRowInSlice > 0 )
357
354
{
358
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [1 ] * remainingRowsInSlice);
355
+ incrementMemoryNeeded (copyByteStrides [1 ] * remainingRowsInSlice);
359
356
360
- if (remainingSlicesInLayer > 1u )
361
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [2 ] * (remainingSlicesInLayer - 1u ));
362
- if (remainingLayersInRegion > 1u )
363
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * (remainingLayersInRegion - 1u ));
357
+ if (remainingSlicesInLayer > 1u )
358
+ incrementMemoryNeeded (copyByteStrides [2 ] * (remainingSlicesInLayer - 1u ));
359
+ if (remainingLayersInRegion > 1u )
360
+ incrementMemoryNeeded (copyByteStrides [3 ] * (remainingLayersInRegion - 1u ));
364
361
}
365
362
else if (currentBlockInRow > 0 )
366
363
{
367
364
// want to first fill the remaining blocks in current row
368
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [0 ] * remainingBlocksInRow);
365
+ incrementMemoryNeeded (copyByteStrides [0 ] * remainingBlocksInRow);
369
366
// then fill the remaining rows in current slice
370
- if (remainingRowsInSlice > 1u )
371
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [1 ] * (remainingRowsInSlice - 1u ));
367
+ if (remainingRowsInSlice > 1u )
368
+ incrementMemoryNeeded (copyByteStrides [1 ] * (remainingRowsInSlice - 1u ));
372
369
// then fill the remaining slices in current layer
373
- if (remainingSlicesInLayer > 1u )
374
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [2 ] * (remainingSlicesInLayer - 1u ));
370
+ if (remainingSlicesInLayer > 1u )
371
+ incrementMemoryNeeded (copyByteStrides [2 ] * (remainingSlicesInLayer - 1u ));
375
372
// then fill the remaining layers in current region
376
- if (remainingLayersInRegion > 1u )
377
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * (remainingLayersInRegion - 1u ));
373
+ if (remainingLayersInRegion > 1u )
374
+ incrementMemoryNeeded (copyByteStrides [3 ] * (remainingLayersInRegion - 1u ));
378
375
}
379
376
}
380
377
else
381
378
{
382
379
// we want to fill the whole layers in the region
383
- incrementMemoryNeeded (imageExtentBlockStridesInBytes [3 ] * region.imageSubresource .layerCount ); // = blockByteSize * imageExtentInBlocks.x * imageExtentInBlocks.y * imageExtentInBlocks.z * region.imageSubresource.layerCount
380
+ incrementMemoryNeeded (copyByteStrides [3 ] * region.imageSubresource .layerCount ); // = blockByteSize * imageExtentInBlocks.x * imageExtentInBlocks.y * imageExtentInBlocks.z * region.imageSubresource.layerCount
384
381
}
385
382
}
386
383
return memoryNeededForRemainingRegions;
@@ -495,11 +492,9 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
495
492
}
496
493
497
494
const asset::TexelBlockInfo dstImageTexelBlockInfo (dstImageFormat);
498
- const asset::TexelBlockInfo srcImageTexelBlockInfo (srcImageFormat);
499
495
500
496
// ! Current Region that may break down into smaller regions (the first smaller region is nextRegionToCopy)
501
497
const asset::IImage::SBufferCopy & mainRegion = regions[currentRegion];
502
- const core::vector4du32_SIMD srcBufferByteStrides = mainRegion.getByteStrides (srcImageTexelBlockInfo);
503
498
504
499
// ! We only need subresourceSize for validations and assertions about minImageTransferGranularity because granularity requirements can be ignored if region fits against the right corner of the subresource (described in more detail below)
505
500
const auto subresourceSize = dstImage->getMipSize (mainRegion.imageSubresource .mipLevel );
@@ -511,9 +506,9 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
511
506
const auto imageOffsetInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (core::vector3du32_SIMD (mainRegion.imageOffset .x , mainRegion.imageOffset .y , mainRegion.imageOffset .z ));
512
507
const auto imageExtentInBlocks = dstImageTexelBlockInfo.convertTexelsToBlocks (core::vector3du32_SIMD (mainRegion.imageExtent .width , mainRegion.imageExtent .height , mainRegion.imageExtent .depth ));
513
508
514
- // TODO: This needs to change with optimal rowpitch
515
- const core::vector4du32_SIMD imageExtentBlockStridesInBytes = dstImageTexelBlockInfo.convert3DBlockStridesTo1DByteStrides (imageExtentInBlocks );
516
-
509
+ const auto copyTexelStrides = getOptimalCopyTexelStrides (mainRegion. imageExtent );
510
+ const core::vector4du32_SIMD copyByteStrides = dstImageTexelBlockInfo.convert3DTexelStridesTo1DByteStrides (copyTexelStrides );
511
+
517
512
// region <-> region.imageSubresource.layerCount <-> imageExtentInBlocks.z <-> imageExtentInBlocks.y <-> imageExtentInBlocks.x
518
513
auto updateCurrentOffsets = [&]() -> void
519
514
{
@@ -542,10 +537,10 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
542
537
}
543
538
};
544
539
545
- uint32_t eachBlockNeededMemory = imageExtentBlockStridesInBytes [0 ]; // = blockByteSize
546
- uint32_t eachRowNeededMemory = imageExtentBlockStridesInBytes [1 ]; // = blockByteSize * imageExtentInBlocks .x
547
- uint32_t eachSliceNeededMemory = imageExtentBlockStridesInBytes [2 ]; // = blockByteSize * imageExtentInBlocks .x * imageExtentInBlocks .y
548
- uint32_t eachLayerNeededMemory = imageExtentBlockStridesInBytes [3 ]; // = blockByteSize * imageExtentInBlocks .x * imageExtentInBlocks .y * imageExtentInBlocks .z
540
+ uint32_t eachBlockNeededMemory = copyByteStrides [0 ]; // = blockByteSize
541
+ uint32_t eachRowNeededMemory = copyByteStrides [1 ]; // = blockByteSize * copyBlockStrides .x
542
+ uint32_t eachSliceNeededMemory = copyByteStrides [2 ]; // = blockByteSize * copyBlockStrides .x * copyBlockStrides .y
543
+ uint32_t eachLayerNeededMemory = copyByteStrides [3 ]; // = blockByteSize * copyBlockStrides .x * copyBlockStrides .y * copyBlockStrides .z
549
544
550
545
// There is remaining layers in region that needs copying
551
546
uint32_t uploadableArrayLayers = availableMemory / eachLayerNeededMemory;
@@ -606,8 +601,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
606
601
uint32_t layersToUploadMemorySize = eachLayerNeededMemory * uploadableArrayLayers;
607
602
608
603
regionToCopyNext.bufferOffset = stagingBufferOffset;
609
- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
610
- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
604
+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
605
+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
611
606
regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
612
607
regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
613
608
regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
@@ -645,8 +640,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
645
640
uint32_t slicesToUploadMemorySize = eachSliceNeededMemory * uploadableSlices;
646
641
647
642
regionToCopyNext.bufferOffset = stagingBufferOffset;
648
- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
649
- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
643
+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
644
+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
650
645
regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
651
646
regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
652
647
regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
@@ -684,8 +679,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
684
679
uint32_t rowsToUploadMemorySize = eachRowNeededMemory * uploadableRows;
685
680
686
681
regionToCopyNext.bufferOffset = stagingBufferOffset;
687
- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
688
- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
682
+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
683
+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
689
684
regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
690
685
regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
691
686
regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
@@ -724,8 +719,8 @@ bool ImageRegionIterator::advanceAndCopyToStagingBuffer(asset::IImage::SBufferCo
724
719
uint32_t blocksToUploadMemorySize = eachBlockNeededMemory * uploadableBlocks;
725
720
726
721
regionToCopyNext.bufferOffset = stagingBufferOffset;
727
- regionToCopyNext.bufferRowLength = imageExtentInBlocks. x * texelBlockDim .x ;
728
- regionToCopyNext.bufferImageHeight = imageExtentInBlocks. y * texelBlockDim .y ;
722
+ regionToCopyNext.bufferRowLength = copyTexelStrides .x ;
723
+ regionToCopyNext.bufferImageHeight = copyTexelStrides .y ;
729
724
regionToCopyNext.imageSubresource .aspectMask = mainRegion.imageSubresource .aspectMask ;
730
725
regionToCopyNext.imageSubresource .mipLevel = mainRegion.imageSubresource .mipLevel ;
731
726
regionToCopyNext.imageSubresource .baseArrayLayer = mainRegion.imageSubresource .baseArrayLayer + currentLayerInRegion;
0 commit comments