@@ -797,62 +797,70 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
797
797
const auto &ZeCommandList = CommandList->first ;
798
798
const auto &WaitList = (*Event)->WaitList ;
799
799
800
+ uint32_t PixelSizeInBytes = getPixelSizeBytes (pImageFormat);
801
+
800
802
if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
803
+ uint32_t SrcRowPitch = hostExtent.width * PixelSizeInBytes;
804
+ uint32_t SrcSlicePitch = SrcRowPitch * hostExtent.height ;
801
805
if (pImageDesc->rowPitch == 0 ) {
802
806
// Copy to Non-USM memory
803
807
ze_image_region_t DstRegion;
804
808
UR_CALL (getImageRegionHelper (ZeImageDesc, &dstOffset, ©Extent,
805
809
DstRegion));
806
810
auto *UrImage = static_cast <_ur_image *>(pDst);
807
- ZE2UR_CALL (zeCommandListAppendImageCopyFromMemory,
808
- (ZeCommandList, UrImage->ZeImage , pSrc, &DstRegion, ZeEvent,
809
- WaitList.Length , WaitList.ZeEventList ));
811
+ char *SrcPtr = static_cast <char *>(pSrc) + srcOffset.z * SrcSlicePitch +
812
+ srcOffset.y * SrcRowPitch + srcOffset.x * PixelSizeInBytes;
813
+ ZE2UR_CALL (zeCommandListAppendImageCopyFromMemoryExt,
814
+ (ZeCommandList, UrImage->ZeImage , SrcPtr, &DstRegion,
815
+ SrcRowPitch, SrcSlicePitch, ZeEvent, WaitList.Length ,
816
+ WaitList.ZeEventList ));
810
817
} else {
811
818
// Copy to pitched USM memory
812
- uint32_t DstPitch = pImageDesc->rowPitch ;
819
+ uint32_t DstRowPitch = pImageDesc->rowPitch ;
813
820
ze_copy_region_t ZeDstRegion = {
814
821
(uint32_t )dstOffset.x , (uint32_t )dstOffset.y ,
815
- (uint32_t )dstOffset.z , DstPitch ,
822
+ (uint32_t )dstOffset.z , DstRowPitch ,
816
823
(uint32_t )copyExtent.height , (uint32_t )copyExtent.depth };
817
824
uint32_t DstSlicePitch = 0 ;
818
- uint32_t SrcPitch = hostExtent.width * getPixelSizeBytes (pImageFormat);
819
825
ze_copy_region_t ZeSrcRegion = {
820
826
(uint32_t )srcOffset.x , (uint32_t )srcOffset.y ,
821
- (uint32_t )srcOffset.z , SrcPitch ,
827
+ (uint32_t )srcOffset.z , SrcRowPitch ,
822
828
(uint32_t )copyExtent.height , (uint32_t )copyExtent.depth };
823
- uint32_t SrcSlicePitch = 0 ;
824
829
ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
825
- (ZeCommandList, pDst, &ZeDstRegion, DstPitch , DstSlicePitch,
826
- pSrc, &ZeSrcRegion, SrcPitch , SrcSlicePitch, ZeEvent,
830
+ (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch , DstSlicePitch,
831
+ pSrc, &ZeSrcRegion, SrcRowPitch , SrcSlicePitch, ZeEvent,
827
832
WaitList.Length , WaitList.ZeEventList ));
828
833
}
829
834
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
835
+ uint32_t DstRowPitch = hostExtent.width * PixelSizeInBytes;
836
+ uint32_t DstSlicePitch = DstRowPitch * hostExtent.height ;
830
837
if (pImageDesc->rowPitch == 0 ) {
831
838
// Copy from Non-USM memory to host
832
839
ze_image_region_t SrcRegion;
833
840
UR_CALL (getImageRegionHelper (ZeImageDesc, &srcOffset, ©Extent,
834
841
SrcRegion));
835
842
auto *UrImage = static_cast <_ur_image *>(pSrc);
836
- ZE2UR_CALL (zeCommandListAppendImageCopyToMemory,
837
- (ZeCommandList, pDst, UrImage->ZeImage , &SrcRegion, ZeEvent,
838
- WaitList.Length , WaitList.ZeEventList ));
843
+ char *DstPtr = static_cast <char *>(pDst) + dstOffset.z * DstSlicePitch +
844
+ dstOffset.y * DstRowPitch + dstOffset.x * PixelSizeInBytes;
845
+ ZE2UR_CALL (zeCommandListAppendImageCopyToMemoryExt,
846
+ (ZeCommandList, DstPtr, UrImage->ZeImage , &SrcRegion,
847
+ DstRowPitch, DstSlicePitch, ZeEvent, WaitList.Length ,
848
+ WaitList.ZeEventList ));
839
849
} else {
840
850
// Copy from pitched USM memory to host
841
- uint32_t DstPitch = copyExtent.width * getPixelSizeBytes (pImageFormat);
842
851
ze_copy_region_t ZeDstRegion = {
843
852
(uint32_t )dstOffset.x , (uint32_t )dstOffset.y ,
844
- (uint32_t )dstOffset.z , DstPitch ,
853
+ (uint32_t )dstOffset.z , DstRowPitch ,
845
854
(uint32_t )copyExtent.height , (uint32_t )copyExtent.depth };
846
- uint32_t DstSlicePitch = 0 ;
847
- uint32_t SrcPitch = pImageDesc->rowPitch ;
855
+ uint32_t SrcRowPitch = pImageDesc->rowPitch ;
848
856
ze_copy_region_t ZeSrcRegion = {
849
857
(uint32_t )srcOffset.x , (uint32_t )srcOffset.y ,
850
- (uint32_t )srcOffset.z , SrcPitch ,
858
+ (uint32_t )srcOffset.z , SrcRowPitch ,
851
859
(uint32_t )copyExtent.height , (uint32_t )copyExtent.depth };
852
860
uint32_t SrcSlicePitch = 0 ;
853
861
ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
854
- (ZeCommandList, pDst, &ZeDstRegion, DstPitch , DstSlicePitch,
855
- pSrc, &ZeSrcRegion, SrcPitch , SrcSlicePitch, ZeEvent,
862
+ (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch , DstSlicePitch,
863
+ pSrc, &ZeSrcRegion, SrcRowPitch , SrcSlicePitch, ZeEvent,
856
864
WaitList.Length , WaitList.ZeEventList ));
857
865
}
858
866
} else {
0 commit comments