@@ -773,9 +773,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
773773 }
774774 } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
775775 CUDA_MEMCPY2D cpy_desc = {};
776- cpy_desc.srcXInBytes = srcOffset.x ;
776+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
777777 cpy_desc.srcY = srcOffset.y ;
778- cpy_desc.dstXInBytes = dstOffset.x ;
778+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
779779 cpy_desc.dstY = dstOffset.y ;
780780 if (pImageDesc->rowPitch == 0 ) {
781781 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -788,21 +788,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
788788 }
789789 cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
790790 cpy_desc.dstHost = pDst;
791+ cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
791792 cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
792793 cpy_desc.Height = copyExtent.height ;
793794 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
794795 } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
795796 CUDA_MEMCPY3D cpy_desc = {};
796- cpy_desc.srcXInBytes = srcOffset.x ;
797+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
797798 cpy_desc.srcY = srcOffset.y ;
798799 cpy_desc.srcZ = srcOffset.z ;
799- cpy_desc.dstXInBytes = dstOffset.x ;
800+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
800801 cpy_desc.dstY = dstOffset.y ;
801802 cpy_desc.dstZ = dstOffset.z ;
802803 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
803804 cpy_desc.srcArray = (CUarray)pSrc;
804805 cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
805806 cpy_desc.dstHost = pDst;
807+ cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
808+ cpy_desc.dstHeight = hostExtent.height ;
806809 cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
807810 cpy_desc.Height = copyExtent.height ;
808811 cpy_desc.Depth = copyExtent.depth ;
@@ -811,16 +814,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
811814 pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
812815 pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
813816 CUDA_MEMCPY3D cpy_desc = {};
814- cpy_desc.srcXInBytes = srcOffset.x ;
817+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
815818 cpy_desc.srcY = srcOffset.y ;
816819 cpy_desc.srcZ = srcOffset.z ;
817- cpy_desc.dstXInBytes = dstOffset.x ;
820+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
818821 cpy_desc.dstY = dstOffset.y ;
819822 cpy_desc.dstZ = dstOffset.z ;
820823 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
821824 cpy_desc.srcArray = (CUarray)pSrc;
822825 cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
823826 cpy_desc.dstHost = pDst;
827+ cpy_desc.dstPitch = hostExtent.width * PixelSizeBytes;
828+ cpy_desc.dstHeight = hostExtent.height ;
824829 cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
825830 cpy_desc.Height = std::max (uint64_t {1 }, copyExtent.height );
826831 cpy_desc.Depth = pImageDesc->arraySize ;
@@ -834,9 +839,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
834839 // the end
835840 if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
836841 CUDA_MEMCPY2D cpy_desc = {};
837- cpy_desc.srcXInBytes = srcOffset.x ;
842+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
838843 cpy_desc.srcY = 0 ;
839- cpy_desc.dstXInBytes = dstOffset.x ;
844+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
840845 cpy_desc.dstY = 0 ;
841846 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
842847 cpy_desc.srcArray = (CUarray)pSrc;
@@ -847,9 +852,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
847852 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
848853 } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
849854 CUDA_MEMCPY2D cpy_desc = {};
850- cpy_desc.srcXInBytes = srcOffset.x ;
855+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
851856 cpy_desc.srcY = srcOffset.y ;
852- cpy_desc.dstXInBytes = dstOffset.x ;
857+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
853858 cpy_desc.dstY = dstOffset.y ;
854859 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
855860 cpy_desc.srcArray = (CUarray)pSrc;
@@ -860,10 +865,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
860865 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
861866 } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
862867 CUDA_MEMCPY3D cpy_desc = {};
863- cpy_desc.srcXInBytes = srcOffset.x ;
868+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
864869 cpy_desc.srcY = srcOffset.y ;
865870 cpy_desc.srcZ = srcOffset.z ;
866- cpy_desc.dstXInBytes = dstOffset.x ;
871+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
867872 cpy_desc.dstY = dstOffset.y ;
868873 cpy_desc.dstZ = dstOffset.z ;
869874 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -878,10 +883,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
878883 pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
879884 pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
880885 CUDA_MEMCPY3D cpy_desc = {};
881- cpy_desc.srcXInBytes = srcOffset.x ;
886+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes ;
882887 cpy_desc.srcY = srcOffset.y ;
883888 cpy_desc.srcZ = srcOffset.z ;
884- cpy_desc.dstXInBytes = dstOffset.x ;
889+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes ;
885890 cpy_desc.dstY = dstOffset.y ;
886891 cpy_desc.dstZ = dstOffset.z ;
887892 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
0 commit comments