@@ -583,7 +583,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
583583 const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) {
584584 UR_ASSERT ((imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE ||
585585 imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST ||
586- imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE),
586+ imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE ||
587+ imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST),
587588 UR_RESULT_ERROR_INVALID_VALUE);
588589 UR_ASSERT (pSrcImageFormat->channelOrder == pDstImageFormat->channelOrder ,
589590 UR_RESULT_ERROR_INVALID_ARGUMENT);
@@ -651,6 +652,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
651652 cpy_desc.srcY = pCopyRegion->srcOffset .y ;
652653 cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
653654 cpy_desc.dstY = pCopyRegion->dstOffset .y ;
655+ cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
656+ cpy_desc.Height = pCopyRegion->copyExtent .height ;
654657 cpy_desc.srcPitch = pSrcImageDesc->width * PixelSizeBytes;
655658 if (pDstImageDesc->rowPitch == 0 ) {
656659 cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -661,8 +664,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
661664 cpy_desc.dstDevice = (CUdeviceptr)pDst;
662665 cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
663666 }
664- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
665- cpy_desc.Height = pCopyRegion->copyExtent .height ;
666667 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
667668 } else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
668669 CUDA_MEMCPY3D cpy_desc = {};
@@ -740,22 +741,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
740741 cpy_desc.srcY = pCopyRegion->srcOffset .y ;
741742 cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
742743 cpy_desc.dstY = pCopyRegion->dstOffset .y ;
744+ cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
745+ cpy_desc.Height = pCopyRegion->copyExtent .height ;
746+ cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes;
743747 cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
744748 cpy_desc.dstHost = pDst;
745749 if (pSrcImageDesc->rowPitch == 0 ) {
746750 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
747751 cpy_desc.srcArray = as_CUArray (pSrc);
748752 } else {
749753 // Pitched memory
750- cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE;
751754 cpy_desc.srcPitch = pSrcImageDesc->rowPitch ;
755+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_DEVICE;
752756 cpy_desc.srcDevice = (CUdeviceptr)pSrc;
753757 }
754- cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
755- cpy_desc.dstHost = pDst;
756- cpy_desc.dstPitch = pDstImageDesc->width * PixelSizeBytes;
757- cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
758- cpy_desc.Height = pCopyRegion->copyExtent .height ;
759758 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
760759 } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
761760 CUDA_MEMCPY3D cpy_desc = {};
@@ -797,7 +796,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
797796 UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
798797 }
799798 } else {
800- // imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE
799+ // imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE ||
800+ // imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_HOST
801801
802802 // we don't support copying between different image types.
803803 if (pSrcImageDesc->type != pDstImageDesc->type ) {
@@ -810,30 +810,67 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
810810 // synchronous because of the explicit call to cuStreamSynchronize at
811811 // the end
812812 if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
813+ // Check what type of memory pSrc and pDst are to set the correct
814+ // attributes of cpy_desc.
815+ // If cuPointerGetAttribute returns something different from
816+ // CUDA_SUCCESS then we know that the memory type is a CuArray.
817+ // Otherwise, it's CU_MEMORYTYPE_DEVICE.
818+ CUmemorytype memType;
819+ bool isSrcCudaArray =
820+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
821+ (CUdeviceptr)pSrc) != CUDA_SUCCESS;
822+ bool isDstCudaArray =
823+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
824+ (CUdeviceptr)pDst) != CUDA_SUCCESS;
825+
813826 CUDA_MEMCPY2D cpy_desc = {};
814827 cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes;
815828 cpy_desc.srcY = 0 ;
816829 cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
817830 cpy_desc.dstY = 0 ;
818- cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
819- cpy_desc.srcArray = as_CUArray (pSrc);
820- cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
821- cpy_desc.dstArray = (CUarray)pDst;
822831 cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
823832 cpy_desc.Height = 1 ;
833+ if (isSrcCudaArray) {
834+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
835+ cpy_desc.srcArray = as_CUArray (pSrc);
836+ } else {
837+ getUSMHostOrDevicePtr (pSrc, &cpy_desc.srcMemoryType ,
838+ &cpy_desc.srcDevice , &cpy_desc.srcHost );
839+ }
840+ if (isDstCudaArray) {
841+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
842+ cpy_desc.dstArray = (CUarray)pDst;
843+ } else {
844+ getUSMHostOrDevicePtr (pDst, &cpy_desc.dstMemoryType ,
845+ &cpy_desc.dstDevice , &cpy_desc.dstHost );
846+ }
824847 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
825848 } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
826849 CUDA_MEMCPY2D cpy_desc = {};
827850 cpy_desc.srcXInBytes = pCopyRegion->srcOffset .x * PixelSizeBytes;
828851 cpy_desc.srcY = pCopyRegion->srcOffset .y ;
829852 cpy_desc.dstXInBytes = pCopyRegion->dstOffset .x * PixelSizeBytes;
830853 cpy_desc.dstY = pCopyRegion->dstOffset .y ;
831- cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
832- cpy_desc.srcArray = as_CUArray (pSrc);
833- cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
834- cpy_desc.dstArray = (CUarray)pDst;
835854 cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent .width ;
836855 cpy_desc.Height = pCopyRegion->copyExtent .height ;
856+ if (pSrcImageDesc->rowPitch == 0 ) {
857+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
858+ cpy_desc.srcArray = as_CUArray (pSrc);
859+ } else {
860+ // Pitched memory
861+ cpy_desc.srcPitch = pSrcImageDesc->rowPitch ;
862+ getUSMHostOrDevicePtr (pSrc, &cpy_desc.srcMemoryType ,
863+ &cpy_desc.srcDevice , &cpy_desc.srcHost );
864+ }
865+ if (pDstImageDesc->rowPitch == 0 ) {
866+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
867+ cpy_desc.dstArray = (CUarray)pDst;
868+ } else {
869+ // Pitched memory
870+ cpy_desc.dstPitch = pDstImageDesc->rowPitch ;
871+ getUSMHostOrDevicePtr (pDst, &cpy_desc.dstMemoryType ,
872+ &cpy_desc.dstDevice , &cpy_desc.dstHost );
873+ }
837874 UR_CHECK_ERROR (cuMemcpy2DAsync (&cpy_desc, Stream));
838875 } else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
839876 CUDA_MEMCPY3D cpy_desc = {};
0 commit comments