@@ -867,21 +867,59 @@ ur_result_t bindlessImagesHandleCopyFlags(
867867 return UR_RESULT_SUCCESS;
868868 };
869869 case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: {
870- ze_image_region_t DstRegion;
871- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
872- &pCopyRegion->copyExtent , DstRegion));
873- ze_image_region_t SrcRegion;
874- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
875- &pCopyRegion->copyExtent , SrcRegion));
876-
877- auto *urImgSrc = reinterpret_cast <const ur_bindless_mem_handle_t *>(pSrc);
878- auto *urImgDst = reinterpret_cast <ur_bindless_mem_handle_t *>(pDst);
879-
880- ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
881- (ZeCommandList, urImgDst->getZeImage (), urImgSrc->getZeImage (),
882- &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
883- phWaitEvents));
884-
870+ if (pSrcImageDesc->rowPitch != 0 && pDstImageDesc->rowPitch != 0 ) {
871+ // Copy from pitched USM memory to pitched USM memory
872+ uint32_t SrcRowPitch = pSrcImageDesc->rowPitch ;
873+ uint32_t DstRowPitch = pDstImageDesc->rowPitch ;
874+ ze_copy_region_t ZeDstRegion = {(uint32_t )pCopyRegion->dstOffset .x ,
875+ (uint32_t )pCopyRegion->dstOffset .y ,
876+ (uint32_t )pCopyRegion->dstOffset .z ,
877+ DstRowPitch,
878+ (uint32_t )pCopyRegion->copyExtent .height ,
879+ (uint32_t )pCopyRegion->copyExtent .depth };
880+ uint32_t DstSlicePitch = 0 ;
881+ uint32_t SrcSlicePitch = 0 ;
882+ ze_copy_region_t ZeSrcRegion = {(uint32_t )pCopyRegion->srcOffset .x ,
883+ (uint32_t )pCopyRegion->srcOffset .y ,
884+ (uint32_t )pCopyRegion->srcOffset .z ,
885+ SrcRowPitch,
886+ (uint32_t )pCopyRegion->copyExtent .height ,
887+ (uint32_t )pCopyRegion->copyExtent .depth };
888+ ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
889+ (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch,
890+ pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent,
891+ numWaitEvents, phWaitEvents));
892+ } else if (pSrcImageDesc->rowPitch == 0 && pDstImageDesc->rowPitch == 0 ) {
893+ // Copy from Non-USM memory to Non-USM memory
894+ ze_image_region_t DstRegion;
895+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
896+ &pCopyRegion->copyExtent , DstRegion));
897+ ze_image_region_t SrcRegion;
898+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
899+ &pCopyRegion->copyExtent , SrcRegion));
900+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
901+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
902+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
903+ (ZeCommandList, UrImageDst->getZeImage (),
904+ UrImageSrc->getZeImage (), &DstRegion, &SrcRegion,
905+ zeSignalEvent, numWaitEvents, phWaitEvents));
906+ } else {
907+ // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory
908+ // Note: This might be the same procedure as pitched USM to
909+ // pitched USM. Need further testing.
910+ ze_image_region_t DstRegion;
911+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
912+ &pCopyRegion->copyExtent , DstRegion));
913+ ze_image_region_t SrcRegion;
914+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
915+ &pCopyRegion->copyExtent , SrcRegion));
916+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
917+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
918+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
919+ (ZeCommandList, UrImageDst->getZeImage (),
920+ UrImageSrc->getZeImage (), &DstRegion, &SrcRegion,
921+ zeSignalEvent, numWaitEvents, phWaitEvents));
922+ }
885923 return UR_RESULT_SUCCESS;
886924 };
887925 default :
0 commit comments