@@ -867,21 +867,61 @@ ur_result_t bindlessImagesHandleCopyFlags(
867867 return UR_RESULT_SUCCESS;
868868 };
869869 case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: {
870- ze_image_region_t DstRegion;
871- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
872- &pCopyRegion->copyExtent , DstRegion));
873- ze_image_region_t SrcRegion;
874- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
875- &pCopyRegion->copyExtent , SrcRegion));
876-
877- auto *urImgSrc = reinterpret_cast <const ur_bindless_mem_handle_t *>(pSrc);
878- auto *urImgDst = reinterpret_cast <ur_bindless_mem_handle_t *>(pDst);
879-
880- ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
881- (ZeCommandList, urImgDst->getZeImage (), urImgSrc->getZeImage (),
882- &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
883- phWaitEvents));
870+ if (pSrcImageDesc->rowPitch != 0 && pDstImageDesc->rowPitch != 0 ) {
871+ // Copy from pitched USM memory to pitched USM memory
872+ uint32_t SrcRowPitch = pSrcImageDesc->rowPitch ;
873+ uint32_t DstRowPitch = pDstImageDesc->rowPitch ;
874+ ze_copy_region_t ZeDstRegion = {(uint32_t )pCopyRegion->dstOffset .x ,
875+ (uint32_t )pCopyRegion->dstOffset .y ,
876+ (uint32_t )pCopyRegion->dstOffset .z ,
877+ DstRowPitch,
878+ (uint32_t )pCopyRegion->copyExtent .height ,
879+ (uint32_t )pCopyRegion->copyExtent .depth };
880+ uint32_t DstSlicePitch = 0 ;
881+ uint32_t SrcSlicePitch = 0 ;
882+ ze_copy_region_t ZeSrcRegion = {(uint32_t )pCopyRegion->srcOffset .x ,
883+ (uint32_t )pCopyRegion->srcOffset .y ,
884+ (uint32_t )pCopyRegion->srcOffset .z ,
885+ SrcRowPitch,
886+ (uint32_t )pCopyRegion->copyExtent .height ,
887+ (uint32_t )pCopyRegion->copyExtent .depth };
888+ ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
889+ (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch,
890+ pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent,
891+ numWaitEvents, phWaitEvents));
892+ } else if (pSrcImageDesc->rowPitch == 0 && pDstImageDesc->rowPitch == 0 ) {
893+ // Copy from Non-USM memory to Non-USM memory
894+ ze_image_region_t DstRegion;
895+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
896+ &pCopyRegion->copyExtent , DstRegion));
897+ ze_image_region_t SrcRegion;
898+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
899+ &pCopyRegion->copyExtent , SrcRegion));
900+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
901+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
902+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
903+ (ZeCommandList, UrImageDst->getZeImage (), UrImageSrc->getZeImage (),
904+ &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
905+ phWaitEvents));
884906
907+ } else {
908+ // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory
909+ // Note: This might be the same procedure as pitched USM to
910+ // pitched USM. Need further testing.
911+ ze_image_region_t DstRegion;
912+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
913+ &pCopyRegion->copyExtent , DstRegion));
914+ ze_image_region_t SrcRegion;
915+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
916+ &pCopyRegion->copyExtent , SrcRegion));
917+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
918+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
919+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
920+ (ZeCommandList, UrImageDst->getZeImage (), UrImageSrc->getZeImage (),
921+ &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
922+ phWaitEvents));
923+ }
924+
885925 return UR_RESULT_SUCCESS;
886926 };
887927 default :
0 commit comments