@@ -865,21 +865,61 @@ ur_result_t bindlessImagesHandleCopyFlags(
865865 return UR_RESULT_SUCCESS;
866866 };
867867 case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: {
868- ze_image_region_t DstRegion;
869- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
870- &pCopyRegion->copyExtent , DstRegion));
871- ze_image_region_t SrcRegion;
872- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
873- &pCopyRegion->copyExtent , SrcRegion));
874-
875- auto *urImgSrc = reinterpret_cast <const ur_bindless_mem_handle_t *>(pSrc);
876- auto *urImgDst = reinterpret_cast <ur_bindless_mem_handle_t *>(pDst);
877-
878- ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
879- (ZeCommandList, urImgDst->getZeImage (), urImgSrc->getZeImage (),
880- &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
881- phWaitEvents));
868+ if (pSrcImageDesc->rowPitch != 0 && pDstImageDesc->rowPitch != 0 ) {
869+ // Copy from pitched USM memory to pitched USM memory
870+ uint32_t SrcRowPitch = pSrcImageDesc->rowPitch ;
871+ uint32_t DstRowPitch = pDstImageDesc->rowPitch ;
872+ ze_copy_region_t ZeDstRegion = {(uint32_t )pCopyRegion->dstOffset .x ,
873+ (uint32_t )pCopyRegion->dstOffset .y ,
874+ (uint32_t )pCopyRegion->dstOffset .z ,
875+ DstRowPitch,
876+ (uint32_t )pCopyRegion->copyExtent .height ,
877+ (uint32_t )pCopyRegion->copyExtent .depth };
878+ uint32_t DstSlicePitch = 0 ;
879+ uint32_t SrcSlicePitch = 0 ;
880+ ze_copy_region_t ZeSrcRegion = {(uint32_t )pCopyRegion->srcOffset .x ,
881+ (uint32_t )pCopyRegion->srcOffset .y ,
882+ (uint32_t )pCopyRegion->srcOffset .z ,
883+ SrcRowPitch,
884+ (uint32_t )pCopyRegion->copyExtent .height ,
885+ (uint32_t )pCopyRegion->copyExtent .depth };
886+ ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
887+ (ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch,
888+ pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent,
889+ numWaitEvents, phWaitEvents));
890+ } else if (pSrcImageDesc->rowPitch == 0 && pDstImageDesc->rowPitch == 0 ) {
891+ // Copy from Non-USM memory to Non-USM memory
892+ ze_image_region_t DstRegion;
893+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
894+ &pCopyRegion->copyExtent , DstRegion));
895+ ze_image_region_t SrcRegion;
896+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
897+ &pCopyRegion->copyExtent , SrcRegion));
898+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
899+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
900+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
901+ (ZeCommandList, UrImageDst->getZeImage (), UrImageSrc->getZeImage (),
902+ &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
903+ phWaitEvents));
882904
905+ } else {
906+ // Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory
907+ // Note: This might be the same procedure as pitched USM to
908+ // pitched USM. Need further testing.
909+ ze_image_region_t DstRegion;
910+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
911+ &pCopyRegion->copyExtent , DstRegion));
912+ ze_image_region_t SrcRegion;
913+ UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
914+ &pCopyRegion->copyExtent , SrcRegion));
915+ auto *UrImageDst = static_cast <ur_bindless_mem_handle_t *>(pDst);
916+ auto *UrImageSrc = static_cast <const ur_bindless_mem_handle_t *>(pSrc);
917+ ZE2UR_CALL (zeCommandListAppendImageCopyRegion,
918+ (ZeCommandList, UrImageDst->getZeImage (), UrImageSrc->getZeImage (),
919+ &DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
920+ phWaitEvents));
921+ }
922+
883923 return UR_RESULT_SUCCESS;
884924 };
885925 default :
0 commit comments