@@ -747,20 +747,11 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc,
747747 UR_RESULT_ERROR_INVALID_VALUE);
748748#endif // !NDEBUG
749749
750- // ur_rect_offset_t and ur_rect_region_t describe first component using bytes
751- // ze_image_region_t however uses pixels for it
752-
753- // TODO: this is less efficient than a direct calculation of a pixel size
754- // using ze_image_format_t
755- ur_image_format_t UrImageFormat;
756- UR_CALL (ze2urImageFormat (ZeImageDesc.format , &UrImageFormat));
757- uint32_t PixelSizeBytes = getPixelSizeBytes (&UrImageFormat);
758-
759- uint32_t OriginX = ur_cast<uint32_t >(Origin->x ) / PixelSizeBytes;
750+ uint32_t OriginX = ur_cast<uint32_t >(Origin->x );
760751 uint32_t OriginY = ur_cast<uint32_t >(Origin->y );
761752 uint32_t OriginZ = ur_cast<uint32_t >(Origin->z );
762753
763- uint32_t Width = ur_cast<uint32_t >(Region->width ) / PixelSizeBytes ;
754+ uint32_t Width = ur_cast<uint32_t >(Region->width );
764755 uint32_t Height = (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY)
765756 ? ZeImageDesc.arraylevels
766757 : ur_cast<uint32_t >(Region->height );
@@ -773,6 +764,27 @@ ur_result_t getImageRegionHelper(ze_image_desc_t ZeImageDesc,
773764 return UR_RESULT_SUCCESS;
774765}
775766
767+ // ur_rect_offset_t and ur_rect_region_t describe their first component as
768+ // bytes, whilst ze_image_region_t uses pixels.
769+ //
770+ // However, the getImageRegionHelper above is used for both bindless and regular
771+ // images and APIs for the latter explicitly document that ur_rect_offset_t and
772+ // ur_rect_region_t are misused and all their component are treated as pixels.
773+ //
774+ // As such, a new helper function for translation between UR and L0 formats is
775+ // introduced instead of modifying the existing one above.
776+ static ur_result_t getZeImageRegionHelper (ze_image_desc_t ZeImageDesc,
777+ size_t PixelSizeInBytes,
778+ ur_rect_offset_t *Origin,
779+ ur_rect_region_t *Region,
780+ ze_image_region_t &ZeRegion) {
781+ UR_CALL (getImageRegionHelper (ZeImageDesc, Origin, Region, ZeRegion));
782+ ZeRegion.originX /= PixelSizeInBytes;
783+ ZeRegion.width /= PixelSizeInBytes;
784+
785+ return UR_RESULT_SUCCESS;
786+ }
787+
776788ur_result_t bindlessImagesHandleCopyFlags (
777789 const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc,
778790 const ur_image_desc_t *pDstImageDesc,
@@ -785,6 +797,8 @@ ur_result_t bindlessImagesHandleCopyFlags(
785797
786798 ZeStruct<ze_image_desc_t > zeSrcImageDesc;
787799 ur2zeImageDesc (pSrcImageFormat, pSrcImageDesc, zeSrcImageDesc);
800+ uint32_t SrcPixelSizeInBytes = getPixelSizeBytes (pSrcImageFormat);
801+ uint32_t DstPixelSizeInBytes = getPixelSizeBytes (pDstImageFormat);
788802
789803 switch (imageCopyFlags) {
790804 case UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE: {
@@ -794,15 +808,15 @@ ur_result_t bindlessImagesHandleCopyFlags(
794808 // Copy to Non-USM memory
795809
796810 ze_image_region_t DstRegion;
797- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
798- &pCopyRegion->copyExtent , DstRegion));
811+ UR_CALL (getZeImageRegionHelper (zeSrcImageDesc, SrcPixelSizeInBytes,
812+ &pCopyRegion->dstOffset ,
813+ &pCopyRegion->copyExtent , DstRegion));
799814 auto *urDstImg = static_cast <ur_bindless_mem_handle_t *>(pDst);
800815
801- const char *SrcPtr =
802- static_cast <const char *>(pSrc) +
803- pCopyRegion->srcOffset .z * SrcSlicePitch +
804- pCopyRegion->srcOffset .y * SrcRowPitch +
805- pCopyRegion->srcOffset .x * getPixelSizeBytes (pSrcImageFormat);
816+ const char *SrcPtr = static_cast <const char *>(pSrc) +
817+ pCopyRegion->srcOffset .z * SrcSlicePitch +
818+ pCopyRegion->srcOffset .y * SrcRowPitch +
819+ pCopyRegion->srcOffset .x ;
806820
807821 ZE2UR_CALL (zeCommandListAppendImageCopyFromMemoryExt,
808822 (ZeCommandList, urDstImg->getZeImage (), SrcPtr, &DstRegion,
@@ -837,15 +851,15 @@ ur_result_t bindlessImagesHandleCopyFlags(
837851 if (pSrcImageDesc->rowPitch == 0 ) {
838852 // Copy from Non-USM memory to host
839853 ze_image_region_t SrcRegion;
840- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
841- &pCopyRegion->copyExtent , SrcRegion));
854+ UR_CALL (getZeImageRegionHelper (zeSrcImageDesc, SrcPixelSizeInBytes,
855+ &pCopyRegion->srcOffset ,
856+ &pCopyRegion->copyExtent , SrcRegion));
842857
843858 auto *urSrcImg = reinterpret_cast <const ur_bindless_mem_handle_t *>(pSrc);
844859
845860 char *DstPtr =
846861 static_cast <char *>(pDst) + pCopyRegion->dstOffset .z * DstSlicePitch +
847- pCopyRegion->dstOffset .y * DstRowPitch +
848- pCopyRegion->dstOffset .x * getPixelSizeBytes (pDstImageFormat);
862+ pCopyRegion->dstOffset .y * DstRowPitch + pCopyRegion->dstOffset .x ;
849863 ZE2UR_CALL (zeCommandListAppendImageCopyToMemoryExt,
850864 (ZeCommandList, DstPtr, urSrcImg->getZeImage (), &SrcRegion,
851865 DstRowPitch, DstSlicePitch, zeSignalEvent, numWaitEvents,
@@ -875,11 +889,13 @@ ur_result_t bindlessImagesHandleCopyFlags(
875889 };
876890 case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: {
877891 ze_image_region_t DstRegion;
878- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->dstOffset ,
879- &pCopyRegion->copyExtent , DstRegion));
892+ UR_CALL (getZeImageRegionHelper (zeSrcImageDesc, DstPixelSizeInBytes,
893+ &pCopyRegion->dstOffset ,
894+ &pCopyRegion->copyExtent , DstRegion));
880895 ze_image_region_t SrcRegion;
881- UR_CALL (getImageRegionHelper (zeSrcImageDesc, &pCopyRegion->srcOffset ,
882- &pCopyRegion->copyExtent , SrcRegion));
896+ UR_CALL (getZeImageRegionHelper (zeSrcImageDesc, SrcPixelSizeInBytes,
897+ &pCopyRegion->srcOffset ,
898+ &pCopyRegion->copyExtent , SrcRegion));
883899
884900 auto *urImgSrc = reinterpret_cast <const ur_bindless_mem_handle_t *>(pSrc);
885901 auto *urImgDst = reinterpret_cast <ur_bindless_mem_handle_t *>(pDst);
0 commit comments