Skip to content

Commit 046bcb4

Browse files
Resume work, rebased with origin/sycl
Signed-off-by: Zhang, Winston <[email protected]>
1 parent 67822b4 commit 046bcb4

File tree

6 files changed

+66
-20
lines changed

6 files changed

+66
-20
lines changed

sycl/test-e2e/bindless_images/copies/device_to_device_copy.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// REQUIRES: aspect-ext_oneapi_bindless_images
2-
// REQUIRES: cuda
32

3+
// UNSUPPORTED: target-amd
4+
// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d
5+
// copy
46
// RUN: %{build} -o %t.out
57
// RUN: %{run} %t.out
68

sycl/test-e2e/bindless_images/copies/device_to_device_copy_1D_subregion.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// REQUIRES: aspect-ext_oneapi_bindless_images
2-
// REQUIRES: cuda
32

3+
// UNSUPPORTED: target-amd
4+
// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d
5+
// copy
46
// RUN: %{build} -o %t.out
57
// RUN: %{run} %t.out
68

sycl/test-e2e/bindless_images/copies/device_to_device_copy_2D_subregion.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// REQUIRES: aspect-ext_oneapi_bindless_images
2-
// REQUIRES: cuda
32

3+
// UNSUPPORTED: target-amd
4+
// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d
5+
// copy
46
// RUN: %{build} -o %t.out
57
// RUN: %{run} %t.out
68

sycl/test-e2e/bindless_images/copies/device_to_device_pitched.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm
33
// REQUIRES: cuda
44
//
5-
// UNSUPPORTED: cuda
5+
// UNSUPPORTED: target-amd
66
// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17231
77

88
// RUN: %{build} -o %t.out

sycl/test-e2e/bindless_images/copies/host_to_host_pitched.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// REQUIRES: aspect-ext_oneapi_bindless_images
22
// REQUIRES: aspect-ext_oneapi_bindless_images_2d_usm
3-
// REQUIRES: cuda
43

4+
// UNSUPPORTED: target-amd
5+
// UNSUPPORTED-INTENDED: currently not supporting amd for bindless image d2d
6+
// copy
57
// RUN: %{build} -o %t.out
68
// RUN: %{run} %t.out
79

unified-runtime/source/adapters/level_zero/image_common.cpp

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -867,21 +867,59 @@ ur_result_t bindlessImagesHandleCopyFlags(
867867
return UR_RESULT_SUCCESS;
868868
};
869869
case UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE: {
870-
ze_image_region_t DstRegion;
871-
UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset,
872-
&pCopyRegion->copyExtent, DstRegion));
873-
ze_image_region_t SrcRegion;
874-
UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset,
875-
&pCopyRegion->copyExtent, SrcRegion));
876-
877-
auto *urImgSrc = reinterpret_cast<const ur_bindless_mem_handle_t *>(pSrc);
878-
auto *urImgDst = reinterpret_cast<ur_bindless_mem_handle_t *>(pDst);
879-
880-
ZE2UR_CALL(zeCommandListAppendImageCopyRegion,
881-
(ZeCommandList, urImgDst->getZeImage(), urImgSrc->getZeImage(),
882-
&DstRegion, &SrcRegion, zeSignalEvent, numWaitEvents,
883-
phWaitEvents));
884-
870+
if (pSrcImageDesc->rowPitch != 0 && pDstImageDesc->rowPitch != 0) {
871+
// Copy from pitched USM memory to pitched USM memory
872+
uint32_t SrcRowPitch = pSrcImageDesc->rowPitch;
873+
uint32_t DstRowPitch = pDstImageDesc->rowPitch;
874+
ze_copy_region_t ZeDstRegion = {(uint32_t)pCopyRegion->dstOffset.x,
875+
(uint32_t)pCopyRegion->dstOffset.y,
876+
(uint32_t)pCopyRegion->dstOffset.z,
877+
DstRowPitch,
878+
(uint32_t)pCopyRegion->copyExtent.height,
879+
(uint32_t)pCopyRegion->copyExtent.depth};
880+
uint32_t DstSlicePitch = 0;
881+
uint32_t SrcSlicePitch = 0;
882+
ze_copy_region_t ZeSrcRegion = {(uint32_t)pCopyRegion->srcOffset.x,
883+
(uint32_t)pCopyRegion->srcOffset.y,
884+
(uint32_t)pCopyRegion->srcOffset.z,
885+
SrcRowPitch,
886+
(uint32_t)pCopyRegion->copyExtent.height,
887+
(uint32_t)pCopyRegion->copyExtent.depth};
888+
ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion,
889+
(ZeCommandList, pDst, &ZeDstRegion, DstRowPitch, DstSlicePitch,
890+
pSrc, &ZeSrcRegion, SrcRowPitch, SrcSlicePitch, zeSignalEvent,
891+
numWaitEvents, phWaitEvents));
892+
} else if (pSrcImageDesc->rowPitch == 0 && pDstImageDesc->rowPitch == 0) {
893+
// Copy from Non-USM memory to Non-USM memory
894+
ze_image_region_t DstRegion;
895+
UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset,
896+
&pCopyRegion->copyExtent, DstRegion));
897+
ze_image_region_t SrcRegion;
898+
UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset,
899+
&pCopyRegion->copyExtent, SrcRegion));
900+
auto *UrImageDst = static_cast<ur_bindless_mem_handle_t *>(pDst);
901+
auto *UrImageSrc = static_cast<const ur_bindless_mem_handle_t *>(pSrc);
902+
ZE2UR_CALL(zeCommandListAppendImageCopyRegion,
903+
(ZeCommandList, UrImageDst->getZeImage(),
904+
UrImageSrc->getZeImage(), &DstRegion, &SrcRegion,
905+
zeSignalEvent, numWaitEvents, phWaitEvents));
906+
} else {
907+
// Copy from Non-USM/pitched USM memory to pitched USM/Non-USM memory
908+
// Note: This might be the same procedure as pitched USM to
909+
// pitched USM. Need further testing.
910+
ze_image_region_t DstRegion;
911+
UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->dstOffset,
912+
&pCopyRegion->copyExtent, DstRegion));
913+
ze_image_region_t SrcRegion;
914+
UR_CALL(getImageRegionHelper(zeSrcImageDesc, &pCopyRegion->srcOffset,
915+
&pCopyRegion->copyExtent, SrcRegion));
916+
auto *UrImageDst = static_cast<ur_bindless_mem_handle_t *>(pDst);
917+
auto *UrImageSrc = static_cast<const ur_bindless_mem_handle_t *>(pSrc);
918+
ZE2UR_CALL(zeCommandListAppendImageCopyRegion,
919+
(ZeCommandList, UrImageDst->getZeImage(),
920+
UrImageSrc->getZeImage(), &DstRegion, &SrcRegion,
921+
zeSignalEvent, numWaitEvents, phWaitEvents));
922+
}
885923
return UR_RESULT_SUCCESS;
886924
};
887925
default:

0 commit comments

Comments
 (0)