@@ -628,15 +628,37 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
628628 ScopedContext Active (hQueue->getContext ());
629629 CUstream Stream = hQueue->getNextTransferStream ();
630630 enqueueEventsWait (hQueue, Stream, numEventsInWaitList, phEventWaitList);
631+
631632 // We have to use a different copy function for each image dimensionality.
632633
633634 if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
634635 if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
636+ CUmemorytype memType;
637+
638+ // Check what type of memory is pDst. If cuPointerGetAttribute returns
639+ // somthing different from CUDA_SUCCESS then we know that pDst memory
640+ // type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
641+ bool isCudaArray =
642+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
643+ (CUdeviceptr)pDst) != CUDA_SUCCESS;
644+
635645 size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width ;
636646 char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
637- UR_CHECK_ERROR (
638- cuMemcpyHtoAAsync ((CUarray)pDst, dstOffset.x * PixelSizeBytes,
639- (void *)SrcWithOffset, CopyExtentBytes, Stream));
647+
648+ if (isCudaArray) {
649+ UR_CHECK_ERROR (cuMemcpyHtoAAsync (
650+ (CUarray)pDst, dstOffset.x * PixelSizeBytes,
651+ (void *)SrcWithOffset, CopyExtentBytes, Stream));
652+ } else if (memType == CU_MEMORYTYPE_DEVICE) {
653+ void *DstWithOffset =
654+ (void *)((char *)pDst + (PixelSizeBytes * dstOffset.x ));
655+ UR_CHECK_ERROR (cuMemcpyHtoDAsync ((CUdeviceptr)DstWithOffset,
656+ (void *)SrcWithOffset,
657+ CopyExtentBytes, Stream));
658+ } else {
659+ // This should be unreachable.
660+ return UR_RESULT_ERROR_INVALID_VALUE;
661+ }
640662 } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
641663 CUDA_MEMCPY2D cpy_desc = {};
642664 cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -679,13 +701,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
679701 }
680702 } else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
681703 if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
704+ CUmemorytype memType;
705+ // Check what type of memory is pSrc. If cuPointerGetAttribute returns
706+ // somthing different from CUDA_SUCCESS then we know that pSrc memory
707+ // type is a CuArray. Otherwise, it's CU_MEMORYTYPE_DEVICE.
708+ bool isCudaArray =
709+ cuPointerGetAttribute (&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
710+ (CUdeviceptr)pSrc) != CUDA_SUCCESS;
711+
682712 size_t CopyExtentBytes = PixelSizeBytes * copyExtent.width ;
683- size_t src_offset_bytes = PixelSizeBytes * srcOffset.x ;
684- void *dst_with_offset =
713+ void *DstWithOffset =
685714 (void *)((char *)pDst + (PixelSizeBytes * dstOffset.x ));
686- UR_CHECK_ERROR (cuMemcpyAtoHAsync (dst_with_offset, (CUarray)pSrc,
687- src_offset_bytes, CopyExtentBytes,
688- Stream));
715+
716+ if (isCudaArray) {
717+ UR_CHECK_ERROR (cuMemcpyAtoHAsync (DstWithOffset, (CUarray)pSrc,
718+ PixelSizeBytes * srcOffset.x ,
719+ CopyExtentBytes, Stream));
720+ } else if (memType == CU_MEMORYTYPE_DEVICE) {
721+ char *SrcWithOffset = (char *)pSrc + (srcOffset.x * PixelSizeBytes);
722+ UR_CHECK_ERROR (cuMemcpyDtoHAsync (DstWithOffset,
723+ (CUdeviceptr)SrcWithOffset,
724+ CopyExtentBytes, Stream));
725+ } else {
726+ // This should be unreachable.
727+ return UR_RESULT_ERROR_INVALID_VALUE;
728+ }
689729 } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
690730 CUDA_MEMCPY2D cpy_desc = {};
691731 cpy_desc.srcXInBytes = srcOffset.x ;
0 commit comments