@@ -395,15 +395,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
395395
396396 array_desc.Flags = 0 ; // No flags required
397397 array_desc.Width = pImageDesc->width ;
398- if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
398+ switch (pImageDesc->type ) {
399+ case UR_MEM_TYPE_IMAGE1D:
399400 array_desc.Height = 0 ;
400401 array_desc.Depth = 0 ;
401- } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
402+ break ;
403+ case UR_MEM_TYPE_IMAGE2D:
402404 array_desc.Height = pImageDesc->height ;
403405 array_desc.Depth = 0 ;
404- } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
406+ break ;
407+ case UR_MEM_TYPE_IMAGE3D:
405408 array_desc.Height = pImageDesc->height ;
406409 array_desc.Depth = pImageDesc->depth ;
410+ break ;
411+ case UR_MEM_TYPE_IMAGE1D_ARRAY:
412+ array_desc.Height = 0 ;
413+ array_desc.Depth = pImageDesc->arraySize ;
414+ array_desc.Flags |= CUDA_ARRAY3D_LAYERED;
415+ break ;
416+ case UR_MEM_TYPE_IMAGE2D_ARRAY:
417+ array_desc.Height = pImageDesc->height ;
418+ array_desc.Depth = pImageDesc->arraySize ;
419+ array_desc.Flags |= CUDA_ARRAY3D_LAYERED;
420+ break ;
421+ default :
422+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
407423 }
408424
409425 ScopedContext Active (hDevice->getContext ());
@@ -698,6 +714,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
698714 cpy_desc.Height = copyExtent.height ;
699715 cpy_desc.Depth = copyExtent.depth ;
700716 UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
717+ } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
718+ pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) {
719+ CUDA_MEMCPY3D cpy_desc = {};
720+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
721+ cpy_desc.srcY = srcOffset.y ;
722+ cpy_desc.srcZ = srcOffset.z ;
723+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
724+ cpy_desc.dstY = dstOffset.y ;
725+ cpy_desc.dstZ = dstOffset.z ;
726+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
727+ cpy_desc.srcHost = pSrc;
728+ cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes;
729+ cpy_desc.srcHeight = hostExtent.height ;
730+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
731+ cpy_desc.dstArray = (CUarray)pDst;
732+ cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
733+ cpy_desc.Height = std::max (uint64_t {1 }, copyExtent.height );
734+ cpy_desc.Depth = pImageDesc->arraySize ;
735+ UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
701736 }
702737 } else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
703738 if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
@@ -762,6 +797,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
762797 cpy_desc.Height = copyExtent.height ;
763798 cpy_desc.Depth = copyExtent.depth ;
764799 UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
800+ } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
801+ pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) {
802+ CUDA_MEMCPY3D cpy_desc = {};
803+ cpy_desc.srcXInBytes = srcOffset.x ;
804+ cpy_desc.srcY = srcOffset.y ;
805+ cpy_desc.srcZ = srcOffset.z ;
806+ cpy_desc.dstXInBytes = dstOffset.x ;
807+ cpy_desc.dstY = dstOffset.y ;
808+ cpy_desc.dstZ = dstOffset.z ;
809+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
810+ cpy_desc.srcArray = (CUarray)pSrc;
811+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
812+ cpy_desc.dstHost = pDst;
813+ cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
814+ cpy_desc.Height = std::max (uint64_t {1 }, copyExtent.height );
815+ cpy_desc.Depth = pImageDesc->arraySize ;
816+ UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
765817 }
766818 } else {
767819 // / imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE
0 commit comments