@@ -862,7 +862,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
862
862
}
863
863
}
864
864
865
- // / General ND memory copy operation for images (where N > 1) .
865
+ // / General ND memory copy operation for images.
866
866
// / This function requires the corresponding CUDA context to be at the top of
867
867
// / the context stack
868
868
// / If the source and/or destination is an array, SrcPtr and/or DstPtr
@@ -877,27 +877,27 @@ static ur_result_t commonEnqueueMemImageNDCopy(
877
877
UR_ASSERT (DstType == CU_MEMORYTYPE_ARRAY || DstType == CU_MEMORYTYPE_HOST,
878
878
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
879
879
880
- if (ImgType == UR_MEM_TYPE_IMAGE2D) {
880
+ if (ImgType == UR_MEM_TYPE_IMAGE1D || ImgType == UR_MEM_TYPE_IMAGE2D) {
881
881
CUDA_MEMCPY2D CpyDesc;
882
882
memset (&CpyDesc, 0 , sizeof (CpyDesc));
883
883
CpyDesc.srcMemoryType = SrcType;
884
884
if (SrcType == CU_MEMORYTYPE_ARRAY) {
885
885
CpyDesc.srcArray = *static_cast <const CUarray *>(SrcPtr);
886
886
CpyDesc.srcXInBytes = SrcOffset.x ;
887
- CpyDesc.srcY = SrcOffset.y ;
887
+ CpyDesc.srcY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : SrcOffset.y ;
888
888
} else {
889
889
CpyDesc.srcHost = SrcPtr;
890
890
}
891
891
CpyDesc.dstMemoryType = DstType;
892
892
if (DstType == CU_MEMORYTYPE_ARRAY) {
893
893
CpyDesc.dstArray = *static_cast <CUarray *>(DstPtr);
894
894
CpyDesc.dstXInBytes = DstOffset.x ;
895
- CpyDesc.dstY = DstOffset.y ;
895
+ CpyDesc.dstY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : DstOffset.y ;
896
896
} else {
897
897
CpyDesc.dstHost = DstPtr;
898
898
}
899
899
CpyDesc.WidthInBytes = Region.width ;
900
- CpyDesc.Height = Region.height ;
900
+ CpyDesc.Height = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 1 : Region.height ;
901
901
UR_CHECK_ERROR (cuMemcpy2DAsync (&CpyDesc, CuStream));
902
902
return UR_RESULT_SUCCESS;
903
903
}
@@ -1124,21 +1124,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
1124
1124
}
1125
1125
1126
1126
ur_mem_type_t ImgType = std::get<SurfaceMem>(hImageSrc->Mem ).getImageType ();
1127
- if (ImgType == UR_MEM_TYPE_IMAGE1D) {
1128
- UR_CHECK_ERROR (cuMemcpyAtoA (DstArray, DstByteOffsetX, SrcArray,
1129
- SrcByteOffsetX, BytesToCopy));
1130
- } else {
1131
- ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height ,
1132
- region.depth };
1133
- ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y , srcOrigin.z };
1134
- ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y , dstOrigin.z };
1135
1127
1136
- Result = commonEnqueueMemImageNDCopy (
1137
- CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
1138
- SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
1139
- if (Result != UR_RESULT_SUCCESS) {
1140
- return Result;
1141
- }
1128
+ ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height ,
1129
+ region.depth };
1130
+ ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y , srcOrigin.z };
1131
+ ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y , dstOrigin.z };
1132
+
1133
+ Result = commonEnqueueMemImageNDCopy (
1134
+ CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
1135
+ SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
1136
+ if (Result != UR_RESULT_SUCCESS) {
1137
+ return Result;
1142
1138
}
1143
1139
1144
1140
if (phEvent) {
0 commit comments