Skip to content

Commit edb281f

Browse files
authored
Merge pull request #1104 from fabiomestre/fabio/fix_cuda_intermittent
[CUDA] Fix synchronization issue in urEnqueueMemImageCopy
2 parents 8695b5d + ffe9a51 commit edb281f

File tree

1 file changed

+15
-19
lines changed

1 file changed

+15
-19
lines changed

source/adapters/cuda/enqueue.cpp

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
862862
}
863863
}
864864

865-
/// General ND memory copy operation for images (where N > 1).
865+
/// General ND memory copy operation for images.
866866
/// This function requires the corresponding CUDA context to be at the top of
867867
/// the context stack
868868
/// If the source and/or destination is an array, SrcPtr and/or DstPtr
@@ -877,27 +877,27 @@ static ur_result_t commonEnqueueMemImageNDCopy(
877877
UR_ASSERT(DstType == CU_MEMORYTYPE_ARRAY || DstType == CU_MEMORYTYPE_HOST,
878878
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
879879

880-
if (ImgType == UR_MEM_TYPE_IMAGE2D) {
880+
if (ImgType == UR_MEM_TYPE_IMAGE1D || ImgType == UR_MEM_TYPE_IMAGE2D) {
881881
CUDA_MEMCPY2D CpyDesc;
882882
memset(&CpyDesc, 0, sizeof(CpyDesc));
883883
CpyDesc.srcMemoryType = SrcType;
884884
if (SrcType == CU_MEMORYTYPE_ARRAY) {
885885
CpyDesc.srcArray = *static_cast<const CUarray *>(SrcPtr);
886886
CpyDesc.srcXInBytes = SrcOffset.x;
887-
CpyDesc.srcY = SrcOffset.y;
887+
CpyDesc.srcY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : SrcOffset.y;
888888
} else {
889889
CpyDesc.srcHost = SrcPtr;
890890
}
891891
CpyDesc.dstMemoryType = DstType;
892892
if (DstType == CU_MEMORYTYPE_ARRAY) {
893893
CpyDesc.dstArray = *static_cast<CUarray *>(DstPtr);
894894
CpyDesc.dstXInBytes = DstOffset.x;
895-
CpyDesc.dstY = DstOffset.y;
895+
CpyDesc.dstY = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 0 : DstOffset.y;
896896
} else {
897897
CpyDesc.dstHost = DstPtr;
898898
}
899899
CpyDesc.WidthInBytes = Region.width;
900-
CpyDesc.Height = Region.height;
900+
CpyDesc.Height = (ImgType == UR_MEM_TYPE_IMAGE1D) ? 1 : Region.height;
901901
UR_CHECK_ERROR(cuMemcpy2DAsync(&CpyDesc, CuStream));
902902
return UR_RESULT_SUCCESS;
903903
}
@@ -1124,21 +1124,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
11241124
}
11251125

11261126
ur_mem_type_t ImgType = std::get<SurfaceMem>(hImageSrc->Mem).getImageType();
1127-
if (ImgType == UR_MEM_TYPE_IMAGE1D) {
1128-
UR_CHECK_ERROR(cuMemcpyAtoA(DstArray, DstByteOffsetX, SrcArray,
1129-
SrcByteOffsetX, BytesToCopy));
1130-
} else {
1131-
ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
1132-
region.depth};
1133-
ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
1134-
ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
11351127

1136-
Result = commonEnqueueMemImageNDCopy(
1137-
CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
1138-
SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
1139-
if (Result != UR_RESULT_SUCCESS) {
1140-
return Result;
1141-
}
1128+
ur_rect_region_t AdjustedRegion = {BytesToCopy, region.height,
1129+
region.depth};
1130+
ur_rect_offset_t SrcOffset = {SrcByteOffsetX, srcOrigin.y, srcOrigin.z};
1131+
ur_rect_offset_t DstOffset = {DstByteOffsetX, dstOrigin.y, dstOrigin.z};
1132+
1133+
Result = commonEnqueueMemImageNDCopy(
1134+
CuStream, ImgType, AdjustedRegion, &SrcArray, CU_MEMORYTYPE_ARRAY,
1135+
SrcOffset, &DstArray, CU_MEMORYTYPE_ARRAY, DstOffset);
1136+
if (Result != UR_RESULT_SUCCESS) {
1137+
return Result;
11421138
}
11431139

11441140
if (phEvent) {

0 commit comments

Comments
 (0)