Skip to content

Commit 7837acf

Browse files
committed
Fix CUDA & HIP (untested locally)
1 parent 880fd69 commit 7837acf

File tree

2 files changed

+94
-104
lines changed

2 files changed

+94
-104
lines changed

unified-runtime/source/adapters/cuda/image.cpp

Lines changed: 47 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
645645
};
646646

647647
unsigned int NumChannels = 0;
648-
size_t PixelSizeBytes = 0;
648+
[[maybe_unused]] size_t PixelSizeBytes = 0;
649649

650650
UR_CALL(urCalculateNumChannels(pSrcImageFormat->channelOrder, &NumChannels));
651651

@@ -673,19 +673,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
673673
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
674674
(CUdeviceptr)pDst) != CUDA_SUCCESS;
675675

676-
size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
677-
const char *SrcWithOffset = static_cast<const char *>(pSrc) +
678-
(pCopyRegion->srcOffset.x * PixelSizeBytes);
676+
size_t CopyExtentBytes = pCopyRegion->copyExtent.width;
677+
const char *SrcWithOffset =
678+
static_cast<const char *>(pSrc) + pCopyRegion->srcOffset.x;
679679

680680
if (isCudaArray) {
681-
UR_CHECK_ERROR(cuMemcpyHtoAAsync(
682-
(CUarray)pDst, pCopyRegion->dstOffset.x * PixelSizeBytes,
683-
static_cast<const void *>(SrcWithOffset), CopyExtentBytes,
684-
Stream));
681+
UR_CHECK_ERROR(
682+
cuMemcpyHtoAAsync((CUarray)pDst, pCopyRegion->dstOffset.x,
683+
static_cast<const void *>(SrcWithOffset),
684+
CopyExtentBytes, Stream));
685685
} else if (memType == CU_MEMORYTYPE_DEVICE) {
686-
void *DstWithOffset =
687-
static_cast<void *>(static_cast<char *>(pDst) +
688-
(PixelSizeBytes * pCopyRegion->dstOffset.x));
686+
void *DstWithOffset = static_cast<void *>(static_cast<char *>(pDst) +
687+
pCopyRegion->dstOffset.x);
689688
UR_CHECK_ERROR(
690689
cuMemcpyHtoDAsync((CUdeviceptr)DstWithOffset,
691690
static_cast<const void *>(SrcWithOffset),
@@ -698,11 +697,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
698697
CUDA_MEMCPY2D cpy_desc = {};
699698
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
700699
cpy_desc.srcHost = pSrc;
701-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
700+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
702701
cpy_desc.srcY = pCopyRegion->srcOffset.y;
703-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
702+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
704703
cpy_desc.dstY = pCopyRegion->dstOffset.y;
705-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
704+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
706705
cpy_desc.Height = pCopyRegion->copyExtent.height;
707706
cpy_desc.srcPitch = pSrcImageDesc->rowPitch;
708707
if (pDstImageDesc->rowPitch == 0) {
@@ -717,10 +716,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
717716
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
718717
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
719718
CUDA_MEMCPY3D cpy_desc = {};
720-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
719+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
721720
cpy_desc.srcY = pCopyRegion->srcOffset.y;
722721
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
723-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
722+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
724723
cpy_desc.dstY = pCopyRegion->dstOffset.y;
725724
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
726725
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -729,18 +728,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
729728
cpy_desc.srcHeight = pSrcImageDesc->height;
730729
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
731730
cpy_desc.dstArray = (CUarray)pDst;
732-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
731+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
733732
cpy_desc.Height = pCopyRegion->copyExtent.height;
734733
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
735734
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
736735
} else if (pDstImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
737736
pDstImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
738737
pDstImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
739738
CUDA_MEMCPY3D cpy_desc = {};
740-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
739+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
741740
cpy_desc.srcY = pCopyRegion->srcOffset.y;
742741
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
743-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
742+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
744743
cpy_desc.dstY = pCopyRegion->dstOffset.y;
745744
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
746745
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -749,7 +748,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
749748
cpy_desc.srcHeight = std::max(uint64_t{1}, pSrcImageDesc->height);
750749
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
751750
cpy_desc.dstArray = (CUarray)pDst;
752-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
751+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
753752
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
754753
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
755754
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
@@ -764,20 +763,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
764763
cuPointerGetAttribute(&memType, CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
765764
(CUdeviceptr)pSrc) != CUDA_SUCCESS;
766765

767-
size_t CopyExtentBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
768-
void *DstWithOffset =
769-
static_cast<void *>(static_cast<char *>(pDst) +
770-
(PixelSizeBytes * pCopyRegion->dstOffset.x));
766+
size_t CopyExtentBytes = CopyRegion->copyExtent.width;
767+
void *DstWithOffset = static_cast<void *>(static_cast<char *>(pDst) +
768+
pCopyRegion->dstOffset.x);
771769

772770
if (isCudaArray) {
773-
UR_CHECK_ERROR(
774-
cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc),
775-
PixelSizeBytes * pCopyRegion->srcOffset.x,
776-
CopyExtentBytes, Stream));
771+
UR_CHECK_ERROR(cuMemcpyAtoHAsync(DstWithOffset, as_CUArray(pSrc),
772+
pCopyRegion->srcOffset.x,
773+
CopyExtentBytes, Stream));
777774
} else if (memType == CU_MEMORYTYPE_DEVICE) {
778775
const char *SrcWithOffset =
779-
static_cast<const char *>(pSrc) +
780-
(pCopyRegion->srcOffset.x * PixelSizeBytes);
776+
static_cast<const char *>(pSrc) + pCopyRegion->srcOffset.x;
781777
UR_CHECK_ERROR(cuMemcpyDtoHAsync(DstWithOffset,
782778
(CUdeviceptr)SrcWithOffset,
783779
CopyExtentBytes, Stream));
@@ -787,11 +783,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
787783
}
788784
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
789785
CUDA_MEMCPY2D cpy_desc = {};
790-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
786+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
791787
cpy_desc.srcY = pCopyRegion->srcOffset.y;
792-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
788+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
793789
cpy_desc.dstY = pCopyRegion->dstOffset.y;
794-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
790+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
795791
cpy_desc.Height = pCopyRegion->copyExtent.height;
796792
cpy_desc.dstPitch = pDstImageDesc->rowPitch;
797793
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
@@ -808,10 +804,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
808804
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
809805
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
810806
CUDA_MEMCPY3D cpy_desc = {};
811-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
807+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
812808
cpy_desc.srcY = pCopyRegion->srcOffset.y;
813809
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
814-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
810+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
815811
cpy_desc.dstY = pCopyRegion->dstOffset.y;
816812
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
817813
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -820,18 +816,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
820816
cpy_desc.dstHost = pDst;
821817
cpy_desc.dstPitch = pDstImageDesc->rowPitch;
822818
cpy_desc.dstHeight = pDstImageDesc->height;
823-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
819+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
824820
cpy_desc.Height = pCopyRegion->copyExtent.height;
825821
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
826822
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
827823
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
828824
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
829825
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
830826
CUDA_MEMCPY3D cpy_desc = {};
831-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
827+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
832828
cpy_desc.srcY = pCopyRegion->srcOffset.y;
833829
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
834-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
830+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
835831
cpy_desc.dstY = pCopyRegion->dstOffset.y;
836832
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
837833
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -840,7 +836,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
840836
cpy_desc.dstHost = pDst;
841837
cpy_desc.dstPitch = pDstImageDesc->rowPitch;
842838
cpy_desc.dstHeight = std::max(uint64_t{1}, pDstImageDesc->height);
843-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
839+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
844840
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
845841
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
846842
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
@@ -874,11 +870,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
874870
(CUdeviceptr)pDst) != CUDA_SUCCESS;
875871

876872
CUDA_MEMCPY2D cpy_desc = {};
877-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
873+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
878874
cpy_desc.srcY = 0;
879-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
875+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
880876
cpy_desc.dstY = 0;
881-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
877+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
882878
cpy_desc.Height = 1;
883879
if (isSrcCudaArray) {
884880
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -897,11 +893,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
897893
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
898894
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
899895
CUDA_MEMCPY2D cpy_desc = {};
900-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
896+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
901897
cpy_desc.srcY = pCopyRegion->srcOffset.y;
902-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
898+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
903899
cpy_desc.dstY = pCopyRegion->dstOffset.y;
904-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
900+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
905901
cpy_desc.Height = pCopyRegion->copyExtent.height;
906902
if (pSrcImageDesc->rowPitch == 0) {
907903
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
@@ -924,35 +920,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
924920
UR_CHECK_ERROR(cuMemcpy2DAsync(&cpy_desc, Stream));
925921
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
926922
CUDA_MEMCPY3D cpy_desc = {};
927-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
923+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
928924
cpy_desc.srcY = pCopyRegion->srcOffset.y;
929925
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
930-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
926+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
931927
cpy_desc.dstY = pCopyRegion->dstOffset.y;
932928
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
933929
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
934930
cpy_desc.srcArray = as_CUArray(pSrc);
935931
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
936932
cpy_desc.dstArray = (CUarray)pDst;
937-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
933+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
938934
cpy_desc.Height = pCopyRegion->copyExtent.height;
939935
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
940936
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
941937
} else if (pSrcImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
942938
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY ||
943939
pSrcImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) {
944940
CUDA_MEMCPY3D cpy_desc = {};
945-
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x * PixelSizeBytes;
941+
cpy_desc.srcXInBytes = pCopyRegion->srcOffset.x;
946942
cpy_desc.srcY = pCopyRegion->srcOffset.y;
947943
cpy_desc.srcZ = pCopyRegion->srcOffset.z;
948-
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x * PixelSizeBytes;
944+
cpy_desc.dstXInBytes = pCopyRegion->dstOffset.x;
949945
cpy_desc.dstY = pCopyRegion->dstOffset.y;
950946
cpy_desc.dstZ = pCopyRegion->dstOffset.z;
951947
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
952948
cpy_desc.srcArray = as_CUArray(pSrc);
953949
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
954950
cpy_desc.dstArray = (CUarray)pDst;
955-
cpy_desc.WidthInBytes = PixelSizeBytes * pCopyRegion->copyExtent.width;
951+
cpy_desc.WidthInBytes = pCopyRegion->copyExtent.width;
956952
cpy_desc.Height = std::max(uint64_t{1}, pCopyRegion->copyExtent.height);
957953
cpy_desc.Depth = pCopyRegion->copyExtent.depth;
958954
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));

0 commit comments

Comments
 (0)