diff --git a/flang-rt/lib/cuda/memory.cpp b/flang-rt/lib/cuda/memory.cpp index 1ebe5059b9411..adc24ff223729 100644 --- a/flang-rt/lib/cuda/memory.cpp +++ b/flang-rt/lib/cuda/memory.cpp @@ -8,6 +8,7 @@ #include "flang/Runtime/CUDA/memory.h" #include "flang-rt/runtime/assign-impl.h" +#include "flang-rt/runtime/descriptor.h" #include "flang-rt/runtime/terminator.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" @@ -98,8 +99,21 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, } else { terminator.Crash("host to host copy not supported"); } - Fortran::runtime::Assign( - *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct); + if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) { + // Special case when rhs is bigger than lhs and both are contiguous arrays. + // In this case we do a simple ptr to ptr transfer with the size of lhs. + // This is be allowed in the reference compiler and it avoids error + // triggered in the Assign runtime function used for the main case below. + if (!srcDesc->IsContiguous() || !dstDesc->IsContiguous()) + terminator.Crash("Unsupported data transfer: mismatching element counts " + "with non-contiguous arrays"); + RTNAME(CUFDataTransferPtrPtr)(dstDesc->raw().base_addr, + srcDesc->raw().base_addr, dstDesc->Elements() * dstDesc->ElementBytes(), + mode, sourceFile, sourceLine); + } else { + Fortran::runtime::Assign( + *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct); + } } void RTDECL(CUFDataTransferCstDesc)(Descriptor *dstDesc, Descriptor *srcDesc,