From 3796fd784a3bb533b9f0b91c4b37338e7cd30d00 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 15 Oct 2025 22:39:07 -0700 Subject: [PATCH] [flang][cuda] Move function result assignment to managed variable on host --- flang/include/flang/Evaluate/tools.h | 10 ++++++---- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 20 ++++++++++++++++++++ flang/test/Lower/CUDA/cuda-managed.cuf | 20 ++++++++------------ 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 20a091918dc56..7f64d230f7348 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1342,10 +1342,12 @@ inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) { int rhsNbManagedSymbols = {GetNbOfCUDAManagedOrUnifiedSymbols(rhs)}; int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)}; - // Special case where only managed or unifed symbols are involved. This is - // performed on the host. - if (lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 && - rhsNbSymbols == 1) { + // Special cases perforemd on the host: + // - Only managed or unifed symbols are involved on RHS and LHS. + // - LHS is managed or unified and the RHS is host only. + if ((lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 && + rhsNbSymbols == 1) || + (lhsNbManagedSymbols == 1 && rhsNbSymbols == 0)) { return false; } return HasCUDADeviceAttrs(lhs) || rhsNbSymbols > 0; diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index aef926b09a1ed..d1c8ecca3b019 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -15,6 +15,13 @@ module mod1 real(kind=8), device, allocatable, dimension(:) :: p + interface + function __sum(a_d) result(res_h) + integer(4), managed, intent(in) :: a_d(:,:,:,:) + integer(4), allocatable, managed :: res_h(:,:,:) + end function + end interface + contains function dev1(a) integer, device :: a(:) @@ -522,3 +529,16 @@ end subroutine ! CHECK: hlfir.yield_element %[[CONV]] : f32 ! CHECK: } ! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref> + +subroutine sub28(N1,N2,N3,N4) + use mod1 + integer(4), managed :: a(N1,N2,N3,N4) + integer(4), managed :: bres(N1,N2,N3) + bres = __sum(a) +end subroutine + +! CHECK-LABEL: func.func @_QPsub28 +! CHECK: fir.call @_QP__sum +! CHECK-NOT: cuf.data_transfer +! CHECK: hlfir.assign +! CHECK-NOT: cuf.data_transfer diff --git a/flang/test/Lower/CUDA/cuda-managed.cuf b/flang/test/Lower/CUDA/cuda-managed.cuf index e14bd849670b1..69c9ecfd355f7 100644 --- a/flang/test/Lower/CUDA/cuda-managed.cuf +++ b/flang/test/Lower/CUDA/cuda-managed.cuf @@ -1,18 +1,14 @@ ! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s +! Check for implicit data transfer of managed variable + subroutine testr2(N1,N2) real(4), managed :: ai4(N1,N2) real(4), allocatable :: bRefi4(:) integer :: i1, i2 - do i2 = 1, N2 - do i1 = 1, N1 - ai4(i1,i2) = i1 + N1*(i2-1) - enddo - enddo - - allocate(bRefi4 (N1)) + allocate(bRefi4(N1)) do i1 = 1, N1 bRefi4(i1) = (ai4(i1,1)+ai4(i1,N2))*N2/2 enddo @@ -20,8 +16,8 @@ subroutine testr2(N1,N2) end subroutine -!CHECK-LABEL: func.func @_QPtestr2 -!CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.array, %{{.*}}, %{{.*}} : index, index {bindc_name = "ai4", data_attr = #cuf.cuda, uniq_name = "_QFtestr2Eai4"} -> !fir.ref> -!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {data_attr = #cuf.cuda, uniq_name = "_QFtestr2Eai4"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) -!CHECK: %[[DEST:.*]] = hlfir.designate %[[DECLARE]]#0 (%{{.*}}, %{{.*}}) : (!fir.box>, i64, i64) -> !fir.ref -!CHECK: cuf.data_transfer %{{.*}}#0 to %[[DEST]] {transfer_kind = #cuf.cuda_transfer} : !fir.ref, !fir.ref +! CHECK-LABEL: func.func @_QPtestr2 +! CHECK: %[[MANAGED:.*]]:2 = hlfir.declare %22(%23) {data_attr = #cuf.cuda, uniq_name = "_QFtestr2Eai4"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array, %16, %21 {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<2>) -> (!fir.box>, !fir.heap>) +! CHECK: cuf.data_transfer %[[MANAGED]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer} : !fir.ref>, !fir.box>