Skip to content

Commit 3796fd7

Browse files
committed
[flang][cuda] Move function result assignment to managed variable on host
1 parent 20fdd53 commit 3796fd7

File tree

3 files changed

+34
-16
lines changed

3 files changed

+34
-16
lines changed

flang/include/flang/Evaluate/tools.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,10 +1342,12 @@ inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) {
13421342
int rhsNbManagedSymbols = {GetNbOfCUDAManagedOrUnifiedSymbols(rhs)};
13431343
int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)};
13441344

1345-
// Special case where only managed or unifed symbols are involved. This is
1346-
// performed on the host.
1347-
if (lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 &&
1348-
rhsNbSymbols == 1) {
1345+
// Special cases perforemd on the host:
1346+
// - Only managed or unifed symbols are involved on RHS and LHS.
1347+
// - LHS is managed or unified and the RHS is host only.
1348+
if ((lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 &&
1349+
rhsNbSymbols == 1) ||
1350+
(lhsNbManagedSymbols == 1 && rhsNbSymbols == 0)) {
13491351
return false;
13501352
}
13511353
return HasCUDADeviceAttrs(lhs) || rhsNbSymbols > 0;

flang/test/Lower/CUDA/cuda-data-transfer.cuf

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ module mod1
1515

1616
real(kind=8), device, allocatable, dimension(:) :: p
1717

18+
interface
19+
function __sum(a_d) result(res_h)
20+
integer(4), managed, intent(in) :: a_d(:,:,:,:)
21+
integer(4), allocatable, managed :: res_h(:,:,:)
22+
end function
23+
end interface
24+
1825
contains
1926
function dev1(a)
2027
integer, device :: a(:)
@@ -522,3 +529,16 @@ end subroutine
522529
! CHECK: hlfir.yield_element %[[CONV]] : f32
523530
! CHECK: }
524531
! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
532+
533+
subroutine sub28(N1,N2,N3,N4)
534+
use mod1
535+
integer(4), managed :: a(N1,N2,N3,N4)
536+
integer(4), managed :: bres(N1,N2,N3)
537+
bres = __sum(a)
538+
end subroutine
539+
540+
! CHECK-LABEL: func.func @_QPsub28
541+
! CHECK: fir.call @_QP__sum
542+
! CHECK-NOT: cuf.data_transfer
543+
! CHECK: hlfir.assign
544+
! CHECK-NOT: cuf.data_transfer
Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,23 @@
11
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
22

3+
! Check for implicit data transfer of managed variable
4+
35
subroutine testr2(N1,N2)
46
real(4), managed :: ai4(N1,N2)
57
real(4), allocatable :: bRefi4(:)
68

79
integer :: i1, i2
810

9-
do i2 = 1, N2
10-
do i1 = 1, N1
11-
ai4(i1,i2) = i1 + N1*(i2-1)
12-
enddo
13-
enddo
14-
15-
allocate(bRefi4 (N1))
11+
allocate(bRefi4(N1))
1612
do i1 = 1, N1
1713
bRefi4(i1) = (ai4(i1,1)+ai4(i1,N2))*N2/2
1814
enddo
1915
deallocate(bRefi4)
2016

2117
end subroutine
2218

23-
!CHECK-LABEL: func.func @_QPtestr2
24-
!CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.array<?x?xf32>, %{{.*}}, %{{.*}} : index, index {bindc_name = "ai4", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} -> !fir.ref<!fir.array<?x?xf32>>
25-
!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
26-
!CHECK: %[[DEST:.*]] = hlfir.designate %[[DECLARE]]#0 (%{{.*}}, %{{.*}}) : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
27-
!CHECK: cuf.data_transfer %{{.*}}#0 to %[[DEST]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<f32>, !fir.ref<f32>
19+
! CHECK-LABEL: func.func @_QPtestr2
20+
! CHECK: %[[MANAGED:.*]]:2 = hlfir.declare %22(%23) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
21+
! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array<?x?xf32>, %16, %21 {bindc_name = ".tmp", uniq_name = ""}
22+
! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.heap<!fir.array<?x?xf32>>)
23+
! CHECK: cuf.data_transfer %[[MANAGED]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>

0 commit comments

Comments
 (0)