Skip to content

Conversation

@clementval
Copy link
Contributor

  • Update data transfer detection to let an assignment with a host rhs and managed lhs to be performed on the host. This helps if the rhs is a function result.
  • Fix test cuda-maanaged-cuf introduces on eef4b5a. The test was not checking for the implicit transfer but the explicit transfer that was part of the first loop.

@clementval clementval requested a review from wangzpgi October 16, 2025 05:42
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir flang:semantics labels Oct 16, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 16, 2025

@llvm/pr-subscribers-flang-semantics

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes
  • Update data transfer detection to let an assignment with a host rhs and managed lhs to be performed on the host. This helps if the rhs is a function result.
  • Fix test cuda-maanaged-cuf introduces on eef4b5a. The test was not checking for the implicit transfer but the explicit transfer that was part of the first loop.

Full diff: https://github.com/llvm/llvm-project/pull/163705.diff

3 Files Affected:

  • (modified) flang/include/flang/Evaluate/tools.h (+6-4)
  • (modified) flang/test/Lower/CUDA/cuda-data-transfer.cuf (+20)
  • (modified) flang/test/Lower/CUDA/cuda-managed.cuf (+8-12)
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 20a091918dc56..7f64d230f7348 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1342,10 +1342,12 @@ inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) {
   int rhsNbManagedSymbols = {GetNbOfCUDAManagedOrUnifiedSymbols(rhs)};
   int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)};
 
-  // Special case where only managed or unifed symbols are involved. This is
-  // performed on the host.
-  if (lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 &&
-      rhsNbSymbols == 1) {
+  // Special cases perforemd on the host:
+  // - Only managed or unifed symbols are involved on RHS and LHS.
+  // - LHS is managed or unified and the RHS is host only.
+  if ((lhsNbManagedSymbols == 1 && rhsNbManagedSymbols == 1 &&
+          rhsNbSymbols == 1) ||
+      (lhsNbManagedSymbols == 1 && rhsNbSymbols == 0)) {
     return false;
   }
   return HasCUDADeviceAttrs(lhs) || rhsNbSymbols > 0;
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index aef926b09a1ed..d1c8ecca3b019 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -15,6 +15,13 @@ module mod1
 
   real(kind=8), device, allocatable, dimension(:) :: p
 
+  interface
+    function __sum(a_d) result(res_h)
+      integer(4), managed, intent(in) :: a_d(:,:,:,:)
+      integer(4), allocatable, managed :: res_h(:,:,:)
+    end function
+  end interface
+
 contains
   function dev1(a)
     integer, device :: a(:)
@@ -522,3 +529,16 @@ end subroutine
 ! CHECK: hlfir.yield_element %[[CONV]] : f32
 ! CHECK: }
 ! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
+
+subroutine sub28(N1,N2,N3,N4)
+  use mod1
+  integer(4), managed :: a(N1,N2,N3,N4) 
+  integer(4), managed :: bres(N1,N2,N3)
+  bres = __sum(a)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub28
+! CHECK: fir.call @_QP__sum
+! CHECK-NOT: cuf.data_transfer
+! CHECK: hlfir.assign
+! CHECK-NOT: cuf.data_transfer
diff --git a/flang/test/Lower/CUDA/cuda-managed.cuf b/flang/test/Lower/CUDA/cuda-managed.cuf
index e14bd849670b1..69c9ecfd355f7 100644
--- a/flang/test/Lower/CUDA/cuda-managed.cuf
+++ b/flang/test/Lower/CUDA/cuda-managed.cuf
@@ -1,18 +1,14 @@
 ! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
 
+! Check for implicit data transfer of managed variable
+
 subroutine testr2(N1,N2)
   real(4), managed :: ai4(N1,N2)
   real(4), allocatable :: bRefi4(:)
 
   integer :: i1, i2
 
-  do i2 = 1, N2
-    do i1 = 1, N1
-      ai4(i1,i2) = i1 + N1*(i2-1)
-    enddo
-  enddo
-
-  allocate(bRefi4 (N1))
+  allocate(bRefi4(N1))
   do i1 = 1, N1
     bRefi4(i1) = (ai4(i1,1)+ai4(i1,N2))*N2/2
   enddo
@@ -20,8 +16,8 @@ subroutine testr2(N1,N2)
 
 end subroutine
 
-!CHECK-LABEL: func.func @_QPtestr2
-!CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.array<?x?xf32>, %{{.*}}, %{{.*}} : index, index {bindc_name = "ai4", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} -> !fir.ref<!fir.array<?x?xf32>>
-!CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOC]](%{{.*}}) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
-!CHECK: %[[DEST:.*]] = hlfir.designate %[[DECLARE]]#0 (%{{.*}}, %{{.*}}) : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
-!CHECK: cuf.data_transfer %{{.*}}#0 to %[[DEST]] {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<f32>, !fir.ref<f32>
+! CHECK-LABEL: func.func @_QPtestr2
+! CHECK:  %[[MANAGED:.*]]:2 = hlfir.declare %22(%23) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtestr2Eai4"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array<?x?xf32>, %16, %21 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?xf32>>, !fir.shape<2>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.heap<!fir.array<?x?xf32>>)
+! CHECK: cuf.data_transfer %[[MANAGED]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>

@clementval clementval merged commit 8b06ef3 into llvm:main Oct 16, 2025
14 checks passed
@clementval clementval deleted the cuf_managed_host branch October 16, 2025 17:01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang:semantics flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants