Skip to content

Conversation

clementval
Copy link
Contributor

No description provided.

@clementval clementval requested a review from wangzpgi October 16, 2025 19:45
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Oct 16, 2025
@llvmbot
Copy link
Member

llvmbot commented Oct 16, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/163852.diff

4 Files Affected:

  • (modified) flang/include/flang/Lower/CUDA.h (+7-1)
  • (modified) flang/lib/Lower/Bridge.cpp (+1-4)
  • (modified) flang/lib/Lower/CUDA.cpp (+21-6)
  • (modified) flang/test/Lower/CUDA/cuda-data-transfer.cuf (+17)
diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index ab9dde8ad5198..971527cba1de8 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -27,6 +27,10 @@ class Location;
 class MLIRContext;
 } // namespace mlir
 
+namespace hlfir {
+class ElementalOp;
+} // namespace hlfir
+
 namespace Fortran::lower {
 
 class AbstractConverter;
@@ -58,7 +62,9 @@ cuf::DataAttributeAttr
 translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
                                 const Fortran::semantics::Symbol &sym);
 
-bool isTransferWithConversion(mlir::Value rhs);
+/// Check is the rhs has an implicit conversion. Return the elemental op if
+/// there is a conversion. Return null otherwise.
+hlfir::ElementalOp isTransferWithConversion(mlir::Value rhs);
 
 } // end namespace Fortran::lower
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 68adf346fe8c0..525fb0e9997b7 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4987,11 +4987,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
     // host = device
     if (!lhsIsDevice && rhsIsDevice) {
-      if (Fortran::lower::isTransferWithConversion(rhs)) {
+      if (auto elementalOp = Fortran::lower::isTransferWithConversion(rhs)) {
         mlir::OpBuilder::InsertionGuard insertionGuard(builder);
-        auto elementalOp =
-            mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp());
-        assert(elementalOp && "expect elemental op");
         auto designateOp =
             *elementalOp.getBody()->getOps<hlfir::DesignateOp>().begin();
         builder.setInsertionPoint(elementalOp);
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index bb4bdee78f97d..9501b0ec60002 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -68,11 +68,26 @@ cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
   return cuf::getDataAttribute(mlirContext, cudaAttr);
 }
 
-bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+  auto isConversionElementalOp = [](hlfir::ElementalOp elOp) {
+    return llvm::hasSingleElement(
+               elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
+           llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
+           llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) ==
+               1;
+  };
+  if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(rhs.getDefiningOp())) {
+    if (!declOp.getMemref().getDefiningOp())
+      return {};
+    if (auto associateOp = mlir::dyn_cast<hlfir::AssociateOp>(
+            declOp.getMemref().getDefiningOp()))
+      if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(
+              associateOp.getSource().getDefiningOp()))
+        if (isConversionElementalOp(elOp))
+          return elOp;
+  }
   if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp()))
-    if (llvm::hasSingleElement(elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
-        llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
-        llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) == 1)
-      return true;
-  return false;
+    if (isConversionElementalOp(elOp))
+      return elOp;
+  return {};
 }
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index aef926b09a1ed..fa579031a09d4 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -522,3 +522,20 @@ end subroutine
 ! CHECK: hlfir.yield_element %[[CONV]] : f32
 ! CHECK: }
 ! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
+
+! Data transfer with conversion with more complex elemental
+! Check that the data transfer is palce
+subroutine sub28()
+  real(2), device, allocatable :: a(:)
+  real(4), allocatable :: ha(:)
+  allocate(a(10))
+  allocate(ha(10))
+  ha = a
+  deallocate(a)
+end subroutine
+
+! CHECK-LABEL:  func.func @_QPsub28()
+! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array<?xf16>, %24#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TMP_BUFFER:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf16>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf16>>, !fir.heap<!fir.array<?xf16>>)
+! CHECK: cuf.data_transfer %{{.*}} to %[[TMP_BUFFER]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.heap<!fir.array<?xf16>>>, !fir.box<!fir.array<?xf16>>
+! CHECK: hlfir.elemental 

@clementval clementval force-pushed the cuf_data_tx_with_conv2 branch from d76c3d6 to 383325f Compare October 16, 2025 19:46
@clementval clementval force-pushed the cuf_data_tx_with_conv2 branch from 67d130b to f4b56af Compare October 16, 2025 20:09
@clementval clementval enabled auto-merge (squash) October 16, 2025 20:14
@clementval clementval merged commit e55071b into llvm:main Oct 16, 2025
10 checks passed
@clementval clementval deleted the cuf_data_tx_with_conv2 branch October 16, 2025 21:04
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants