[flang][cuda] Extent detection of data transfer with conversion #163852

clementval · 2025-10-16T19:45:11Z

No description provided.

llvmbot · 2025-10-16T19:45:43Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/163852.diff

4 Files Affected:

(modified) flang/include/flang/Lower/CUDA.h (+7-1)
(modified) flang/lib/Lower/Bridge.cpp (+1-4)
(modified) flang/lib/Lower/CUDA.cpp (+21-6)
(modified) flang/test/Lower/CUDA/cuda-data-transfer.cuf (+17)

diff --git a/flang/include/flang/Lower/CUDA.h b/flang/include/flang/Lower/CUDA.h
index ab9dde8ad5198..971527cba1de8 100644
--- a/flang/include/flang/Lower/CUDA.h
+++ b/flang/include/flang/Lower/CUDA.h
@@ -27,6 +27,10 @@ class Location;
 class MLIRContext;
 } // namespace mlir
 
+namespace hlfir {
+class ElementalOp;
+} // namespace hlfir
+
 namespace Fortran::lower {
 
 class AbstractConverter;
@@ -58,7 +62,9 @@ cuf::DataAttributeAttr
 translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext,
                                 const Fortran::semantics::Symbol &sym);
 
-bool isTransferWithConversion(mlir::Value rhs);
+/// Check is the rhs has an implicit conversion. Return the elemental op if
+/// there is a conversion. Return null otherwise.
+hlfir::ElementalOp isTransferWithConversion(mlir::Value rhs);
 
 } // end namespace Fortran::lower
 
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 68adf346fe8c0..525fb0e9997b7 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4987,11 +4987,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
     // host = device
     if (!lhsIsDevice && rhsIsDevice) {
-      if (Fortran::lower::isTransferWithConversion(rhs)) {
+      if (auto elementalOp = Fortran::lower::isTransferWithConversion(rhs)) {
         mlir::OpBuilder::InsertionGuard insertionGuard(builder);
-        auto elementalOp =
-            mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp());
-        assert(elementalOp && "expect elemental op");
         auto designateOp =
             *elementalOp.getBody()->getOps<hlfir::DesignateOp>().begin();
         builder.setInsertionPoint(elementalOp);
diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp
index bb4bdee78f97d..9501b0ec60002 100644
--- a/flang/lib/Lower/CUDA.cpp
+++ b/flang/lib/Lower/CUDA.cpp
@@ -68,11 +68,26 @@ cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute(
   return cuf::getDataAttribute(mlirContext, cudaAttr);
 }
 
-bool Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+hlfir::ElementalOp Fortran::lower::isTransferWithConversion(mlir::Value rhs) {
+  auto isConversionElementalOp = [](hlfir::ElementalOp elOp) {
+    return llvm::hasSingleElement(
+               elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
+           llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
+           llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) ==
+               1;
+  };
+  if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(rhs.getDefiningOp())) {
+    if (!declOp.getMemref().getDefiningOp())
+      return {};
+    if (auto associateOp = mlir::dyn_cast<hlfir::AssociateOp>(
+            declOp.getMemref().getDefiningOp()))
+      if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(
+              associateOp.getSource().getDefiningOp()))
+        if (isConversionElementalOp(elOp))
+          return elOp;
+  }
   if (auto elOp = mlir::dyn_cast<hlfir::ElementalOp>(rhs.getDefiningOp()))
-    if (llvm::hasSingleElement(elOp.getBody()->getOps<hlfir::DesignateOp>()) &&
-        llvm::hasSingleElement(elOp.getBody()->getOps<fir::LoadOp>()) == 1 &&
-        llvm::hasSingleElement(elOp.getBody()->getOps<fir::ConvertOp>()) == 1)
-      return true;
-  return false;
+    if (isConversionElementalOp(elOp))
+      return elOp;
+  return {};
 }
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index aef926b09a1ed..fa579031a09d4 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -522,3 +522,20 @@ end subroutine
 ! CHECK: hlfir.yield_element %[[CONV]] : f32
 ! CHECK: }
 ! CHECKL: hlfir.assign %[[ELE]] to %[[HD]]#0 : !hlfir.expr<10x20x30xf32>, !fir.ref<!fir.array<10x20x30xf32>>
+
+! Data transfer with conversion with more complex elemental
+! Check that the data transfer is palce
+subroutine sub28()
+  real(2), device, allocatable :: a(:)
+  real(4), allocatable :: ha(:)
+  allocate(a(10))
+  allocate(ha(10))
+  ha = a
+  deallocate(a)
+end subroutine
+
+! CHECK-LABEL:  func.func @_QPsub28()
+! CHECK: %[[TMP:.*]] = fir.allocmem !fir.array<?xf16>, %24#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[TMP_BUFFER:.*]]:2 = hlfir.declare %[[TMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf16>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf16>>, !fir.heap<!fir.array<?xf16>>)
+! CHECK: cuf.data_transfer %{{.*}} to %[[TMP_BUFFER]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.heap<!fir.array<?xf16>>>, !fir.box<!fir.array<?xf16>>
+! CHECK: hlfir.elemental

flang/test/Lower/CUDA/cuda-data-transfer.cuf

flang/include/flang/Lower/CUDA.h

clementval requested a review from wangzpgi October 16, 2025 19:45

llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Oct 16, 2025

[flang][cuda] Extent detection of data transfer with conversion

383325f

clementval force-pushed the cuf_data_tx_with_conv2 branch from d76c3d6 to 383325f Compare October 16, 2025 19:46

wangzpgi reviewed Oct 16, 2025

View reviewed changes

flang/test/Lower/CUDA/cuda-data-transfer.cuf Outdated Show resolved Hide resolved

clementval commented Oct 16, 2025

View reviewed changes

flang/test/Lower/CUDA/cuda-data-transfer.cuf Outdated Show resolved Hide resolved

wangzpgi reviewed Oct 16, 2025

View reviewed changes

flang/include/flang/Lower/CUDA.h Outdated Show resolved Hide resolved

wangzpgi approved these changes Oct 16, 2025

View reviewed changes

clementval added 2 commits October 16, 2025 13:07

Fix typo

4cbef0b

Fix comment

f4b56af

clementval force-pushed the cuf_data_tx_with_conv2 branch from 67d130b to f4b56af Compare October 16, 2025 20:09

clementval enabled auto-merge (squash) October 16, 2025 20:14

clementval merged commit e55071b into llvm:main Oct 16, 2025
10 checks passed

clementval deleted the cuf_data_tx_with_conv2 branch October 16, 2025 21:04

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[flang][cuda] Extent detection of data transfer with conversion #163852

[flang][cuda] Extent detection of data transfer with conversion #163852

Uh oh!

clementval commented Oct 16, 2025

Uh oh!

llvmbot commented Oct 16, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

[flang][cuda] Extent detection of data transfer with conversion #163852

[flang][cuda] Extent detection of data transfer with conversion #163852

Uh oh!

Conversation

clementval commented Oct 16, 2025

Uh oh!

llvmbot commented Oct 16, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants