Skip to content

Conversation

@clementval
Copy link
Contributor

The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a cuf.allocate operation.

@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir flang:semantics labels Aug 4, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 4, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a cuf.allocate operation.


Full diff: https://github.com/llvm/llvm-project/pull/152041.diff

5 Files Affected:

  • (modified) flang/include/flang/Semantics/tools.h (+2)
  • (modified) flang/lib/Lower/Allocatable.cpp (+5-3)
  • (modified) flang/lib/Lower/ConvertVariable.cpp (+4-2)
  • (modified) flang/lib/Semantics/tools.cpp (+15)
  • (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+13)
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b9357b4c1f..966a30f7081fd 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &sym);
+
 inline bool IsCUDAShared(const Symbol &sym) {
   if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
     if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 15cd9770b35ba..92ac050c41ae2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ class AllocateStmtHelper {
 
   void genSimpleAllocation(const Allocation &alloc,
                            const fir::MutableBoxValue &box) {
-    bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+    bool isCudaAllocate =
+        Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+        Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
     bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
     bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
                             !alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ class AllocateStmtHelper {
     unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
 
     if (inlineAllocation &&
-        ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+        ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
       // Pointers must use PointerAllocate so that their deallocations
       // can be validated.
       genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ class AllocateStmtHelper {
     genSetDeferredLengthParameters(alloc, box);
     genAllocateObjectBounds(alloc, box);
     mlir::Value stat;
-    if (!isCudaSymbol) {
+    if (!isCudaAllocate) {
       stat = genRuntimeAllocate(builder, loc, box, errorManager);
       setPinnedToFalse();
     } else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 647bd0d079985..b71f6519c39d9 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
         baseTy = boxTy.getEleTy();
       baseTy = fir::unwrapRefType(baseTy);
 
-      if (mlir::isa<fir::SequenceType>(baseTy))
-        TODO(loc, "array of derived-type with device component");
+      if (mlir::isa<fir::SequenceType>(baseTy) &&
+          (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+           fir::isPointerType(fir::getBase(exv).getType())))
+        return; // Allocator index need to be set after allocation.
 
       auto recTy =
           mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e1ac3ce..913bf08cd0d99 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &symbol) {
+  if (const auto *details{symbol.GetUltimate()
+              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+    const Fortran::semantics::DeclTypeSpec *type{details->type()};
+    const Fortran::semantics::DerivedTypeSpec *derived{
+        type ? type->AsDerived() : nullptr};
+    if (derived) {
+      if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 UltimateComponentIterator::const_iterator
 FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
   UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768bd7d92c..2cf8c7d336812 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
   real, device, allocatable :: a_device(:)
   real, managed, allocatable :: a_managed(:)
   real, pinned, allocatable :: a_pinned(:)
+  type :: t1
+    integer :: a
+    real, dimension(:), allocatable, device :: b
+  end type
 end module
 
 ! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
 ! CHECK: %[[FALSE:.*]] = arith.constant false
 ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
 ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+  use globals
+  type(t1), pointer, dimension(:) :: d
+  allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate

@llvmbot
Copy link
Member

llvmbot commented Aug 4, 2025

@llvm/pr-subscribers-flang-semantics

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a cuf.allocate operation.


Full diff: https://github.com/llvm/llvm-project/pull/152041.diff

5 Files Affected:

  • (modified) flang/include/flang/Semantics/tools.h (+2)
  • (modified) flang/lib/Lower/Allocatable.cpp (+5-3)
  • (modified) flang/lib/Lower/ConvertVariable.cpp (+4-2)
  • (modified) flang/lib/Semantics/tools.cpp (+15)
  • (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+13)
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b9357b4c1f..966a30f7081fd 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &sym);
+
 inline bool IsCUDAShared(const Symbol &sym) {
   if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
     if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 15cd9770b35ba..92ac050c41ae2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ class AllocateStmtHelper {
 
   void genSimpleAllocation(const Allocation &alloc,
                            const fir::MutableBoxValue &box) {
-    bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+    bool isCudaAllocate =
+        Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+        Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
     bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
     bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
                             !alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ class AllocateStmtHelper {
     unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
 
     if (inlineAllocation &&
-        ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+        ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
       // Pointers must use PointerAllocate so that their deallocations
       // can be validated.
       genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ class AllocateStmtHelper {
     genSetDeferredLengthParameters(alloc, box);
     genAllocateObjectBounds(alloc, box);
     mlir::Value stat;
-    if (!isCudaSymbol) {
+    if (!isCudaAllocate) {
       stat = genRuntimeAllocate(builder, loc, box, errorManager);
       setPinnedToFalse();
     } else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 647bd0d079985..b71f6519c39d9 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
         baseTy = boxTy.getEleTy();
       baseTy = fir::unwrapRefType(baseTy);
 
-      if (mlir::isa<fir::SequenceType>(baseTy))
-        TODO(loc, "array of derived-type with device component");
+      if (mlir::isa<fir::SequenceType>(baseTy) &&
+          (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+           fir::isPointerType(fir::getBase(exv).getType())))
+        return; // Allocator index need to be set after allocation.
 
       auto recTy =
           mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e1ac3ce..913bf08cd0d99 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &symbol) {
+  if (const auto *details{symbol.GetUltimate()
+              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+    const Fortran::semantics::DeclTypeSpec *type{details->type()};
+    const Fortran::semantics::DerivedTypeSpec *derived{
+        type ? type->AsDerived() : nullptr};
+    if (derived) {
+      if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 UltimateComponentIterator::const_iterator
 FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
   UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768bd7d92c..2cf8c7d336812 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
   real, device, allocatable :: a_device(:)
   real, managed, allocatable :: a_managed(:)
   real, pinned, allocatable :: a_pinned(:)
+  type :: t1
+    integer :: a
+    real, dimension(:), allocatable, device :: b
+  end type
 end module
 
 ! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
 ! CHECK: %[[FALSE:.*]] = arith.constant false
 ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
 ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+  use globals
+  type(t1), pointer, dimension(:) :: d
+  allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate

Copy link
Contributor

@razvanlupusoru razvanlupusoru left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you!

@clementval clementval merged commit 9b195dc into llvm:main Aug 4, 2025
13 checks passed
@clementval clementval deleted the cuf_allocate_components branch August 4, 2025 23:51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang:semantics flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants