Skip to content

Conversation

@clementval
Copy link
Contributor

Similar for the double descriptor information added in #170901, we need to carry over the pointer information until the op can be converted. The correct detection would fail if the op is converted late.

@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Dec 5, 2025
@llvmbot
Copy link
Member

llvmbot commented Dec 5, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Similar for the double descriptor information added in #170901, we need to carry over the pointer information until the op can be converted. The correct detection would fail if the op is converted late.


Full diff: https://github.com/llvm/llvm-project/pull/170937.diff

5 Files Affected:

  • (modified) flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td (+2-2)
  • (modified) flang/lib/Lower/Allocatable.cpp (+4-2)
  • (modified) flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp (+1-9)
  • (modified) flang/test/Fir/CUDA/cuda-allocate.fir (+4-4)
  • (modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+8)
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 766a0d6bb8ee0..636879f28a2fb 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -101,7 +101,7 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
       Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
       cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
-      UnitAttr:$hasDoubleDescriptor);
+      UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer);
 
   let results = (outs AnyIntegerType:$stat);
 
@@ -129,7 +129,7 @@ def cuf_DeallocateOp : cuf_Op<"deallocate",
   let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
       Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
       cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
-      UnitAttr:$hasDoubleDescriptor);
+      UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer);
 
   let results = (outs AnyIntegerType:$stat);
 
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 2ae13e2bd73fb..c9a9d935bd615 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -803,7 +803,8 @@ class AllocateStmtHelper {
                builder, loc, retTy, box.getAddr(), errmsg, stream, pinned,
                source, cudaAttr,
                errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
-               doubleDescriptors ? builder.getUnitAttr() : nullptr)
+               doubleDescriptors ? builder.getUnitAttr() : nullptr,
+               box.isPointer() ? builder.getUnitAttr() : nullptr)
         .getResult();
   }
 
@@ -873,7 +874,8 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
   return cuf::DeallocateOp::create(
              builder, loc, retTy, box.getAddr(), errmsg, cudaAttr,
              errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
-             doubleDescriptors ? builder.getUnitAttr() : nullptr)
+             doubleDescriptors ? builder.getUnitAttr() : nullptr,
+             box.isPointer() ? builder.getUnitAttr() : nullptr)
       .getResult();
 }
 
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
index 2c40991580c2e..6579c2362cd87 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp
@@ -322,15 +322,7 @@ struct CUFAllocateOpConversion
     fir::FirOpBuilder builder(rewriter, mod);
     mlir::Location loc = op.getLoc();
 
-    bool isPointer = false;
-
-    if (auto declareOp =
-            mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
-      if (declareOp.getFortranAttrs() &&
-          bitEnumContainsAny(*declareOp.getFortranAttrs(),
-                             fir::FortranVariableFlagsEnum::pointer))
-        isPointer = true;
-
+    bool isPointer = op.getPointer();
     if (op.getHasDoubleDescriptor()) {
       // Allocation for module variable are done with custom runtime entry point
       // so the descriptors can be synchronized.
diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir
index eb2816145c77a..9d0d181609ada 100644
--- a/flang/test/Fir/CUDA/cuda-allocate.fir
+++ b/flang/test/Fir/CUDA/cuda-allocate.fir
@@ -183,7 +183,7 @@ func.func @_QQallocate_stream() {
 func.func @_QPp_alloc() {
   %0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
   %4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
-  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
+  %9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>, pointer} -> i32
   return
 }
 
@@ -201,7 +201,7 @@ func.func @_QPpointer_source() {
   %5 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a_d", data_attr = #cuf.cuda<device>, uniq_name = "_QFpointer_sourceEa_d"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
   %7 = fir.declare %5 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_sourceEa_d"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
   %8 = fir.load %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
-  %22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
+  %22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, pointer} -> i32
   return
 }
 
@@ -226,7 +226,7 @@ func.func @_QQpointer_sync() attributes {fir.bindc_name = "test"} {
   %3 = fir.convert %c1 : (index) -> i64
   %4 = fir.convert %c10_i32 : (i32) -> i64
   fir.call @_FortranAAllocatableSetBounds(%2, %c0_i32, %3, %4) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
-  %6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+  %6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
   return
 }
 
@@ -246,7 +246,7 @@ func.func @_QMmod1Ppointer_source_global() {
   %2 = fir.alloca !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"}
   %6 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
   %7 = fir.load %6 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
-  %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
+  %21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
   return
 }
 
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 393faff6046bc..43e716532ecca 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -227,6 +227,14 @@ end
 ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
 ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
 
+subroutine devicepointer()
+  integer, device, pointer :: i(:)
+  allocate(i(10))
+end
+
+! CHECK-LABEL: func.func @_QPdevicepointer()
+! CHECK: cuf.allocate{{.*}}pointer
+
 subroutine cuda_component()
   use globals
   type(t1), pointer, dimension(:) :: d

@clementval clementval enabled auto-merge (squash) December 5, 2025 22:36
@clementval clementval merged commit 7ba7101 into llvm:main Dec 5, 2025
13 checks passed
@clementval clementval deleted the cuf_pointer_op branch December 5, 2025 23:30
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Dec 9, 2025
…170937)

Similar for the double descriptor information added in
llvm#170901, we need to carry over
the pointer information until the op can be converted. The correct
detection would fail if the op is converted late.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants