Skip to content

Conversation

@clementval
Copy link
Contributor

Allocatable or pointer module variables with the CUDA managed attribute are defined with a double descriptor. One on the host and one on the device. Only the data pointed to by the descriptor will be allocated in managed memory.
Allow the registration of any allocatable or pointer module variables like device or constant.

@clementval clementval requested a review from wangzpgi April 4, 2025 20:34
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Apr 4, 2025
@llvmbot
Copy link
Member

llvmbot commented Apr 4, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Allocatable or pointer module variables with the CUDA managed attribute are defined with a double descriptor. One on the host and one on the device. Only the data pointed to by the descriptor will be allocated in managed memory.
Allow the registration of any allocatable or pointer module variables like device or constant.


Full diff: https://github.com/llvm/llvm-project/pull/134444.diff

2 Files Affected:

  • (modified) flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp (+6-3)
  • (modified) flang/test/Fir/CUDA/cuda-constructor-2.f90 (+18)
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index ad39640235e91..064f0f363f699 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -105,10 +105,15 @@ struct CUFAddConstructor
         if (!attr)
           continue;
 
+        if (attr.getValue() == cuf::DataAttribute::Managed &&
+            !mlir::isa<fir::BaseBoxType>(globalOp.getType()))
+          TODO(loc, "registration of non-allocatable managed variables");
+
         mlir::func::FuncOp func;
         switch (attr.getValue()) {
         case cuf::DataAttribute::Device:
-        case cuf::DataAttribute::Constant: {
+        case cuf::DataAttribute::Constant:
+        case cuf::DataAttribute::Managed: {
           func = fir::runtime::getRuntimeFunc<mkRTKey(CUFRegisterVariable)>(
               loc, builder);
           auto fTy = func.getFunctionType();
@@ -141,8 +146,6 @@ struct CUFAddConstructor
               builder, loc, fTy, registeredMod, addr, gblName, sizeVal)};
           builder.create<fir::CallOp>(loc, func, args);
         } break;
-        case cuf::DataAttribute::Managed:
-          TODO(loc, "registration of managed variables");
         default:
           break;
         }
diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90
index 89fc99b736f4f..62118bb2eed2e 100644
--- a/flang/test/Fir/CUDA/cuda-constructor-2.f90
+++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90
@@ -60,3 +60,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<i8 = dense<8> : vector<2xi64>, i
     }
   }
 }
+
+// -----
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git 3372303188df0f7f8ac26e7ab610cf8b0f716d42)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {  
+  fir.global @_QMmEa00 {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<!fir.array<?x?x?x?x?xf64>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?x?x?x?x?xf64>>
+    %1 = fir.shape %c0, %c0, %c0, %c0, %c0 : (index, index, index, index, index) -> !fir.shape<5>
+    %2 = fir.embox %0(%1) {allocator_idx = 3 : i32} : (!fir.heap<!fir.array<?x?x?x?x?xf64>>, !fir.shape<5>) -> !fir.box<!fir.heap<!fir.array<?x?x?x?x?xf64>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?x?x?x?x?xf64>>>
+  }
+  gpu.module @cuda_device_mod {
+  }
+}
+
+// CHECK: llvm.func internal @__cudaFortranConstructor()
+// CHECK: fir.address_of(@_QMmEa00)
+// CHECK: fir.call @_FortranACUFRegisterVariable

@clementval clementval merged commit 18ff8df into llvm:main Apr 4, 2025
14 checks passed
@clementval clementval deleted the cuf_register_managed branch April 4, 2025 21:38
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants