Skip to content

Conversation

@clementval
Copy link
Contributor

Implement this_cluster like this_group by lowering it directly like an intrinsic function. Use the NVVM operation to get the rank and size information and populate the derived type.

@clementval clementval requested a review from wangzpgi November 24, 2025 21:57
@llvmbot llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir labels Nov 24, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 24, 2025

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

Changes

Implement this_cluster like this_group by lowering it directly like an intrinsic function. Use the NVVM operation to get the rank and size information and populate the derived type.


Full diff: https://github.com/llvm/llvm-project/pull/169414.diff

3 Files Affected:

  • (modified) flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h (+1)
  • (modified) flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp (+42)
  • (modified) flang/module/cooperative_groups.f90 (+13)
diff --git a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
index ae7d566920656..027bd3b79a1df 100644
--- a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
@@ -60,6 +60,7 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
   mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
   void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
+  mlir::Value genThisCluster(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
   mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index f67129dfa6730..c560c53033780 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -457,6 +457,10 @@ static constexpr IntrinsicHandler cudaHandlers[]{
      static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(&CI::genSyncWarp),
      {},
      /*isElemental=*/false},
+    {"this_cluster",
+     static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisCluster),
+     {},
+     /*isElemental=*/false},
     {"this_grid",
      static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisGrid),
      {},
@@ -1122,6 +1126,44 @@ void CUDAIntrinsicLibrary::genSyncWarp(
   mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0]));
 }
 
+// THIS_CLUSTER
+mlir::Value
+CUDAIntrinsicLibrary::genThisCluster(mlir::Type resultType,
+                                     llvm::ArrayRef<mlir::Value> args) {
+  assert(args.size() == 0);
+  auto recTy = mlir::cast<fir::RecordType>(resultType);
+  assert(recTy && "RecordType expepected");
+  mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
+  mlir::Type i32Ty = builder.getI32Type();
+
+  // SIZE
+  mlir::Value size = mlir::NVVM::ClusterDim::create(builder, loc, i32Ty);
+  auto sizeFieldName = recTy.getTypeList()[1].first;
+  mlir::Type sizeFieldTy = recTy.getTypeList()[1].second;
+  mlir::Type fieldIndexType = fir::FieldType::get(resultType.getContext());
+  mlir::Value sizeFieldIndex = fir::FieldIndexOp::create(
+      builder, loc, fieldIndexType, sizeFieldName, recTy,
+      /*typeParams=*/mlir::ValueRange{});
+  mlir::Value sizeCoord = fir::CoordinateOp::create(
+      builder, loc, builder.getRefType(sizeFieldTy), res, sizeFieldIndex);
+  fir::StoreOp::create(builder, loc, size, sizeCoord);
+
+  // RANK
+  mlir::Value rank = mlir::NVVM::ClusterId::create(builder, loc, i32Ty);
+  mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+  rank = mlir::arith::AddIOp::create(builder, loc, rank, one);
+  auto rankFieldName = recTy.getTypeList()[2].first;
+  mlir::Type rankFieldTy = recTy.getTypeList()[2].second;
+  mlir::Value rankFieldIndex = fir::FieldIndexOp::create(
+      builder, loc, fieldIndexType, rankFieldName, recTy,
+      /*typeParams=*/mlir::ValueRange{});
+  mlir::Value rankCoord = fir::CoordinateOp::create(
+      builder, loc, builder.getRefType(rankFieldTy), res, rankFieldIndex);
+  fir::StoreOp::create(builder, loc, rank, rankCoord);
+
+  return res;
+}
+
 // THIS_GRID
 mlir::Value
 CUDAIntrinsicLibrary::genThisGrid(mlir::Type resultType,
diff --git a/flang/module/cooperative_groups.f90 b/flang/module/cooperative_groups.f90
index b8875f72f8079..1c89866f9c84a 100644
--- a/flang/module/cooperative_groups.f90
+++ b/flang/module/cooperative_groups.f90
@@ -14,6 +14,12 @@ module cooperative_groups
 
 implicit none
 
+type :: cluster_group
+  type(c_devptr), private :: handle
+  integer(4) :: size
+  integer(4) :: rank
+end type cluster_group
+
 type :: grid_group
   type(c_devptr), private :: handle
   integer(4) :: size
@@ -32,6 +38,13 @@ module cooperative_groups
   integer(4) :: rank
 end type thread_group
 
+interface
+  attributes(device) function this_cluster()
+    import
+    type(cluster_group) :: this_cluster
+  end function
+end interface
+
 interface
   attributes(device) function this_grid()
     import

@clementval clementval merged commit ab5ae9a into llvm:main Nov 24, 2025
13 checks passed
@clementval clementval deleted the cuf_this_cluster branch November 24, 2025 22:28
aadeshps-mcw pushed a commit to aadeshps-mcw/llvm-project that referenced this pull request Nov 26, 2025
)

Implement `this_cluster` like `this_group` by lowering it directly like
an intrinsic function. Use the NVVM operation to get the rank and size
information and populate the derived type.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

flang:fir-hlfir flang Flang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants