Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genThisCluster(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
Expand Down
42 changes: 42 additions & 0 deletions flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,10 @@ static constexpr IntrinsicHandler cudaHandlers[]{
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(&CI::genSyncWarp),
{},
/*isElemental=*/false},
{"this_cluster",
static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisCluster),
{},
/*isElemental=*/false},
{"this_grid",
static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisGrid),
{},
Expand Down Expand Up @@ -1122,6 +1126,44 @@ void CUDAIntrinsicLibrary::genSyncWarp(
mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0]));
}

// THIS_CLUSTER
mlir::Value
CUDAIntrinsicLibrary::genThisCluster(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 0);
auto recTy = mlir::cast<fir::RecordType>(resultType);
assert(recTy && "RecordType expepected");
mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
mlir::Type i32Ty = builder.getI32Type();

// SIZE
mlir::Value size = mlir::NVVM::ClusterDim::create(builder, loc, i32Ty);
auto sizeFieldName = recTy.getTypeList()[1].first;
mlir::Type sizeFieldTy = recTy.getTypeList()[1].second;
mlir::Type fieldIndexType = fir::FieldType::get(resultType.getContext());
mlir::Value sizeFieldIndex = fir::FieldIndexOp::create(
builder, loc, fieldIndexType, sizeFieldName, recTy,
/*typeParams=*/mlir::ValueRange{});
mlir::Value sizeCoord = fir::CoordinateOp::create(
builder, loc, builder.getRefType(sizeFieldTy), res, sizeFieldIndex);
fir::StoreOp::create(builder, loc, size, sizeCoord);

// RANK
mlir::Value rank = mlir::NVVM::ClusterId::create(builder, loc, i32Ty);
mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
rank = mlir::arith::AddIOp::create(builder, loc, rank, one);
auto rankFieldName = recTy.getTypeList()[2].first;
mlir::Type rankFieldTy = recTy.getTypeList()[2].second;
mlir::Value rankFieldIndex = fir::FieldIndexOp::create(
builder, loc, fieldIndexType, rankFieldName, recTy,
/*typeParams=*/mlir::ValueRange{});
mlir::Value rankCoord = fir::CoordinateOp::create(
builder, loc, builder.getRefType(rankFieldTy), res, rankFieldIndex);
fir::StoreOp::create(builder, loc, rank, rankCoord);

return res;
}

// THIS_GRID
mlir::Value
CUDAIntrinsicLibrary::genThisGrid(mlir::Type resultType,
Expand Down
13 changes: 13 additions & 0 deletions flang/module/cooperative_groups.f90
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ module cooperative_groups

implicit none

type :: cluster_group
type(c_devptr), private :: handle
integer(4) :: size
integer(4) :: rank
end type cluster_group

type :: grid_group
type(c_devptr), private :: handle
integer(4) :: size
Expand All @@ -32,6 +38,13 @@ module cooperative_groups
integer(4) :: rank
end type thread_group

interface
attributes(device) function this_cluster()
import
type(cluster_group) :: this_cluster
end function
end interface

interface
attributes(device) function this_grid()
import
Expand Down