Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2224,4 +2224,34 @@ def WorkdistributeOp : OpenMP_Op<"workdistribute"> {
let assemblyFormat = "$region attr-dict";
}

//===----------------------------------------------------------------------===//
// [6.0] groupprivate Directive
//===----------------------------------------------------------------------===//

def GroupprivateOp : OpenMP_Op<"groupprivate",
[AllTypesMatch<["sym_addr", "gp_addr"]>]> {
let summary = "groupprivate directive";
let description = [{
The groupprivate directive specifies that variables are replicated, with
each group having its own copy.

This operation takes in the address of a symbol that represents the original
variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
All occurrences of groupprivate variables in a parallel region should
use the groupprivate copy returned by this operation.

The `sym_addr` refers to the address of the symbol, which is a pointer to
the original variable.
}];

let arguments = (ins
OpenMP_PointerLikeType:$sym_addr,
OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
);
let results = (outs OpenMP_PointerLikeType:$gp_addr);
let assemblyFormat = [{
$sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
}];
}

#endif // OPENMP_OPS
113 changes: 104 additions & 9 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3564,6 +3564,26 @@ convertOmpCancellationPoint(omp::CancellationPointOp op,
return success();
}

static LLVM::GlobalOp
getGlobalFromSymbol(Operation *symOp,
LLVM::ModuleTranslation &moduleTranslation,
Operation *opInst) {

// Handle potential address space cast
if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
symOp = asCast.getOperand().getDefiningOp();

// Check if we have an AddressOfOp
if (!isa<LLVM::AddressOfOp>(symOp)) {
if (opInst)
opInst->emitError("Addressing symbol not found");
return nullptr;
}

LLVM::AddressOfOp addressOfOp = cast<LLVM::AddressOfOp>(symOp);
return addressOfOp.getGlobal(moduleTranslation.symbolTable());
}

/// Converts an OpenMP Threadprivate operation into LLVM IR using
/// OpenMPIRBuilder.
static LogicalResult
Expand All @@ -3579,15 +3599,10 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder,
Value symAddr = threadprivateOp.getSymAddr();
auto *symOp = symAddr.getDefiningOp();

if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
symOp = asCast.getOperand().getDefiningOp();

if (!isa<LLVM::AddressOfOp>(symOp))
return opInst.emitError("Addressing symbol not found");
LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);

LLVM::GlobalOp global =
addressOfOp.getGlobal(moduleTranslation.symbolTable());
getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
if (!global)
return failure();
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);

if (!ompBuilder->Config.isTargetDevice()) {
Expand Down Expand Up @@ -6013,7 +6028,7 @@ static bool isTargetDeviceOp(Operation *op) {
// by taking it in as an operand, so we must always lower these in
// some manner or result in an ICE (whether they end up in a no-op
// or otherwise).
if (mlir::isa<omp::ThreadprivateOp>(op))
if (mlir::isa<omp::ThreadprivateOp, omp::GroupprivateOp>(op))
return true;

if (mlir::isa<omp::TargetAllocMemOp>(op) ||
Expand Down Expand Up @@ -6111,6 +6126,83 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
return success();
}

/// Converts an OpenMP Groupprivate operation into LLVM IR.
static LogicalResult
convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation) {
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
auto groupprivateOp = cast<omp::GroupprivateOp>(opInst);

if (failed(checkImplementationStatus(opInst)))
return failure();

bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto deviceType = groupprivateOp.getDeviceType();

// skip allocation based on device_type
bool shouldAllocate = true;
if (deviceType.has_value()) {
switch (*deviceType) {
case mlir::omp::DeclareTargetDeviceType::host:
// Only allocate on host
shouldAllocate = !isTargetDevice;
break;
case mlir::omp::DeclareTargetDeviceType::nohost:
// Only allocate on device
shouldAllocate = isTargetDevice;
break;
case mlir::omp::DeclareTargetDeviceType::any:
// Allocate on both
shouldAllocate = true;
break;
}
}

Value symAddr = groupprivateOp.getSymAddr();
Operation *symOp = symAddr.getDefiningOp();

LLVM::GlobalOp global =
getGlobalFromSymbol(symOp, moduleTranslation, &opInst);
if (!global)
return failure();

llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
llvm::Value *resultPtr;

if (shouldAllocate) {
if (isTargetDevice) {
// Get the size of the variable
llvm::Type *varType = globalValue->getValueType();
llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
// Create a llvm global variable in shared memory
llvm::Triple targetTriple = llvm::Triple(llvmModule->getTargetTriple());
if (targetTriple.isAMDGCN() || targetTriple.isNVPTX()) {
// Shared address space is 3 for amdgpu and nvptx targets.
unsigned sharedAddressSpace = 3;
llvm::GlobalVariable *sharedVar = new llvm::GlobalVariable(
*llvmModule, varType, false, llvm::GlobalValue::InternalLinkage,
llvm::PoisonValue::get(varType), globalValue->getName(), nullptr,
llvm::GlobalValue::NotThreadLocal, sharedAddressSpace, false);
resultPtr = sharedVar;
} else {
return opInst.emitError()
<< "Groupprivate operation is not supported for this target: "
<< targetTriple.str();
}
} else {
// Use original global address when allocating on host device.
// TODO: Add support for allocating group-private storage on host device.
resultPtr = globalValue;
}
} else {
// Use original global address when not allocating group-private storage.
resultPtr = globalValue;
}

moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
return success();
}

/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
/// OpenMP runtime calls).
static LogicalResult
Expand Down Expand Up @@ -6294,6 +6386,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
.Case([&](omp::TargetFreeMemOp) {
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
})
.Case([&](omp::GroupprivateOp) {
return convertOmpGroupprivate(*op, builder, moduleTranslation);
})
.Default([&](Operation *inst) {
return inst->emitError()
<< "not yet implemented: " << inst->getName();
Expand Down
36 changes: 36 additions & 0 deletions mlir/test/Dialect/OpenMP/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3367,3 +3367,39 @@ func.func @omp_target_map_clause_type_test(%arg0 : memref<?xi32>) -> () {

return
}

// CHECK-LABEL: func.func @omp_groupprivate_device_type
func.func @omp_groupprivate_device_type() {
%0 = arith.constant 1 : i32
%1 = arith.constant 2 : i32
// CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr
%gp_addr = llvm.mlir.addressof @gp : !llvm.ptr
// CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr
%any_addr = llvm.mlir.addressof @any : !llvm.ptr
// CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr
%host_addr = llvm.mlir.addressof @host : !llvm.ptr
// CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr
%nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr

// CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
%group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr

// CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr
%group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
llvm.store %1, %group_private_any : i32, !llvm.ptr

// CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr
%group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
llvm.store %1, %group_private_host : i32, !llvm.ptr

// CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
%group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
llvm.store %1, %group_private_nohost : i32, !llvm.ptr

return
}

llvm.mlir.global internal @gp() : i32
llvm.mlir.global internal @any() : i32
llvm.mlir.global internal @host() : i32
llvm.mlir.global internal @nohost() : i32
45 changes: 45 additions & 0 deletions mlir/test/Target/LLVMIR/omptarget-groupprivate.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa",
dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {

%ga = llvm.mlir.addressof @global_a : !llvm.ptr
%map_a = omp.map.info var_ptr(%ga : !llvm.ptr, i32) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr {name = "i"}
omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
%loaded = llvm.load %arg1 : !llvm.ptr -> i32

%any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
%any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
llvm.store %loaded, %any_gp : i32, !llvm.ptr

%host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
%host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
llvm.store %loaded, %host_gp : i32, !llvm.ptr

%nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
%nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
llvm.store %loaded, %nohost_gp : i32, !llvm.ptr

omp.terminator
}
llvm.return
}
llvm.mlir.global internal @global_a() : i32
llvm.mlir.global internal @global_any() : i32
llvm.mlir.global internal @global_host() : i32
llvm.mlir.global internal @global_nohost() : i32
}

// CHECK-DAG: @global_a = internal global i32 undef
// CHECK-DAG: @global_any = internal global i32 undef
// CHECK-DAG: @global_host = internal global i32 undef
// CHECK-DAG: @global_nohost = internal global i32 undef
// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
// CHECK-LABEL: omp.target:
// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %{{.*}}, align 4
// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
// CHECK-NEXT : store i32 %[[LOAD]], ptr addrspace(3) {{.*}}, align 4
68 changes: 68 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3449,3 +3449,71 @@ llvm.func @nested_task_with_deps() {

// CHECK: ret void
// CHECK: }

// -----

module attributes {omp.is_target_device = true, llvm.target_triple = "nvptx64-nvidia-cuda"} {
llvm.mlir.global internal @any() : i32
llvm.mlir.global internal @host() : i32
llvm.mlir.global internal @nohost() : i32
llvm.func @omp_groupprivate_device() {
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.mlir.addressof @any : !llvm.ptr
%2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
llvm.store %0, %2 : i32, !llvm.ptr

%3 = llvm.mlir.addressof @host : !llvm.ptr
%4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
llvm.store %0, %4 : i32, !llvm.ptr

%5 = llvm.mlir.addressof @nohost : !llvm.ptr
%6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
}

// CHECK-DAG: @any = internal global i32 undef
// CHECK-DAG: @host = internal global i32 undef
// CHECK-DAG: @nohost = internal global i32 undef
// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
// CHECK-DAG: {{.*}} = internal addrspace(3) global i32 poison
// CHECK-LABEL: define void @omp_groupprivate_device()
// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
// CHECK: store i32 1, ptr @host, align 4
// CHECK: store i32 1, ptr addrspace(3) {{.*}}, align 4
// CHECK: ret void

// -----

module attributes {omp.is_target_device = false} {
llvm.mlir.global internal @any1() : i32
llvm.mlir.global internal @host1() : i32
llvm.mlir.global internal @nohost1() : i32
llvm.func @omp_groupprivate_host() {
%0 = llvm.mlir.constant(1 : i32) : i32
%1 = llvm.mlir.addressof @any1 : !llvm.ptr
%2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
llvm.store %0, %2 : i32, !llvm.ptr

%3 = llvm.mlir.addressof @host1 : !llvm.ptr
%4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
llvm.store %0, %4 : i32, !llvm.ptr

%5 = llvm.mlir.addressof @nohost1 : !llvm.ptr
%6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
llvm.store %0, %6 : i32, !llvm.ptr
llvm.return
}
}

// CHECK-DAG: @any1 = internal global i32 undef
// CHECK-DAG: @host1 = internal global i32 undef
// CHECK-DAG: @nohost1 = internal global i32 undef
// CHECK-LABEL: define void @omp_groupprivate_host()
// CHECK: store i32 1, ptr @any1, align 4
// CHECK: store i32 1, ptr @host1, align 4
// CHECK: store i32 1, ptr @nohost1, align 4
// CHECK: ret void

// -----