Skip to content

Commit 6336f7d

Browse files
committed
[OpenMP][mlir] Add Groupprivate op in omp dialect.
1 parent 168db5e commit 6336f7d

File tree

5 files changed

+280
-1
lines changed

5 files changed

+280
-1
lines changed

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2224,4 +2224,34 @@ def WorkdistributeOp : OpenMP_Op<"workdistribute"> {
22242224
let assemblyFormat = "$region attr-dict";
22252225
}
22262226

2227+
//===----------------------------------------------------------------------===//
2228+
// [6.0] groupprivate Directive
2229+
//===----------------------------------------------------------------------===//
2230+
2231+
def GroupprivateOp : OpenMP_Op<"groupprivate",
2232+
[AllTypesMatch<["sym_addr", "gp_addr"]>]> {
2233+
let summary = "groupprivate directive";
2234+
let description = [{
2235+
The groupprivate directive specifies that variables are replicated, with
2236+
each group having its own copy.
2237+
2238+
This operation takes in the address of a symbol that represents the original
2239+
variable, optional DeviceTypeAttr and returns the address of its groupprivate copy.
2240+
All occurrences of groupprivate variables in a parallel region should
2241+
use the groupprivate copy returned by this operation.
2242+
2243+
The `sym_addr` refers to the address of the symbol, which is a pointer to
2244+
the original variable.
2245+
}];
2246+
2247+
let arguments = (ins
2248+
OpenMP_PointerLikeType:$sym_addr,
2249+
OptionalAttr<DeclareTargetDeviceTypeAttr>:$device_type
2250+
);
2251+
let results = (outs OpenMP_PointerLikeType:$gp_addr);
2252+
let assemblyFormat = [{
2253+
$sym_addr `:` type($sym_addr) ( `,` `device_type` $device_type^ )? `->` type($gp_addr) attr-dict
2254+
}];
2255+
}
2256+
22272257
#endif // OPENMP_OPS

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6030,7 +6030,7 @@ static bool isTargetDeviceOp(Operation *op) {
60306030
// by taking it in as an operand, so we must always lower these in
60316031
// some manner or result in an ICE (whether they end up in a no-op
60326032
// or otherwise).
6033-
if (mlir::isa<omp::ThreadprivateOp>(op))
6033+
if (mlir::isa<omp::ThreadprivateOp, omp::GroupprivateOp>(op))
60346034
return true;
60356035

60366036
if (mlir::isa<omp::TargetAllocMemOp>(op) ||
@@ -6128,6 +6128,74 @@ convertTargetFreeMemOp(Operation &opInst, llvm::IRBuilderBase &builder,
61286128
return success();
61296129
}
61306130

6131+
/// Converts an OpenMP Groupprivate operation into LLVM IR.
6132+
static LogicalResult
6133+
convertOmpGroupprivate(Operation &opInst, llvm::IRBuilderBase &builder,
6134+
LLVM::ModuleTranslation &moduleTranslation) {
6135+
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
6136+
auto groupprivateOp = cast<omp::GroupprivateOp>(opInst);
6137+
6138+
if (failed(checkImplementationStatus(opInst)))
6139+
return failure();
6140+
6141+
bool isTargetDevice = ompBuilder->Config.isTargetDevice();
6142+
auto deviceType = groupprivateOp.getDeviceType();
6143+
6144+
// skip allocation based on device_type
6145+
bool shouldAllocate = true;
6146+
if (deviceType.has_value()) {
6147+
switch (*deviceType) {
6148+
case mlir::omp::DeclareTargetDeviceType::host:
6149+
// Only allocate on host
6150+
shouldAllocate = !isTargetDevice;
6151+
break;
6152+
case mlir::omp::DeclareTargetDeviceType::nohost:
6153+
// Only allocate on device
6154+
shouldAllocate = isTargetDevice;
6155+
break;
6156+
case mlir::omp::DeclareTargetDeviceType::any:
6157+
// Allocate on both
6158+
shouldAllocate = true;
6159+
break;
6160+
}
6161+
}
6162+
6163+
Value symAddr = groupprivateOp.getSymAddr();
6164+
auto *symOp = symAddr.getDefiningOp();
6165+
6166+
if (auto asCast = dyn_cast<LLVM::AddrSpaceCastOp>(symOp))
6167+
symOp = asCast.getOperand().getDefiningOp();
6168+
6169+
if (!isa<LLVM::AddressOfOp>(symOp))
6170+
return opInst.emitError("Addressing symbol not found");
6171+
LLVM::AddressOfOp addressOfOp = dyn_cast<LLVM::AddressOfOp>(symOp);
6172+
6173+
LLVM::GlobalOp global =
6174+
addressOfOp.getGlobal(moduleTranslation.symbolTable());
6175+
llvm::GlobalValue *globalValue = moduleTranslation.lookupGlobal(global);
6176+
llvm::Value *resultPtr;
6177+
6178+
if (shouldAllocate) {
6179+
// Get the size of the variable
6180+
llvm::Type *varType = globalValue->getValueType();
6181+
llvm::Module *llvmModule = moduleTranslation.getLLVMModule();
6182+
llvm::DataLayout DL = llvmModule->getDataLayout();
6183+
uint64_t typeSize = DL.getTypeAllocSize(varType);
6184+
// Call omp_alloc_shared to allocate memory for groupprivate variable.
6185+
llvm::FunctionCallee allocSharedFn = ompBuilder->getOrCreateRuntimeFunction(
6186+
*llvmModule, llvm::omp::OMPRTL___kmpc_alloc_shared);
6187+
// Call runtime to allocate shared memory for this group
6188+
resultPtr = builder.CreateCall(allocSharedFn, {builder.getInt64(typeSize)});
6189+
} else {
6190+
// Use original global address when not allocating group-private storage
6191+
resultPtr = moduleTranslation.lookupValue(symAddr);
6192+
if (!resultPtr)
6193+
resultPtr = globalValue;
6194+
}
6195+
moduleTranslation.mapValue(opInst.getResult(0), resultPtr);
6196+
return success();
6197+
}
6198+
61316199
/// Given an OpenMP MLIR operation, create the corresponding LLVM IR (including
61326200
/// OpenMP runtime calls).
61336201
static LogicalResult
@@ -6311,6 +6379,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
63116379
.Case([&](omp::TargetFreeMemOp) {
63126380
return convertTargetFreeMemOp(*op, builder, moduleTranslation);
63136381
})
6382+
.Case([&](omp::GroupprivateOp) {
6383+
return convertOmpGroupprivate(*op, builder, moduleTranslation);
6384+
})
63146385
.Default([&](Operation *inst) {
63156386
return inst->emitError()
63166387
<< "not yet implemented: " << inst->getName();

mlir/test/Dialect/OpenMP/ops.mlir

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3367,3 +3367,39 @@ func.func @omp_target_map_clause_type_test(%arg0 : memref<?xi32>) -> () {
33673367

33683368
return
33693369
}
3370+
3371+
// CHECK-LABEL: func.func @omp_groupprivate_device_type
3372+
func.func @omp_groupprivate_device_type() {
3373+
%0 = arith.constant 1 : i32
3374+
%1 = arith.constant 2 : i32
3375+
// CHECK: [[ARG0:%.*]] = llvm.mlir.addressof @gp : !llvm.ptr
3376+
%gp_addr = llvm.mlir.addressof @gp : !llvm.ptr
3377+
// CHECK: [[ARG1:%.*]] = llvm.mlir.addressof @any : !llvm.ptr
3378+
%any_addr = llvm.mlir.addressof @any : !llvm.ptr
3379+
// CHECK: [[ARG2:%.*]] = llvm.mlir.addressof @host : !llvm.ptr
3380+
%host_addr = llvm.mlir.addressof @host : !llvm.ptr
3381+
// CHECK: [[ARG3:%.*]] = llvm.mlir.addressof @nohost : !llvm.ptr
3382+
%nohost_addr = llvm.mlir.addressof @nohost : !llvm.ptr
3383+
3384+
// CHECK: {{.*}} = omp.groupprivate [[ARG0]] : !llvm.ptr -> !llvm.ptr
3385+
%group_private_addr = omp.groupprivate %gp_addr : !llvm.ptr -> !llvm.ptr
3386+
3387+
// CHECK: {{.*}} = omp.groupprivate [[ARG1]] : !llvm.ptr, device_type (any) -> !llvm.ptr
3388+
%group_private_any = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
3389+
llvm.store %1, %group_private_any : i32, !llvm.ptr
3390+
3391+
// CHECK: {{.*}} = omp.groupprivate [[ARG2]] : !llvm.ptr, device_type (host) -> !llvm.ptr
3392+
%group_private_host = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
3393+
llvm.store %1, %group_private_host : i32, !llvm.ptr
3394+
3395+
// CHECK: {{.*}} = omp.groupprivate [[ARG3]] : !llvm.ptr, device_type (nohost) -> !llvm.ptr
3396+
%group_private_nohost = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
3397+
llvm.store %1, %group_private_nohost : i32, !llvm.ptr
3398+
3399+
return
3400+
}
3401+
3402+
llvm.mlir.global internal @gp() : i32
3403+
llvm.mlir.global internal @any() : i32
3404+
llvm.mlir.global internal @host() : i32
3405+
llvm.mlir.global internal @nohost() : i32
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa",
4+
dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>} {
5+
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
6+
7+
%ga = llvm.mlir.addressof @global_a : !llvm.ptr
8+
%map_a = omp.map.info var_ptr(%ga : !llvm.ptr, i32) map_clauses(tofrom) capture(ByCopy) -> !llvm.ptr {name = "i"}
9+
omp.target map_entries(%map_a -> %arg1 : !llvm.ptr) {
10+
%loaded = llvm.load %arg1 : !llvm.ptr -> i32
11+
12+
%any_addr = llvm.mlir.addressof @global_any : !llvm.ptr
13+
%any_gp = omp.groupprivate %any_addr : !llvm.ptr, device_type(any) -> !llvm.ptr
14+
llvm.store %loaded, %any_gp : i32, !llvm.ptr
15+
16+
%host_addr = llvm.mlir.addressof @global_host : !llvm.ptr
17+
%host_gp = omp.groupprivate %host_addr : !llvm.ptr, device_type(host) -> !llvm.ptr
18+
llvm.store %loaded, %host_gp : i32, !llvm.ptr
19+
20+
%nohost_addr = llvm.mlir.addressof @global_nohost : !llvm.ptr
21+
%nohost_gp = omp.groupprivate %nohost_addr : !llvm.ptr, device_type(nohost) -> !llvm.ptr
22+
llvm.store %loaded, %nohost_gp : i32, !llvm.ptr
23+
24+
omp.terminator
25+
}
26+
llvm.return
27+
}
28+
llvm.mlir.global internal @global_a() : i32
29+
llvm.mlir.global internal @global_any() : i32
30+
llvm.mlir.global internal @global_host() : i32
31+
llvm.mlir.global internal @global_nohost() : i32
32+
}
33+
34+
// CHECK: define {{.*}} amdgpu_kernel void @__omp_offloading_{{.*}}_{{.*}}__QQmain_{{.*}}(ptr %{{.*}}, ptr %{{.*}}) #{{[0-9]+}} {
35+
// CHECK-LABEL: omp.target:
36+
// CHECK-NEXT : %[[LOAD:.*]] = load i32, ptr %3, align 4
37+
// CHECK-NEXT : %[[ALLOC_any:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
38+
// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_any]], align 4
39+
// CHECK-NEXT : store i32 %[[LOAD]], ptr @global_host, align 4
40+
// CHECK-NEXT : %[[ALLOC_NOHOST:.*]] = call ptr @__kmpc_alloc_shared(i64 4)
41+
// CHECK-NEXT : store i32 %[[LOAD]], ptr %[[ALLOC_NOHOST]], align 4

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3449,3 +3449,104 @@ llvm.func @nested_task_with_deps() {
34493449

34503450
// CHECK: ret void
34513451
// CHECK: }
3452+
3453+
// -----
3454+
3455+
module attributes {omp.is_target_device = false} {
3456+
llvm.mlir.global internal @any() : i32
3457+
llvm.mlir.global internal @host() : i32
3458+
llvm.mlir.global internal @nohost() : i32
3459+
llvm.func @omp_groupprivate_host() {
3460+
%0 = llvm.mlir.constant(1 : i32) : i32
3461+
%1 = llvm.mlir.addressof @any : !llvm.ptr
3462+
%2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
3463+
llvm.store %0, %2 : i32, !llvm.ptr
3464+
3465+
%3 = llvm.mlir.addressof @host : !llvm.ptr
3466+
%4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
3467+
llvm.store %0, %4 : i32, !llvm.ptr
3468+
3469+
%5 = llvm.mlir.addressof @nohost : !llvm.ptr
3470+
%6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
3471+
llvm.store %0, %6 : i32, !llvm.ptr
3472+
llvm.return
3473+
}
3474+
}
3475+
3476+
// CHECK: @any = internal global i32 undef
3477+
// CHECK: @host = internal global i32 undef
3478+
// CHECK: @nohost = internal global i32 undef
3479+
// CHECK-LABEL: @omp_groupprivate_host
3480+
// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
3481+
// CHECK: store i32 1, ptr [[TMP1]], align 4
3482+
// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
3483+
// CHECK: store i32 1, ptr [[TMP2]], align 4
3484+
// CHECK: store i32 1, ptr @nohost, align 4
3485+
3486+
// -----
3487+
3488+
module attributes {omp.is_target_device = true} {
3489+
llvm.mlir.global internal @any() : i32
3490+
llvm.mlir.global internal @host() : i32
3491+
llvm.mlir.global internal @nohost() : i32
3492+
llvm.func @omp_groupprivate_device() {
3493+
%0 = llvm.mlir.constant(1 : i32) : i32
3494+
%1 = llvm.mlir.addressof @any : !llvm.ptr
3495+
%2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
3496+
llvm.store %0, %2 : i32, !llvm.ptr
3497+
3498+
%3 = llvm.mlir.addressof @host : !llvm.ptr
3499+
%4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
3500+
llvm.store %0, %4 : i32, !llvm.ptr
3501+
3502+
%5 = llvm.mlir.addressof @nohost : !llvm.ptr
3503+
%6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
3504+
llvm.store %0, %6 : i32, !llvm.ptr
3505+
llvm.return
3506+
}
3507+
}
3508+
3509+
// CHECK: @any = internal global i32 undef
3510+
// CHECK: @host = internal global i32 undef
3511+
// CHECK: @nohost = internal global i32 undef
3512+
// CHECK-LABEL: @omp_groupprivate_device
3513+
// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
3514+
// CHECK: store i32 1, ptr [[TMP1]], align 4
3515+
// CHECK: store i32 1, ptr @host, align 4
3516+
// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
3517+
// CHECK: store i32 1, ptr [[TMP2]], align 4
3518+
3519+
// -----
3520+
3521+
module attributes {omp.is_target_device = false} {
3522+
llvm.mlir.global internal @any1() : i32
3523+
llvm.mlir.global internal @host1() : i32
3524+
llvm.mlir.global internal @nohost1() : i32
3525+
llvm.func @omp_groupprivate_host() {
3526+
%0 = llvm.mlir.constant(1 : i32) : i32
3527+
%1 = llvm.mlir.addressof @any1 : !llvm.ptr
3528+
%2 = omp.groupprivate %1 : !llvm.ptr, device_type(any) -> !llvm.ptr
3529+
llvm.store %0, %2 : i32, !llvm.ptr
3530+
3531+
%3 = llvm.mlir.addressof @host1 : !llvm.ptr
3532+
%4 = omp.groupprivate %3 : !llvm.ptr, device_type(host) -> !llvm.ptr
3533+
llvm.store %0, %4 : i32, !llvm.ptr
3534+
3535+
%5 = llvm.mlir.addressof @nohost1 : !llvm.ptr
3536+
%6 = omp.groupprivate %5 : !llvm.ptr, device_type(nohost) -> !llvm.ptr
3537+
llvm.store %0, %6 : i32, !llvm.ptr
3538+
llvm.return
3539+
}
3540+
}
3541+
3542+
// CHECK: @any1 = internal global i32 undef
3543+
// CHECK: @host1 = internal global i32 undef
3544+
// CHECK: @nohost1 = internal global i32 undef
3545+
// CHECK-LABEL: @omp_groupprivate_host
3546+
// CHECK: [[TMP1:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
3547+
// CHECK: store i32 1, ptr [[TMP1]], align 4
3548+
// CHECK: [[TMP2:%.*]] = call ptr @__kmpc_alloc_shared(i64 4)
3549+
// CHECK: store i32 1, ptr [[TMP2]], align 4
3550+
// CHECK: store i32 1, ptr @nohost1, align 4
3551+
3552+
// -----

0 commit comments

Comments
 (0)