Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 87 additions & 23 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1356,6 +1356,38 @@ class OmpParallelOpConversionManager {
unsigned privateArgEndIdx;
};

namespace {
omp::PrivateClauseOp findPrivatizer(Operation *from, SymbolRefAttr symbolName) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: MLIR prefers static functions over functions in anonymous namespaces. Also, please comment functions.

omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(from,
symbolName);
assert(privatizer && "privatizer not found in the symbol table");
return privatizer;
}
omp::PrivateClauseOp clonePrivatizer(LLVM::ModuleTranslation &moduleTranslation,
omp::PrivateClauseOp privatizer,
Operation *fromOperation) {
MLIRContext &context = moduleTranslation.getContext();
mlir::IRRewriter opCloner(&context);
opCloner.setInsertionPoint(privatizer);
auto clone =
llvm::cast<mlir::omp::PrivateClauseOp>(opCloner.clone(*privatizer));

// Unique the clone name to avoid clashes in the symbol table.
unsigned counter = 0;
SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
privatizer.getSymName(),
[&](llvm::StringRef candidate) {
return SymbolTable::lookupNearestSymbolFrom(
fromOperation, StringAttr::get(&context, candidate)) !=
nullptr;
},
counter);

clone.setSymName(cloneName);
return clone;
}
} // namespace
/// Converts the OpenMP parallel operation to LLVM IR.
static LogicalResult
convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
Expand Down Expand Up @@ -1611,34 +1643,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
continue;

SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(mlirPrivatizerAttr);
omp::PrivateClauseOp privatizer =
SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(
opInst, privSym);
omp::PrivateClauseOp privatizer = findPrivatizer(opInst, privSym);

// Clone the privatizer in case it is used by more than one parallel
// region. The privatizer is processed in-place (see below) before it
// gets inlined in the parallel region and therefore processing the
// original op is dangerous.

MLIRContext &context = moduleTranslation.getContext();
mlir::IRRewriter opCloner(&context);
opCloner.setInsertionPoint(privatizer);
auto clone = llvm::cast<mlir::omp::PrivateClauseOp>(
opCloner.clone(*privatizer));

// Unique the clone name to avoid clashes in the symbol table.
unsigned counter = 0;
SmallString<256> cloneName = SymbolTable::generateSymbolName<256>(
privatizer.getSymName(),
[&](llvm::StringRef candidate) {
return SymbolTable::lookupNearestSymbolFrom(
opInst, StringAttr::get(&context, candidate)) !=
nullptr;
},
counter);

clone.setSymName(cloneName);
return {mlirPrivVar, clone};
return {mlirPrivVar,
clonePrivatizer(moduleTranslation, privatizer, opInst)};
}
}

Expand Down Expand Up @@ -3435,6 +3447,58 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
const auto &arg = targetRegion.front().getArgument(argIndex);
moduleTranslation.mapValue(arg, mapOpValue);
}

// Do privatization after moduleTranslation has already recorded
// mapped values.
if (!targetOp.getPrivateVars().empty()) {
auto oldIP = builder.saveIP();
builder.restoreIP(allocaIP);

OperandRange privateVars = targetOp.getPrivateVars();
std::optional<ArrayAttr> privateSyms = targetOp.getPrivateSyms();
unsigned numMapVars = targetOp.getMapVars().size();
Block &firstTargetBlock = targetRegion.front();
auto *blockArgsStart = firstTargetBlock.getArguments().begin();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do only use auto when the type is given on the RHS or is too complex.

auto *privArgsStart = blockArgsStart + numMapVars;
auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
MutableArrayRef privateBlockArgs(privArgsStart, privArgsEnd);

for (auto [privVar, privatizerNameAttr, privBlockArg] :
llvm::zip_equal(privateVars, *privateSyms, privateBlockArgs)) {

SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
omp::PrivateClauseOp privatizer = findPrivatizer(&opInst, privSym);
if (privatizer.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate ||
!privatizer.getDeallocRegion().empty()) {
opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
"failed because translation of firstprivate and "
" private allocatables is not supported yet");
bodyGenStatus = failure();
} else {
omp::PrivateClauseOp clonedPrivatizer =
clonePrivatizer(moduleTranslation, privatizer, &opInst);
Region &allocRegion = clonedPrivatizer.getAllocRegion();
BlockArgument allocRegionArg = allocRegion.getArgument(0);
replaceAllUsesInRegionWith(allocRegionArg, privVar, allocRegion);
SmallVector<llvm::Value *, 1> yieldedValues;
if (failed(inlineConvertOmpRegions(
allocRegion, "omp.targetop.privatizer", builder,
moduleTranslation, &yieldedValues))) {
opInst.emitError(
"failed to inline `alloc` region of an `omp.private` "
"op in the target region");
bodyGenStatus = failure();
} else {
assert(yieldedValues.size() == 1);
moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
}
clonedPrivatizer.erase();
builder.restoreIP(oldIP);
}
}
}
builder.restoreIP(codeGenIP);
llvm::BasicBlock *exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
builder.SetInsertPoint(exitBlock);
Expand Down
71 changes: 71 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-target-private.mlir
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Please ensure that the IR is properly formatted and is stripped to the absolute minimum to test this feature.

Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}
llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
%2 = llvm.mlir.constant(1 : i64) : i64
%3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
%4 = llvm.mlir.constant(2 : i32) : i32
llvm.store %4, %3 : i32, !llvm.ptr
%5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
%6 = llvm.mlir.constant(10 : i32) : i32
%7 = llvm.load %arg0 : !llvm.ptr -> i32
%8 = llvm.add %7, %6 : i32
llvm.store %8, %arg1 : i32, !llvm.ptr
omp.terminator
}
llvm.return
}
// CHECK: define internal void @__omp_offloading_fd00
// CHECK-NOT: define {{.*}}
// CHECK: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[ADD:.*]] = add i32 {{.*}}, 10
// CHECK: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4

omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
^bb0(%arg0: !llvm.ptr):
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
omp.yield(%1 : !llvm.ptr)
}
llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
%3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
%5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
%6 = llvm.mlir.constant(2 : i32) : i32
llvm.store %6, %5 : i32, !llvm.ptr
%7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
%8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
%9 = llvm.mlir.constant(10 : i32) : i32
%10 = llvm.load %arg0 : !llvm.ptr -> i32
%11 = llvm.add %10, %9 : i32
llvm.store %11, %arg1 : i32, !llvm.ptr
%12 = llvm.load %arg1 : !llvm.ptr -> i32
%13 = llvm.sitofp %12 : i32 to f32
%14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
llvm.store %14, %arg2 : f32, !llvm.ptr
omp.terminator
}
llvm.return
}

// CHECK: define internal void @__omp_offloading_fd00
// CHECK: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
// CHECK: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
// CHECK: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
// CHECK: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %[[PRIV_I32_ALLOC]], align 4
// CHECK: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
// CHECK: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
// CHECK: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4