Skip to content

Commit 92b347c

Browse files
committed
Allocate simple private vars inside of the task
There's no need to put the variable in the context structure if it can be initialized without reading from the mold variable. This leads to much simpler code generation for nested tasks which only privatize simple scalar variables and do not use firstprivate. This works around the failure in 0226_0013 (Fujitsu test).
1 parent ba908aa commit 92b347c

File tree

3 files changed

+87
-20
lines changed

3 files changed

+87
-20
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,24 +1767,42 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
17671767
}
17681768
}
17691769

1770+
static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
1771+
if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
1772+
return true;
1773+
1774+
Region &initRegion = priv.getInitRegion();
1775+
if (initRegion.empty())
1776+
return false;
1777+
1778+
BlockArgument sourceVariable = priv.getInitMoldArg();
1779+
if (!sourceVariable)
1780+
return false;
1781+
return !sourceVariable.use_empty();
1782+
}
1783+
17701784
namespace {
17711785
/// TaskContextStructManager takes care of creating and freeing a structure
17721786
/// containing information needed by the task body to execute.
17731787
class TaskContextStructManager {
17741788
public:
17751789
TaskContextStructManager(llvm::IRBuilderBase &builder,
1776-
LLVM::ModuleTranslation &moduleTranslation)
1777-
: builder{builder}, moduleTranslation{moduleTranslation} {}
1790+
LLVM::ModuleTranslation &moduleTranslation,
1791+
MutableArrayRef<omp::PrivateClauseOp> privateDecls)
1792+
: builder{builder}, moduleTranslation{moduleTranslation},
1793+
privateDecls{privateDecls} {}
17781794

17791795
/// Creates a heap allocated struct containing space for each private
17801796
/// variable. Returns nullptr if there are is no struct needed. Invariant:
17811797
/// privateVarTypes, privateDecls, and the elements of the structure should
1782-
/// all have the same order.
1783-
void
1784-
generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
1798+
/// all have the same order (although privateDecls which do not read from the
1799+
/// mold argument are skipped).
1800+
void generateTaskContextStruct();
17851801

17861802
/// Create GEPs to access each member of the structure representing a private
1787-
/// variable, adding them to llvmPrivateVars.
1803+
/// variable, adding them to llvmPrivateVars. Null values are added where
1804+
/// private decls were skipped so that the ordering continues to match the
1805+
/// private decls.
17881806
void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
17891807

17901808
/// De-allocate the task context structure.
@@ -1795,6 +1813,7 @@ class TaskContextStructManager {
17951813
private:
17961814
llvm::IRBuilderBase &builder;
17971815
LLVM::ModuleTranslation &moduleTranslation;
1816+
MutableArrayRef<omp::PrivateClauseOp> privateDecls;
17981817

17991818
/// The type of each member of the structure, in order.
18001819
SmallVector<llvm::Type *> privateVarTypes;
@@ -1806,13 +1825,16 @@ class TaskContextStructManager {
18061825
};
18071826
} // namespace
18081827

1809-
void TaskContextStructManager::generateTaskContextStruct(
1810-
MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
1828+
void TaskContextStructManager::generateTaskContextStruct() {
18111829
if (privateDecls.empty())
18121830
return;
18131831
privateVarTypes.reserve(privateDecls.size());
18141832

18151833
for (omp::PrivateClauseOp &privOp : privateDecls) {
1834+
// Skip private variables which can safely be allocated and initialised
1835+
// inside of the task
1836+
if (!privatizerReadsSourceVariable(privOp))
1837+
continue;
18161838
Type mlirType = privOp.getType();
18171839
privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
18181840
}
@@ -1841,10 +1863,17 @@ void TaskContextStructManager::createGEPsToPrivateVars(
18411863
// Create GEPs for each struct member and initialize llvmPrivateVars to point
18421864
llvmPrivateVars.reserve(privateVarTypes.size());
18431865
llvm::Value *zero = builder.getInt32(0);
1844-
for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
1866+
unsigned i = 0;
1867+
for (auto privDecl : privateDecls) {
1868+
if (!privatizerReadsSourceVariable(privDecl)) {
1869+
// Handle this inside of the task so we don't pass unnessecary vars in
1870+
llvmPrivateVars.push_back(nullptr);
1871+
continue;
1872+
}
18451873
llvm::Value *iVal = builder.getInt32(i);
18461874
llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
18471875
llvmPrivateVars.push_back(gep);
1876+
i += 1;
18481877
}
18491878
}
18501879

@@ -1872,10 +1901,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
18721901
SmallVector<mlir::Value> mlirPrivateVars;
18731902
SmallVector<llvm::Value *> llvmPrivateVars;
18741903
SmallVector<omp::PrivateClauseOp> privateDecls;
1875-
TaskContextStructManager taskStructMgr{builder, moduleTranslation};
18761904
mlirPrivateVars.reserve(privateBlockArgs.size());
18771905
llvmPrivateVars.reserve(privateBlockArgs.size());
18781906
collectPrivatizationDecls(taskOp, privateDecls);
1907+
TaskContextStructManager taskStructMgr{builder, moduleTranslation,
1908+
privateDecls};
18791909
for (mlir::Value privateVar : taskOp.getPrivateVars())
18801910
mlirPrivateVars.push_back(privateVar);
18811911

@@ -1927,7 +1957,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19271957

19281958
// Create task variable structure
19291959
llvm::SmallVector<llvm::Value *> privateVarAllocations;
1930-
taskStructMgr.generateTaskContextStruct(privateDecls);
1960+
taskStructMgr.generateTaskContextStruct();
19311961
// GEPs so that we can initialize the variables. Don't use these GEPs inside
19321962
// of the body otherwise it will be the GEP not the struct which is fowarded
19331963
// to the outlined function. GEPs forwarded in this way are passed in a
@@ -1939,6 +1969,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19391969
for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
19401970
llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
19411971
privateVarAllocations)) {
1972+
if (!llvmPrivateVarAlloc)
1973+
// to be handled inside the task
1974+
continue;
1975+
19421976
llvm::Expected<llvm::Value *> privateVarOrErr =
19431977
initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
19441978
blockArg, llvmPrivateVarAlloc, initBlock);
@@ -1975,12 +2009,45 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19752009

19762010
auto bodyCB = [&](InsertPointTy allocaIP,
19772011
InsertPointTy codegenIP) -> llvm::Error {
2012+
// Save the alloca insertion point on ModuleTranslation stack for use in
2013+
// nested regions.
2014+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
2015+
moduleTranslation, allocaIP);
2016+
2017+
// translate the body of the task:
19782018
builder.restoreIP(codegenIP);
2019+
2020+
llvm::BasicBlock *privInitBlock = nullptr;
2021+
for (auto [blockArg, privDecl, mlirPrivVar] :
2022+
llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
2023+
if (privatizerReadsSourceVariable(privDecl))
2024+
// This is handled before the task executes
2025+
continue;
2026+
2027+
auto codegenInsertionPt = builder.saveIP();
2028+
llvm::Type *llvmAllocType =
2029+
moduleTranslation.convertType(privDecl.getType());
2030+
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2031+
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2032+
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2033+
2034+
llvm::Expected<llvm::Value *> privateVarOrError =
2035+
initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2036+
blockArg, llvmPrivateVar, privInitBlock);
2037+
if (auto err = privateVarOrError.takeError())
2038+
return err;
2039+
moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2040+
builder.restoreIP(codegenInsertionPt);
2041+
}
2042+
19792043
// Find and map the addresses of each variable within the task context
19802044
// structure
19812045
taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
19822046
for (auto [blockArg, llvmPrivateVar] :
19832047
llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
2048+
if (!llvmPrivateVar)
2049+
// This was handled above
2050+
continue;
19842051
// Fix broken pass-by-value case for Fortran character boxes
19852052
if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
19862053
llvmPrivateVar = builder.CreateLoad(

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,9 +2795,10 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
27952795
// CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
27962796
// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
27972797
// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
2798-
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
2798+
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
27992799
// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
2800-
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
2800+
// CHEKC: %[[NEW_STRUCTARG:.*]] = alloca { ptr }, align 8
2801+
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]],
28012802
// CHECK: define internal void @[[parallel_outlined_fn]]
28022803
// -----
28032804

mlir/test/Target/LLVMIR/openmp-task-privatization.mlir

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,11 @@ llvm.func @task_privatization_test() {
3636
// CHECK: br label %omp.private.init
3737
// CHECK: omp.private.init:
3838
// CHECK: %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
39-
// CHECK: %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
40-
// CHECK: %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
39+
// CHECK: %[[VAL_7:.*]] = getelementptr { i32 }, ptr %[[VAL_5]], i32 0, i32 0
4140
// CHECK: br label %omp.private.copy
4241
// CHECK: omp.private.copy:
4342
// CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
44-
// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
43+
// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_7]], align 4
4544
// CHECK: br label %omp.task.start
4645
// CHECK: omp.task.start:
4746
// CHECK: br label %codeRepl
@@ -63,14 +62,14 @@ llvm.func @task_privatization_test() {
6362
// CHECK: %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
6463
// CHECK: %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
6564
// CHECK: %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
65+
// CHECK: %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
6666
// CHECK: br label %[[VAL_18:.*]]
6767
// CHECK: task.body: ; preds = %[[VAL_19:.*]]
68-
// CHECK: %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
69-
// CHECK: %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
68+
// CHECK: %[[VAL_20:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
7069
// CHECK: br label %[[VAL_23:.*]]
7170
// CHECK: omp.task.region: ; preds = %[[VAL_18]]
72-
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
73-
// CHECK: store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
71+
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_20]], align 4
72+
// CHECK: store i32 %[[VAL_24]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
7473
// CHECK: br label %[[VAL_25:.*]]
7574
// CHECK: omp.region.cont: ; preds = %[[VAL_23]]
7675
// CHECK: tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])

0 commit comments

Comments
 (0)