Skip to content

Commit 9a4fe6a

Browse files
committed
Allocate simple private vars inside of the task
There's no need to put the variable in the context structure if it can be initialized without reading from the mold variable. This leads to much simpler code generation for nested tasks which only privatize simple scalar variables and do not use firstprivate. This works around the failure in 0226_0013 (Fujitsu test).
1 parent df47461 commit 9a4fe6a

File tree

3 files changed

+87
-20
lines changed

3 files changed

+87
-20
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,24 +1755,42 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
17551755
}
17561756
}
17571757

1758+
static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
1759+
if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
1760+
return true;
1761+
1762+
Region &initRegion = priv.getInitRegion();
1763+
if (initRegion.empty())
1764+
return false;
1765+
1766+
BlockArgument sourceVariable = priv.getInitMoldArg();
1767+
if (!sourceVariable)
1768+
return false;
1769+
return !sourceVariable.use_empty();
1770+
}
1771+
17581772
namespace {
17591773
/// TaskContextStructManager takes care of creating and freeing a structure
17601774
/// containing information needed by the task body to execute.
17611775
class TaskContextStructManager {
17621776
public:
17631777
TaskContextStructManager(llvm::IRBuilderBase &builder,
1764-
LLVM::ModuleTranslation &moduleTranslation)
1765-
: builder{builder}, moduleTranslation{moduleTranslation} {}
1778+
LLVM::ModuleTranslation &moduleTranslation,
1779+
MutableArrayRef<omp::PrivateClauseOp> privateDecls)
1780+
: builder{builder}, moduleTranslation{moduleTranslation},
1781+
privateDecls{privateDecls} {}
17661782

17671783
/// Creates a heap allocated struct containing space for each private
17681784
/// variable. Returns nullptr if there are is no struct needed. Invariant:
17691785
/// privateVarTypes, privateDecls, and the elements of the structure should
1770-
/// all have the same order.
1771-
void
1772-
generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
1786+
/// all have the same order (although privateDecls which do not read from the
1787+
/// mold argument are skipped).
1788+
void generateTaskContextStruct();
17731789

17741790
/// Create GEPs to access each member of the structure representing a private
1775-
/// variable, adding them to llvmPrivateVars.
1791+
/// variable, adding them to llvmPrivateVars. Null values are added where
1792+
/// private decls were skipped so that the ordering continues to match the
1793+
/// private decls.
17761794
void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
17771795

17781796
/// De-allocate the task context structure.
@@ -1783,6 +1801,7 @@ class TaskContextStructManager {
17831801
private:
17841802
llvm::IRBuilderBase &builder;
17851803
LLVM::ModuleTranslation &moduleTranslation;
1804+
MutableArrayRef<omp::PrivateClauseOp> privateDecls;
17861805

17871806
/// The type of each member of the structure, in order.
17881807
SmallVector<llvm::Type *> privateVarTypes;
@@ -1794,13 +1813,16 @@ class TaskContextStructManager {
17941813
};
17951814
} // namespace
17961815

1797-
void TaskContextStructManager::generateTaskContextStruct(
1798-
MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
1816+
void TaskContextStructManager::generateTaskContextStruct() {
17991817
if (privateDecls.empty())
18001818
return;
18011819
privateVarTypes.reserve(privateDecls.size());
18021820

18031821
for (omp::PrivateClauseOp &privOp : privateDecls) {
1822+
// Skip private variables which can safely be allocated and initialised
1823+
// inside of the task
1824+
if (!privatizerReadsSourceVariable(privOp))
1825+
continue;
18041826
Type mlirType = privOp.getType();
18051827
privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
18061828
}
@@ -1829,10 +1851,17 @@ void TaskContextStructManager::createGEPsToPrivateVars(
18291851
// Create GEPs for each struct member and initialize llvmPrivateVars to point
18301852
llvmPrivateVars.reserve(privateVarTypes.size());
18311853
llvm::Value *zero = builder.getInt32(0);
1832-
for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
1854+
unsigned i = 0;
1855+
for (auto privDecl : privateDecls) {
1856+
if (!privatizerReadsSourceVariable(privDecl)) {
1857+
// Handle this inside of the task so we don't pass unnessecary vars in
1858+
llvmPrivateVars.push_back(nullptr);
1859+
continue;
1860+
}
18331861
llvm::Value *iVal = builder.getInt32(i);
18341862
llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
18351863
llvmPrivateVars.push_back(gep);
1864+
i += 1;
18361865
}
18371866
}
18381867

@@ -1860,10 +1889,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
18601889
SmallVector<mlir::Value> mlirPrivateVars;
18611890
SmallVector<llvm::Value *> llvmPrivateVars;
18621891
SmallVector<omp::PrivateClauseOp> privateDecls;
1863-
TaskContextStructManager taskStructMgr{builder, moduleTranslation};
18641892
mlirPrivateVars.reserve(privateBlockArgs.size());
18651893
llvmPrivateVars.reserve(privateBlockArgs.size());
18661894
collectPrivatizationDecls(taskOp, privateDecls);
1895+
TaskContextStructManager taskStructMgr{builder, moduleTranslation,
1896+
privateDecls};
18671897
for (mlir::Value privateVar : taskOp.getPrivateVars())
18681898
mlirPrivateVars.push_back(privateVar);
18691899

@@ -1915,7 +1945,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19151945

19161946
// Create task variable structure
19171947
llvm::SmallVector<llvm::Value *> privateVarAllocations;
1918-
taskStructMgr.generateTaskContextStruct(privateDecls);
1948+
taskStructMgr.generateTaskContextStruct();
19191949
// GEPs so that we can initialize the variables. Don't use these GEPs inside
19201950
// of the body otherwise it will be the GEP not the struct which is fowarded
19211951
// to the outlined function. GEPs forwarded in this way are passed in a
@@ -1927,6 +1957,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19271957
for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
19281958
llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
19291959
privateVarAllocations)) {
1960+
if (!llvmPrivateVarAlloc)
1961+
// to be handled inside the task
1962+
continue;
1963+
19301964
llvm::Expected<llvm::Value *> privateVarOrErr =
19311965
initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
19321966
blockArg, llvmPrivateVarAlloc, initBlock);
@@ -1963,12 +1997,45 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19631997

19641998
auto bodyCB = [&](InsertPointTy allocaIP,
19651999
InsertPointTy codegenIP) -> llvm::Error {
2000+
// Save the alloca insertion point on ModuleTranslation stack for use in
2001+
// nested regions.
2002+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
2003+
moduleTranslation, allocaIP);
2004+
2005+
// translate the body of the task:
19662006
builder.restoreIP(codegenIP);
2007+
2008+
llvm::BasicBlock *privInitBlock = nullptr;
2009+
for (auto [blockArg, privDecl, mlirPrivVar] :
2010+
llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
2011+
if (privatizerReadsSourceVariable(privDecl))
2012+
// This is handled before the task executes
2013+
continue;
2014+
2015+
auto codegenInsertionPt = builder.saveIP();
2016+
llvm::Type *llvmAllocType =
2017+
moduleTranslation.convertType(privDecl.getType());
2018+
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
2019+
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
2020+
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
2021+
2022+
llvm::Expected<llvm::Value *> privateVarOrError =
2023+
initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2024+
blockArg, llvmPrivateVar, privInitBlock);
2025+
if (auto err = privateVarOrError.takeError())
2026+
return err;
2027+
moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2028+
builder.restoreIP(codegenInsertionPt);
2029+
}
2030+
19672031
// Find and map the addresses of each variable within the task context
19682032
// structure
19692033
taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
19702034
for (auto [blockArg, llvmPrivateVar] :
19712035
llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
2036+
if (!llvmPrivateVar)
2037+
// This was handled above
2038+
continue;
19722039
// Fix broken pass-by-value case for Fortran character boxes
19732040
if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
19742041
llvmPrivateVar = builder.CreateLoad(

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,9 +2795,10 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
27952795
// CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
27962796
// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
27972797
// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
2798-
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
2798+
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
27992799
// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
2800-
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
2800+
// CHEKC: %[[NEW_STRUCTARG:.*]] = alloca { ptr }, align 8
2801+
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]],
28012802
// CHECK: define internal void @[[parallel_outlined_fn]]
28022803
// -----
28032804

mlir/test/Target/LLVMIR/openmp-task-privatization.mlir

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,11 @@ llvm.func @task_privatization_test() {
3636
// CHECK: br label %omp.private.init
3737
// CHECK: omp.private.init:
3838
// CHECK: %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
39-
// CHECK: %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
40-
// CHECK: %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
39+
// CHECK: %[[VAL_7:.*]] = getelementptr { i32 }, ptr %[[VAL_5]], i32 0, i32 0
4140
// CHECK: br label %omp.private.copy
4241
// CHECK: omp.private.copy:
4342
// CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
44-
// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
43+
// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_7]], align 4
4544
// CHECK: br label %omp.task.start
4645
// CHECK: omp.task.start:
4746
// CHECK: br label %codeRepl
@@ -63,14 +62,14 @@ llvm.func @task_privatization_test() {
6362
// CHECK: %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
6463
// CHECK: %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
6564
// CHECK: %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
65+
// CHECK: %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
6666
// CHECK: br label %[[VAL_18:.*]]
6767
// CHECK: task.body: ; preds = %[[VAL_19:.*]]
68-
// CHECK: %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
69-
// CHECK: %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
68+
// CHECK: %[[VAL_20:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
7069
// CHECK: br label %[[VAL_23:.*]]
7170
// CHECK: omp.task.region: ; preds = %[[VAL_18]]
72-
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
73-
// CHECK: store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
71+
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_20]], align 4
72+
// CHECK: store i32 %[[VAL_24]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
7473
// CHECK: br label %[[VAL_25:.*]]
7574
// CHECK: omp.region.cont: ; preds = %[[VAL_23]]
7675
// CHECK: tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])

0 commit comments

Comments
 (0)