Skip to content

Commit d6d50cd

Browse files
committed
Allocate simple private vars inside of the task
There's no need to put the variable in the context structure if it can be initialized without reading from the mold variable. This leads to much simpler code generation for nested tasks which only privatize simple scalar variables and do not use firstprivate. This works around the failure in 0226_0013 (Fujitsu test).
1 parent f8a0b71 commit d6d50cd

File tree

3 files changed

+85
-20
lines changed

3 files changed

+85
-20
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,24 +1730,42 @@ buildDependData(std::optional<ArrayAttr> dependKinds, OperandRange dependVars,
17301730
}
17311731
}
17321732

1733+
static bool privatizerReadsSourceVariable(omp::PrivateClauseOp &priv) {
1734+
if (priv.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate)
1735+
return true;
1736+
1737+
Region &initRegion = priv.getInitRegion();
1738+
if (initRegion.empty())
1739+
return false;
1740+
1741+
BlockArgument sourceVariable = priv.getInitMoldArg();
1742+
if (!sourceVariable)
1743+
return false;
1744+
return !sourceVariable.use_empty();
1745+
}
1746+
17331747
namespace {
17341748
/// TaskContextStructManager takes care of creating and freeing a structure
17351749
/// containing information needed by the task body to execute.
17361750
class TaskContextStructManager {
17371751
public:
17381752
TaskContextStructManager(llvm::IRBuilderBase &builder,
1739-
LLVM::ModuleTranslation &moduleTranslation)
1740-
: builder{builder}, moduleTranslation{moduleTranslation} {}
1753+
LLVM::ModuleTranslation &moduleTranslation,
1754+
MutableArrayRef<omp::PrivateClauseOp> privateDecls)
1755+
: builder{builder}, moduleTranslation{moduleTranslation},
1756+
privateDecls{privateDecls} {}
17411757

17421758
/// Creates a heap allocated struct containing space for each private
17431759
/// variable. Returns nullptr if there are is no struct needed. Invariant:
17441760
/// privateVarTypes, privateDecls, and the elements of the structure should
1745-
/// all have the same order.
1746-
void
1747-
generateTaskContextStruct(MutableArrayRef<omp::PrivateClauseOp> privateDecls);
1761+
/// all have the same order (although privateDecls which do not read from the
1762+
/// mold argument are skipped).
1763+
void generateTaskContextStruct();
17481764

17491765
/// Create GEPs to access each member of the structure representing a private
1750-
/// variable, adding them to llvmPrivateVars.
1766+
/// variable, adding them to llvmPrivateVars. Null values are added where
1767+
/// private decls were skipped so that the ordering continues to match the
1768+
/// private decls.
17511769
void createGEPsToPrivateVars(SmallVectorImpl<llvm::Value *> &llvmPrivateVars);
17521770

17531771
/// De-allocate the task context structure.
@@ -1758,6 +1776,7 @@ class TaskContextStructManager {
17581776
private:
17591777
llvm::IRBuilderBase &builder;
17601778
LLVM::ModuleTranslation &moduleTranslation;
1779+
MutableArrayRef<omp::PrivateClauseOp> privateDecls;
17611780

17621781
/// The type of each member of the structure, in order.
17631782
SmallVector<llvm::Type *> privateVarTypes;
@@ -1769,13 +1788,16 @@ class TaskContextStructManager {
17691788
};
17701789
} // namespace
17711790

1772-
void TaskContextStructManager::generateTaskContextStruct(
1773-
MutableArrayRef<omp::PrivateClauseOp> privateDecls) {
1791+
void TaskContextStructManager::generateTaskContextStruct() {
17741792
if (privateDecls.empty())
17751793
return;
17761794
privateVarTypes.reserve(privateDecls.size());
17771795

17781796
for (omp::PrivateClauseOp &privOp : privateDecls) {
1797+
// Skip private variables which can safely be allocated and initialised
1798+
// inside of the task
1799+
if (!privatizerReadsSourceVariable(privOp))
1800+
continue;
17791801
Type mlirType = privOp.getType();
17801802
privateVarTypes.push_back(moduleTranslation.convertType(mlirType));
17811803
}
@@ -1804,10 +1826,17 @@ void TaskContextStructManager::createGEPsToPrivateVars(
18041826
// Create GEPs for each struct member and initialize llvmPrivateVars to point
18051827
llvmPrivateVars.reserve(privateVarTypes.size());
18061828
llvm::Value *zero = builder.getInt32(0);
1807-
for (auto [i, eleTy] : llvm::enumerate(privateVarTypes)) {
1829+
unsigned i = 0;
1830+
for (auto privDecl : privateDecls) {
1831+
if (!privatizerReadsSourceVariable(privDecl)) {
1832+
// Handle this inside of the task so we don't pass unnessecary vars in
1833+
llvmPrivateVars.push_back(nullptr);
1834+
continue;
1835+
}
18081836
llvm::Value *iVal = builder.getInt32(i);
18091837
llvm::Value *gep = builder.CreateGEP(structTy, structPtr, {zero, iVal});
18101838
llvmPrivateVars.push_back(gep);
1839+
i += 1;
18111840
}
18121841
}
18131842

@@ -1835,10 +1864,11 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
18351864
SmallVector<mlir::Value> mlirPrivateVars;
18361865
SmallVector<llvm::Value *> llvmPrivateVars;
18371866
SmallVector<omp::PrivateClauseOp> privateDecls;
1838-
TaskContextStructManager taskStructMgr{builder, moduleTranslation};
18391867
mlirPrivateVars.reserve(privateBlockArgs.size());
18401868
llvmPrivateVars.reserve(privateBlockArgs.size());
18411869
collectPrivatizationDecls(taskOp, privateDecls);
1870+
TaskContextStructManager taskStructMgr{builder, moduleTranslation,
1871+
privateDecls};
18421872
for (mlir::Value privateVar : taskOp.getPrivateVars())
18431873
mlirPrivateVars.push_back(privateVar);
18441874

@@ -1891,7 +1921,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
18911921

18921922
// Create task variable structure
18931923
llvm::SmallVector<llvm::Value *> privateVarAllocations;
1894-
taskStructMgr.generateTaskContextStruct(privateDecls);
1924+
taskStructMgr.generateTaskContextStruct();
18951925
// GEPs so that we can initialize the variables. Don't use these GEPs inside
18961926
// of the body otherwise it will be the GEP not the struct which is fowarded
18971927
// to the outlined function. GEPs forwarded in this way are passed in a
@@ -1903,6 +1933,10 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19031933
for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
19041934
llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs,
19051935
privateVarAllocations)) {
1936+
if (!llvmPrivateVarAlloc)
1937+
// to be handled inside the task
1938+
continue;
1939+
19061940
llvm::Expected<llvm::Value *> privateVarOrErr =
19071941
initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
19081942
blockArg, llvmPrivateVarAlloc, initBlock);
@@ -1939,14 +1973,45 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
19391973

19401974
auto bodyCB = [&](InsertPointTy allocaIP,
19411975
InsertPointTy codegenIP) -> llvm::Error {
1976+
// Save the alloca insertion point on ModuleTranslation stack for use in
1977+
// nested regions.
1978+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
1979+
moduleTranslation, allocaIP);
1980+
19421981
// translate the body of the task:
19431982
builder.restoreIP(codegenIP);
19441983

1984+
llvm::BasicBlock *privInitBlock = nullptr;
1985+
for (auto [blockArg, privDecl, mlirPrivVar] :
1986+
llvm::zip_equal(privateBlockArgs, privateDecls, mlirPrivateVars)) {
1987+
if (privatizerReadsSourceVariable(privDecl))
1988+
// This is handled before the task executes
1989+
continue;
1990+
1991+
auto codegenInsertionPt = builder.saveIP();
1992+
llvm::Type *llvmAllocType =
1993+
moduleTranslation.convertType(privDecl.getType());
1994+
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1995+
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1996+
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1997+
1998+
llvm::Expected<llvm::Value *> privateVarOrError =
1999+
initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
2000+
blockArg, llvmPrivateVar, privInitBlock);
2001+
if (auto err = privateVarOrError.takeError())
2002+
return err;
2003+
moduleTranslation.mapValue(blockArg, privateVarOrError.get());
2004+
builder.restoreIP(codegenInsertionPt);
2005+
}
2006+
19452007
// Find and map the addresses of each variable within the task context
19462008
// structure
19472009
taskStructMgr.createGEPsToPrivateVars(llvmPrivateVars);
19482010
for (auto [blockArg, llvmPrivateVar] :
19492011
llvm::zip_equal(privateBlockArgs, llvmPrivateVars)) {
2012+
if (!llvmPrivateVar)
2013+
// This was handled above
2014+
continue;
19502015
// Fix broken pass-by-value case for Fortran character boxes
19512016
if (!mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
19522017
llvmPrivateVar = builder.CreateLoad(

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,9 +2795,10 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
27952795
// CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
27962796
// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
27972797
// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
2798-
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
2798+
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
27992799
// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
2800-
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
2800+
// CHEKC: %[[NEW_STRUCTARG:.*]] = alloca { ptr }, align 8
2801+
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]],
28012802
// CHECK: define internal void @[[parallel_outlined_fn]]
28022803
// -----
28032804

mlir/test/Target/LLVMIR/openmp-task-privatization.mlir

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,11 @@ llvm.func @task_privatization_test() {
3636
// CHECK: br label %omp.private.init
3737
// CHECK: omp.private.init:
3838
// CHECK: %[[VAL_5:.*]] = tail call ptr @malloc(i64 ptrtoint (ptr getelementptr ([[STRUCT_KMP_PRIVATES_T:.*]], ptr null, i32 1) to i64))
39-
// CHECK: %[[VAL_7:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 0
40-
// CHECK: %[[VAL_8:.*]] = getelementptr { i32, i32 }, ptr %[[VAL_5]], i32 0, i32 1
39+
// CHECK: %[[VAL_7:.*]] = getelementptr { i32 }, ptr %[[VAL_5]], i32 0, i32 0
4140
// CHECK: br label %omp.private.copy1
4241
// CHECK: omp.private.copy1:
4342
// CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_1]], align 4
44-
// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_8]], align 4
43+
// CHECK: store i32 %[[VAL_10]], ptr %[[VAL_7]], align 4
4544
// CHECK: br label %omp.private.copy
4645
// CHECK: omp.private.copy:
4746
// CHECK: br label %omp.task.start
@@ -65,14 +64,14 @@ llvm.func @task_privatization_test() {
6564
// CHECK: %[[OMP_TASK_CONEXT_PTR_PTR_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR_PTR]], align 8
6665
// CHECK: %[[OMP_TASK_CONTEXT_PTR_PTR:.*]] = getelementptr { ptr }, ptr %[[OMP_TASK_CONTEXT_PTR_PTR_PTR:.*]], i32 0, i32 0
6766
// CHECK: %[[OMP_TASK_CONTEXT_PTR:.*]] = load ptr, ptr %[[OMP_TASK_CONTEXT_PTR_PTR:.*]], align 8
67+
// CHECK: %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
6868
// CHECK: br label %[[VAL_18:.*]]
6969
// CHECK: task.body: ; preds = %[[VAL_19:.*]]
70-
// CHECK: %[[VAL_20:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
71-
// CHECK: %[[VAL_22:.*]] = getelementptr { i32, i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 1
70+
// CHECK: %[[VAL_20:.*]] = getelementptr { i32 }, ptr %[[OMP_TASK_CONTEXT_PTR]], i32 0, i32 0
7271
// CHECK: br label %[[VAL_23:.*]]
7372
// CHECK: omp.task.region: ; preds = %[[VAL_18]]
74-
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_22]], align 4
75-
// CHECK: store i32 %[[VAL_24]], ptr %[[VAL_20]], align 4
73+
// CHECK: %[[VAL_24:.*]] = load i32, ptr %[[VAL_20]], align 4
74+
// CHECK: store i32 %[[VAL_24]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
7675
// CHECK: br label %[[VAL_25:.*]]
7776
// CHECK: omp.region.cont: ; preds = %[[VAL_23]]
7877
// CHECK: tail call void @free(ptr %[[OMP_TASK_CONTEXT_PTR]])

0 commit comments

Comments
 (0)