Skip to content

Commit a05af19

Browse files
author
Chandra Ghale
committed
Codegen for Reduction over private variables with reduction clause
1 parent 387f3e8 commit a05af19

File tree

4 files changed

+406
-3
lines changed

4 files changed

+406
-3
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4899,6 +4899,150 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
48994899
}
49004900
}
49014901

4902+
void CGOpenMPRuntime::emitPrivateReduction(
4903+
CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
4904+
ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
4905+
ArrayRef<const Expr *> ReductionOps) {
4906+
4907+
if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
4908+
return;
4909+
4910+
if (LHSExprs.size() != Privates.size() ||
4911+
LHSExprs.size() != ReductionOps.size())
4912+
return;
4913+
4914+
QualType PrivateType = Privates[0]->getType();
4915+
llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4916+
4917+
BinaryOperatorKind MainBO = BO_Comma;
4918+
if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[0])) {
4919+
if (const auto *RHSExpr = BinOp->getRHS()) {
4920+
if (const auto *BORHS =
4921+
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4922+
MainBO = BORHS->getOpcode();
4923+
}
4924+
}
4925+
}
4926+
4927+
llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
4928+
const Expr *Private = Privates[0];
4929+
4930+
if (const auto *DRE = dyn_cast<DeclRefExpr>(Private)) {
4931+
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
4932+
if (const Expr *Init = VD->getInit()) {
4933+
if (Init->isConstantInitializer(CGF.getContext(), false)) {
4934+
Expr::EvalResult Result;
4935+
if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
4936+
APValue &InitValue = Result.Val;
4937+
if (InitValue.isInt()) {
4938+
InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
4939+
}
4940+
}
4941+
}
4942+
}
4943+
}
4944+
}
4945+
4946+
// Create an internal shared variable
4947+
std::string SharedName = getName({"internal_private_var"});
4948+
llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
4949+
CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
4950+
InitVal, ".omp.reduction." + SharedName, nullptr,
4951+
llvm::GlobalVariable::NotThreadLocal);
4952+
4953+
SharedVar->setAlignment(
4954+
llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4955+
4956+
Address SharedResult(SharedVar, SharedVar->getValueType(),
4957+
CGF.getContext().getTypeAlignInChars(PrivateType));
4958+
4959+
llvm::Value *ThreadId = getThreadID(CGF, Loc);
4960+
llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4961+
llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4962+
4963+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4964+
CGM.getModule(), OMPRTL___kmpc_barrier),
4965+
BarrierArgs);
4966+
4967+
llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4968+
llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4969+
4970+
llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4971+
ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4972+
CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4973+
4974+
CGF.EmitBlock(InitBB);
4975+
CGF.Builder.CreateStore(InitVal, SharedResult);
4976+
CGF.Builder.CreateBr(InitEndBB);
4977+
4978+
CGF.EmitBlock(InitEndBB);
4979+
4980+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4981+
CGM.getModule(), OMPRTL___kmpc_barrier),
4982+
BarrierArgs);
4983+
4984+
for (unsigned I = 0; I < ReductionOps.size(); ++I) {
4985+
if (I >= LHSExprs.size()) {
4986+
break;
4987+
}
4988+
4989+
const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[I]);
4990+
if (!BinOp || BinOp->getOpcode() != BO_Assign)
4991+
continue;
4992+
4993+
const Expr *RHSExpr = BinOp->getRHS();
4994+
if (!RHSExpr)
4995+
continue;
4996+
4997+
BinaryOperatorKind BO = BO_Comma;
4998+
if (const auto *BORHS =
4999+
dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5000+
BO = BORHS->getOpcode();
5001+
}
5002+
5003+
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5004+
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
5005+
RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
5006+
auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
5007+
if (BO == BO_Mul) {
5008+
llvm::Value *OldScalar = OldVal.getScalarVal();
5009+
llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
5010+
llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
5011+
return RValue::get(Result);
5012+
} else {
5013+
OpaqueValueExpr OVE(BinOp->getLHS()->getExprLoc(),
5014+
BinOp->getLHS()->getType(),
5015+
ExprValueKind::VK_PRValue);
5016+
CodeGenFunction::OpaqueValueMapping OldValMapping(CGF, &OVE, OldVal);
5017+
return CGF.EmitAnyExpr(BinOp->getRHS());
5018+
}
5019+
};
5020+
5021+
(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5022+
SharedLV, PrivateRV, BO, true,
5023+
llvm::AtomicOrdering::SequentiallyConsistent, Loc, UpdateOp);
5024+
}
5025+
5026+
// Final barrier
5027+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5028+
CGM.getModule(), OMPRTL___kmpc_barrier),
5029+
BarrierArgs);
5030+
5031+
// Broadcast final result
5032+
llvm::Value *FinalResult = CGF.Builder.CreateLoad(SharedResult);
5033+
5034+
// Update private variables with final result
5035+
for (unsigned I = 0; I < Privates.size(); ++I) {
5036+
LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
5037+
CGF.Builder.CreateStore(FinalResult, LHSLV.getAddress());
5038+
}
5039+
5040+
// Final synchronization
5041+
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5042+
CGM.getModule(), OMPRTL___kmpc_barrier),
5043+
BarrierArgs);
5044+
}
5045+
49025046
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
49035047
ArrayRef<const Expr *> Privates,
49045048
ArrayRef<const Expr *> LHSExprs,
@@ -5201,6 +5345,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
52015345

52025346
CGF.EmitBranch(DefaultBB);
52035347
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5348+
if (Options.IsPrivateVarReduction) {
5349+
emitPrivateReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps);
5350+
}
52045351
}
52055352

52065353
/// Generates unique name for artificial threadprivate variables.

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,8 +1201,22 @@ class CGOpenMPRuntime {
12011201
struct ReductionOptionsTy {
12021202
bool WithNowait;
12031203
bool SimpleReduction;
1204+
bool IsPrivateVarReduction;
12041205
OpenMPDirectiveKind ReductionKind;
12051206
};
1207+
1208+
/// Emits code for private variable reduction
1209+
/// \param Privates List of private copies for original reduction arguments.
1210+
/// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
1211+
/// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
1212+
/// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
1213+
/// or 'operator binop(LHS, RHS)'.
1214+
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc,
1215+
ArrayRef<const Expr *> Privates,
1216+
ArrayRef<const Expr *> LHSExprs,
1217+
ArrayRef<const Expr *> RHSExprs,
1218+
ArrayRef<const Expr *> ReductionOps);
1219+
12061220
/// Emit a code for reduction clause. Next code should be emitted for
12071221
/// reduction:
12081222
/// \code

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,6 +1470,7 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
14701470
llvm::SmallVector<const Expr *, 8> LHSExprs;
14711471
llvm::SmallVector<const Expr *, 8> RHSExprs;
14721472
llvm::SmallVector<const Expr *, 8> ReductionOps;
1473+
llvm::SmallVector<bool, 8> IsPrivate;
14731474
bool HasAtLeastOneReduction = false;
14741475
bool IsReductionWithTaskMod = false;
14751476
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
@@ -1480,6 +1481,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
14801481
Privates.append(C->privates().begin(), C->privates().end());
14811482
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
14821483
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1484+
IsPrivate.append(C->private_var_reduction_flags().begin(),
1485+
C->private_var_reduction_flags().end());
14831486
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
14841487
IsReductionWithTaskMod =
14851488
IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
@@ -1499,9 +1502,11 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
14991502
bool SimpleReduction = ReductionKind == OMPD_simd;
15001503
// Emit nowait reduction if nowait clause is present or directive is a
15011504
// parallel directive (it always has implicit barrier).
1505+
bool IsPrivateVarReduction =
1506+
llvm::any_of(IsPrivate, [](bool IsPriv) { return IsPriv; });
15021507
CGM.getOpenMPRuntime().emitReduction(
15031508
*this, D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1504-
{WithNowait, SimpleReduction, ReductionKind});
1509+
{WithNowait, SimpleReduction, IsPrivateVarReduction, ReductionKind});
15051510
}
15061511
}
15071512

@@ -3943,7 +3948,8 @@ static void emitScanBasedDirective(
39433948
PrivScope.Privatize();
39443949
CGF.CGM.getOpenMPRuntime().emitReduction(
39453950
CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3946-
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3951+
{/*WithNowait=*/true, /*SimpleReduction=*/true,
3952+
/*IsPrivateVarReduction */ false, OMPD_unknown});
39473953
}
39483954
llvm::Value *NextIVal =
39493955
CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
@@ -5747,7 +5753,7 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
57475753
}
57485754
CGM.getOpenMPRuntime().emitReduction(
57495755
*this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5750-
{/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5756+
{/*WithNowait=*/true, /*SimpleReduction=*/true, false, OMPD_simd});
57515757
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
57525758
const Expr *PrivateExpr = Privates[I];
57535759
LValue DestLVal;

0 commit comments

Comments
 (0)