@@ -4899,6 +4899,150 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
48994899 }
49004900}
49014901
4902+ void CGOpenMPRuntime::emitPrivateReduction(
4903+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
4904+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
4905+ ArrayRef<const Expr *> ReductionOps) {
4906+
4907+ if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
4908+ return;
4909+
4910+ if (LHSExprs.size() != Privates.size() ||
4911+ LHSExprs.size() != ReductionOps.size())
4912+ return;
4913+
4914+ QualType PrivateType = Privates[0]->getType();
4915+ llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4916+
4917+ BinaryOperatorKind MainBO = BO_Comma;
4918+ if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[0])) {
4919+ if (const auto *RHSExpr = BinOp->getRHS()) {
4920+ if (const auto *BORHS =
4921+ dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
4922+ MainBO = BORHS->getOpcode();
4923+ }
4924+ }
4925+ }
4926+
4927+ llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
4928+ const Expr *Private = Privates[0];
4929+
4930+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Private)) {
4931+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
4932+ if (const Expr *Init = VD->getInit()) {
4933+ if (Init->isConstantInitializer(CGF.getContext(), false)) {
4934+ Expr::EvalResult Result;
4935+ if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
4936+ APValue &InitValue = Result.Val;
4937+ if (InitValue.isInt()) {
4938+ InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
4939+ }
4940+ }
4941+ }
4942+ }
4943+ }
4944+ }
4945+
4946+ // Create an internal shared variable
4947+ std::string SharedName = getName({"internal_private_var"});
4948+ llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
4949+ CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
4950+ InitVal, ".omp.reduction." + SharedName, nullptr,
4951+ llvm::GlobalVariable::NotThreadLocal);
4952+
4953+ SharedVar->setAlignment(
4954+ llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4955+
4956+ Address SharedResult(SharedVar, SharedVar->getValueType(),
4957+ CGF.getContext().getTypeAlignInChars(PrivateType));
4958+
4959+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
4960+ llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4961+ llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4962+
4963+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4964+ CGM.getModule(), OMPRTL___kmpc_barrier),
4965+ BarrierArgs);
4966+
4967+ llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4968+ llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4969+
4970+ llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4971+ ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4972+ CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4973+
4974+ CGF.EmitBlock(InitBB);
4975+ CGF.Builder.CreateStore(InitVal, SharedResult);
4976+ CGF.Builder.CreateBr(InitEndBB);
4977+
4978+ CGF.EmitBlock(InitEndBB);
4979+
4980+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4981+ CGM.getModule(), OMPRTL___kmpc_barrier),
4982+ BarrierArgs);
4983+
4984+ for (unsigned I = 0; I < ReductionOps.size(); ++I) {
4985+ if (I >= LHSExprs.size()) {
4986+ break;
4987+ }
4988+
4989+ const auto *BinOp = dyn_cast<BinaryOperator>(ReductionOps[I]);
4990+ if (!BinOp || BinOp->getOpcode() != BO_Assign)
4991+ continue;
4992+
4993+ const Expr *RHSExpr = BinOp->getRHS();
4994+ if (!RHSExpr)
4995+ continue;
4996+
4997+ BinaryOperatorKind BO = BO_Comma;
4998+ if (const auto *BORHS =
4999+ dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5000+ BO = BORHS->getOpcode();
5001+ }
5002+
5003+ LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5004+ LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
5005+ RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
5006+ auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
5007+ if (BO == BO_Mul) {
5008+ llvm::Value *OldScalar = OldVal.getScalarVal();
5009+ llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
5010+ llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
5011+ return RValue::get(Result);
5012+ } else {
5013+ OpaqueValueExpr OVE(BinOp->getLHS()->getExprLoc(),
5014+ BinOp->getLHS()->getType(),
5015+ ExprValueKind::VK_PRValue);
5016+ CodeGenFunction::OpaqueValueMapping OldValMapping(CGF, &OVE, OldVal);
5017+ return CGF.EmitAnyExpr(BinOp->getRHS());
5018+ }
5019+ };
5020+
5021+ (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
5022+ SharedLV, PrivateRV, BO, true,
5023+ llvm::AtomicOrdering::SequentiallyConsistent, Loc, UpdateOp);
5024+ }
5025+
5026+ // Final barrier
5027+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5028+ CGM.getModule(), OMPRTL___kmpc_barrier),
5029+ BarrierArgs);
5030+
5031+ // Broadcast final result
5032+ llvm::Value *FinalResult = CGF.Builder.CreateLoad(SharedResult);
5033+
5034+ // Update private variables with final result
5035+ for (unsigned I = 0; I < Privates.size(); ++I) {
5036+ LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
5037+ CGF.Builder.CreateStore(FinalResult, LHSLV.getAddress());
5038+ }
5039+
5040+ // Final synchronization
5041+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5042+ CGM.getModule(), OMPRTL___kmpc_barrier),
5043+ BarrierArgs);
5044+ }
5045+
49025046void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
49035047 ArrayRef<const Expr *> Privates,
49045048 ArrayRef<const Expr *> LHSExprs,
@@ -5201,6 +5345,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
52015345
52025346 CGF.EmitBranch(DefaultBB);
52035347 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5348+ if (Options.IsPrivateVarReduction) {
5349+ emitPrivateReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps);
5350+ }
52045351}
52055352
52065353/// Generates unique name for artificial threadprivate variables.
0 commit comments