Skip to content

Commit 1462e63

Browse files
committed
[OPENMP]PR53344: Emit code for final update of the inscan reduction vars in worksharing loops.
Need to emit final update of the inscan reduction variables. For worksharing loops, the reduction values are stored in the temp array, need to copy the last element to the original var at the end of the construct. Differential Revision: https://reviews.llvm.org/D121156
1 parent 17f3a92 commit 1462e63

File tree

2 files changed

+84
-20
lines changed

2 files changed

+84
-20
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3515,6 +3515,57 @@ static void emitScanBasedDirectiveDecls(
35153515
}
35163516
}
35173517

3518+
/// Copies final inscan reductions values to the original variables.
3519+
/// The code is the following:
3520+
/// \code
3521+
/// <orig_var> = buffer[num_iters-1];
3522+
/// \endcode
3523+
static void emitScanBasedDirectiveFinals(
3524+
CodeGenFunction &CGF, const OMPLoopDirective &S,
3525+
llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3526+
llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3527+
NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3528+
SmallVector<const Expr *, 4> Shareds;
3529+
SmallVector<const Expr *, 4> LHSs;
3530+
SmallVector<const Expr *, 4> RHSs;
3531+
SmallVector<const Expr *, 4> Privates;
3532+
SmallVector<const Expr *, 4> CopyOps;
3533+
SmallVector<const Expr *, 4> CopyArrayElems;
3534+
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3535+
assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3536+
"Only inscan reductions are expected.");
3537+
Shareds.append(C->varlist_begin(), C->varlist_end());
3538+
LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3539+
RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3540+
Privates.append(C->privates().begin(), C->privates().end());
3541+
CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3542+
CopyArrayElems.append(C->copy_array_elems().begin(),
3543+
C->copy_array_elems().end());
3544+
}
3545+
// Create temp var and copy LHS value to this temp value.
3546+
// LHS = TMP[LastIter];
3547+
llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3548+
OMPScanNumIterations,
3549+
llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false));
3550+
for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3551+
const Expr *PrivateExpr = Privates[I];
3552+
const Expr *OrigExpr = Shareds[I];
3553+
const Expr *CopyArrayElem = CopyArrayElems[I];
3554+
CodeGenFunction::OpaqueValueMapping IdxMapping(
3555+
CGF,
3556+
cast<OpaqueValueExpr>(
3557+
cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3558+
RValue::get(OMPLast));
3559+
LValue DestLVal = CGF.EmitLValue(OrigExpr);
3560+
LValue SrcLVal = CGF.EmitLValue(CopyArrayElem);
3561+
CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF),
3562+
SrcLVal.getAddress(CGF),
3563+
cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
3564+
cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
3565+
CopyOps[I]);
3566+
}
3567+
}
3568+
35183569
/// Emits the code for the directive with inscan reductions.
35193570
/// The code is the following:
35203571
/// \code
@@ -3709,6 +3760,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
37093760
if (!isOpenMPParallelDirective(S.getDirectiveKind()))
37103761
emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
37113762
emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3763+
if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3764+
emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
37123765
} else {
37133766
CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
37143767
HasCancel);
@@ -4282,23 +4335,25 @@ void CodeGenFunction::EmitOMPParallelForDirective(
42824335
(void)emitWorksharingDirective(CGF, S, S.hasCancel());
42834336
};
42844337
{
4285-
if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4338+
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4339+
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4340+
CGCapturedStmtInfo CGSI(CR_OpenMP);
4341+
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4342+
OMPLoopScope LoopScope(CGF, S);
4343+
return CGF.EmitScalarExpr(S.getNumIterations());
4344+
};
4345+
bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
42864346
[](const OMPReductionClause *C) {
42874347
return C->getModifier() == OMPC_REDUCTION_inscan;
4288-
})) {
4289-
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4290-
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4291-
CGCapturedStmtInfo CGSI(CR_OpenMP);
4292-
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4293-
OMPLoopScope LoopScope(CGF, S);
4294-
return CGF.EmitScalarExpr(S.getNumIterations());
4295-
};
4348+
});
4349+
if (IsInscan)
42964350
emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4297-
}
42984351
auto LPCRegion =
42994352
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
43004353
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
43014354
emitEmptyBoundParameters);
4355+
if (IsInscan)
4356+
emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
43024357
}
43034358
// Check for outer lastprivate conditional update.
43044359
checkForLastprivateConditionalUpdate(*this, S);
@@ -4313,23 +4368,25 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
43134368
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
43144369
};
43154370
{
4316-
if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4371+
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4372+
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4373+
CGCapturedStmtInfo CGSI(CR_OpenMP);
4374+
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4375+
OMPLoopScope LoopScope(CGF, S);
4376+
return CGF.EmitScalarExpr(S.getNumIterations());
4377+
};
4378+
bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
43174379
[](const OMPReductionClause *C) {
43184380
return C->getModifier() == OMPC_REDUCTION_inscan;
4319-
})) {
4320-
const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4321-
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4322-
CGCapturedStmtInfo CGSI(CR_OpenMP);
4323-
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4324-
OMPLoopScope LoopScope(CGF, S);
4325-
return CGF.EmitScalarExpr(S.getNumIterations());
4326-
};
4381+
});
4382+
if (IsInscan)
43274383
emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4328-
}
43294384
auto LPCRegion =
43304385
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
43314386
emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
43324387
emitEmptyBoundParameters);
4388+
if (IsInscan)
4389+
emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
43334390
}
43344391
// Check for outer lastprivate conditional update.
43354392
checkForLastprivateConditionalUpdate(*this, S);

clang/test/OpenMP/parallel_for_scan_codegen.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ void baz(int n) {
2727
// CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
2828

2929
// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
30+
// CHECK: [[LAST:%.+]] = mul nsw i64 9, %
31+
// CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]]
32+
// CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8*
33+
// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false)
34+
// CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9
35+
// CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]],
36+
// CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b,
3037

3138
// CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
3239

0 commit comments

Comments
 (0)