Skip to content

[flang][mlir] Add support for implicit linearization in omp.simd #150386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2760,9 +2760,17 @@ genStandaloneSimd(lower::AbstractConverter &converter, lower::SymMap &symTable,
simdArgs.priv.vars = simdClauseOps.privateVars;
simdArgs.reduction.syms = simdReductionSyms;
simdArgs.reduction.vars = simdClauseOps.reductionVars;

for (auto &sym : simdArgs.priv.syms) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we iterating over the symbols for the loop nest iteration variables here?
If so, should we iterate over iv instead? Also, make sure we use the proper loopSteps[...] (we always use the step from the first loop in the nest below)?

if (sym->test(Fortran::semantics::Symbol::Flag::OmpLinear)) {
const mlir::Value variable = converter.getSymbolAddress(*sym);
simdClauseOps.linearVars.push_back(variable);
simdClauseOps.linearStepVars.push_back(loopNestClauseOps.loopSteps[0]);
}
}

auto simdOp =
genWrapperOp<mlir::omp::SimdOp>(converter, loc, simdClauseOps, simdArgs);

genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
loopNestClauseOps, iv, {{simdOp, simdArgs}},
llvm::omp::Directive::OMPD_simd, dsp);
Expand Down
5 changes: 3 additions & 2 deletions flang/lib/Semantics/resolve-directives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1979,7 +1979,7 @@ std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses(
// parallel do, taskloop, or distribute construct is (are) private.
// - The loop iteration variable in the associated do-loop of a simd construct
// with just one associated do-loop is linear with a linear-step that is the
// increment of the associated do-loop.
// increment of the associated do-loop (only for OpenMP versions <= 4.5)
// - The loop iteration variables in the associated do-loops of a simd
// construct with multiple associated do-loops are lastprivate.
void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
Expand All @@ -1993,7 +1993,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
ivDSA = Symbol::Flag::OmpPrivate;
} else if (level == 1) {
ivDSA = Symbol::Flag::OmpLinear;
if (version <= 45)
ivDSA = Symbol::Flag::OmpLinear;
} else {
ivDSA = Symbol::Flag::OmpLastPrivate;
}
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/parallel-private-clause.f90
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ subroutine simd_loop_1
! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32
! FIRDialect: %[[STEP:.*]] = arith.constant 1 : i32

! FIRDialect: omp.simd private({{.*}}) {
! FIRDialect: omp.simd linear({{.*}} = %[[STEP]] : !fir.ref<i32>) private({{.*}}) {
! FIRDialect-NEXT: omp.loop_nest (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) {
!$OMP SIMD PRIVATE(r)
do i=1, 9
Expand Down
46 changes: 39 additions & 7 deletions flang/test/Lower/OpenMP/simd-linear.f90
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
! This test checks lowering of OpenMP DO Directive (Worksharing)
! with linear clause
! This test checks lowering of OpenMP SIMD with linear clause

! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - 2>&1 | FileCheck %s
! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=45 %s -o - 2>&1 | FileCheck %s
! RUN: %flang_fc1 -fopenmp -emit-hlfir -fopenmp-version=50 %s -o - 2>&1 | FileCheck %s --check-prefix=NOIMPLICIT

!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_linearEi"}
!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFsimple_linearEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[const:.*]] = arith.constant 1 : i32
!CHECK: %{{.*}} = arith.constant 1 : i32
!CHECK: %[[IV_step:.*]] = arith.constant 1 : i32

!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_linearEx"}
!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFsimple_linearEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!NOIMPLICIT: %[[const:.*]] = arith.constant 1 : i32
subroutine simple_linear
implicit none
integer :: x, y, i
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}
!NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!$omp simd linear(x)
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 2 : i32
Expand All @@ -19,14 +28,23 @@ subroutine simple_linear
end do
end subroutine


!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_stepEi"}
!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFlinear_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)

!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_stepEx"}
!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_stepEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!NOIMPLICIT: %[[const:.*]] = arith.constant 4 : i32
subroutine linear_step
implicit none
integer :: x, y, i
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!CHECK: %{{.*}} = arith.constant 1 : i32
!CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}

!NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[const]] : !fir.ref<i32>) {{.*}}
!$omp simd linear(x:4)
!CHECK: %[[LOAD:.*]] = fir.load %[[X]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 2 : i32
Expand All @@ -38,15 +56,29 @@ subroutine linear_step

!CHECK: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
!CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[IV_alloca:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlinear_exprEi"}
!CHECK: %[[IV:.*]]:2 = hlfir.declare %[[IV_alloca]] {uniq_name = "_QFlinear_exprEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!CHECK: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
!CHECK: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)

!NOIMPLICIT: %[[A_alloca:.*]] = fir.alloca i32 {bindc_name = "a", uniq_name = "_QFlinear_exprEa"}
!NOIMPLICIT: %[[A:.*]]:2 = hlfir.declare %[[A_alloca]] {uniq_name = "_QFlinear_exprEa"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!NOIMPLICIT: %[[X_alloca:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFlinear_exprEx"}
!NOIMPLICIT: %[[X:.*]]:2 = hlfir.declare %[[X_alloca]] {uniq_name = "_QFlinear_exprEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
!NOIMPLICIT: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
!NOIMPLICIT: %[[const:.*]] = arith.constant 4 : i32
!NOIMPLICIT: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
subroutine linear_expr
implicit none
integer :: x, y, i, a
!CHECK: %[[LOAD_A:.*]] = fir.load %[[A]]#0 : !fir.ref<i32>
!CHECK: %[[const:.*]] = arith.constant 4 : i32
!CHECK: %[[LINEAR_EXPR:.*]] = arith.addi %[[LOAD_A]], %[[const]] : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
!CHECK: %{{.*}} = arith.constant 1 : i32
!CHECK: %[[IV_step:.*]] = arith.constant 1 : i32
!CHECK: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>, %[[IV]]#0 = %[[IV_step]] : !fir.ref<i32>) {{.*}}

!NOIMPLICIT: omp.simd linear(%[[X]]#0 = %[[LINEAR_EXPR]] : !fir.ref<i32>) {{.*}}
!$omp simd linear(x:a+4)
do i = 1, 10
y = x + 2
Expand Down
60 changes: 48 additions & 12 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,27 @@ class LinearClauseProcessor {
llvm::BasicBlock *linearLastIterExitBB;

public:
// Allocate space for linear variabes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merge conflict? The only difference between this an the following function seems to be mlir::Value &linearVar vs mlir::Value *linearVar

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is actually llvm::Value *linearVar and mlir::Value &linearVar. These functions are overloaded because in case of implicit linearization, the FIR lowering has already performed privatization. From lines 2893 - 2910, if a linear variable also exists as a private variable, we directly linearize a llvm::Value *. If not, then we have a mlir::Value which to process.

But now that I detail this, I realise that maybe in the second case, I could invoke ModuleTranslation to lookup the llvm::Value for the mlir::Value and use the same function.

LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::Value *linearVar, Operation &op) {
if (llvm::AllocaInst *linearVarAlloca =
dyn_cast<llvm::AllocaInst>(linearVar)) {
linearPreconditionVars.push_back(builder.CreateAlloca(
linearVarAlloca->getAllocatedType(), nullptr, ".linear_var"));
llvm::Value *linearLoopBodyTemp = builder.CreateAlloca(
linearVarAlloca->getAllocatedType(), nullptr, ".linear_result");
linearOrigVal.push_back(linearVar);
linearLoopBodyTemps.push_back(linearLoopBodyTemp);
linearOrigVars.push_back(linearVarAlloca);
return success();
}

else
return op.emitError() << "not yet implemented: linear clause support"
<< " for non alloca linear variables";
}

// Allocate space for linear variabes
LogicalResult createLinearVar(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
Expand Down Expand Up @@ -265,7 +286,8 @@ class LinearClauseProcessor {
users.push_back(user);
for (auto *user : users) {
if (auto *userInst = dyn_cast<llvm::Instruction>(user)) {
if (userInst->getParent()->getName().str() == BBName)
if (userInst->getParent()->getName().str().find(BBName) !=
std::string::npos)
user->replaceUsesOfWith(linearOrigVal[varIndex],
linearLoopBodyTemps[varIndex]);
}
Expand Down Expand Up @@ -2849,17 +2871,6 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

// Create linear variables and initialize linear step
LinearClauseProcessor linearClauseProcessor;

for (mlir::Value linearVar : simdOp.getLinearVars()) {
if (failed(linearClauseProcessor.createLinearVar(builder, moduleTranslation,
linearVar, opInst)))
return failure();
}
for (mlir::Value linearStep : simdOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);

llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
builder, moduleTranslation, privateVarsInfo, allocaIP);
if (handleError(afterAllocas, opInst).failed())
Expand All @@ -2876,6 +2887,31 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
.failed())
return failure();

LinearClauseProcessor linearClauseProcessor;

// Create linear variables and initialize linear step
for (mlir::Value linearVar : simdOp.getLinearVars()) {
bool isImplicit = false;
for (auto [mlirPrivVar, llvmPrivateVar] :
llvm::zip_equal(privateVarsInfo.mlirVars, privateVarsInfo.llvmVars)) {
if (linearVar == mlirPrivVar) {
isImplicit = true;
if (failed(linearClauseProcessor.createLinearVar(
builder, moduleTranslation, llvmPrivateVar, opInst)))
return failure();
break;
}
}
if (!isImplicit) {
if (failed(linearClauseProcessor.createLinearVar(
builder, moduleTranslation, linearVar, opInst)))
return failure();
}
}

for (mlir::Value linearStep : simdOp.getLinearStepVars())
linearClauseProcessor.initLinearStep(moduleTranslation, linearStep);

// No call to copyFirstPrivateVars because FIRSTPRIVATE is not allowed for
// SIMD.

Expand Down
Loading