-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Flang] Hoist concurrent-limit and concurrent-step expressions outside the outer most do concurrent loop #111665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2012,6 +2012,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { | |
| IncrementLoopNestInfo incrementLoopNestInfo; | ||
| const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; | ||
| bool infiniteLoop = !loopControl.has_value(); | ||
| bool isConcurrent = false; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like the existing |
||
| if (infiniteLoop) { | ||
| assert(unstructuredContext && "infinite loop must be unstructured"); | ||
| startBlock(headerBlock); | ||
|
|
@@ -2042,6 +2043,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { | |
| std::get_if<Fortran::parser::LoopControl::Concurrent>( | ||
| &loopControl->u); | ||
| assert(concurrent && "invalid DO loop variant"); | ||
| isConcurrent = true; | ||
| incrementLoopNestInfo = getConcurrentControl( | ||
| std::get<Fortran::parser::ConcurrentHeader>(concurrent->t), | ||
| std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent->t)); | ||
|
|
@@ -2070,7 +2072,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { | |
|
|
||
| // Increment loop begin code. (Infinite/while code was already generated.) | ||
| if (!infiniteLoop && !whileCondition) | ||
| genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); | ||
| genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs, | ||
| isConcurrent); | ||
|
|
||
| // Loop body code. | ||
| auto iter = eval.getNestedEvaluations().begin(); | ||
|
|
@@ -2128,12 +2131,26 @@ class FirConverter : public Fortran::lower::AbstractConverter { | |
| /// Generate FIR to begin a structured or unstructured increment loop nest. | ||
| void genFIRIncrementLoopBegin( | ||
| IncrementLoopNestInfo &incrementLoopNestInfo, | ||
| llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) { | ||
| llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs, | ||
| bool isConcurrent) { | ||
| assert(!incrementLoopNestInfo.empty() && "empty loop nest"); | ||
| mlir::Location loc = toLocation(); | ||
| Fortran::lower::pft::Evaluation &eval = getEval(); | ||
| Fortran::lower::pft::Evaluation *outermostEval = nullptr; | ||
| if (isConcurrent) { | ||
| outermostEval = &eval; | ||
| while (outermostEval->parentConstruct) { | ||
| outermostEval = outermostEval->parentConstruct; | ||
| } | ||
| } | ||
| mlir::OpBuilder::InsertPoint insertPt; | ||
| for (IncrementLoopInfo &info : incrementLoopNestInfo) { | ||
| info.loopVariable = | ||
| genLoopVariableAddress(loc, *info.loopVariableSym, info.isUnordered); | ||
| if (outermostEval && outermostEval->op) { | ||
| insertPt = builder->saveInsertionPoint(); | ||
| builder->setInsertionPoint(outermostEval->op); | ||
| } | ||
| mlir::Value lowerValue = genControlValue(info.lowerExpr, info); | ||
| mlir::Value upperValue = genControlValue(info.upperExpr, info); | ||
| bool isConst = true; | ||
|
|
@@ -2144,7 +2161,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { | |
| info.stepVariable = builder->createTemporary(loc, stepValue.getType()); | ||
| builder->create<fir::StoreOp>(loc, stepValue, info.stepVariable); | ||
| } | ||
|
|
||
| if (outermostEval && outermostEval->op) | ||
| builder->restoreInsertionPoint(insertPt); | ||
| // Structured loop - generate fir.do_loop. | ||
| if (info.isStructured()) { | ||
| mlir::Type loopVarType = info.getLoopVariableType(); | ||
|
|
@@ -2179,6 +2197,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { | |
| builder->setInsertionPointToStart(info.doLoop.getBody()); | ||
| loopValue = info.doLoop.getRegionIterArgs()[0]; | ||
| } | ||
| eval.op = info.doLoop; | ||
| // Update the loop variable value in case it has non-index references. | ||
| builder->create<fir::StoreOp>(loc, loopValue, info.loopVariable); | ||
| if (info.maskExpr) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| ! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s | ||
|
|
||
| ! Simple tests for structured concurrent loops with loop-control. | ||
|
|
||
| pure function bar(n, m) | ||
| implicit none | ||
| integer, intent(in) :: n, m | ||
| integer :: bar | ||
| bar = n + m | ||
| end function | ||
|
|
||
| subroutine sub1(n) | ||
| implicit none | ||
| integer :: n, m, i, j | ||
| integer, dimension(n) :: a | ||
| !CHECK: %[[LB1:.*]] = arith.constant 1 : i32 | ||
| !CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index | ||
| !CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32> | ||
| !CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index | ||
| !CHECK: %[[LB2:.*]] = arith.constant 1 : i32 | ||
| !CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index | ||
| !CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32 | ||
| !CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index | ||
| !CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered | ||
| !CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered | ||
| do concurrent(i=1:n, j=1:bar(n*m, n/m)) | ||
| a(i) = n | ||
| end do | ||
| end subroutine | ||
|
|
||
| subroutine sub2(n) | ||
| implicit none | ||
| integer :: n, m, i, j | ||
| integer, dimension(n) :: a | ||
| !CHECK: %[[LB1:.*]] = arith.constant 1 : i32 | ||
| !CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index | ||
| !CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref<i32> | ||
| !CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index | ||
| !CHECK: %[[LB2:.*]] = arith.constant 1 : i32 | ||
| !CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index | ||
| !CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>, !fir.ref<i32>) -> i32 | ||
| !CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index | ||
| !CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered | ||
| !CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered | ||
| do concurrent(i=1:n) | ||
| do concurrent(j=1:bar(n*m, n/m)) | ||
| a(i) = n | ||
| end do | ||
| end do | ||
| end subroutine | ||
|
Comment on lines
+31
to
+50
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This case is different than sub1 since 11.1.7.4.2 does not say that the concurrent limit and steps of all nested do concurrent construct must be performed before the outter constructs. You can have: This is a valid program, and with your current patch it will compile to invalid code that will most likely segfault at runtime. |
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If something like this is needed, it probably belongs in
struct IncrementLoopInfonear the top ofBridge.cpp. But it probably isn't needed - see other comments.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1, there is no one-to-one mapping between a pft::Evaluation and an operation, so it looks weird to me to set this field here in a very generic data structure for a limited use case.