Skip to content

Commit e573c6b

Browse files
authored
[flang] Add nsw to DO loop parameters (#113854)
nsw is added to DO loop parameters (initial parameters, terminal parameters, and incrementation parameters). This can help vectorization in some cases like #110609. See also the discussion in https://discourse.llvm.org/t/rfc-add-nsw-flags-to-arithmetic-integer-operations-using-the-option-fno-wrapv/77584/20.
1 parent 21af99a commit e573c6b

File tree

4 files changed

+107
-4
lines changed

4 files changed

+107
-4
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,6 +2137,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
21372137
assert(!incrementLoopNestInfo.empty() && "empty loop nest");
21382138
mlir::Location loc = toLocation();
21392139
mlir::Operation *boundsAndStepIP = nullptr;
2140+
mlir::arith::IntegerOverflowFlags iofBackup{};
21402141

21412142
for (IncrementLoopInfo &info : incrementLoopNestInfo) {
21422143
mlir::Value lowerValue;
@@ -2153,11 +2154,18 @@ class FirConverter : public Fortran::lower::AbstractConverter {
21532154

21542155
info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym,
21552156
info.isUnordered);
2157+
if (!getLoweringOptions().getIntegerWrapAround()) {
2158+
iofBackup = builder->getIntegerOverflowFlags();
2159+
builder->setIntegerOverflowFlags(
2160+
mlir::arith::IntegerOverflowFlags::nsw);
2161+
}
21562162
lowerValue = genControlValue(info.lowerExpr, info);
21572163
upperValue = genControlValue(info.upperExpr, info);
21582164
bool isConst = true;
21592165
stepValue = genControlValue(info.stepExpr, info,
21602166
info.isStructured() ? nullptr : &isConst);
2167+
if (!getLoweringOptions().getIntegerWrapAround())
2168+
builder->setIntegerOverflowFlags(iofBackup);
21612169
boundsAndStepIP = stepValue.getDefiningOp();
21622170

21632171
// Use a temp variable for unstructured loops with non-const step.

flang/test/Lower/HLFIR/goto-do-body.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ subroutine sub2()
8383

8484
do i = 1, 2, 3 * j - 8
8585
! CHECK: %[[TMP2:.*]] = fir.load %[[J]]#0 : !fir.ref<i32>
86-
! CHECK: %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] : i32
87-
! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] : i32
86+
! CHECK: %[[TMP3:.*]] = arith.muli %[[TMP2]], %[[C3]] overflow<nsw> : i32
87+
! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8]] overflow<nsw> : i32
8888
! CHECK: fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
8989
! CHECK: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[C_7]] : i32
9090
! CHECK: %[[TMP5:.*]] = arith.divsi %[[TMP4]], %[[STEP]] : i32

flang/test/Lower/goto-do-body.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ subroutine sub2()
9090
! CHECK: %[[C2_2:.*]] = arith.constant 2 : i32
9191
! CHECK: %[[C3_2:.*]] = arith.constant 3 : i32
9292
! CHECK: %[[TMP2:.*]] = fir.load %[[J]] : !fir.ref<i32>
93-
! CHECK: %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] : i32
93+
! CHECK: %[[TMP3:.*]] = arith.muli %[[C3_2]], %[[TMP2]] overflow<nsw> : i32
9494
! CHECK: %[[C8_1:.*]] = arith.constant 8 : i32
95-
! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] : i32
95+
! CHECK: %[[STEP:.*]] = arith.subi %[[TMP3]], %[[C8_1]] overflow<nsw> : i32
9696
! CHECK: fir.store %[[STEP]] to %[[STEP_VAR:.*]] : !fir.ref<i32>
9797
! CHECK: %[[TMP4:.*]] = arith.subi %[[C2_2]], %[[C1_1]] : i32
9898
! CHECK: %[[TMP5:.*]] = arith.addi %[[TMP4]], %[[STEP]] : i32

flang/test/Lower/nsw.f90

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,98 @@ subroutine bitwise_comparison(a, b)
5959
! CHECK-LABEL: func.func @_QPbitwise_comparison(
6060
! CHECK-NOT: overflow<nsw>
6161
! CHECK: return
62+
63+
subroutine loop_params(a,lb,ub,st)
64+
integer :: i, lb, ub, st
65+
integer :: a(lb:ub)
66+
do i = lb+1, ub-1, st*2
67+
a(i) = i
68+
end do
69+
end subroutine
70+
! CHECK-LABEL: func.func @_QPloop_params(
71+
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
72+
! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
73+
! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
74+
! CHECK: %[[VAL_10:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
75+
! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
76+
! CHECK: %[[VAL_13:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
77+
! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
78+
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
79+
! CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] overflow<nsw> : i32
80+
! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i32) -> index
81+
! CHECK: %[[VAL_27:.*]] = arith.subi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
82+
! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
83+
! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_13]] : !fir.ref<i32>
84+
! CHECK: %[[VAL_30:.*]] = arith.muli %[[VAL_29]], %[[VAL_4]] overflow<nsw> : i32
85+
! CHECK: %[[VAL_31:.*]] = fir.convert %[[VAL_30]] : (i32) -> index
86+
! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
87+
! CHECK: %[[VAL_33:.*]]:2 = fir.do_loop %[[VAL_34:.*]] = %[[VAL_26]] to %[[VAL_28]] step %[[VAL_31]] iter_args(%[[VAL_35:.*]] = %[[VAL_32]]) -> (index, i32) {
88+
89+
subroutine loop_params2(a,lb,ub,st)
90+
integer :: i, lb, ub, st
91+
integer :: a(lb:ub)
92+
real :: ii
93+
do ii = lb+1, ub-1, st*2
94+
i = ii
95+
a(i) = i
96+
end do
97+
end subroutine
98+
! CHECK-LABEL: func.func @_QPloop_params2(
99+
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
100+
! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
101+
! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
102+
! CHECK: %[[VAL_8:.*]] = fir.alloca index
103+
! CHECK: %[[VAL_9:.*]] = fir.alloca f32
104+
! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
105+
! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
106+
! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
107+
! CHECK: %[[VAL_16:.*]] = fir.declare %{{.*}}ii"} : (!fir.ref<f32>) -> !fir.ref<f32>
108+
! CHECK: %[[VAL_17:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
109+
! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
110+
! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
111+
! CHECK: %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
112+
! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> f32
113+
! CHECK: %[[VAL_31:.*]] = arith.subi %[[VAL_20]], %[[VAL_5]] overflow<nsw> : i32
114+
! CHECK: %[[VAL_32:.*]] = fir.convert %[[VAL_31]] : (i32) -> f32
115+
! CHECK: %[[VAL_33:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
116+
! CHECK: %[[VAL_34:.*]] = arith.muli %[[VAL_33]], %[[VAL_4]] overflow<nsw> : i32
117+
! CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_34]] : (i32) -> f32
118+
! CHECK: fir.store %[[VAL_35]] to %[[VAL_9]] : !fir.ref<f32>
119+
! CHECK: %[[VAL_36:.*]] = arith.subf %[[VAL_32]], %[[VAL_30]] fastmath<contract> : f32
120+
! CHECK: %[[VAL_37:.*]] = arith.addf %[[VAL_36]], %[[VAL_35]] fastmath<contract> : f32
121+
! CHECK: %[[VAL_38:.*]] = arith.divf %[[VAL_37]], %[[VAL_35]] fastmath<contract> : f32
122+
! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (f32) -> index
123+
! CHECK: fir.store %[[VAL_39]] to %[[VAL_8]] : !fir.ref<index>
124+
! CHECK: fir.store %[[VAL_30]] to %[[VAL_16]] : !fir.ref<f32>
125+
! CHECK: cf.br ^bb1
126+
! CHECK: ^bb1:
127+
! CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_8]] : !fir.ref<index>
128+
! CHECK: %[[VAL_41:.*]] = arith.cmpi sgt, %[[VAL_40]], %[[VAL_6]] : index
129+
! CHECK: cf.cond_br %[[VAL_41]], ^bb2, ^bb3
130+
! CHECK: ^bb2:
131+
132+
subroutine loop_params3(a,lb,ub,st)
133+
integer :: i, lb, ub, st
134+
integer :: a(lb:ub)
135+
do concurrent (i=lb+1:ub-1:st*2)
136+
a(i) = i
137+
end do
138+
end subroutine
139+
! CHECK-LABEL: func.func @_QPloop_params3(
140+
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
141+
! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32
142+
! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
143+
! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
144+
! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
145+
! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref<i32>) -> !fir.ref<i32>
146+
! CHECK: %[[VAL_15:.*]] = fir.declare %{{.*}}st"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
147+
! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_11]] : !fir.ref<i32>
148+
! CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_12]] : !fir.ref<i32>
149+
! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] overflow<nsw> : i32
150+
! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
151+
! CHECK: %[[VAL_29:.*]] = arith.subi %[[VAL_18]], %[[VAL_5]] overflow<nsw> : i32
152+
! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_29]] : (i32) -> index
153+
! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
154+
! CHECK: %[[VAL_32:.*]] = arith.muli %[[VAL_31]], %[[VAL_4]] overflow<nsw> : i32
155+
! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> index
156+
! CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_33]] unordered {

0 commit comments

Comments
 (0)