Skip to content

Commit 2f0a69c

Browse files
Meinersburtstellar
authored andcommitted
[OpenMP] Fix partial unrolling off-by-one.
Even though the comment description is ".unroll_inner.iv < NumIterations", the code emitted a BO_LE ('<=') operator for the inner loop that is to be unrolled. This lead to one additional copy of the body code in a partially unrolled. It only manifests when the unrolled loop is consumed by another loop-associated construct. Fix by using the BO_LT operator instead. The condition for the outer loop and the corresponding code for tiling correctly used BO_LT already. Fixes #55236
1 parent 79147e4 commit 2f0a69c

9 files changed

+22
-22
lines changed

clang/lib/Sema/SemaOpenMP.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13314,11 +13314,11 @@ StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
1331413314
if (!EndOfTile.isUsable())
1331513315
return StmtError();
1331613316
ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(),
13317-
BO_LE, MakeInnerRef(), EndOfTile.get());
13317+
BO_LT, MakeInnerRef(), EndOfTile.get());
1331813318
if (!InnerCond1.isUsable())
1331913319
return StmtError();
1332013320
ExprResult InnerCond2 =
13321-
BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LE, MakeInnerRef(),
13321+
BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeInnerRef(),
1332213322
MakeNumIterations());
1332313323
if (!InnerCond2.isUsable())
1332413324
return StmtError();

clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,12 @@
106106
// CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
107107
// CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
108108
// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4
109-
// CHECK-NEXT: %[[CMP22:.+]] = icmp sle i32 %[[TMP15]], %[[ADD21]]
109+
// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]]
110110
// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
111111
// CHECK-EMPTY:
112112
// CHECK-NEXT: [[LAND_RHS]]:
113113
// CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
114-
// CHECK-NEXT: %[[CMP24:.+]] = icmp sle i32 %[[TMP17]], 8
114+
// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8
115115
// CHECK-NEXT: br label %[[LAND_END]]
116116
// CHECK-EMPTY:
117117
// CHECK-NEXT: [[LAND_END]]:

clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,11 @@ void unroll_partial_heuristic_for(int m, float *a, float *b, float *c, float *d,
121121
// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4
122122
// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTUNROLLED_IV_J7]], align 4
123123
// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP16]], 2
124-
// CHECK-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP15]], [[ADD21]]
124+
// CHECK-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP15]], [[ADD21]]
125125
// CHECK-NEXT: br i1 [[CMP22]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]]
126126
// CHECK: land.rhs:
127127
// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTUNROLL_INNER_IV_J]], align 4
128-
// CHECK-NEXT: [[CMP24:%.*]] = icmp sle i32 [[TMP17]], 8
128+
// CHECK-NEXT: [[CMP24:%.*]] = icmp slt i32 [[TMP17]], 8
129129
// CHECK-NEXT: br label [[LAND_END]]
130130
// CHECK: land.end:
131131
// CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[FOR_COND]] ], [ [[CMP24]], [[LAND_RHS]] ]

clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,14 @@ extern "C" void body(...) {}
176176
// IR-NEXT: %[[TMP39:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
177177
// IR-NEXT: %[[TMP40:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4
178178
// IR-NEXT: %[[ADD50:.+]] = add i32 %[[TMP40]], 2
179-
// IR-NEXT: %[[CMP51:.+]] = icmp ule i32 %[[TMP39]], %[[ADD50]]
179+
// IR-NEXT: %[[CMP51:.+]] = icmp ult i32 %[[TMP39]], %[[ADD50]]
180180
// IR-NEXT: br i1 %[[CMP51]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
181181
// IR-EMPTY:
182182
// IR-NEXT: [[LAND_RHS]]:
183183
// IR-NEXT: %[[TMP41:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
184184
// IR-NEXT: %[[TMP42:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4
185185
// IR-NEXT: %[[ADD52:.+]] = add i32 %[[TMP42]], 1
186-
// IR-NEXT: %[[CMP53:.+]] = icmp ule i32 %[[TMP41]], %[[ADD52]]
186+
// IR-NEXT: %[[CMP53:.+]] = icmp ult i32 %[[TMP41]], %[[ADD52]]
187187
// IR-NEXT: br label %[[LAND_END]]
188188
// IR-EMPTY:
189189
// IR-NEXT: [[LAND_END]]:

clang/test/OpenMP/unroll_codegen_for_partial.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,14 @@ extern "C" void body(...) {}
114114
// IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
115115
// IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
116116
// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP22]], 2
117-
// IR-NEXT: %[[CMP18:.+]] = icmp ule i32 %[[TMP21]], %[[ADD17]]
117+
// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP21]], %[[ADD17]]
118118
// IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
119119
// IR-EMPTY:
120120
// IR-NEXT: [[LAND_RHS]]:
121121
// IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
122122
// IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
123123
// IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP24]], 1
124-
// IR-NEXT: %[[CMP20:.+]] = icmp ule i32 %[[TMP23]], %[[ADD19]]
124+
// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP23]], %[[ADD19]]
125125
// IR-NEXT: br label %[[LAND_END]]
126126
// IR-EMPTY:
127127
// IR-NEXT: [[LAND_END]]:

clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,14 @@ extern "C" void func(int start, int end, int step) {
143143
// IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
144144
// IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
145145
// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP27]], 7
146-
// IR-NEXT: %[[CMP18:.+]] = icmp ule i32 %[[TMP26]], %[[ADD17]]
146+
// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP26]], %[[ADD17]]
147147
// IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
148148
// IR-EMPTY:
149149
// IR-NEXT: [[LAND_RHS]]:
150150
// IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
151151
// IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
152152
// IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP29]], 1
153-
// IR-NEXT: %[[CMP20:.+]] = icmp ule i32 %[[TMP28]], %[[ADD19]]
153+
// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP28]], %[[ADD19]]
154154
// IR-NEXT: br label %[[LAND_END]]
155155
// IR-EMPTY:
156156
// IR-NEXT: [[LAND_END]]:

clang/test/OpenMP/unroll_codegen_tile_for.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,14 @@ extern "C" void body(...) {}
162162
// IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
163163
// IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
164164
// IR-NEXT: %[[ADD36:.+]] = add i32 %[[TMP32]], 2
165-
// IR-NEXT: %[[CMP37:.+]] = icmp ule i32 %[[TMP31]], %[[ADD36]]
165+
// IR-NEXT: %[[CMP37:.+]] = icmp ult i32 %[[TMP31]], %[[ADD36]]
166166
// IR-NEXT: br i1 %[[CMP37]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
167167
// IR-EMPTY:
168168
// IR-NEXT: [[LAND_RHS]]:
169169
// IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
170170
// IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
171171
// IR-NEXT: %[[ADD38:.+]] = add i32 %[[TMP34]], 1
172-
// IR-NEXT: %[[CMP39:.+]] = icmp ule i32 %[[TMP33]], %[[ADD38]]
172+
// IR-NEXT: %[[CMP39:.+]] = icmp ult i32 %[[TMP33]], %[[ADD38]]
173173
// IR-NEXT: br label %[[LAND_END]]
174174
// IR-EMPTY:
175175
// IR-NEXT: [[LAND_END]]:

clang/test/OpenMP/unroll_codegen_unroll_for.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,14 +129,14 @@ extern "C" void body(...) {}
129129
// IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
130130
// IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
131131
// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 2
132-
// IR-NEXT: %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]]
132+
// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]]
133133
// IR-NEXT: br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
134134
// IR-EMPTY:
135135
// IR-NEXT: [[LAND_RHS]]:
136136
// IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
137137
// IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
138138
// IR-NEXT: %[[ADD25:.+]] = add i32 %[[TMP27]], 1
139-
// IR-NEXT: %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]]
139+
// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]]
140140
// IR-NEXT: br label %[[LAND_END]]
141141
// IR-EMPTY:
142142
// IR-NEXT: [[LAND_END]]:
@@ -156,14 +156,14 @@ extern "C" void body(...) {}
156156
// IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
157157
// IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
158158
// IR-NEXT: %[[ADD30:.+]] = add i32 %[[TMP32]], 2
159-
// IR-NEXT: %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]]
159+
// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]]
160160
// IR-NEXT: br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]]
161161
// IR-EMPTY:
162162
// IR-NEXT: [[LAND_RHS32]]:
163163
// IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
164164
// IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
165165
// IR-NEXT: %[[ADD33:.+]] = add i32 %[[TMP34]], 1
166-
// IR-NEXT: %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]]
166+
// IR-NEXT: %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]]
167167
// IR-NEXT: br label %[[LAND_END35]]
168168
// IR-EMPTY:
169169
// IR-NEXT: [[LAND_END35]]:

clang/test/OpenMP/unroll_codegen_unroll_for_attr.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,14 +129,14 @@ extern "C" void body(...) {}
129129
// IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
130130
// IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
131131
// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP25]], 2
132-
// IR-NEXT: %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]]
132+
// IR-NEXT: %[[CMP24:.+]] = icmp ult i32 %[[TMP24]], %[[ADD23]]
133133
// IR-NEXT: br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
134134
// IR-EMPTY:
135135
// IR-NEXT: [[LAND_RHS]]:
136136
// IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
137137
// IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
138138
// IR-NEXT: %[[ADD25:.+]] = add i32 %[[TMP27]], 1
139-
// IR-NEXT: %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]]
139+
// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP26]], %[[ADD25]]
140140
// IR-NEXT: br label %[[LAND_END]]
141141
// IR-EMPTY:
142142
// IR-NEXT: [[LAND_END]]:
@@ -156,14 +156,14 @@ extern "C" void body(...) {}
156156
// IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
157157
// IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
158158
// IR-NEXT: %[[ADD30:.+]] = add i32 %[[TMP32]], 2
159-
// IR-NEXT: %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]]
159+
// IR-NEXT: %[[CMP31:.+]] = icmp ult i32 %[[TMP31]], %[[ADD30]]
160160
// IR-NEXT: br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]]
161161
// IR-EMPTY:
162162
// IR-NEXT: [[LAND_RHS32]]:
163163
// IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
164164
// IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
165165
// IR-NEXT: %[[ADD33:.+]] = add i32 %[[TMP34]], 1
166-
// IR-NEXT: %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]]
166+
// IR-NEXT: %[[CMP34:.+]] = icmp ult i32 %[[TMP33]], %[[ADD33]]
167167
// IR-NEXT: br label %[[LAND_END35]]
168168
// IR-EMPTY:
169169
// IR-NEXT: [[LAND_END35]]:

0 commit comments

Comments
 (0)