Skip to content

Commit 6dc21da

Browse files
committed
[OpenMPIRBuilder] Fix tripcount not a multiple of tile size
1 parent c9bb3bd commit 6dc21da

6 files changed

+28
-27
lines changed

clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d)
7979
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
8080
// CHECK: omp_floor0.body:
8181
// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
82-
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]]
82+
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
8383
// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13
8484
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
8585
// CHECK: omp_tile0.preheader:
@@ -152,11 +152,11 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d)
152152
// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
153153
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
154154
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
155-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
155+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
156156
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
157157
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4
158158
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1
159-
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
159+
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull [[META6]], !align [[META7]]
160160
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
161161
// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4
162162
// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4
@@ -178,7 +178,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d)
178178
// CHECK-NEXT: br label [[COND_END]]
179179
// CHECK: cond.end:
180180
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
181-
// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
181+
// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
182182
// CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4
183183
// CHECK-NEXT: ret void
184184
//
@@ -198,7 +198,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d)
198198
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
199199
// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]]
200200
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
201-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
201+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
202202
// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4
203203
// CHECK-NEXT: ret void
204204
//

clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *
8585
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
8686
// CHECK: omp_floor0.body:
8787
// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
88-
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]]
88+
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
8989
// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4
9090
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
9191
// CHECK: omp_tile0.preheader:
@@ -176,7 +176,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *
176176
// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
177177
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
178178
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
179-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
179+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
180180
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
181181
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4
182182
// CHECK-NEXT: store i32 128, ptr [[DOTSTOP]], align 4
@@ -199,7 +199,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *
199199
// CHECK-NEXT: br label [[COND_END]]
200200
// CHECK: cond.end:
201201
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
202-
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
202+
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
203203
// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4
204204
// CHECK-NEXT: ret void
205205
//
@@ -219,7 +219,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *
219219
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
220220
// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]]
221221
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
222-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
222+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
223223
// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4
224224
// CHECK-NEXT: ret void
225225
//

clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f
8787
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
8888
// CHECK: omp_floor0.body:
8989
// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
90-
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]]
90+
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
9191
// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4
9292
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
9393
// CHECK: omp_tile0.preheader:
@@ -178,11 +178,11 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f
178178
// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
179179
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
180180
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
181-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
181+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
182182
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
183183
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4
184184
// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1
185-
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
185+
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull [[META6]], !align [[META7]]
186186
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
187187
// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4
188188
// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4
@@ -204,7 +204,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f
204204
// CHECK-NEXT: br label [[COND_END]]
205205
// CHECK: cond.end:
206206
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
207-
// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
207+
// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
208208
// CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4
209209
// CHECK-NEXT: ret void
210210
//
@@ -224,7 +224,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f
224224
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
225225
// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]]
226226
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
227-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
227+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
228228
// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4
229229
// CHECK-NEXT: ret void
230230
//

clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
7575
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
7676
// CHECK: omp_floor0.body:
7777
// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
78-
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]]
78+
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
7979
// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2
8080
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
8181
// CHECK: omp_tile0.preheader:
@@ -148,7 +148,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
148148
// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
149149
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
150150
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
151-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
151+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
152152
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
153153
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4
154154
// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4
@@ -171,7 +171,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
171171
// CHECK-NEXT: br label [[COND_END]]
172172
// CHECK: cond.end:
173173
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
174-
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
174+
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
175175
// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4
176176
// CHECK-NEXT: ret void
177177
//
@@ -191,7 +191,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
191191
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
192192
// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]]
193193
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
194-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
194+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
195195
// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4
196196
// CHECK-NEXT: ret void
197197
//

clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
5959
// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]]
6060
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
6161
// CHECK: omp_floor0.body:
62-
// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]]
62+
// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]]
6363
// CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8
6464
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
6565
// CHECK: omp_tile0.preheader:
@@ -129,7 +129,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
129129
// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
130130
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
131131
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
132-
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
132+
// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
133133
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
134134
// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4
135135
// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4
@@ -152,7 +152,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
152152
// CHECK-NEXT: br label [[COND_END]]
153153
// CHECK: cond.end:
154154
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ]
155-
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8
155+
// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
156156
// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4
157157
// CHECK-NEXT: ret void
158158
//
@@ -172,7 +172,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
172172
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4
173173
// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]]
174174
// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]]
175-
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8
175+
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
176176
// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4
177177
// CHECK-NEXT: ret void
178178
//

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5582,13 +5582,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
55825582
// Compute the trip counts of the floor loops.
55835583
Builder.SetCurrentDebugLocation(DL);
55845584
Builder.restoreIP(OutermostLoop->getPreheaderIP());
5585-
SmallVector<Value *, 4> FloorCount, FloorRems;
5585+
SmallVector<Value *, 4> FloorCompleteCount, FloorCount, FloorRems;
55865586
for (int i = 0; i < NumLoops; ++i) {
55875587
Value *TileSize = TileSizes[i];
55885588
Value *OrigTripCount = OrigTripCounts[i];
55895589
Type *IVType = OrigTripCount->getType();
55905590

5591-
Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
5591+
Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
55925592
Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
55935593

55945594
// 0 if tripcount divides the tilesize, 1 otherwise.
@@ -5602,11 +5602,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
56025602
Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
56035603

56045604
FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5605-
FloorTripCount =
5606-
Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
5605+
Value *FloorTripCount =
5606+
Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
56075607
"omp_floor" + Twine(i) + ".tripcount", true);
56085608

56095609
// Remember some values for later use.
5610+
FloorCompleteCount.push_back(FloorCompleteTripCount);
56105611
FloorCount.push_back(FloorTripCount);
56115612
FloorRems.push_back(FloorTripRem);
56125613
}
@@ -5661,7 +5662,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
56615662
Value *TileSize = TileSizes[i];
56625663

56635664
Value *FloorIsEpilogue =
5664-
Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
5665+
Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
56655666
Value *TileTripCount =
56665667
Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
56675668

0 commit comments

Comments
 (0)