Skip to content

Commit e9a27a3

Browse files
authored
[OpenMPIRBuilder] Fix tripcount not a multiple of tile size (llvm#154999)
The emitted code tests whether the current tile should executing the remainder iterations by checking the logical iteration number is the one after the floor iterations that execute the non-remainder iterations. There are two counts of how many iterations there are: Those of non-remainder iterations (simply rounded-down division of tripcount and tile size), and those including an additional floor iteration for the remainder iterations. The code was used the wrong one that caused the condition to never match.
1 parent 5c852fc commit e9a27a3

6 files changed

+11
-10
lines changed

clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d)
7979
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
8080
// CHECK: omp_floor0.body:
8181
// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
82-
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]]
82+
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
8383
// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13
8484
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
8585
// CHECK: omp_tile0.preheader:

clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *
8585
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
8686
// CHECK: omp_floor0.body:
8787
// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
88-
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]]
88+
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
8989
// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4
9090
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
9191
// CHECK: omp_tile0.preheader:

clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f
8787
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
8888
// CHECK: omp_floor0.body:
8989
// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]]
90-
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]]
90+
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]]
9191
// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4
9292
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
9393
// CHECK: omp_tile0.preheader:

clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) {
7575
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
7676
// CHECK: omp_floor0.body:
7777
// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]]
78-
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]]
78+
// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]]
7979
// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2
8080
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
8181
// CHECK: omp_tile0.preheader:

clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) {
5959
// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]]
6060
// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]]
6161
// CHECK: omp_floor0.body:
62-
// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]]
62+
// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]]
6363
// CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8
6464
// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]]
6565
// CHECK: omp_tile0.preheader:

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5581,13 +5581,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
55815581
// Compute the trip counts of the floor loops.
55825582
Builder.SetCurrentDebugLocation(DL);
55835583
Builder.restoreIP(OutermostLoop->getPreheaderIP());
5584-
SmallVector<Value *, 4> FloorCount, FloorRems;
5584+
SmallVector<Value *, 4> FloorCompleteCount, FloorCount, FloorRems;
55855585
for (int i = 0; i < NumLoops; ++i) {
55865586
Value *TileSize = TileSizes[i];
55875587
Value *OrigTripCount = OrigTripCounts[i];
55885588
Type *IVType = OrigTripCount->getType();
55895589

5590-
Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
5590+
Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
55915591
Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
55925592

55935593
// 0 if tripcount divides the tilesize, 1 otherwise.
@@ -5601,11 +5601,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
56015601
Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
56025602

56035603
FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5604-
FloorTripCount =
5605-
Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
5604+
Value *FloorTripCount =
5605+
Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
56065606
"omp_floor" + Twine(i) + ".tripcount", true);
56075607

56085608
// Remember some values for later use.
5609+
FloorCompleteCount.push_back(FloorCompleteTripCount);
56095610
FloorCount.push_back(FloorTripCount);
56105611
FloorRems.push_back(FloorTripRem);
56115612
}
@@ -5660,7 +5661,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
56605661
Value *TileSize = TileSizes[i];
56615662

56625663
Value *FloorIsEpilogue =
5663-
Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
5664+
Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
56645665
Value *TileTripCount =
56655666
Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
56665667

0 commit comments

Comments
 (0)