From 6dc21da545dc46323eea681f640a95204a4d39f7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 22 Aug 2025 19:54:03 +0200 Subject: [PATCH 1/2] [OpenMPIRBuilder] Fix tripcount not a multiple of tile size --- .../test/OpenMP/irbuilder_unroll_partial_factor_for.c | 10 +++++----- .../irbuilder_unroll_partial_heuristic_constant_for.c | 8 ++++---- .../irbuilder_unroll_partial_heuristic_runtime_for.c | 10 +++++----- .../OpenMP/irbuilder_unroll_unroll_partial_factor.c | 8 ++++---- .../irbuilder_unroll_unroll_partial_heuristic.c | 8 ++++---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 11 ++++++----- 6 files changed, 28 insertions(+), 27 deletions(-) diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c index 8780d51de8a36..79d1c6bb524c9 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -79,7 +79,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]] // CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: @@ -152,11 +152,11 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 @@ -178,7 +178,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4 // CHECK-NEXT: ret void // @@ -198,7 +198,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c index 728f67ccf2843..3183aa7bbca74 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -85,7 +85,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]] // CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: @@ -176,7 +176,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 128, ptr [[DOTSTOP]], align 4 @@ -199,7 +199,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // @@ -219,7 +219,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c index f41f1fe5ce719..91e56bfb08894 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -87,7 +87,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] -// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[TMP4]] // CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: @@ -178,11 +178,11 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 @@ -204,7 +204,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4 // CHECK-NEXT: ret void // @@ -224,7 +224,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c index 3c2407851e5a5..0855ac3c6c1f4 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -75,7 +75,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: // CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] -// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[TMP3]] // CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: @@ -148,7 +148,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 @@ -171,7 +171,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // @@ -191,7 +191,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c index a03bd47ca8b8f..5e6123259c917 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c @@ -59,7 +59,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]] // CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] // CHECK: omp_floor0.body: -// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[TMP3]] // CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8 // CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] // CHECK: omp_tile0.preheader: @@ -129,7 +129,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 @@ -152,7 +152,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META7]], !align [[META8]] // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // @@ -172,7 +172,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]] // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 989bcf45e0006..606f2e03821a3 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5582,13 +5582,13 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef Loops, // Compute the trip counts of the floor loops. Builder.SetCurrentDebugLocation(DL); Builder.restoreIP(OutermostLoop->getPreheaderIP()); - SmallVector FloorCount, FloorRems; + SmallVector FloorCompleteCount, FloorCount, FloorRems; for (int i = 0; i < NumLoops; ++i) { Value *TileSize = TileSizes[i]; Value *OrigTripCount = OrigTripCounts[i]; Type *IVType = OrigTripCount->getType(); - Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); + Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount, TileSize); Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize); // 0 if tripcount divides the tilesize, 1 otherwise. @@ -5602,11 +5602,12 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef Loops, Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0)); FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType); - FloorTripCount = - Builder.CreateAdd(FloorTripCount, FloorTripOverflow, + Value *FloorTripCount = + Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow, "omp_floor" + Twine(i) + ".tripcount", true); // Remember some values for later use. + FloorCompleteCount.push_back(FloorCompleteTripCount); FloorCount.push_back(FloorTripCount); FloorRems.push_back(FloorTripRem); } @@ -5661,7 +5662,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef Loops, Value *TileSize = TileSizes[i]; Value *FloorIsEpilogue = - Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]); + Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]); Value *TileTripCount = Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize); From 7e6de456dd28e934f62de4e7c95a30df600a4dfc Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 22 Aug 2025 19:58:53 +0200 Subject: [PATCH 2/2] Reduce change noise --- clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c | 8 ++++---- .../irbuilder_unroll_partial_heuristic_constant_for.c | 6 +++--- .../irbuilder_unroll_partial_heuristic_runtime_for.c | 8 ++++---- .../test/OpenMP/irbuilder_unroll_unroll_partial_factor.c | 6 +++--- .../OpenMP/irbuilder_unroll_unroll_partial_heuristic.c | 6 +++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c index 79d1c6bb524c9..a9514e1e7d145 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -152,11 +152,11 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 @@ -178,7 +178,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 // CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4 // CHECK-NEXT: ret void // @@ -198,7 +198,7 @@ void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c index 3183aa7bbca74..8ca000a05792f 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -176,7 +176,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 128, ptr [[DOTSTOP]], align 4 @@ -199,7 +199,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // @@ -219,7 +219,7 @@ void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float * // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c index 91e56bfb08894..5fbcf8f2d030c 100644 --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -178,11 +178,11 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 @@ -204,7 +204,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 // CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4 // CHECK-NEXT: ret void // @@ -224,7 +224,7 @@ void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, f // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c index 0855ac3c6c1f4..9a28c0c1bf713 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -148,7 +148,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 @@ -171,7 +171,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // @@ -191,7 +191,7 @@ void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META6]], !align [[META7]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c index 5e6123259c917..24d42d265d6a6 100644 --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c @@ -129,7 +129,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 // CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 @@ -152,7 +152,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: // CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 // CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 // CHECK-NEXT: ret void // @@ -172,7 +172,7 @@ void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 // CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void //