Skip to content

Commit d233f7f

Browse files
nikicmemfrob
authored andcommitted
[SCEVExpander] Simplify pointer overflow check
This is a followup to D104662 to generate slightly nicer code for pointer overflow checks. Bypass expandAddToGEP and instead explicitly generate i8 GEPs. This saves some bitcasts and negates the value in a more obvious way. In particular, this prevents SCEV from looking through the umul.with.overflow, same as in the integer case. The wrapping-pointer-ni.ll test deserves a comment: Previously, this generated a typed GEP which used the umulo argument rather than the multiplication result. This results in more compact IR in that case, but effectively does the multiplication twice, the second one is just hidden in the GEP. Reusing the umulo result seems pretty reasonable to me. Differential Revision: https://reviews.llvm.org/D109093
1 parent 15cb4c4 commit d233f7f

File tree

4 files changed

+87
-108
lines changed

4 files changed

+87
-108
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2490,12 +2490,11 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
24902490
// Start - |Step| * Backedge > Start
24912491
Value *Add = nullptr, *Sub = nullptr;
24922492
if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
2493-
const SCEV *MulS = SE.getSCEV(MulV);
2494-
const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
2495-
Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
2496-
ARPtrTy);
2497-
Sub = Builder.CreateBitCast(
2498-
expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
2493+
StartValue = InsertNoopCastOfTo(
2494+
StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
2495+
Value *NegMulV = Builder.CreateNeg(MulV);
2496+
Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
2497+
Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
24992498
} else {
25002499
Add = Builder.CreateAdd(StartValue, MulV);
25012500
Sub = Builder.CreateSub(StartValue, MulV);

llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@ define void @"japi1_align!_9477"(%jl_value_t addrspace(10)** %arg) {
1717
; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
1818
; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
1919
; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
20-
; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
21-
; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
22-
; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
23-
; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
20+
; LV: [[OFNegMulResult:%[^ ]*]] = sub i64 0, [[OFMulResult]]
21+
; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFMulResult]]
22+
; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base]], i64 [[OFNegMulResult]]
23+
; LV-NEXT: icmp ugt i8 addrspace(13)* [[NegGEP]], [[Base]]
24+
; LV-NEXT: icmp ult i8 addrspace(13)* [[PosGEP]], [[Base]]
2425
; LV-NOT: inttoptr
2526
; LV-NOT: ptrtoint
2627
top:

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,15 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
2929
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
3030
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
3131
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
32-
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
33-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
34-
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8
35-
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 8, [[TMP12]]
36-
; CHECK-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
37-
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i32*
38-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i32* [[TMP14]], [[A]]
39-
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i32* [[TMP11]], [[A]]
40-
; CHECK-NEXT: [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
41-
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
42-
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
43-
; CHECK-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
32+
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
33+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
34+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
35+
; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
36+
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
37+
; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
38+
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
39+
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
40+
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
4441
; CHECK: for.body.ph.lver.orig:
4542
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
4643
; CHECK: for.body.lver.orig:
@@ -101,10 +98,10 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %
10198
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
10299
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
103100
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
104-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
101+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
105102
; CHECK: for.end.loopexit:
106103
; CHECK-NEXT: br label [[FOR_END:%.*]]
107-
; CHECK: for.end.loopexit7:
104+
; CHECK: for.end.loopexit6:
108105
; CHECK-NEXT: br label [[FOR_END]]
109106
; CHECK: for.end:
110107
; CHECK-NEXT: ret void
@@ -181,18 +178,15 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
181178
; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
182179
; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
183180
; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
184-
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
185-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to [8192 x i32]*
186-
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8
187-
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 8, [[TMP12]]
188-
; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP13]]
189-
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[UGLYGEP5]] to [8192 x i32]*
190-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt [8192 x i32]* [[TMP14]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*)
191-
; CHECK-NEXT: [[TMP16:%.*]] = icmp ult [8192 x i32]* [[TMP11]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*)
192-
; CHECK-NEXT: [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
193-
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
194-
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
195-
; CHECK-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
181+
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
182+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
183+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]]
184+
; CHECK-NEXT: [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
185+
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
186+
; CHECK-NEXT: [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
187+
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
188+
; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
189+
; CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
196190
; CHECK: for.body.ph.lver.orig:
197191
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
198192
; CHECK: for.body.lver.orig:
@@ -253,10 +247,10 @@ define void @f_with_offset(i32* noalias %b, i32* noalias %c, i32* noalias %d, i3
253247
; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
254248
; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
255249
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
256-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
250+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
257251
; CHECK: for.end.loopexit:
258252
; CHECK-NEXT: br label [[FOR_END:%.*]]
259-
; CHECK: for.end.loopexit6:
253+
; CHECK: for.end.loopexit5:
260254
; CHECK-NEXT: br label [[FOR_END]]
261255
; CHECK: for.end:
262256
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)