Skip to content

Commit 90ddc85

Browse files
committed
[InstCombine] Add limit for expansion of gep chains
When converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains. Another improvement we could make is to expand one-use followed by multi-use gep differently, by rewriting the multi-use gep to include the one-use offsets. But I think we want to have some kind of complexity cut-off in any case.
1 parent a216702 commit 90ddc85

File tree

5 files changed

+167
-2
lines changed

5 files changed

+167
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2146,13 +2146,35 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) {
21462146
return Base;
21472147
}
21482148

2149+
bool CommonPointerBase::isExpensive() const {
2150+
bool SeenConst = false;
2151+
unsigned NumGEPs = 0;
2152+
auto ProcessGEPs = [&SeenConst, &NumGEPs](ArrayRef<GEPOperator *> GEPs) {
2153+
bool SeenMultiUse = false;
2154+
for (GEPOperator *GEP : GEPs) {
2155+
// Only count GEPs after the first multi-use GEP. For the first one,
2156+
// we will directly reuse the offset.
2157+
if (SeenMultiUse) {
2158+
bool IsConst = GEP->hasAllConstantIndices();
2159+
SeenConst |= IsConst;
2160+
NumGEPs += !IsConst;
2161+
}
2162+
SeenMultiUse |= !GEP->hasOneUse();
2163+
}
2164+
};
2165+
ProcessGEPs(LHSGEPs);
2166+
ProcessGEPs(RHSGEPs);
2167+
NumGEPs += SeenConst;
2168+
return NumGEPs > 2;
2169+
}
2170+
21492171
/// Optimize pointer differences into the same array into a size. Consider:
21502172
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
21512173
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
21522174
Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
21532175
Type *Ty, bool IsNUW) {
21542176
CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS);
2155-
if (!Base.Ptr)
2177+
if (!Base.Ptr || Base.isExpensive())
21562178
return nullptr;
21572179

21582180
// To avoid duplicating the offset arithmetic, rewrite the GEP to use the

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
712712
};
713713

714714
CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS);
715-
if (Base.Ptr == RHS && CanFold(Base.LHSNW)) {
715+
if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) {
716716
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
717717
Type *IdxTy = DL.getIndexType(GEPLHS->getType());
718718
Value *Offset =

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,9 @@ struct CommonPointerBase {
910910
GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all();
911911

912912
static CommonPointerBase compute(Value *LHS, Value *RHS);
913+
914+
/// Whether expanding the GEP chains is expensive.
915+
bool isExpensive() const;
913916
};
914917

915918
} // end namespace llvm

llvm/test/Transforms/InstCombine/icmp-gep.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,3 +849,91 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
849849
%cmp = icmp ugt ptr %gep2, %base
850850
ret i1 %cmp
851851
}
852+
853+
define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) {
854+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit(
855+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
856+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
857+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
858+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
859+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
860+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
861+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
862+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
863+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX]]
864+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
865+
; CHECK-NEXT: ret i1 [[CMP]]
866+
;
867+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
868+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
869+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
870+
call void @use(ptr %gep3)
871+
%cmp = icmp eq ptr %gep3, %base
872+
ret i1 %cmp
873+
}
874+
875+
define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
876+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep(
877+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
878+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
879+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
880+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
881+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
882+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
883+
; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4
884+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
885+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
886+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]]
887+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]]
888+
; CHECK-NEXT: ret i1 [[CMP]]
889+
;
890+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
891+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
892+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
893+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
894+
call void @use(ptr %gep3)
895+
%cmp = icmp eq ptr %gep4, %base
896+
ret i1 %cmp
897+
}
898+
899+
define i1 @gep_multiple_multi_use_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) {
900+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit_consts(
901+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16
902+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
903+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
904+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16
905+
; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
906+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]]
907+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
908+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP2_IDX]], 32
909+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP4_IDX]]
910+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
911+
; CHECK-NEXT: ret i1 [[CMP]]
912+
;
913+
%gep1 = getelementptr i32, ptr %base, i64 4
914+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx1
915+
%gep3 = getelementptr i32, ptr %gep2, i64 4
916+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx2
917+
call void @use(ptr %gep4)
918+
%cmp = icmp eq ptr %gep4, %base
919+
ret i1 %cmp
920+
}
921+
922+
define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
923+
; CHECK-LABEL: @gep_multiple_multi_use_above_limit(
924+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
925+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX2:%.*]]
926+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP2]], i64 [[IDX3:%.*]]
927+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX4:%.*]]
928+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
929+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]]
930+
; CHECK-NEXT: ret i1 [[CMP]]
931+
;
932+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
933+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
934+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
935+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
936+
call void @use(ptr %gep4)
937+
%cmp = icmp eq ptr %gep4, %base
938+
ret i1 %cmp
939+
}

llvm/test/Transforms/InstCombine/sub-gep.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,3 +1172,55 @@ define i64 @nuw_ptrdiff_mul_nsw_nneg_scale_multiuse(ptr %base, i64 %idx) {
11721172
%diff = sub nuw i64 %lhs, %rhs
11731173
ret i64 %diff
11741174
}
1175+
1176+
define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
1177+
; CHECK-LABEL: @multiple_geps_multi_use_below_limit(
1178+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
1179+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
1180+
; CHECK-NEXT: call void @use(ptr [[P2]])
1181+
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
1182+
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
1183+
; CHECK-NEXT: call void @use(ptr [[P4]])
1184+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX1]], [[IDX2]]
1185+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]]
1186+
; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
1187+
; CHECK-NEXT: ret i64 [[GEPDIFF]]
1188+
;
1189+
%p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
1190+
%p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
1191+
call void @use(ptr %p2)
1192+
%p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
1193+
%p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
1194+
call void @use(ptr %p4)
1195+
%i1 = ptrtoint ptr %p4 to i64
1196+
%i2 = ptrtoint ptr %p2 to i64
1197+
%d = sub i64 %i2, %i1
1198+
ret i64 %d
1199+
}
1200+
1201+
define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) {
1202+
; CHECK-LABEL: @multiple_geps_multi_use_above_limit(
1203+
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
1204+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX2:%.*]]
1205+
; CHECK-NEXT: call void @use(ptr [[P2]])
1206+
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 [[IDX3:%.*]]
1207+
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
1208+
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P4]], i64 [[IDX5:%.*]]
1209+
; CHECK-NEXT: call void @use(ptr [[P5]])
1210+
; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P5]] to i64
1211+
; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P2]] to i64
1212+
; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]]
1213+
; CHECK-NEXT: ret i64 [[D]]
1214+
;
1215+
%p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
1216+
%p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
1217+
call void @use(ptr %p2)
1218+
%p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
1219+
%p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
1220+
%p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5
1221+
call void @use(ptr %p5)
1222+
%i1 = ptrtoint ptr %p5 to i64
1223+
%i2 = ptrtoint ptr %p2 to i64
1224+
%d = sub i64 %i2, %i1
1225+
ret i64 %d
1226+
}

0 commit comments

Comments
 (0)