Skip to content

Commit 1e24b53

Browse files
authored
[InstCombine] Add limit for expansion of gep chains (#147065)
When converting gep subtraction / comparison to offset subtraction / comparison, avoid expanding very long multi-use gep chains.
1 parent 073460a commit 1e24b53

File tree

5 files changed

+219
-2
lines changed

5 files changed

+219
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2146,13 +2146,33 @@ CommonPointerBase CommonPointerBase::compute(Value *LHS, Value *RHS) {
21462146
return Base;
21472147
}
21482148

2149+
bool CommonPointerBase::isExpensive() const {
2150+
unsigned NumGEPs = 0;
2151+
auto ProcessGEPs = [&NumGEPs](ArrayRef<GEPOperator *> GEPs) {
2152+
bool SeenMultiUse = false;
2153+
for (GEPOperator *GEP : GEPs) {
2154+
// Only count multi-use GEPs, excluding the first one. For the first one,
2155+
// we will directly reuse the offset. For one-use GEPs, their offset will
2156+
// be folded into a multi-use GEP.
2157+
if (!GEP->hasOneUse()) {
2158+
if (SeenMultiUse)
2159+
++NumGEPs;
2160+
SeenMultiUse = true;
2161+
}
2162+
}
2163+
};
2164+
ProcessGEPs(LHSGEPs);
2165+
ProcessGEPs(RHSGEPs);
2166+
return NumGEPs > 2;
2167+
}
2168+
21492169
/// Optimize pointer differences into the same array into a size. Consider:
21502170
/// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer
21512171
/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
21522172
Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
21532173
Type *Ty, bool IsNUW) {
21542174
CommonPointerBase Base = CommonPointerBase::compute(LHS, RHS);
2155-
if (!Base.Ptr)
2175+
if (!Base.Ptr || Base.isExpensive())
21562176
return nullptr;
21572177

21582178
// To avoid duplicating the offset arithmetic, rewrite the GEP to use the

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
712712
};
713713

714714
CommonPointerBase Base = CommonPointerBase::compute(GEPLHS, RHS);
715-
if (Base.Ptr == RHS && CanFold(Base.LHSNW)) {
715+
if (Base.Ptr == RHS && CanFold(Base.LHSNW) && !Base.isExpensive()) {
716716
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
717717
Type *IdxTy = DL.getIndexType(GEPLHS->getType());
718718
Value *Offset =

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,9 @@ struct CommonPointerBase {
910910
GEPNoWrapFlags RHSNW = GEPNoWrapFlags::all();
911911

912912
static CommonPointerBase compute(Value *LHS, Value *RHS);
913+
914+
/// Whether expanding the GEP chains is expensive.
915+
bool isExpensive() const;
913916
};
914917

915918
} // end namespace llvm

llvm/test/Transforms/InstCombine/icmp-gep.ll

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,3 +849,135 @@ define i1 @gep_mugtiple_ugt_inbounds_nusw(ptr %base, i64 %idx, i64 %idx2) {
849849
%cmp = icmp ugt ptr %gep2, %base
850850
ret i1 %cmp
851851
}
852+
853+
define i1 @gep_multiple_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3) {
854+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit(
855+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
856+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2:%.*]], i64 [[GEP3_IDX]]
857+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
858+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
859+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP2_IDX]]
860+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
861+
; CHECK-NEXT: [[GEP3_IDX1:%.*]] = shl i64 [[IDX4:%.*]], 2
862+
; CHECK-NEXT: [[GEP5:%.*]] = getelementptr i8, ptr [[GEP4]], i64 [[GEP3_IDX1]]
863+
; CHECK-NEXT: call void @use(ptr [[GEP5]])
864+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP3_IDX]], [[GEP2_IDX]]
865+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[GEP3_IDX1]]
866+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]]
867+
; CHECK-NEXT: ret i1 [[CMP]]
868+
;
869+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
870+
call void @use(ptr %gep1)
871+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
872+
call void @use(ptr %gep2)
873+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
874+
call void @use(ptr %gep3)
875+
%cmp = icmp eq ptr %gep3, %base
876+
ret i1 %cmp
877+
}
878+
879+
define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep1(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
880+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep1(
881+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[IDX1:%.*]], 2
882+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP1_IDX]]
883+
; CHECK-NEXT: call void @use(ptr [[GEP1]])
884+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = shl i64 [[IDX2:%.*]], 2
885+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[GEP1]], i64 [[GEP2_IDX]]
886+
; CHECK-NEXT: call void @use(ptr [[GEP2]])
887+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
888+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
889+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
890+
; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = mul i64 [[IDX4:%.*]], -4
891+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[GEP1_IDX]], [[GEP2_IDX]]
892+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]]
893+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]]
894+
; CHECK-NEXT: ret i1 [[CMP]]
895+
;
896+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
897+
call void @use(ptr %gep1)
898+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
899+
call void @use(ptr %gep2)
900+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
901+
call void @use(ptr %gep3)
902+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
903+
%cmp = icmp eq ptr %gep4, %base
904+
ret i1 %cmp
905+
}
906+
907+
define i1 @gep_multiple_multi_use_below_limit_extra_one_use_gep2(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
908+
; CHECK-LABEL: @gep_multiple_multi_use_below_limit_extra_one_use_gep2(
909+
; CHECK-NEXT: [[GEP1_IDX1:%.*]] = add i64 [[IDX1:%.*]], [[IDX2:%.*]]
910+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[GEP1_IDX1]], 2
911+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]]
912+
; CHECK-NEXT: call void @use(ptr [[GEP2]])
913+
; CHECK-NEXT: [[GEP3_IDX:%.*]] = shl i64 [[IDX3:%.*]], 2
914+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 [[GEP3_IDX]]
915+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
916+
; CHECK-NEXT: [[GEP4_IDX:%.*]] = shl i64 [[IDX4:%.*]], 2
917+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[GEP3]], i64 [[GEP4_IDX]]
918+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
919+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[GEP3_IDX]]
920+
; CHECK-NEXT: [[GEP4_IDX_NEG:%.*]] = sub i64 0, [[GEP4_IDX]]
921+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP2]], [[GEP4_IDX_NEG]]
922+
; CHECK-NEXT: ret i1 [[CMP]]
923+
;
924+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
925+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
926+
call void @use(ptr %gep2)
927+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
928+
call void @use(ptr %gep3)
929+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
930+
call void @use(ptr %gep4)
931+
%cmp = icmp eq ptr %gep4, %base
932+
ret i1 %cmp
933+
}
934+
935+
define i1 @gep_multiple_multi_above_below_limit_consts(ptr %base, i64 %idx1, i64 %idx2) {
936+
; CHECK-LABEL: @gep_multiple_multi_above_below_limit_consts(
937+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 16
938+
; CHECK-NEXT: call void @use(ptr [[GEP1]])
939+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i32, ptr [[GEP1]], i64 [[IDX1:%.*]]
940+
; CHECK-NEXT: call void @use(ptr [[GEP2]])
941+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[GEP2]], i64 16
942+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
943+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX2:%.*]]
944+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
945+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP4]], [[BASE]]
946+
; CHECK-NEXT: ret i1 [[CMP]]
947+
;
948+
%gep1 = getelementptr i32, ptr %base, i64 4
949+
call void @use(ptr %gep1)
950+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx1
951+
call void @use(ptr %gep2)
952+
%gep3 = getelementptr i32, ptr %gep2, i64 4
953+
call void @use(ptr %gep3)
954+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx2
955+
call void @use(ptr %gep4)
956+
%cmp = icmp eq ptr %gep4, %base
957+
ret i1 %cmp
958+
}
959+
960+
define i1 @gep_multiple_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
961+
; CHECK-LABEL: @gep_multiple_multi_use_above_limit(
962+
; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX1:%.*]]
963+
; CHECK-NEXT: call void @use(ptr [[GEP4]])
964+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i32, ptr [[GEP4]], i64 [[IDX2:%.*]]
965+
; CHECK-NEXT: call void @use(ptr [[GEP3]])
966+
; CHECK-NEXT: [[GEP5:%.*]] = getelementptr i32, ptr [[GEP3]], i64 [[IDX3:%.*]]
967+
; CHECK-NEXT: call void @use(ptr [[GEP5]])
968+
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr i32, ptr [[GEP5]], i64 [[IDX4:%.*]]
969+
; CHECK-NEXT: call void @use(ptr [[GEP6]])
970+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP6]], [[BASE]]
971+
; CHECK-NEXT: ret i1 [[CMP]]
972+
;
973+
%gep1 = getelementptr i32, ptr %base, i64 %idx1
974+
call void @use(ptr %gep1)
975+
%gep2 = getelementptr i32, ptr %gep1, i64 %idx2
976+
call void @use(ptr %gep2)
977+
%gep3 = getelementptr i32, ptr %gep2, i64 %idx3
978+
call void @use(ptr %gep3)
979+
%gep4 = getelementptr i32, ptr %gep3, i64 %idx4
980+
call void @use(ptr %gep4)
981+
%cmp = icmp eq ptr %gep4, %base
982+
ret i1 %cmp
983+
}

llvm/test/Transforms/InstCombine/sub-gep.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,3 +1172,65 @@ define i64 @nuw_ptrdiff_mul_nsw_nneg_scale_multiuse(ptr %base, i64 %idx) {
11721172
%diff = sub nuw i64 %lhs, %rhs
11731173
ret i64 %diff
11741174
}
1175+
1176+
define i64 @multiple_geps_multi_use_below_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4) {
1177+
; CHECK-LABEL: @multiple_geps_multi_use_below_limit(
1178+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 [[IDX2:%.*]]
1179+
; CHECK-NEXT: call void @use(ptr [[P2]])
1180+
; CHECK-NEXT: [[P4:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[IDX5:%.*]]
1181+
; CHECK-NEXT: call void @use(ptr [[P4]])
1182+
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[IDX3:%.*]]
1183+
; CHECK-NEXT: call void @use(ptr [[P3]])
1184+
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[IDX4:%.*]]
1185+
; CHECK-NEXT: call void @use(ptr [[P5]])
1186+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[IDX2]], [[IDX5]]
1187+
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[IDX3]], [[IDX4]]
1188+
; CHECK-NEXT: [[GEPDIFF:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
1189+
; CHECK-NEXT: ret i64 [[GEPDIFF]]
1190+
;
1191+
%p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
1192+
call void @use(ptr %p1)
1193+
%p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
1194+
call void @use(ptr %p2)
1195+
%p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
1196+
call void @use(ptr %p3)
1197+
%p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
1198+
call void @use(ptr %p4)
1199+
%i1 = ptrtoint ptr %p4 to i64
1200+
%i2 = ptrtoint ptr %p2 to i64
1201+
%d = sub i64 %i2, %i1
1202+
ret i64 %d
1203+
}
1204+
1205+
define i64 @multiple_geps_multi_use_above_limit(ptr %base, i64 %idx1, i64 %idx2, i64 %idx3, i64 %idx4, i64 %idx5) {
1206+
; CHECK-LABEL: @multiple_geps_multi_use_above_limit(
1207+
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[P1:%.*]], i64 [[IDX2:%.*]]
1208+
; CHECK-NEXT: call void @use(ptr [[P2]])
1209+
; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[IDX6:%.*]]
1210+
; CHECK-NEXT: call void @use(ptr [[P3]])
1211+
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[TMP3:%.*]]
1212+
; CHECK-NEXT: call void @use(ptr [[P5]])
1213+
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds nuw i8, ptr [[P5]], i64 [[IDX7:%.*]]
1214+
; CHECK-NEXT: call void @use(ptr [[P6]])
1215+
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds nuw i8, ptr [[P6]], i64 [[IDX5:%.*]]
1216+
; CHECK-NEXT: call void @use(ptr [[P7]])
1217+
; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[P7]] to i64
1218+
; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[P3]] to i64
1219+
; CHECK-NEXT: [[D:%.*]] = sub i64 [[I2]], [[I1]]
1220+
; CHECK-NEXT: ret i64 [[D]]
1221+
;
1222+
%p1 = getelementptr inbounds nuw i8, ptr %base, i64 %idx1
1223+
call void @use(ptr %p1)
1224+
%p2 = getelementptr inbounds nuw i8, ptr %p1, i64 %idx2
1225+
call void @use(ptr %p2)
1226+
%p3 = getelementptr inbounds nuw i8, ptr %base, i64 %idx3
1227+
call void @use(ptr %p3)
1228+
%p4 = getelementptr inbounds nuw i8, ptr %p3, i64 %idx4
1229+
call void @use(ptr %p4)
1230+
%p5 = getelementptr inbounds nuw i8, ptr %p4, i64 %idx5
1231+
call void @use(ptr %p5)
1232+
%i1 = ptrtoint ptr %p5 to i64
1233+
%i2 = ptrtoint ptr %p2 to i64
1234+
%d = sub i64 %i2, %i1
1235+
ret i64 %d
1236+
}

0 commit comments

Comments
 (0)