Skip to content

Commit 2632680

Browse files
committed
[InstCombine] Canonicalize (gep <not i8> p, (div exact X, C))
If C % sizeof(gep_element_type) is zero, we can canonicalize to `i8` via: `(gep i8 p, (div exact X, C / (sizeof(gep_element_type))))` Closes #96898
1 parent c03d3a8 commit 2632680

File tree

3 files changed

+77
-38
lines changed

3 files changed

+77
-38
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,18 +2939,57 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
29392939
});
29402940
return Changed ? &GEP : nullptr;
29412941
}
2942-
} else {
2942+
} else if (auto *ExactIns =
2943+
dyn_cast<PossiblyExactOperator>(GEP.getOperand(1))) {
29432944
// Canonicalize (gep T* X, V / sizeof(T)) to (gep i8* X, V)
29442945
Value *V;
2945-
if ((has_single_bit(TyAllocSize) &&
2946-
match(GEP.getOperand(1),
2947-
m_Exact(m_Shr(m_Value(V),
2948-
m_SpecificInt(countr_zero(TyAllocSize)))))) ||
2949-
match(GEP.getOperand(1),
2950-
m_Exact(m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize))))) {
2951-
return GetElementPtrInst::Create(Builder.getInt8Ty(),
2952-
GEP.getPointerOperand(), V,
2953-
GEP.getNoWrapFlags());
2946+
if (ExactIns->isExact()) {
2947+
if ((has_single_bit(TyAllocSize) &&
2948+
match(GEP.getOperand(1),
2949+
m_Shr(m_Value(V),
2950+
m_SpecificInt(countr_zero(TyAllocSize))))) ||
2951+
match(GEP.getOperand(1),
2952+
m_IDiv(m_Value(V), m_SpecificInt(TyAllocSize)))) {
2953+
return GetElementPtrInst::Create(Builder.getInt8Ty(),
2954+
GEP.getPointerOperand(), V,
2955+
GEP.getNoWrapFlags());
2956+
}
2957+
}
2958+
if (ExactIns->isExact() && ExactIns->hasOneUse()) {
2959+
// Try to canonicalize non-i8 element type to i8 if the index is an
2960+
// exact instruction. If the index is an exact instruction (div/shr)
2961+
// with a constant RHS, we can fold the non-i8 element scale into the
2962+
// div/shr (similiar to the mul case, just inverted).
2963+
const APInt *C;
2964+
std::optional<APInt> NewC;
2965+
if (has_single_bit(TyAllocSize) &&
2966+
match(ExactIns, m_Shr(m_Value(V), m_APInt(C))) &&
2967+
C->uge(countr_zero(TyAllocSize)))
2968+
NewC = *C - countr_zero(TyAllocSize);
2969+
else if (match(ExactIns, m_UDiv(m_Value(V), m_APInt(C)))) {
2970+
APInt Quot;
2971+
uint64_t Rem;
2972+
APInt::udivrem(*C, TyAllocSize, Quot, Rem);
2973+
if (Rem == 0)
2974+
NewC = Quot;
2975+
} else if (match(ExactIns, m_SDiv(m_Value(V), m_APInt(C)))) {
2976+
APInt Quot;
2977+
int64_t Rem;
2978+
APInt::sdivrem(*C, TyAllocSize, Quot, Rem);
2979+
// For sdiv we need to make sure we arent creating INT_MIN / -1.
2980+
if (!Quot.isAllOnes() && Rem == 0)
2981+
NewC = Quot;
2982+
}
2983+
2984+
if (NewC.has_value()) {
2985+
Value *NewOp = Builder.CreateBinOp(
2986+
static_cast<Instruction::BinaryOps>(ExactIns->getOpcode()), V,
2987+
ConstantInt::get(V->getType(), *NewC));
2988+
cast<BinaryOperator>(NewOp)->setIsExact();
2989+
return GetElementPtrInst::Create(Builder.getInt8Ty(),
2990+
GEP.getPointerOperand(), NewOp,
2991+
GEP.getNoWrapFlags());
2992+
}
29542993
}
29552994
}
29562995
}

llvm/test/Transforms/InstCombine/getelementptr.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1793,8 +1793,8 @@ define ptr @gep_sel_const_nuw(i1 %c) {
17931793

17941794
define ptr @gep_of_udiv(ptr %p, i64 %x) {
17951795
; CHECK-LABEL: @gep_of_udiv(
1796-
; CHECK-NEXT: [[IDX:%.*]] = udiv exact i64 [[X:%.*]], 12
1797-
; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
1796+
; CHECK-NEXT: [[TMP1:%.*]] = udiv exact i64 [[X:%.*]], 3
1797+
; CHECK-NEXT: [[R:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP1]]
17981798
; CHECK-NEXT: ret ptr [[R]]
17991799
;
18001800
%idx = udiv exact i64 %x, 12
@@ -1816,8 +1816,8 @@ define ptr @gep_of_udiv_fail_not_divisible(ptr %p, i64 %x) {
18161816

18171817
define ptr @gep_of_sdiv(ptr %p, i64 %x) {
18181818
; CHECK-LABEL: @gep_of_sdiv(
1819-
; CHECK-NEXT: [[IDX:%.*]] = sdiv exact i64 [[X:%.*]], -36
1820-
; CHECK-NEXT: [[R:%.*]] = getelementptr nusw nuw i32, ptr [[P:%.*]], i64 [[IDX]]
1819+
; CHECK-NEXT: [[TMP1:%.*]] = sdiv exact i64 [[X:%.*]], -9
1820+
; CHECK-NEXT: [[R:%.*]] = getelementptr nusw nuw i8, ptr [[P:%.*]], i64 [[TMP1]]
18211821
; CHECK-NEXT: ret ptr [[R]]
18221822
;
18231823
%idx = sdiv exact i64 %x, -36
@@ -1850,8 +1850,8 @@ define ptr @gep_of_sdiv_fail_ub(ptr %p, i64 %x) {
18501850

18511851
define ptr @gep_of_lshr(ptr %p, i64 %x) {
18521852
; CHECK-LABEL: @gep_of_lshr(
1853-
; CHECK-NEXT: [[IDX:%.*]] = lshr exact i64 [[X:%.*]], 3
1854-
; CHECK-NEXT: [[R:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IDX]]
1853+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[X:%.*]], 1
1854+
; CHECK-NEXT: [[R:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP1]]
18551855
; CHECK-NEXT: ret ptr [[R]]
18561856
;
18571857
%idx = lshr exact i64 %x, 3
@@ -1861,8 +1861,8 @@ define ptr @gep_of_lshr(ptr %p, i64 %x) {
18611861

18621862
define ptr @gep_of_ashr(ptr %p, i64 %x) {
18631863
; CHECK-LABEL: @gep_of_ashr(
1864-
; CHECK-NEXT: [[IDX:%.*]] = ashr exact i64 [[X:%.*]], 3
1865-
; CHECK-NEXT: [[R:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[IDX]]
1864+
; CHECK-NEXT: [[TMP1:%.*]] = ashr exact i64 [[X:%.*]], 1
1865+
; CHECK-NEXT: [[R:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[TMP1]]
18661866
; CHECK-NEXT: ret ptr [[R]]
18671867
;
18681868
%idx = ashr exact i64 %x, 3

llvm/test/Transforms/LoopVectorize/induction.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4466,8 +4466,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
44664466
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
44674467
; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
44684468
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4469-
; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
4470-
; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
4469+
; IND-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
4470+
; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
44714471
; IND-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
44724472
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
44734473
; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
@@ -4483,8 +4483,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
44834483
; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
44844484
; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
44854485
; IND-NEXT: [[SEXT1:%.*]] = shl i64 [[INDVARS_IV]], 32
4486-
; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT1]], 32
4487-
; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]]
4486+
; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT1]], 30
4487+
; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
44884488
; IND-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
44894489
; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
44904490
; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -4507,8 +4507,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
45074507
; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
45084508
; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
45094509
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4510-
; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
4511-
; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
4510+
; UNROLL-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
4511+
; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
45124512
; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
45134513
; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
45144514
; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
@@ -4526,8 +4526,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
45264526
; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
45274527
; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
45284528
; UNROLL-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
4529-
; UNROLL-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 32
4530-
; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
4529+
; UNROLL-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 30
4530+
; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
45314531
; UNROLL-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
45324532
; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
45334533
; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -4599,8 +4599,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
45994599
; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
46004600
; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
46014601
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4602-
; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 32
4603-
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
4602+
; INTERLEAVE-NEXT: [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
4603+
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
46044604
; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
46054605
; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
46064606
; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
@@ -4618,8 +4618,8 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
46184618
; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
46194619
; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
46204620
; INTERLEAVE-NEXT: [[SEXT2:%.*]] = shl i64 [[INDVARS_IV]], 32
4621-
; INTERLEAVE-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 32
4622-
; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
4621+
; INTERLEAVE-NEXT: [[TMP4:%.*]] = ashr exact i64 [[SEXT2]], 30
4622+
; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
46234623
; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], ptr [[ARRAYIDX]], align 4
46244624
; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
46254625
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
@@ -6009,8 +6009,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
60096009
; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
60106010
; IND-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
60116011
; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6012-
; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 32
6013-
; IND-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP3]]
6012+
; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 30
6013+
; IND-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP3]]
60146014
; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]]
60156015
; IND-NEXT: store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4
60166016
; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -6044,8 +6044,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
60446044
; UNROLL-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP0]]
60456045
; UNROLL-NEXT: [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
60466046
; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6047-
; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6048-
; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP5]]
6047+
; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 30
6048+
; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
60496049
; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP3]]
60506050
; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP4]]
60516051
; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP6]], i64 8
@@ -6139,8 +6139,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
61396139
; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP0]]
61406140
; INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT4]], [[TMP1]]
61416141
; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6142-
; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6143-
; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[TMP5]]
6142+
; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 30
6143+
; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP5]]
61446144
; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP3]]
61456145
; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STEP_ADD]], [[TMP4]]
61466146
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16
@@ -6166,8 +6166,8 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
61666166
; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
61676167
; INTERLEAVE-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32
61686168
; INTERLEAVE-NEXT: [[SEXT5:%.*]] = shl i64 [[IV]], 32
6169-
; INTERLEAVE-NEXT: [[TMP11:%.*]] = ashr exact i64 [[SEXT5]], 32
6170-
; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP11]]
6169+
; INTERLEAVE-NEXT: [[TMP11:%.*]] = ashr exact i64 [[SEXT5]], 30
6170+
; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
61716171
; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]]
61726172
; INTERLEAVE-NEXT: store i32 [[ADD]], ptr [[DST_GEP]], align 4
61736173
; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100

0 commit comments

Comments
 (0)