Skip to content

Commit 305cf0e

Browse files
authored
[InstCombine] Make foldCmpLoadFromIndexedGlobal() GEP-type independent (#157089)
foldCmpLoadFromIndexedGlobal() currently checks that the global type, the GEP type and the load type match in certain ways. Replace this with generic logic based on offsets. This is a reboot of #67093. This PR is less ambitious by requiring that the constant offset is smaller than the stride, which avoids the additional complexity of that PR.
1 parent 5341e26 commit 305cf0e

File tree

3 files changed

+216
-75
lines changed

3 files changed

+216
-75
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 32 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -112,73 +112,42 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
112112
Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
113113
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
114114
ConstantInt *AndCst) {
115-
if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
116-
!GV->getValueType()->isArrayTy() || !GV->isConstant() ||
117-
!GV->hasDefinitiveInitializer())
118-
return nullptr;
119-
120-
Type *GEPSrcEltTy = GEP->getSourceElementType();
121-
if (GEPSrcEltTy->isArrayTy())
122-
GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
123-
if (GV->getValueType()->getArrayElementType() != GEPSrcEltTy)
115+
if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
124116
return nullptr;
125117

126118
Constant *Init = GV->getInitializer();
127-
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
119+
TypeSize GlobalSize = DL.getTypeAllocSize(Init->getType());
120+
Type *EltTy = LI->getType();
121+
TypeSize EltSize = DL.getTypeStoreSize(EltTy);
122+
if (EltSize.isScalable())
128123
return nullptr;
129124

130-
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
131-
// Don't blow up on huge arrays.
132-
if (ArrayElementCount > MaxArraySizeForCombine)
125+
unsigned IndexBW = DL.getIndexTypeSizeInBits(GEP->getType());
126+
SmallMapVector<Value *, APInt, 4> VarOffsets;
127+
APInt ConstOffset(IndexBW, 0);
128+
if (!GEP->collectOffset(DL, IndexBW, VarOffsets, ConstOffset) ||
129+
VarOffsets.size() != 1 || IndexBW > 64)
133130
return nullptr;
134131

135-
// There are many forms of this optimization we can handle, for now, just do
136-
// the simple index into a single-dimensional array or elements of equal size.
137-
//
138-
// Require: GEP [n x i8] GV, 0, Idx {{, constant indices}}
139-
// Or: GEP i8 GV, Idx
140-
141-
unsigned GEPIdxOp = 1;
142-
if (GEP->getSourceElementType()->isArrayTy()) {
143-
GEPIdxOp = 2;
144-
if (!match(GEP->getOperand(1), m_ZeroInt()))
145-
return nullptr;
146-
}
147-
if (GEP->getNumOperands() < GEPIdxOp + 1 ||
148-
isa<Constant>(GEP->getOperand(GEPIdxOp)))
132+
Value *Idx = VarOffsets.front().first;
133+
const APInt &Stride = VarOffsets.front().second;
134+
// If the index type is non-canonical, wait for it to be canonicalized.
135+
if (Idx->getType()->getScalarSizeInBits() != IndexBW)
149136
return nullptr;
150137

151-
// Check that indices after the variable are constants and in-range for the
152-
// type they index. Collect the indices. This is typically for arrays of
153-
// structs.
154-
SmallVector<unsigned, 4> LaterIndices;
155-
156-
Type *EltTy = Init->getType()->getArrayElementType();
157-
for (unsigned i = GEPIdxOp + 1, e = GEP->getNumOperands(); i != e; ++i) {
158-
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
159-
if (!Idx)
160-
return nullptr; // Variable index.
161-
162-
uint64_t IdxVal = Idx->getZExtValue();
163-
if ((unsigned)IdxVal != IdxVal)
164-
return nullptr; // Too large array index.
165-
166-
if (StructType *STy = dyn_cast<StructType>(EltTy))
167-
EltTy = STy->getElementType(IdxVal);
168-
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
169-
if (IdxVal >= ATy->getNumElements())
170-
return nullptr;
171-
EltTy = ATy->getElementType();
172-
} else {
173-
return nullptr; // Unknown type.
174-
}
138+
// Allow an additional context offset, but only within the stride.
139+
if (!ConstOffset.ult(Stride))
140+
return nullptr;
175141

176-
LaterIndices.push_back(IdxVal);
177-
}
142+
// Don't handle overlapping loads for now.
143+
if (!Stride.uge(EltSize.getFixedValue()))
144+
return nullptr;
178145

179-
Value *Idx = GEP->getOperand(GEPIdxOp);
180-
// If the index type is non-canonical, wait for it to be canonicalized.
181-
if (Idx->getType() != DL.getIndexType(GEP->getType()))
146+
// Don't blow up on huge arrays.
147+
uint64_t ArrayElementCount =
148+
divideCeil((GlobalSize.getFixedValue() - ConstOffset.getZExtValue()),
149+
Stride.getZExtValue());
150+
if (ArrayElementCount > MaxArraySizeForCombine)
182151
return nullptr;
183152

184153
enum { Overdefined = -3, Undefined = -2 };
@@ -211,18 +180,12 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
211180

212181
// Scan the array and see if one of our patterns matches.
213182
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
214-
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
215-
Constant *Elt = Init->getAggregateElement(i);
183+
APInt Offset = ConstOffset;
184+
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i, Offset += Stride) {
185+
Constant *Elt = ConstantFoldLoadFromConst(Init, EltTy, Offset, DL);
216186
if (!Elt)
217187
return nullptr;
218188

219-
// If this is indexing an array of structures, get the structure element.
220-
if (!LaterIndices.empty()) {
221-
Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
222-
if (!Elt)
223-
return nullptr;
224-
}
225-
226189
// If the element is masked, handle it.
227190
if (AndCst) {
228191
Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
@@ -309,19 +272,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
309272
// Now that we've scanned the entire array, emit our new comparison(s). We
310273
// order the state machines in complexity of the generated code.
311274

312-
// If inbounds keyword is not present, Idx * ElementSize can overflow.
313-
// Let's assume that ElementSize is 2 and the wanted value is at offset 0.
275+
// If inbounds keyword is not present, Idx * Stride can overflow.
276+
// Let's assume that Stride is 2 and the wanted value is at offset 0.
314277
// Then, there are two possible values for Idx to match offset 0:
315278
// 0x00..00, 0x80..00.
316279
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
317280
// comparison is false if Idx was 0x80..00.
318281
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
319-
unsigned ElementSize =
320-
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
321282
auto MaskIdx = [&](Value *Idx) {
322-
if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
283+
if (!GEP->isInBounds() && Stride.countr_zero() != 0) {
323284
Value *Mask = Constant::getAllOnesValue(Idx->getType());
324-
Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
285+
Mask = Builder.CreateLShr(Mask, Stride.countr_zero());
325286
Idx = Builder.CreateAnd(Idx, Mask);
326287
}
327288
return Idx;

llvm/test/Transforms/InstCombine/load-cmp.ll

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,3 +371,186 @@ define i1 @pr93017(i64 %idx) {
371371
%cmp = icmp ne ptr %v, null
372372
ret i1 %cmp
373373
}
374+
375+
@g_i32_lo = internal constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
376+
377+
; Mask is 0b10101010
378+
define i1 @load_vs_array_type_mismatch1(i32 %idx) {
379+
; CHECK-LABEL: @load_vs_array_type_mismatch1(
380+
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
381+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 170
382+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
383+
; CHECK-NEXT: ret i1 [[CMP]]
384+
;
385+
%gep = getelementptr inbounds i16, ptr @g_i32_lo, i32 %idx
386+
%load = load i16, ptr %gep
387+
%cmp = icmp eq i16 %load, 0
388+
ret i1 %cmp
389+
}
390+
391+
@g_i32_hi = internal constant [4 x i32] [i32 u0x00010000, i32 u0x00020000, i32 u0x00030000, i32 u0x00040000]
392+
393+
; Mask is 0b01010101
394+
define i1 @load_vs_array_type_mismatch2(i32 %idx) {
395+
; CHECK-LABEL: @load_vs_array_type_mismatch2(
396+
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
397+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 85
398+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
399+
; CHECK-NEXT: ret i1 [[CMP]]
400+
;
401+
%gep = getelementptr inbounds i16, ptr @g_i32_hi, i32 %idx
402+
%load = load i16, ptr %gep
403+
%cmp = icmp eq i16 %load, 0
404+
ret i1 %cmp
405+
}
406+
407+
@g_i16_1 = internal constant [8 x i16] [i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1, i16 0]
408+
409+
; idx == 1 || idx == 3
410+
define i1 @load_vs_array_type_mismatch_offset1(i32 %idx) {
411+
; CHECK-LABEL: @load_vs_array_type_mismatch_offset1(
412+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
413+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
414+
; CHECK-NEXT: ret i1 [[CMP]]
415+
;
416+
%gep = getelementptr inbounds {i16, i16}, ptr @g_i16_1, i32 %idx, i32 1
417+
%load = load i16, ptr %gep
418+
%cmp = icmp eq i16 %load, 0
419+
ret i1 %cmp
420+
}
421+
422+
@g_i16_2 = internal constant [8 x i16] [i16 1, i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1]
423+
424+
; idx == 0 || idx == 2
425+
define i1 @load_vs_array_type_mismatch_offset2(i32 %idx) {
426+
; CHECK-LABEL: @load_vs_array_type_mismatch_offset2(
427+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
428+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
429+
; CHECK-NEXT: ret i1 [[CMP]]
430+
;
431+
%gep = getelementptr inbounds {i16, i16}, ptr @g_i16_2, i32 %idx, i32 1
432+
%load = load i16, ptr %gep
433+
%cmp = icmp eq i16 %load, 0
434+
ret i1 %cmp
435+
}
436+
437+
define i1 @offset_larger_than_stride(i32 %idx) {
438+
; CHECK-LABEL: @offset_larger_than_stride(
439+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i16], ptr @g_i16_1, i32 1, i32 [[TMP1:%.*]]
440+
; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
441+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
442+
; CHECK-NEXT: ret i1 [[CMP]]
443+
;
444+
%gep = getelementptr [2 x i16], ptr @g_i16_1, i64 1, i32 %idx
445+
%load = load i16, ptr %gep
446+
%cmp = icmp eq i16 %load, 0
447+
ret i1 %cmp
448+
}
449+
450+
define i1 @load_size_larger_stride(i32 %idx) {
451+
; CHECK-LABEL: @load_size_larger_stride(
452+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g_i16_1, i32 [[IDX:%.*]]
453+
; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
454+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
455+
; CHECK-NEXT: ret i1 [[CMP]]
456+
;
457+
%gep = getelementptr i8, ptr @g_i16_1, i32 %idx
458+
%load = load i16, ptr %gep
459+
%cmp = icmp eq i16 %load, 0
460+
ret i1 %cmp
461+
}
462+
463+
@CG_MESSY = constant [9 x i32] [i32 1, i32 7, i32 -1, i32 5, i32 4, i32 1, i32 1, i32 5, i32 4]
464+
465+
define i1 @cmp_load_constant_array_messy(i32 %x){
466+
; CHECK-LABEL: @cmp_load_constant_array_messy(
467+
; CHECK-NEXT: entry:
468+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0:%.*]], 1073741823
469+
; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1]]
470+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 373
471+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[TMP3]], 0
472+
; CHECK-NEXT: ret i1 [[COND]]
473+
;
474+
475+
entry:
476+
%isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
477+
%isOK = load i32, ptr %isOK_ptr
478+
%cond = icmp slt i32 %isOK, 5
479+
ret i1 %cond
480+
}
481+
482+
define i1 @cmp_diff_load_constant_array_messy0(i32 %x){
483+
; CHECK-LABEL: @cmp_diff_load_constant_array_messy0(
484+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1:%.*]], 1073741823
485+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 1, [[TMP2]]
486+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 373
487+
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[TMP4]], 0
488+
; CHECK-NEXT: ret i1 [[COND]]
489+
;
490+
%isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
491+
%isOK = load i16, ptr %isOK_ptr
492+
%cond = icmp slt i16 %isOK, 5
493+
ret i1 %cond
494+
}
495+
496+
; Load size larger than store size currently not supported.
497+
define i1 @cmp_diff_load_constant_array_messy1(i32 %x){
498+
; CHECK-LABEL: @cmp_diff_load_constant_array_messy1(
499+
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1:%.*]]
500+
; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
501+
; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[ISOK]], 5
502+
; CHECK-NEXT: ret i1 [[COND]]
503+
;
504+
%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i32 %x
505+
%isOK = load i16, ptr %isOK_ptr
506+
%cond = icmp slt i16 %isOK, 5
507+
ret i1 %cond
508+
}
509+
510+
define i1 @cmp_load_constant_array_variable_icmp(i32 %x, i32 %y) {
511+
; CHECK-LABEL: @cmp_load_constant_array_variable_icmp(
512+
; CHECK-NEXT: entry:
513+
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG_MESSY, i32 [[X:%.*]]
514+
; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
515+
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], [[Y:%.*]]
516+
; CHECK-NEXT: ret i1 [[COND]]
517+
;
518+
entry:
519+
%isOK_ptr = getelementptr inbounds i32, ptr @CG_MESSY, i32 %x
520+
%isOK = load i32, ptr %isOK_ptr
521+
%cond = icmp ult i32 %isOK, %y
522+
ret i1 %cond
523+
}
524+
525+
@CG_CLEAR = constant [10 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9]
526+
527+
; Offsets not supported if negative or larger than stride.
528+
define i1 @cmp_load_constant_additional_positive_offset(i32 %x) {
529+
; CHECK-LABEL: @cmp_load_constant_additional_positive_offset(
530+
; CHECK-NEXT: entry:
531+
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 [[X:%.*]]
532+
; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
533+
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
534+
; CHECK-NEXT: ret i1 [[COND]]
535+
;
536+
entry:
537+
%isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 %x
538+
%isOK = load i32, ptr %isOK_ptr
539+
%cond = icmp ult i32 %isOK, 5
540+
ret i1 %cond
541+
}
542+
543+
define i1 @cmp_load_constant_additional_negative_offset(i32 %x) {
544+
; CHECK-LABEL: @cmp_load_constant_additional_negative_offset(
545+
; CHECK-NEXT: entry:
546+
; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 [[X:%.*]], i32 -5
547+
; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
548+
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
549+
; CHECK-NEXT: ret i1 [[COND]]
550+
;
551+
entry:
552+
%isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 %x, i32 -5
553+
%isOK = load i32, ptr %isOK_ptr
554+
%cond = icmp ult i32 %isOK, 5
555+
ret i1 %cond
556+
}

llvm/test/Transforms/InstCombine/opaque-ptr.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -543,10 +543,7 @@ define i1 @cmp_load_gep_global_different_load_type(i64 %idx) {
543543

544544
define i1 @cmp_load_gep_global_different_gep_type(i64 %idx) {
545545
; CHECK-LABEL: @cmp_load_gep_global_different_gep_type(
546-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr @ary, i64 [[IDX:%.*]]
547-
; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
548-
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
549-
; CHECK-NEXT: ret i1 [[CMP]]
546+
; CHECK-NEXT: ret i1 false
550547
;
551548
%gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx
552549
%load = load i16, ptr %gep

0 commit comments

Comments
 (0)