Skip to content

Commit cb6d6dc

Browse files
committed
[AggressiveCombine] Refactor foldLoadsRecursive to use m_ShlOrSelf
1 parent 8b732b0 commit cb6d6dc

File tree

2 files changed

+18
-50
lines changed

2 files changed

+18
-50
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
8383
// == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
8484
if (match(V, m_OneUse(m_c_Or(
8585
m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
86-
m_LShr(m_Value(ShVal1),
87-
m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) {
86+
m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
87+
m_Deferred(ShAmt))))))) {
8888
return Intrinsic::fshl;
8989
}
9090

@@ -617,7 +617,7 @@ struct LoadOps {
617617
LoadInst *RootInsert = nullptr;
618618
bool FoundRoot = false;
619619
uint64_t LoadSize = 0;
620-
const APInt *Shift = nullptr;
620+
uint64_t Shift = 0;
621621
Type *ZextType;
622622
AAMDNodes AATags;
623623
};
@@ -627,17 +627,15 @@ struct LoadOps {
627627
// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
628628
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
629629
AliasAnalysis &AA) {
630-
const APInt *ShAmt2 = nullptr;
630+
uint64_t ShAmt2;
631631
Value *X;
632632
Instruction *L1, *L2;
633633

634634
// Go to the last node with loads.
635-
if (match(V, m_OneUse(m_c_Or(
636-
m_Value(X),
637-
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))),
638-
m_APInt(ShAmt2)))))) ||
639-
match(V, m_OneUse(m_Or(m_Value(X),
640-
m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) {
635+
if (match(V,
636+
m_OneUse(m_c_Or(m_Value(X), m_OneUse(m_ShlOrSelf(
637+
m_OneUse(m_ZExt(m_Instruction(L2))),
638+
ShAmt2)))))) {
641639
if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
642640
// Avoid Partial chain merge.
643641
return false;
@@ -646,11 +644,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
646644

647645
// Check if the pattern has loads
648646
LoadInst *LI1 = LOps.Root;
649-
const APInt *ShAmt1 = LOps.Shift;
647+
uint64_t ShAmt1 = LOps.Shift;
650648
if (LOps.FoundRoot == false &&
651-
(match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) ||
652-
match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))),
653-
m_APInt(ShAmt1)))))) {
649+
match(X, m_OneUse(
650+
m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
654651
LI1 = dyn_cast<LoadInst>(L1);
655652
}
656653
LoadInst *LI2 = dyn_cast<LoadInst>(L2);
@@ -726,13 +723,6 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
726723
if (IsBigEndian)
727724
std::swap(ShAmt1, ShAmt2);
728725

729-
// Find Shifts values.
730-
uint64_t Shift1 = 0, Shift2 = 0;
731-
if (ShAmt1)
732-
Shift1 = ShAmt1->getZExtValue();
733-
if (ShAmt2)
734-
Shift2 = ShAmt2->getZExtValue();
735-
736726
// First load is always LI1. This is where we put the new load.
737727
// Use the merged load size available from LI1 for forward loads.
738728
if (LOps.FoundRoot) {
@@ -747,7 +737,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
747737
uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
748738
uint64_t PrevSize =
749739
DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
750-
if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
740+
if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
751741
return false;
752742

753743
// Update LOps
@@ -824,7 +814,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
824814
// Check if shift needed. We need to shift with the amount of load1
825815
// shift if not zero.
826816
if (LOps.Shift)
827-
NewOp = Builder.CreateShl(NewOp, ConstantInt::get(I.getContext(), *LOps.Shift));
817+
NewOp = Builder.CreateShl(NewOp, LOps.Shift);
828818
I.replaceAllUsesWith(NewOp);
829819

830820
return true;
@@ -860,11 +850,9 @@ static std::optional<PartStore> matchPartStore(Instruction &I,
860850
return std::nullopt;
861851

862852
uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
863-
uint64_t ValOffset = 0;
853+
uint64_t ValOffset;
864854
Value *Val;
865-
if (!match(StoredVal, m_CombineOr(m_Trunc(m_LShr(m_Value(Val),
866-
m_ConstantInt(ValOffset))),
867-
m_Trunc(m_Value(Val)))))
855+
if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
868856
return std::nullopt;
869857

870858
Value *Ptr = Store->getPointerOperand();

llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -103,18 +103,7 @@ define i32 @loadCombine_4consecutive(ptr %p) {
103103

104104
define i32 @loadCombine_4consecutive_commuted(ptr %p) {
105105
; LE-LABEL: @loadCombine_4consecutive_commuted(
106-
; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
107-
; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
108-
; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1
109-
; LE-NEXT: [[L2:%.*]] = load i16, ptr [[P1]], align 1
110-
; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L2]] to i32
111-
; LE-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], 8
112-
; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
113-
; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32
114-
; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
115-
; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
116-
; LE-NEXT: [[O2:%.*]] = or i32 [[S4]], [[TMP2]]
117-
; LE-NEXT: [[O3:%.*]] = or i32 [[E1]], [[O2]]
106+
; LE-NEXT: [[O3:%.*]] = load i32, ptr [[P:%.*]], align 1
118107
; LE-NEXT: ret i32 [[O3]]
119108
;
120109
; BE-LABEL: @loadCombine_4consecutive_commuted(
@@ -162,19 +151,10 @@ define i32 @loadCombine_4consecutive_commuted(ptr %p) {
162151

163152
define i32 @loadCombine_4consecutive_multiuse(ptr %p) {
164153
; LE-LABEL: @loadCombine_4consecutive_multiuse(
165-
; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
166-
; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
167-
; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1
168-
; LE-NEXT: [[TMP1:%.*]] = zext i16 [[L1]] to i32
169-
; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1
154+
; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3
155+
; LE-NEXT: [[O3:%.*]] = load i32, ptr [[P]], align 1
170156
; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1
171157
; LE-NEXT: call void @use(i8 [[L4]])
172-
; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32
173-
; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32
174-
; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16
175-
; LE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24
176-
; LE-NEXT: [[O2:%.*]] = or i32 [[TMP1]], [[S3]]
177-
; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]]
178158
; LE-NEXT: ret i32 [[O3]]
179159
;
180160
; BE-LABEL: @loadCombine_4consecutive_multiuse(

0 commit comments

Comments
 (0)