From 76d4bdab54655ccaafd40575d12e4d12da81b824 Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Sat, 28 Sep 2024 01:24:29 +0200 Subject: [PATCH 01/11] [GlobalISel] Remove inaccurate input vector restriction In the buildShuffleVector method, there is a restriction that the input vectors must be larger than the mask. However, this is not the definition of a ShuffleVector instruction that is used inside of our test suite. For example: shuffle_concat_1 in combine_shuffle_vector.mir: 4xs8 -> 16xs8 v3s8_crash in legalize_insert_vector_elt: 3xs8 -> 12xs8 shuffle_vector_to_concat_vector_45670123 in prelegalizercombiner-shuffle-vector: 4xs32 -> 12xs32 --- llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 59f2fc633f5de..1ddecefa17383 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -772,8 +772,6 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, LLT DstTy = Res.getLLTTy(*getMRI()); LLT Src1Ty = Src1.getLLTTy(*getMRI()); LLT Src2Ty = Src2.getLLTTy(*getMRI()); - assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >= - Mask.size()); assert(DstTy.getElementType() == Src1Ty.getElementType() && DstTy.getElementType() == Src2Ty.getElementType()); (void)DstTy; From 0d5d582b453be6d5f2023b6db2ded874971a8900 Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Sat, 5 Oct 2024 17:38:02 +0200 Subject: [PATCH 02/11] [GISel] Factor out the mask matching code from the shufflevector combiner --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 11 +++ .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 82 +++++++++++-------- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 76d51ab819f44..b503a53dd98ca 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -249,6 +249,17 @@ class CombinerHelper { /// or an implicit_def if \p Ops is empty. void applyCombineShuffleConcat(MachineInstr &MI, SmallVector &Ops); + /// Check if an instruction whose operations can be represented + /// by a vector mask can be replaced by a concat_vectors. + /// \p Ops will contain the operands to produce the flattened + /// concat_vectors. + /// \p Mask is an array to numbers that represent the order that + /// the elements of \p SrcRegs will be put into \p DstReg. + bool matchVectorMaskSequence(MachineInstr &MI, SmallVectorImpl &Ops, + const Register DstReg, + const std::pair SrcRegs, + ArrayRef Mask); + /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. /// Returns true if MI changed. /// diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index c279289f9161b..d523af15486c9 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #define DEBUG_TYPE "gi-combiner" @@ -472,39 +473,16 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { return false; } -bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, - SmallVectorImpl &Ops) { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction kind"); - LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - Register Src1 = MI.getOperand(1).getReg(); - LLT SrcType = MRI.getType(Src1); - // As bizarre as it may look, shuffle vector can actually produce - // scalar! This is because at the IR level a <1 x ty> shuffle - // vector is perfectly valid. - unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; - unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; - - // If the resulting vector is smaller than the size of the source - // vectors being concatenated, we won't be able to replace the - // shuffle vector into a concat_vectors. - // - // Note: We may still be able to produce a concat_vectors fed by - // extract_vector_elt and so on. It is less clear that would - // be better though, so don't bother for now. - // - // If the destination is a scalar, the size of the sources doesn't - // matter. we will lower the shuffle to a plain copy. This will - // work only if the source and destination have the same size. But - // that's covered by the next condition. - // - // TODO: If the size between the source and destination don't match - // we could still emit an extract vector element in that case. - if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) - return false; +bool CombinerHelper::matchVectorMaskSequence( + MachineInstr &MI, SmallVectorImpl &Ops, const Register DstReg, + const std::pair SrcRegs, ArrayRef Mask) { + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcRegs.first); // Check that the shuffle mask can be broken evenly between the // different sources. + const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; if (DstNumElts % SrcNumElts != 0) return false; @@ -513,7 +491,6 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, // vectors. unsigned NumConcat = DstNumElts / SrcNumElts; SmallVector ConcatSrcs(NumConcat, -1); - ArrayRef Mask = MI.getOperand(3).getShuffleMask(); for (unsigned i = 0; i != DstNumElts; ++i) { int Idx = Mask[i]; // Undef value. @@ -532,21 +509,56 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, // The shuffle is concatenating multiple vectors together. // Collect the different operands for that. Register UndefReg; - Register Src2 = MI.getOperand(2).getReg(); for (auto Src : ConcatSrcs) { if (Src < 0) { if (!UndefReg) { Builder.setInsertPt(*MI.getParent(), MI); - UndefReg = Builder.buildUndef(SrcType).getReg(0); + UndefReg = Builder.buildUndef(SrcTy).getReg(0); } Ops.push_back(UndefReg); } else if (Src == 0) - Ops.push_back(Src1); + Ops.push_back(SrcRegs.first); else - Ops.push_back(Src2); + Ops.push_back(SrcRegs.second); } return true; } +bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Invalid instruction kind"); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcType = MRI.getType(Src1); + // As bizarre as it may look, shuffle vector can actually produce + // scalar! This is because at the IR level a <1 x ty> shuffle + // vector is perfectly valid. + unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; + unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; + + // If the resulting vector is smaller than the size of the source + // vectors being concatenated, we won't be able to replace the + // shuffle vector into a concat_vectors. + // + // Note: We may still be able to produce a concat_vectors fed by + // extract_vector_elt and so on. It is less clear that would + // be better though, so don't bother for now. + // + // If the destination is a scalar, the size of the sources doesn't + // matter. we will lower the shuffle to a plain copy. This will + // work only if the source and destination have the same size. But + // that's covered by the next condition. + // + // TODO: If the size between the source and destination don't match + // we could still emit an extract vector element in that case. + if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) + return false; + + return matchVectorMaskSequence( + MI, Ops, MI.getOperand(0).getReg(), + std::make_pair(MI.getOperand(1).getReg(), MI.getOperand(2).getReg()), + MI.getOperand(3).getShuffleMask()); +} void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, const ArrayRef Ops) { From 4f2fdcdf861303ada109a15bac3f603582b0721f Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 22 Oct 2024 23:52:09 +0200 Subject: [PATCH 03/11] Fixup! Rename function to a clearer name --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 13 +++++++------ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index b503a53dd98ca..e6dd1da1fb1dc 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -250,15 +250,16 @@ class CombinerHelper { void applyCombineShuffleConcat(MachineInstr &MI, SmallVector &Ops); /// Check if an instruction whose operations can be represented - /// by a vector mask can be replaced by a concat_vectors. + /// by a mapping from one index to another in a vector can be replaced + /// by another operations. /// \p Ops will contain the operands to produce the flattened - /// concat_vectors. + /// operation. /// \p Mask is an array to numbers that represent the order that /// the elements of \p SrcRegs will be put into \p DstReg. - bool matchVectorMaskSequence(MachineInstr &MI, SmallVectorImpl &Ops, - const Register DstReg, - const std::pair SrcRegs, - ArrayRef Mask); + bool analysePatternVectorMask(MachineInstr &MI, SmallVectorImpl &Ops, + const Register DstReg, + const std::pair SrcRegs, + ArrayRef Mask); /// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS. /// Returns true if MI changed. diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d523af15486c9..ddeab453d40a0 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -473,7 +473,7 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { return false; } -bool CombinerHelper::matchVectorMaskSequence( +bool CombinerHelper::analysePatternVectorMask( MachineInstr &MI, SmallVectorImpl &Ops, const Register DstReg, const std::pair SrcRegs, ArrayRef Mask) { const LLT DstTy = MRI.getType(DstReg); @@ -554,7 +554,7 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) return false; - return matchVectorMaskSequence( + return analysePatternVectorMask( MI, Ops, MI.getOperand(0).getReg(), std::make_pair(MI.getOperand(1).getReg(), MI.getOperand(2).getReg()), MI.getOperand(3).getShuffleMask()); From 633b8fcf5249bc8e0a0c685a87056eb73ab4c70d Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 22 Oct 2024 15:42:44 +0200 Subject: [PATCH 04/11] Fixup! Add comment to function --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ddeab453d40a0..63e6695a54739 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -473,6 +473,11 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { return false; } +/// Helper function for instruction sequences that can be represented by a +/// mapping from one index to another. E.x. shufflevectors and insert/extract +/// +/// Checks whether the input \p Mask refers to a pre-known sequence and is a +/// valid target to be replaced. Currently supports only concatenation. bool CombinerHelper::analysePatternVectorMask( MachineInstr &MI, SmallVectorImpl &Ops, const Register DstReg, const std::pair SrcRegs, ArrayRef Mask) { @@ -523,6 +528,7 @@ bool CombinerHelper::analysePatternVectorMask( } return true; } + bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl &Ops) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && From f464323111f2be39e199256d383b8469b840bdc0 Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Sat, 28 Sep 2024 02:05:49 +0200 Subject: [PATCH 05/11] [AArch64][GlobalISel] Combine G_EXTRACT_VECTOR_ELT and G_BUILD_VECTOR sequences into G_SHUFFLE_VECTOR This combine tries to find all the build vectors whose source elements all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. One example where this may happen is for AI chips where there are a lot of matrix multiplications. Typically there vectors are disected and then rearranged into the right transformation. E.g. %donor1(<2 x s32>) = COPY $d0 %donor2(<2 x s32>) = COPY $d1 %ext1 = G_EXTRACT_VECTOR_ELT %donor1, 0 %ext2 = G_EXTRACT_VECTOR_ELT %donor1, 1 %ext3 = G_EXTRACT_VECTOR_ELT %donor2, 0 %ext4 = G_EXTRACT_VECTOR_ELT %donor2, 1 %vector = G_BUILD_VECTOR %ext1, %ext2, %ext3, %ext4 ==> replace with: %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3) --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 9 + .../include/llvm/Target/GlobalISel/Combine.td | 13 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 92 ++++++ .../GlobalISel/combine-build-vector.mir | 302 ++++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 104 +----- llvm/test/CodeGen/AArch64/arm64-rev.ll | 9 +- 6 files changed, 437 insertions(+), 92 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e6dd1da1fb1dc..007fbef8eeb84 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -592,6 +592,15 @@ class CombinerHelper { bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg); + /// Combine extracts of two different arrays into one build vector into a + /// shuffle vector. + bool matchCombineExtractToShuffle( + MachineInstr &MI, SmallVectorImpl> &MatchInfo, + std::pair &VectorRegisters); + void applyCombineExtractToShuffle( + MachineInstr &MI, SmallVectorImpl> &MatchInfo, + std::pair &VectorRegisters); + bool matchExtractAllEltsFromBuildVector( MachineInstr &MI, SmallVectorImpl> &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index f838c6e62a2ce..0525bfe1b0ddb 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -905,6 +905,16 @@ def extract_vec_elt_build_vec : GICombineRule< [{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>; +def extract_vector_register_to_id_mapping_matchinfo : + GIDefMatchData<"SmallVector>">; +def vector_reg_pair_matchinfo : + GIDefMatchData<"std::pair">; +def extract_vector_element_build_vector_to_shuffle_vector : GICombineRule< + (defs root:$root, extract_vector_register_to_id_mapping_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match (wip_match_opcode G_BUILD_VECTOR):$root, + [{ return Helper.matchCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }])>; + // Fold away full elt extracts from a build_vector. def extract_all_elts_from_build_vector_matchinfo : GIDefMatchData<"SmallVector>">; @@ -916,7 +926,8 @@ def extract_all_elts_from_build_vector : GICombineRule< def extract_vec_elt_combines : GICombineGroup<[ extract_vec_elt_build_vec, - extract_all_elts_from_build_vector]>; + extract_all_elts_from_build_vector, + extract_vector_element_build_vector_to_shuffle_vector]>; def funnel_shift_from_or_shift : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 63e6695a54739..437e59ed0a36e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include +#include #include #include #include @@ -4223,6 +4224,97 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, replaceSingleDefInstWithReg(MI, Reg); } +bool CombinerHelper::matchCombineExtractToShuffle( + MachineInstr &MI, SmallVectorImpl> &VecIndexPair, + std::pair &VectorRegisters) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + const GBuildVector *Build = cast(&MI); + // This combine tries to find all the build vectors whose source elements + // all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. + // One example where this may happen is for AI chips where there are a lot + // of matrix multiplications. Typically there vectors are disected and then + // rearranged into the right transformation. + // E.g. + // %donor1(<2 x s32>) = COPY $d0 + // %donor2(<2 x s32>) = COPY $d1 + // %ext1 = G_EXTRACT_VECTOR_ELT %donor1, 0 + // %ext2 = G_EXTRACT_VECTOR_ELT %donor1, 1 + // %ext3 = G_EXTRACT_VECTOR_ELT %donor2, 0 + // %ext4 = G_EXTRACT_VECTOR_ELT %donor2, 1 + /// %vector = G_BUILD_VECTOR %ext1, %ext2, %ext3, %ext4 + // ==> + // replace with: + // %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3) + SmallSetVector RegisterVector; + const unsigned NumElements = Build->getNumSources(); + for (unsigned Index = 0; Index < NumElements; Index++) { + Register SrcReg = peekThroughBitcast(Build->getSourceReg(Index), MRI); + auto *ExtractInstr = getOpcodeDef(SrcReg, MRI); + if (!ExtractInstr) + return false; + + // For shufflemasks we need to know exactly what index to place each element + // so if it this build vector doesn't use exclusively constants than we + // can't replace with a shufflevector + auto Cst = getIConstantVRegVal(ExtractInstr->getIndexReg(), MRI); + if (!Cst) + return false; + unsigned Idx = Cst->getZExtValue(); + + Register VectorReg = ExtractInstr->getVectorReg(); + RegisterVector.insert(VectorReg); + VecIndexPair.emplace_back(std::make_pair(VectorReg, Idx)); + } + + // Create a pair so that we don't need to look for them later. This code is + // incorrect if we have more than two vectors in the set. Since we can only + // put two vectors in a shuffle, we reject any solution with more than two + // anyways. + VectorRegisters = + std::make_pair(RegisterVector.front(), RegisterVector.back()); + + // We check that they're the same type before running. We can also grow the + // smaller one to the target size, but there isn't an elegant way to do that + // until we have a good lowering for G_EXTRACT_SUBVECTOR. + if (MRI.getType(VectorRegisters.first) != MRI.getType(VectorRegisters.second)) + return false; + + return RegisterVector.size() <= 2; +} + +void CombinerHelper::applyCombineExtractToShuffle( + MachineInstr &MI, SmallVectorImpl> &MatchInfo, + std::pair &VectorRegisters) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + + const Register FirstRegister = VectorRegisters.first; + const LLT FirstRegisterType = MRI.getType(FirstRegister); + const unsigned VectorSize = FirstRegisterType.getNumElements(); + SmallVector ShuffleMask; + for (auto &Pair : MatchInfo) { + const Register VectorReg = Pair.first; + int Idx = Pair.second; + + if (VectorReg != VectorRegisters.first) { + Idx += VectorSize; + } + ShuffleMask.emplace_back(Idx); + } + + // We could reuse the same vector register and shuffle them both together + // but it is nicer for later optimizations to explicitely make it undef. + const GBuildVector *BuildVector = cast(&MI); + Register SecondRegister = VectorRegisters.second; + if (FirstRegister == SecondRegister) { + SecondRegister = MRI.createGenericVirtualRegister(FirstRegisterType); + Builder.buildUndef(SecondRegister); + } + + Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister, + SecondRegister, ShuffleMask); + MI.eraseFromParent(); +} + bool CombinerHelper::matchExtractAllEltsFromBuildVector( MachineInstr &MI, SmallVectorImpl> &SrcDstPairs) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir index 93f6051c3bd3b..3cc836b971829 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir @@ -151,3 +151,305 @@ body: | RET_ReallyLR implicit $x0 ... +--- +name: reverse_concat_buildvector_shuffle +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 7, 6, 5, 4) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_interweave_buildvector_shuffle +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_interweave_buildvector_shuffle + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4, 7, 2, 5, 0) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ + RET_ReallyLR implicit %18 +... + +--- +name: reverse_interweave_same_size_as_dest_buildvector_shuffle +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_interweave_same_size_as_dest_buildvector_shuffle + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %14:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_ + RET_ReallyLR implicit %14 +... +--- +name: reverse_interweave_half_size_as_dest_buildvector_shuffle +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_interweave_half_size_as_dest_buildvector_shuffle + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<2 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_ + RET_ReallyLR implicit %12 +... +--- +name: reverse_concat_single_buildvector_shuffle +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_single_buildvector_shuffle + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(3, 1, 0, 2) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64) = G_CONSTANT i64 2 + %4:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_concat_double_buildvector_shuffle +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_double_buildvector_shuffle + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 6, 4, 5, 7, 1, 0, 2, 0, 5, 4, 1, 7) + ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<16 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) + %18:_(<16 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %12:_, %13:_, %11:_, %13:_, %16:_, %15:_, %12:_, %17:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_concat_buildvector_shuffle_three_sources +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1, $q2 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_three_sources + ; CHECK: liveins: $q0, $q1, $q2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC1]](s32), [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = COPY $q2 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %4:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %3:_(s64) + %18:_(<8 x s32>) = G_BUILD_VECTOR %12:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %12:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_concat_buildvector_shuffle_different_element_size +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $d0 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_element_size + ; CHECK: liveins: $q0, $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[DEF]](s32), [[DEF]](s32), [[EVEC1]](s32), [[EVEC2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<2 x s32>) = COPY $d0 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64) + %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %11:_, %12:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_concat_buildvector_shuffle_different_type +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_type + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<8 x s16>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %5:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %4:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %3:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %2:_(s64) + %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_concat_buildvector_shuffle_non_constant_id +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0, $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_non_constant_id + ; CHECK: liveins: $d0, $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[COPY2]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[COPY2]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = COPY $d0 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_ + RET_ReallyLR implicit %12 +... diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index c0d91c1e0c836..07cb5379a075c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1351,30 +1351,10 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { } define <8 x i8> @getl(<16 x i8> %x) #0 { -; CHECK-SD-LABEL: getl: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: getl: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov b2, v0.b[1] -; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: mov v1.b[1], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[3] -; CHECK-GI-NEXT: mov v1.b[2], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[4] -; CHECK-GI-NEXT: mov v1.b[3], v2.b[0] -; CHECK-GI-NEXT: mov b2, v0.b[5] -; CHECK-GI-NEXT: mov v1.b[4], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v1.b[5], v2.b[0] -; CHECK-GI-NEXT: mov v1.b[6], v3.b[0] -; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: getl: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret %vecext = extractelement <16 x i8> %x, i32 0 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 %vecext1 = extractelement <16 x i8> %x, i32 1 @@ -1923,49 +1903,12 @@ entry: } define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { -; CHECK-SD-LABEL: test_concat_v16i8_v8i8_v8i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov b3, v0.b[1] -; CHECK-GI-NEXT: mov v2.b[0], v0.b[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov b4, v0.b[2] -; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[3] -; CHECK-GI-NEXT: mov v2.b[2], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[4] -; CHECK-GI-NEXT: mov v2.b[3], v3.b[0] -; CHECK-GI-NEXT: mov b3, v0.b[5] -; CHECK-GI-NEXT: mov v2.b[4], v4.b[0] -; CHECK-GI-NEXT: mov b4, v0.b[6] -; CHECK-GI-NEXT: mov b0, v0.b[7] -; CHECK-GI-NEXT: mov v2.b[5], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[2] -; CHECK-GI-NEXT: mov v2.b[6], v4.b[0] -; CHECK-GI-NEXT: mov v2.b[7], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[1] -; CHECK-GI-NEXT: mov v2.b[8], v1.b[0] -; CHECK-GI-NEXT: mov v2.b[9], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[3] -; CHECK-GI-NEXT: mov v2.b[10], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[4] -; CHECK-GI-NEXT: mov v2.b[11], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[5] -; CHECK-GI-NEXT: mov v2.b[12], v3.b[0] -; CHECK-GI-NEXT: mov b3, v1.b[6] -; CHECK-GI-NEXT: mov v2.b[13], v0.b[0] -; CHECK-GI-NEXT: mov b0, v1.b[7] -; CHECK-GI-NEXT: mov v2.b[14], v3.b[0] -; CHECK-GI-NEXT: mov v2.b[15], v0.b[0] -; CHECK-GI-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret entry: %vecext = extractelement <8 x i8> %x, i32 0 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 @@ -2094,27 +2037,12 @@ entry: } define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { -; CHECK-SD-LABEL: test_concat_v8i16_v4i16_v4i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v2.h[0], v0.h[0] -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v2.h[1], v0.h[1] -; CHECK-GI-NEXT: mov v2.h[2], v0.h[2] -; CHECK-GI-NEXT: mov v2.h[3], v0.h[3] -; CHECK-GI-NEXT: mov v2.h[4], v1.h[0] -; CHECK-GI-NEXT: mov v2.h[5], v1.h[1] -; CHECK-GI-NEXT: mov v2.h[6], v1.h[2] -; CHECK-GI-NEXT: mov v2.h[7], v1.h[3] -; CHECK-GI-NEXT: mov v0.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret entry: %vecext = extractelement <4 x i16> %x, i32 0 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index f548a0e01feee..a728836fb0558 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -397,10 +397,13 @@ define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp ; ; CHECK-GI-LABEL: test_vrev64: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI27_0 ; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: add x8, x1, #2 -; CHECK-GI-NEXT: st1.h { v0 }[6], [x1] -; CHECK-GI-NEXT: st1.h { v0 }[5], [x8] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI27_0] +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: mov h1, v0[1] +; CHECK-GI-NEXT: str h0, [x1] +; CHECK-GI-NEXT: str h1, [x1, #2] ; CHECK-GI-NEXT: ret entry: %tmp2 = load <8 x i16>, ptr %source, align 4 From 6cb6d388f56a8fcfc16f2b9e6c669df97419838e Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 1 Oct 2024 15:36:21 +0200 Subject: [PATCH 06/11] Fixup! Remove redudant G_BUILD_VECTOR assert --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 437e59ed0a36e..f2e5c54d18cbe 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4227,7 +4227,6 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, bool CombinerHelper::matchCombineExtractToShuffle( MachineInstr &MI, SmallVectorImpl> &VecIndexPair, std::pair &VectorRegisters) { - assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); const GBuildVector *Build = cast(&MI); // This combine tries to find all the build vectors whose source elements // all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. From f93503cc035399eaa36fbe77a66f4ab0fa6014f2 Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 1 Oct 2024 21:44:59 +0200 Subject: [PATCH 07/11] Fixup! Add aditional tests --- .../GlobalISel/combine-build-vector.mir | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir index 3cc836b971829..3abb334bb5b81 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir @@ -453,3 +453,77 @@ body: | %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_ RET_ReallyLR implicit %12 ... +--- +name: reverse_concat_buildvector_shuffle_other_sources +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_other_sources + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[C2]](s32), [[EVEC2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s32>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %12:_(s32) = G_CONSTANT i32 42 + %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_ + RET_ReallyLR implicit %18 +... +--- +name: reverse_concat_buildvector_shuffle_trunc +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0, $q1 + ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_trunc + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR_TRUNC]](<8 x s16>) + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CONSTANT i64 1 + %4:_(s64) = G_CONSTANT i64 2 + %5:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %18:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ + RET_ReallyLR implicit %18 +... From 35500c64e1c852a003c82492ee7457b44e812600 Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 1 Oct 2024 21:46:25 +0200 Subject: [PATCH 08/11] Fixup! Create undef using buildUndef --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index f2e5c54d18cbe..d972852e0b776 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4301,12 +4301,11 @@ void CombinerHelper::applyCombineExtractToShuffle( } // We could reuse the same vector register and shuffle them both together - // but it is nicer for later optimizations to explicitely make it undef. + // but it is nicer for later optimizations to explicitly make it undef. const GBuildVector *BuildVector = cast(&MI); Register SecondRegister = VectorRegisters.second; if (FirstRegister == SecondRegister) { - SecondRegister = MRI.createGenericVirtualRegister(FirstRegisterType); - Builder.buildUndef(SecondRegister); + SecondRegister = Builder.buildUndef(FirstRegisterType).getReg(0); } Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister, From 1ef577367df4a2e33d9c96c1d64b4b51589b539a Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Fri, 11 Oct 2024 13:48:01 +0200 Subject: [PATCH 09/11] fixup! Directly run the shuffle vector analysis code --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 9 +- .../include/llvm/Target/GlobalISel/Combine.td | 12 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 57 +-- .../GlobalISel/combine-build-vector.mir | 339 ++++++++---------- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 28 +- llvm/test/CodeGen/AArch64/arm64-rev.ll | 9 +- 6 files changed, 198 insertions(+), 256 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 007fbef8eeb84..4323f41182477 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -594,12 +594,9 @@ class CombinerHelper { /// Combine extracts of two different arrays into one build vector into a /// shuffle vector. - bool matchCombineExtractToShuffle( - MachineInstr &MI, SmallVectorImpl> &MatchInfo, - std::pair &VectorRegisters); - void applyCombineExtractToShuffle( - MachineInstr &MI, SmallVectorImpl> &MatchInfo, - std::pair &VectorRegisters); + bool + matchCombineExtractToShuffle(MachineInstr &MI, SmallVectorImpl &Ops, + std::pair &VectorRegisters); bool matchExtractAllEltsFromBuildVector( MachineInstr &MI, diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 0525bfe1b0ddb..256dcc2815546 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -905,15 +905,15 @@ def extract_vec_elt_build_vec : GICombineRule< [{ return Helper.matchExtractVecEltBuildVec(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyExtractVecEltBuildVec(*${root}, ${matchinfo}); }])>; -def extract_vector_register_to_id_mapping_matchinfo : - GIDefMatchData<"SmallVector>">; +def extract_vector_register_sources_matchinfo : + GIDefMatchData<"SmallVector">; def vector_reg_pair_matchinfo : GIDefMatchData<"std::pair">; def extract_vector_element_build_vector_to_shuffle_vector : GICombineRule< - (defs root:$root, extract_vector_register_to_id_mapping_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), (match (wip_match_opcode G_BUILD_VECTOR):$root, [{ return Helper.matchCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }]), - (apply [{ Helper.applyCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }])>; + (apply [{ Helper.applyCombineShuffleVector(*${root}, ${matchinfo}); }])>; // Fold away full elt extracts from a build_vector. def extract_all_elts_from_build_vector_matchinfo : @@ -926,8 +926,8 @@ def extract_all_elts_from_build_vector : GICombineRule< def extract_vec_elt_combines : GICombineGroup<[ extract_vec_elt_build_vec, - extract_all_elts_from_build_vector, - extract_vector_element_build_vector_to_shuffle_vector]>; + extract_vector_element_build_vector_to_shuffle_vector, + extract_all_elts_from_build_vector]>; def funnel_shift_from_or_shift : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index d972852e0b776..ae5c0da461661 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4225,9 +4225,12 @@ void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI, } bool CombinerHelper::matchCombineExtractToShuffle( - MachineInstr &MI, SmallVectorImpl> &VecIndexPair, + MachineInstr &MI, SmallVectorImpl &Ops, std::pair &VectorRegisters) { const GBuildVector *Build = cast(&MI); + const unsigned SrcNumElts = + MRI.getType(MI.getOperand(0).getReg()).getNumElements(); + // This combine tries to find all the build vectors whose source elements // all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. // One example where this may happen is for AI chips where there are a lot @@ -4245,6 +4248,7 @@ bool CombinerHelper::matchCombineExtractToShuffle( // replace with: // %vector = G_SHUFFLE_VECTOR %donor1, %donor2, shufflemask(0, 1, 2, 3) SmallSetVector RegisterVector; + SmallVector VectorMask; const unsigned NumElements = Build->getNumSources(); for (unsigned Index = 0; Index < NumElements; Index++) { Register SrcReg = peekThroughBitcast(Build->getSourceReg(Index), MRI); @@ -4252,17 +4256,21 @@ bool CombinerHelper::matchCombineExtractToShuffle( if (!ExtractInstr) return false; + RegisterVector.insert(ExtractInstr->getVectorReg()); + // For shufflemasks we need to know exactly what index to place each element // so if it this build vector doesn't use exclusively constants than we // can't replace with a shufflevector auto Cst = getIConstantVRegVal(ExtractInstr->getIndexReg(), MRI); if (!Cst) return false; + unsigned Idx = Cst->getZExtValue(); + if (ExtractInstr->getVectorReg() != RegisterVector.front()) { + Idx += SrcNumElts; + } - Register VectorReg = ExtractInstr->getVectorReg(); - RegisterVector.insert(VectorReg); - VecIndexPair.emplace_back(std::make_pair(VectorReg, Idx)); + VectorMask.emplace_back(Idx); } // Create a pair so that we don't need to look for them later. This code is @@ -4273,44 +4281,17 @@ bool CombinerHelper::matchCombineExtractToShuffle( std::make_pair(RegisterVector.front(), RegisterVector.back()); // We check that they're the same type before running. We can also grow the - // smaller one to the target size, but there isn't an elegant way to do that - // until we have a good lowering for G_EXTRACT_SUBVECTOR. + // smaller one tro the target size, but there isn't an elegant way to do that + // until we have a good lowerng for G_EXTRACT_SUBVECTOR. + // Apparently if they are the same, they don't necessary have the same type? if (MRI.getType(VectorRegisters.first) != MRI.getType(VectorRegisters.second)) return false; - return RegisterVector.size() <= 2; -} - -void CombinerHelper::applyCombineExtractToShuffle( - MachineInstr &MI, SmallVectorImpl> &MatchInfo, - std::pair &VectorRegisters) { - assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - - const Register FirstRegister = VectorRegisters.first; - const LLT FirstRegisterType = MRI.getType(FirstRegister); - const unsigned VectorSize = FirstRegisterType.getNumElements(); - SmallVector ShuffleMask; - for (auto &Pair : MatchInfo) { - const Register VectorReg = Pair.first; - int Idx = Pair.second; - - if (VectorReg != VectorRegisters.first) { - Idx += VectorSize; - } - ShuffleMask.emplace_back(Idx); - } - - // We could reuse the same vector register and shuffle them both together - // but it is nicer for later optimizations to explicitly make it undef. - const GBuildVector *BuildVector = cast(&MI); - Register SecondRegister = VectorRegisters.second; - if (FirstRegister == SecondRegister) { - SecondRegister = Builder.buildUndef(FirstRegisterType).getReg(0); - } + if (RegisterVector.size() > 2) + return false; - Builder.buildShuffleVector(BuildVector->getOperand(0), FirstRegister, - SecondRegister, ShuffleMask); - MI.eraseFromParent(); + return analysePatternVectorMask(MI, Ops, MI.getOperand(0).getReg(), + VectorRegisters, VectorMask); } bool CombinerHelper::matchExtractAllEltsFromBuildVector( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir index 3abb334bb5b81..a231bc8f29ef6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir @@ -152,248 +152,196 @@ body: | ... --- -name: reverse_concat_buildvector_shuffle +name: concat_buildvector_shuffle tracksRegLiveness: true body: | bb.1: liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle + ; CHECK-LABEL: name: concat_buildvector_shuffle ; CHECK: liveins: $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 7, 6, 5, 4) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<8 x s32>) %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(s64) = G_CONSTANT i64 0 %3:_(s64) = G_CONSTANT i64 1 %4:_(s64) = G_CONSTANT i64 2 %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) - %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) - %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ RET_ReallyLR implicit %18 ... --- -name: reverse_interweave_buildvector_shuffle +name: interweave_buildvector_shuffle tracksRegLiveness: true body: | bb.1: liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_interweave_buildvector_shuffle + ; CHECK-LABEL: name: interweave_buildvector_shuffle ; CHECK: liveins: $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4, 7, 2, 5, 0) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<8 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<8 x s32>) %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(s64) = G_CONSTANT i64 0 %3:_(s64) = G_CONSTANT i64 1 %4:_(s64) = G_CONSTANT i64 2 %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) - %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %15:_, %12:_, %17:_, %14:_, %11:_, %16:_, %13:_ RET_ReallyLR implicit %18 ... --- -name: reverse_interweave_same_size_as_dest_buildvector_shuffle +name: interweave_same_size_as_dest_buildvector_shuffle tracksRegLiveness: true body: | bb.1: - liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_interweave_same_size_as_dest_buildvector_shuffle - ; CHECK: liveins: $q0, $q1 + liveins: $q0 + ; CHECK-LABEL: name: interweave_same_size_as_dest_buildvector_shuffle + ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 6, 1, 4) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit [[COPY]](<4 x s32>) %0:_(<4 x s32>) = COPY $q0 - %1:_(<4 x s32>) = COPY $q1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64) = G_CONSTANT i64 1 - %4:_(s64) = G_CONSTANT i64 2 - %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %1:_(s64) = G_CONSTANT i64 0 + %2:_(s64) = G_CONSTANT i64 1 + %3:_(s64) = G_CONSTANT i64 2 + %4:_(s64) = G_CONSTANT i64 3 + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) %14:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_ RET_ReallyLR implicit %14 ... --- -name: reverse_interweave_half_size_as_dest_buildvector_shuffle +name: half_size_as_dest_buildvector_shuffle tracksRegLiveness: true body: | bb.1: - liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_interweave_half_size_as_dest_buildvector_shuffle - ; CHECK: liveins: $q0, $q1 + liveins: $q0 + ; CHECK-LABEL: name: half_size_as_dest_buildvector_shuffle + ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<2 x s32>) - %0:_(<4 x s32>) = COPY $q0 - %1:_(<4 x s32>) = COPY $q1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) - %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_ - RET_ReallyLR implicit %12 -... ---- -name: reverse_concat_single_buildvector_shuffle -tracksRegLiveness: true -body: | - bb.1: - liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_concat_single_buildvector_shuffle - ; CHECK: liveins: $q0, $q1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[DEF]], shufflemask(3, 1, 0, 2) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $q0 %1:_(s64) = G_CONSTANT i64 0 %2:_(s64) = G_CONSTANT i64 1 - %3:_(s64) = G_CONSTANT i64 2 - %4:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64) %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %1:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_ - RET_ReallyLR implicit %18 -... ---- -name: reverse_concat_double_buildvector_shuffle -tracksRegLiveness: true -body: | - bb.1: - liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_concat_double_buildvector_shuffle - ; CHECK: liveins: $q0, $q1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 2, 1, 0, 6, 4, 5, 7, 1, 0, 2, 0, 5, 4, 1, 7) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<16 x s32>) - %0:_(<4 x s32>) = COPY $q0 - %1:_(<4 x s32>) = COPY $q1 - %2:_(s64) = G_CONSTANT i64 0 - %3:_(s64) = G_CONSTANT i64 1 - %4:_(s64) = G_CONSTANT i64 2 - %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) - %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) - %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) - %18:_(<16 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %12:_, %13:_, %11:_, %13:_, %16:_, %15:_, %12:_, %17:_ - RET_ReallyLR implicit %18 + %12:_(<2 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_ + RET_ReallyLR implicit %12 ... --- -name: reverse_concat_buildvector_shuffle_three_sources +name: concat_buildvector_shuffle_three_sources tracksRegLiveness: true body: | bb.1: - liveins: $q0, $q1, $q2 - ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_three_sources - ; CHECK: liveins: $q0, $q1, $q2 + liveins: $d0, $d1, $d2, $d3 + ; CHECK-LABEL: name: concat_buildvector_shuffle_three_sources + ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC1]](s32), [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC1]](s32) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<2 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<2 x s32>), [[C]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[DEF]](s32), [[EVEC1]](s32), [[DEF]](s32), [[EVEC2]](s32), [[DEF]](s32), [[EVEC3]](s32), [[DEF]](s32) ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>) - %0:_(<4 x s32>) = COPY $q0 - %1:_(<4 x s32>) = COPY $q1 - %2:_(<4 x s32>) = COPY $q2 - %3:_(s64) = G_CONSTANT i64 1 - %4:_(s64) = G_CONSTANT i64 2 - %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %4:_(s64) - %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<4 x s32>), %3:_(s64) - %18:_(<8 x s32>) = G_BUILD_VECTOR %12:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %12:_ - RET_ReallyLR implicit %18 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = COPY $d2 + %3:_(<2 x s32>) = COPY $d3 + %4:_(s64) = G_CONSTANT i64 1 + %5:_(s64) = G_CONSTANT i64 2 + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<2 x s32>), %4:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<2 x s32>), %5:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<2 x s32>), %4:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<2 x s32>), %5:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s32>), %4:_(s64) + %18:_(s32) = G_EXTRACT_VECTOR_ELT %3:_(<2 x s32>), %5:_(s64) + %19:_(<8 x s32>) = G_BUILD_VECTOR %11:_(s32), %12:_, %13:_, %14:_, %15:_, %16:_, %17:_, %18:_ + RET_ReallyLR implicit %19 ... --- -name: reverse_concat_buildvector_shuffle_different_element_size +name: concat_buildvector_shuffle_different_element_size tracksRegLiveness: true body: | bb.1: liveins: $q0, $d0 - ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_element_size + ; CHECK-LABEL: name: concat_buildvector_shuffle_different_element_size ; CHECK: liveins: $q0, $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[DEF]](s32), [[DEF]](s32), [[EVEC1]](s32), [[EVEC2]](s32) - ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<6 x s32>) %0:_(<4 x s32>) = COPY $q0 %1:_(<2 x s32>) = COPY $d0 %2:_(s64) = G_CONSTANT i64 0 %3:_(s64) = G_CONSTANT i64 1 %4:_(s64) = G_CONSTANT i64 2 %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %5:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %4:_(s64) - %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %11:_, %12:_ + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %2:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<2 x s32>), %3:_(s64) + %18:_(<6 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_ RET_ReallyLR implicit %18 ... --- -name: reverse_concat_buildvector_shuffle_different_type +name: concat_buildvector_shuffle_different_type tracksRegLiveness: true body: | bb.1: liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_different_type + ; CHECK-LABEL: name: concat_buildvector_shuffle_different_type ; CHECK: liveins: $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 @@ -402,14 +350,14 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C3]](s64) - ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C2]](s64) - ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64) + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C3]](s64) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32) ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<8 x s32>) %0:_(<4 x s32>) = COPY $q0 @@ -418,24 +366,24 @@ body: | %3:_(s64) = G_CONSTANT i64 1 %4:_(s64) = G_CONSTANT i64 2 %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %5:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %4:_(s64) - %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %3:_(s64) - %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %2:_(s64) + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %2:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %3:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %4:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s16>), %5:_(s64) %18:_(<8 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ RET_ReallyLR implicit %18 ... --- -name: reverse_concat_buildvector_shuffle_non_constant_id +name: concat_buildvector_shuffle_non_constant_id tracksRegLiveness: true body: | bb.1: liveins: $d0, $q0, $q1 - ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_non_constant_id + ; CHECK-LABEL: name: concat_buildvector_shuffle_non_constant_id ; CHECK: liveins: $d0, $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 @@ -454,26 +402,25 @@ body: | RET_ReallyLR implicit %12 ... --- -name: reverse_concat_buildvector_shuffle_other_sources +name: concat_buildvector_shuffle_other_sources tracksRegLiveness: true body: | bb.1: - liveins: $q0, $q1 - ; CHECK-LABEL: name: reverse_concat_buildvector_shuffle_other_sources - ; CHECK: liveins: $q0, $q1 + liveins: $q0 + ; CHECK-LABEL: name: concat_buildvector_shuffle_other_sources + ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[C2]](s32), [[EVEC2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[C3]](s32), [[EVEC2]](s32) ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $q0 - %1:_(<4 x s32>) = COPY $q1 %2:_(s64) = G_CONSTANT i64 0 %3:_(s64) = G_CONSTANT i64 1 %4:_(s64) = G_CONSTANT i64 2 @@ -481,7 +428,7 @@ body: | %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) %12:_(s32) = G_CONSTANT i32 42 - %13:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) %18:_(<4 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_, %12:_, %13:_ RET_ReallyLR implicit %18 ... @@ -500,14 +447,14 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) - ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) - ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) - ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C3]](s64) - ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C2]](s64) - ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) - ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C3]](s64) ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32), [[EVEC4]](s32), [[EVEC5]](s32), [[EVEC6]](s32), [[EVEC7]](s32) ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR_TRUNC]](<8 x s16>) %0:_(<4 x s32>) = COPY $q0 @@ -516,14 +463,14 @@ body: | %3:_(s64) = G_CONSTANT i64 1 %4:_(s64) = G_CONSTANT i64 2 %5:_(s64) = G_CONSTANT i64 3 - %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) - %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) - %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) - %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) - %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) - %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) - %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) - %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %10:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %2:_(s64) + %11:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %3:_(s64) + %12:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %4:_(s64) + %13:_(s32) = G_EXTRACT_VECTOR_ELT %0:_(<4 x s32>), %5:_(s64) + %14:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %2:_(s64) + %15:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %3:_(s64) + %16:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %4:_(s64) + %17:_(s32) = G_EXTRACT_VECTOR_ELT %1:_(<4 x s32>), %5:_(s64) %18:_(<8 x s16>) = G_BUILD_VECTOR_TRUNC %10:_(s32), %11:_, %12:_, %13:_, %14:_, %15:_, %16:_, %17:_ RET_ReallyLR implicit %18 ... diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 07cb5379a075c..98bd794b2d97e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1351,10 +1351,30 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { } define <8 x i8> @getl(<16 x i8> %x) #0 { -; CHECK-LABEL: getl: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: getl: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: getl: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov v1.b[0], v0.b[0] +; CHECK-GI-NEXT: mov b3, v0.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[3] +; CHECK-GI-NEXT: mov v1.b[2], v3.b[0] +; CHECK-GI-NEXT: mov b3, v0.b[4] +; CHECK-GI-NEXT: mov v1.b[3], v2.b[0] +; CHECK-GI-NEXT: mov b2, v0.b[5] +; CHECK-GI-NEXT: mov v1.b[4], v3.b[0] +; CHECK-GI-NEXT: mov b3, v0.b[6] +; CHECK-GI-NEXT: mov b0, v0.b[7] +; CHECK-GI-NEXT: mov v1.b[5], v2.b[0] +; CHECK-GI-NEXT: mov v1.b[6], v3.b[0] +; CHECK-GI-NEXT: mov v1.b[7], v0.b[0] +; CHECK-GI-NEXT: fmov d0, d1 +; CHECK-GI-NEXT: ret %vecext = extractelement <16 x i8> %x, i32 0 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 %vecext1 = extractelement <16 x i8> %x, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index a728836fb0558..f548a0e01feee 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -397,13 +397,10 @@ define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp ; ; CHECK-GI-LABEL: test_vrev64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI27_0 ; CHECK-GI-NEXT: ldr q0, [x0] -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI27_0] -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: mov h1, v0[1] -; CHECK-GI-NEXT: str h0, [x1] -; CHECK-GI-NEXT: str h1, [x1, #2] +; CHECK-GI-NEXT: add x8, x1, #2 +; CHECK-GI-NEXT: st1.h { v0 }[6], [x1] +; CHECK-GI-NEXT: st1.h { v0 }[5], [x8] ; CHECK-GI-NEXT: ret entry: %tmp2 = load <8 x i16>, ptr %source, align 4 From 3bdebfa0da83638a2f6f0f56a7b03469cf5df83b Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 22 Oct 2024 14:35:13 +0200 Subject: [PATCH 10/11] Nit! remove use case from comment --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ae5c0da461661..940106f02ce9e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4233,9 +4233,6 @@ bool CombinerHelper::matchCombineExtractToShuffle( // This combine tries to find all the build vectors whose source elements // all originate from a G_EXTRACT_VECTOR_ELT from one or two donor vectors. - // One example where this may happen is for AI chips where there are a lot - // of matrix multiplications. Typically there vectors are disected and then - // rearranged into the right transformation. // E.g. // %donor1(<2 x s32>) = COPY $d0 // %donor2(<2 x s32>) = COPY $d1 From baa7cd8f0a9d78889e9b7ea1011139a5d323e65c Mon Sep 17 00:00:00 2001 From: Valentijn van de Beek Date: Tue, 22 Oct 2024 23:35:48 +0200 Subject: [PATCH 11/11] Fixup! Use the gallery MIR pattern to be more precise --- .../include/llvm/Target/GlobalISel/Combine.td | 312 +++++++++++++++++- 1 file changed, 306 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 256dcc2815546..e5ef655c4f1ca 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -909,11 +909,305 @@ def extract_vector_register_sources_matchinfo : GIDefMatchData<"SmallVector">; def vector_reg_pair_matchinfo : GIDefMatchData<"std::pair">; -def extract_vector_element_build_vector_to_shuffle_vector : GICombineRule< +def extract_vector_element_build_vector_to_shuffle_vector2 : GICombineRule< (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), - (match (wip_match_opcode G_BUILD_VECTOR):$root, - [{ return Helper.matchCombineExtractToShuffle(*${root}, ${matchinfo}, ${regpair}); }]), - (apply [{ Helper.applyCombineShuffleVector(*${root}, ${matchinfo}); }])>; + (match (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx), + (G_BUILD_VECTOR $root, $el1, $el2):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_to_shuffle_vector4 : GICombineRule< + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match (G_EXTRACT_VECTOR_ELT $el4, $vec4, $idx4), + (G_EXTRACT_VECTOR_ELT $el3, $vec3, $idx3), + (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx2), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx1), + (G_BUILD_VECTOR $root, $el1, $el2, $el3, $el4):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_to_shuffle_vector8 : GICombineRule< + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match + (G_EXTRACT_VECTOR_ELT $el8, $vec8, $idx8), + (G_EXTRACT_VECTOR_ELT $el7, $vec7, $idx7), + (G_EXTRACT_VECTOR_ELT $el6, $vec6, $idx6), + (G_EXTRACT_VECTOR_ELT $el5, $vec5, $idx5), + (G_EXTRACT_VECTOR_ELT $el4, $vec4, $idx4), + (G_EXTRACT_VECTOR_ELT $el3, $vec3, $idx3), + (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx2), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx1), + (G_BUILD_VECTOR $root, $el1, $el2, $el3, $el4, $el5, $el6, $el7, $el8):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_to_shuffle_vector16 : GICombineRule< + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match + (G_EXTRACT_VECTOR_ELT $el16, $vec16, $idx16), + (G_EXTRACT_VECTOR_ELT $el15, $vec15, $idx15), + (G_EXTRACT_VECTOR_ELT $el14, $vec14, $idx14), + (G_EXTRACT_VECTOR_ELT $el13, $vec13, $idx13), + (G_EXTRACT_VECTOR_ELT $el12, $vec12, $idx12), + (G_EXTRACT_VECTOR_ELT $el11, $vec11, $idx11), + (G_EXTRACT_VECTOR_ELT $el10, $vec10, $idx10), + (G_EXTRACT_VECTOR_ELT $el9, $vec9, $idx9), + (G_EXTRACT_VECTOR_ELT $el8, $vec8, $idx8), + (G_EXTRACT_VECTOR_ELT $el7, $vec7, $idx7), + (G_EXTRACT_VECTOR_ELT $el6, $vec6, $idx6), + (G_EXTRACT_VECTOR_ELT $el5, $vec5, $idx5), + (G_EXTRACT_VECTOR_ELT $el4, $vec4, $idx4), + (G_EXTRACT_VECTOR_ELT $el3, $vec3, $idx3), + (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx2), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx1), + (G_BUILD_VECTOR $root, $el1, $el2, $el3, $el4, $el5, $el6, $el7, $el8, $el9, $el10, $el11, $el12, $el13, $el14, $el15, $el16):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_to_shuffle_vector32 : GICombineRule< + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match + (G_EXTRACT_VECTOR_ELT $el32, $vec32, $idx32), + (G_EXTRACT_VECTOR_ELT $el31, $vec31, $idx31), + (G_EXTRACT_VECTOR_ELT $el30, $vec30, $idx30), + (G_EXTRACT_VECTOR_ELT $el29, $vec29, $idx29), + (G_EXTRACT_VECTOR_ELT $el28, $vec28, $idx28), + (G_EXTRACT_VECTOR_ELT $el27, $vec27, $idx27), + (G_EXTRACT_VECTOR_ELT $el26, $vec26, $idx26), + (G_EXTRACT_VECTOR_ELT $el25, $vec25, $idx25), + (G_EXTRACT_VECTOR_ELT $el24, $vec24, $idx24), + (G_EXTRACT_VECTOR_ELT $el23, $vec23, $idx23), + (G_EXTRACT_VECTOR_ELT $el22, $vec22, $idx22), + (G_EXTRACT_VECTOR_ELT $el21, $vec21, $idx21), + (G_EXTRACT_VECTOR_ELT $el20, $vec20, $idx20), + (G_EXTRACT_VECTOR_ELT $el19, $vec19, $idx19), + (G_EXTRACT_VECTOR_ELT $el18, $vec18, $idx18), + (G_EXTRACT_VECTOR_ELT $el17, $vec17, $idx17), + (G_EXTRACT_VECTOR_ELT $el16, $vec16, $idx16), + (G_EXTRACT_VECTOR_ELT $el15, $vec15, $idx15), + (G_EXTRACT_VECTOR_ELT $el14, $vec14, $idx14), + (G_EXTRACT_VECTOR_ELT $el13, $vec13, $idx13), + (G_EXTRACT_VECTOR_ELT $el12, $vec12, $idx12), + (G_EXTRACT_VECTOR_ELT $el11, $vec11, $idx11), + (G_EXTRACT_VECTOR_ELT $el10, $vec10, $idx10), + (G_EXTRACT_VECTOR_ELT $el9, $vec9, $idx9), + (G_EXTRACT_VECTOR_ELT $el8, $vec8, $idx8), + (G_EXTRACT_VECTOR_ELT $el7, $vec7, $idx7), + (G_EXTRACT_VECTOR_ELT $el6, $vec6, $idx6), + (G_EXTRACT_VECTOR_ELT $el5, $vec5, $idx5), + (G_EXTRACT_VECTOR_ELT $el4, $vec4, $idx4), + (G_EXTRACT_VECTOR_ELT $el3, $vec3, $idx3), + (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx2), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx1), + (G_BUILD_VECTOR $root, $el1, $el2, $el3, $el4, $el5, $el6, $el7, $el8, $el9, $el10, $el11, $el12, $el13, $el14, $el15, $el16, $el17, $el18, $el19, $el20, $el21, $el22, $el23, $el24, $el25, $el26, $el27, $el28, $el29, $el30, $el31, $el32):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_to_shuffle_vector64 : GICombineRule< + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match (G_EXTRACT_VECTOR_ELT $el64, $vec64, $idx64), + (G_EXTRACT_VECTOR_ELT $el63, $vec63, $idx63), + (G_EXTRACT_VECTOR_ELT $el62, $vec62, $idx62), + (G_EXTRACT_VECTOR_ELT $el61, $vec61, $idx61), + (G_EXTRACT_VECTOR_ELT $el60, $vec60, $idx60), + (G_EXTRACT_VECTOR_ELT $el59, $vec59, $idx59), + (G_EXTRACT_VECTOR_ELT $el58, $vec58, $idx58), + (G_EXTRACT_VECTOR_ELT $el57, $vec57, $idx57), + (G_EXTRACT_VECTOR_ELT $el56, $vec56, $idx56), + (G_EXTRACT_VECTOR_ELT $el55, $vec55, $idx55), + (G_EXTRACT_VECTOR_ELT $el54, $vec54, $idx54), + (G_EXTRACT_VECTOR_ELT $el53, $vec53, $idx53), + (G_EXTRACT_VECTOR_ELT $el52, $vec52, $idx52), + (G_EXTRACT_VECTOR_ELT $el51, $vec51, $idx51), + (G_EXTRACT_VECTOR_ELT $el50, $vec50, $idx50), + (G_EXTRACT_VECTOR_ELT $el49, $vec49, $idx49), + (G_EXTRACT_VECTOR_ELT $el48, $vec48, $idx48), + (G_EXTRACT_VECTOR_ELT $el47, $vec47, $idx47), + (G_EXTRACT_VECTOR_ELT $el46, $vec46, $idx46), + (G_EXTRACT_VECTOR_ELT $el45, $vec45, $idx45), + (G_EXTRACT_VECTOR_ELT $el44, $vec44, $idx44), + (G_EXTRACT_VECTOR_ELT $el43, $vec43, $idx43), + (G_EXTRACT_VECTOR_ELT $el42, $vec42, $idx42), + (G_EXTRACT_VECTOR_ELT $el41, $vec41, $idx41), + (G_EXTRACT_VECTOR_ELT $el40, $vec40, $idx40), + (G_EXTRACT_VECTOR_ELT $el39, $vec39, $idx39), + (G_EXTRACT_VECTOR_ELT $el38, $vec38, $idx38), + (G_EXTRACT_VECTOR_ELT $el37, $vec37, $idx37), + (G_EXTRACT_VECTOR_ELT $el36, $vec36, $idx36), + (G_EXTRACT_VECTOR_ELT $el35, $vec35, $idx35), + (G_EXTRACT_VECTOR_ELT $el34, $vec34, $idx34), + (G_EXTRACT_VECTOR_ELT $el33, $vec33, $idx33), + (G_EXTRACT_VECTOR_ELT $el32, $vec32, $idx32), + (G_EXTRACT_VECTOR_ELT $el31, $vec31, $idx31), + (G_EXTRACT_VECTOR_ELT $el30, $vec30, $idx30), + (G_EXTRACT_VECTOR_ELT $el29, $vec29, $idx29), + (G_EXTRACT_VECTOR_ELT $el28, $vec28, $idx28), + (G_EXTRACT_VECTOR_ELT $el27, $vec27, $idx27), + (G_EXTRACT_VECTOR_ELT $el26, $vec26, $idx26), + (G_EXTRACT_VECTOR_ELT $el25, $vec25, $idx25), + (G_EXTRACT_VECTOR_ELT $el24, $vec24, $idx24), + (G_EXTRACT_VECTOR_ELT $el23, $vec23, $idx23), + (G_EXTRACT_VECTOR_ELT $el22, $vec22, $idx22), + (G_EXTRACT_VECTOR_ELT $el21, $vec21, $idx21), + (G_EXTRACT_VECTOR_ELT $el20, $vec20, $idx20), + (G_EXTRACT_VECTOR_ELT $el19, $vec19, $idx19), + (G_EXTRACT_VECTOR_ELT $el18, $vec18, $idx18), + (G_EXTRACT_VECTOR_ELT $el17, $vec17, $idx17), + (G_EXTRACT_VECTOR_ELT $el16, $vec16, $idx16), + (G_EXTRACT_VECTOR_ELT $el15, $vec15, $idx15), + (G_EXTRACT_VECTOR_ELT $el14, $vec14, $idx14), + (G_EXTRACT_VECTOR_ELT $el13, $vec13, $idx13), + (G_EXTRACT_VECTOR_ELT $el12, $vec12, $idx12), + (G_EXTRACT_VECTOR_ELT $el11, $vec11, $idx11), + (G_EXTRACT_VECTOR_ELT $el10, $vec10, $idx10), + (G_EXTRACT_VECTOR_ELT $el9, $vec9, $idx9), + (G_EXTRACT_VECTOR_ELT $el8, $vec8, $idx8), + (G_EXTRACT_VECTOR_ELT $el7, $vec7, $idx7), + (G_EXTRACT_VECTOR_ELT $el6, $vec6, $idx6), + (G_EXTRACT_VECTOR_ELT $el5, $vec5, $idx5), + (G_EXTRACT_VECTOR_ELT $el4, $vec4, $idx4), + (G_EXTRACT_VECTOR_ELT $el3, $vec3, $idx3), + (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx2), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx1), + (G_BUILD_VECTOR $root, $el1, $el2, $el3, $el4, $el5, $el6, $el7, $el8, $el9, $el10, $el11, $el12, $el13, $el14, $el15, $el16, $el17, $el18, $el19, $el20, $el21, $el22, $el23, $el24, $el25, $el26, $el27, $el28, $el29, $el30, $el31, $el32, $el33, $el34, $el35, $el36, $el37, $el38, $el39, $el40, $el41, $el42, $el43, $el44, $el45, $el46, $el47, $el48, $el49, $el50, $el51, $el52, $el53, $el54, $el55, $el56, $el57, $el58, $el59, $el60, $el61, $el62, $el63, $el64):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; + + +def extract_vector_element_build_vector_to_shuffle_vector128 : GICombineRule< + (defs root:$root, extract_vector_register_sources_matchinfo:$matchinfo, vector_reg_pair_matchinfo:$regpair), + (match (G_EXTRACT_VECTOR_ELT $el128, $vec128, $idx128), + (G_EXTRACT_VECTOR_ELT $el127, $vec127, $idx127), + (G_EXTRACT_VECTOR_ELT $el126, $vec126, $idx126), + (G_EXTRACT_VECTOR_ELT $el125, $vec125, $idx125), + (G_EXTRACT_VECTOR_ELT $el124, $vec124, $idx124), + (G_EXTRACT_VECTOR_ELT $el123, $vec123, $idx123), + (G_EXTRACT_VECTOR_ELT $el122, $vec122, $idx122), + (G_EXTRACT_VECTOR_ELT $el121, $vec121, $idx121), + (G_EXTRACT_VECTOR_ELT $el120, $vec120, $idx120), + (G_EXTRACT_VECTOR_ELT $el119, $vec119, $idx119), + (G_EXTRACT_VECTOR_ELT $el118, $vec118, $idx118), + (G_EXTRACT_VECTOR_ELT $el117, $vec117, $idx117), + (G_EXTRACT_VECTOR_ELT $el116, $vec116, $idx116), + (G_EXTRACT_VECTOR_ELT $el115, $vec115, $idx115), + (G_EXTRACT_VECTOR_ELT $el114, $vec114, $idx114), + (G_EXTRACT_VECTOR_ELT $el113, $vec113, $idx113), + (G_EXTRACT_VECTOR_ELT $el112, $vec112, $idx112), + (G_EXTRACT_VECTOR_ELT $el111, $vec111, $idx111), + (G_EXTRACT_VECTOR_ELT $el110, $vec110, $idx110), + (G_EXTRACT_VECTOR_ELT $el109, $vec109, $idx109), + (G_EXTRACT_VECTOR_ELT $el108, $vec108, $idx108), + (G_EXTRACT_VECTOR_ELT $el107, $vec107, $idx107), + (G_EXTRACT_VECTOR_ELT $el106, $vec106, $idx106), + (G_EXTRACT_VECTOR_ELT $el105, $vec105, $idx105), + (G_EXTRACT_VECTOR_ELT $el104, $vec104, $idx104), + (G_EXTRACT_VECTOR_ELT $el103, $vec103, $idx103), + (G_EXTRACT_VECTOR_ELT $el102, $vec102, $idx102), + (G_EXTRACT_VECTOR_ELT $el101, $vec101, $idx101), + (G_EXTRACT_VECTOR_ELT $el100, $vec100, $idx100), + (G_EXTRACT_VECTOR_ELT $el99, $vec99, $idx99), + (G_EXTRACT_VECTOR_ELT $el98, $vec98, $idx98), + (G_EXTRACT_VECTOR_ELT $el97, $vec97, $idx97), + (G_EXTRACT_VECTOR_ELT $el96, $vec96, $idx96), + (G_EXTRACT_VECTOR_ELT $el95, $vec95, $idx95), + (G_EXTRACT_VECTOR_ELT $el94, $vec94, $idx94), + (G_EXTRACT_VECTOR_ELT $el93, $vec93, $idx93), + (G_EXTRACT_VECTOR_ELT $el92, $vec92, $idx92), + (G_EXTRACT_VECTOR_ELT $el91, $vec91, $idx91), + (G_EXTRACT_VECTOR_ELT $el90, $vec90, $idx90), + (G_EXTRACT_VECTOR_ELT $el89, $vec89, $idx89), + (G_EXTRACT_VECTOR_ELT $el88, $vec88, $idx88), + (G_EXTRACT_VECTOR_ELT $el87, $vec87, $idx87), + (G_EXTRACT_VECTOR_ELT $el86, $vec86, $idx86), + (G_EXTRACT_VECTOR_ELT $el85, $vec85, $idx85), + (G_EXTRACT_VECTOR_ELT $el84, $vec84, $idx84), + (G_EXTRACT_VECTOR_ELT $el83, $vec83, $idx83), + (G_EXTRACT_VECTOR_ELT $el82, $vec82, $idx82), + (G_EXTRACT_VECTOR_ELT $el81, $vec81, $idx81), + (G_EXTRACT_VECTOR_ELT $el80, $vec80, $idx80), + (G_EXTRACT_VECTOR_ELT $el79, $vec79, $idx79), + (G_EXTRACT_VECTOR_ELT $el78, $vec78, $idx78), + (G_EXTRACT_VECTOR_ELT $el77, $vec77, $idx77), + (G_EXTRACT_VECTOR_ELT $el76, $vec76, $idx76), + (G_EXTRACT_VECTOR_ELT $el75, $vec75, $idx75), + (G_EXTRACT_VECTOR_ELT $el74, $vec74, $idx74), + (G_EXTRACT_VECTOR_ELT $el73, $vec73, $idx73), + (G_EXTRACT_VECTOR_ELT $el72, $vec72, $idx72), + (G_EXTRACT_VECTOR_ELT $el71, $vec71, $idx71), + (G_EXTRACT_VECTOR_ELT $el70, $vec70, $idx70), + (G_EXTRACT_VECTOR_ELT $el69, $vec69, $idx69), + (G_EXTRACT_VECTOR_ELT $el68, $vec68, $idx68), + (G_EXTRACT_VECTOR_ELT $el67, $vec67, $idx67), + (G_EXTRACT_VECTOR_ELT $el66, $vec66, $idx66), + (G_EXTRACT_VECTOR_ELT $el65, $vec65, $idx65), + (G_EXTRACT_VECTOR_ELT $el64, $vec64, $idx64), + (G_EXTRACT_VECTOR_ELT $el63, $vec63, $idx63), + (G_EXTRACT_VECTOR_ELT $el62, $vec62, $idx62), + (G_EXTRACT_VECTOR_ELT $el61, $vec61, $idx61), + (G_EXTRACT_VECTOR_ELT $el60, $vec60, $idx60), + (G_EXTRACT_VECTOR_ELT $el59, $vec59, $idx59), + (G_EXTRACT_VECTOR_ELT $el58, $vec58, $idx58), + (G_EXTRACT_VECTOR_ELT $el57, $vec57, $idx57), + (G_EXTRACT_VECTOR_ELT $el56, $vec56, $idx56), + (G_EXTRACT_VECTOR_ELT $el55, $vec55, $idx55), + (G_EXTRACT_VECTOR_ELT $el54, $vec54, $idx54), + (G_EXTRACT_VECTOR_ELT $el53, $vec53, $idx53), + (G_EXTRACT_VECTOR_ELT $el52, $vec52, $idx52), + (G_EXTRACT_VECTOR_ELT $el51, $vec51, $idx51), + (G_EXTRACT_VECTOR_ELT $el50, $vec50, $idx50), + (G_EXTRACT_VECTOR_ELT $el49, $vec49, $idx49), + (G_EXTRACT_VECTOR_ELT $el48, $vec48, $idx48), + (G_EXTRACT_VECTOR_ELT $el47, $vec47, $idx47), + (G_EXTRACT_VECTOR_ELT $el46, $vec46, $idx46), + (G_EXTRACT_VECTOR_ELT $el45, $vec45, $idx45), + (G_EXTRACT_VECTOR_ELT $el44, $vec44, $idx44), + (G_EXTRACT_VECTOR_ELT $el43, $vec43, $idx43), + (G_EXTRACT_VECTOR_ELT $el42, $vec42, $idx42), + (G_EXTRACT_VECTOR_ELT $el41, $vec41, $idx41), + (G_EXTRACT_VECTOR_ELT $el40, $vec40, $idx40), + (G_EXTRACT_VECTOR_ELT $el39, $vec39, $idx39), + (G_EXTRACT_VECTOR_ELT $el38, $vec38, $idx38), + (G_EXTRACT_VECTOR_ELT $el37, $vec37, $idx37), + (G_EXTRACT_VECTOR_ELT $el36, $vec36, $idx36), + (G_EXTRACT_VECTOR_ELT $el35, $vec35, $idx35), + (G_EXTRACT_VECTOR_ELT $el34, $vec34, $idx34), + (G_EXTRACT_VECTOR_ELT $el33, $vec33, $idx33), + (G_EXTRACT_VECTOR_ELT $el32, $vec32, $idx32), + (G_EXTRACT_VECTOR_ELT $el31, $vec31, $idx31), + (G_EXTRACT_VECTOR_ELT $el30, $vec30, $idx30), + (G_EXTRACT_VECTOR_ELT $el29, $vec29, $idx29), + (G_EXTRACT_VECTOR_ELT $el28, $vec28, $idx28), + (G_EXTRACT_VECTOR_ELT $el27, $vec27, $idx27), + (G_EXTRACT_VECTOR_ELT $el26, $vec26, $idx26), + (G_EXTRACT_VECTOR_ELT $el25, $vec25, $idx25), + (G_EXTRACT_VECTOR_ELT $el24, $vec24, $idx24), + (G_EXTRACT_VECTOR_ELT $el23, $vec23, $idx23), + (G_EXTRACT_VECTOR_ELT $el22, $vec22, $idx22), + (G_EXTRACT_VECTOR_ELT $el21, $vec21, $idx21), + (G_EXTRACT_VECTOR_ELT $el20, $vec20, $idx20), + (G_EXTRACT_VECTOR_ELT $el19, $vec19, $idx19), + (G_EXTRACT_VECTOR_ELT $el18, $vec18, $idx18), + (G_EXTRACT_VECTOR_ELT $el17, $vec17, $idx17), + (G_EXTRACT_VECTOR_ELT $el16, $vec16, $idx16), + (G_EXTRACT_VECTOR_ELT $el15, $vec15, $idx15), + (G_EXTRACT_VECTOR_ELT $el14, $vec14, $idx14), + (G_EXTRACT_VECTOR_ELT $el13, $vec13, $idx13), + (G_EXTRACT_VECTOR_ELT $el12, $vec12, $idx12), + (G_EXTRACT_VECTOR_ELT $el11, $vec11, $idx11), + (G_EXTRACT_VECTOR_ELT $el10, $vec10, $idx10), + (G_EXTRACT_VECTOR_ELT $el9, $vec9, $idx9), + (G_EXTRACT_VECTOR_ELT $el8, $vec8, $idx8), + (G_EXTRACT_VECTOR_ELT $el7, $vec7, $idx7), + (G_EXTRACT_VECTOR_ELT $el6, $vec6, $idx6), + (G_EXTRACT_VECTOR_ELT $el5, $vec5, $idx5), + (G_EXTRACT_VECTOR_ELT $el4, $vec4, $idx4), + (G_EXTRACT_VECTOR_ELT $el3, $vec3, $idx3), + (G_EXTRACT_VECTOR_ELT $el2, $vec2, $idx2), + (G_EXTRACT_VECTOR_ELT $el1, $vec1, $idx1), + (G_BUILD_VECTOR $root, $el1, $el2, $el3, $el4, $el5, $el6, $el7, $el8, $el9, $el10, $el11, $el12, $el13, $el14, $el15, $el16, $el17, $el18, $el19, $el20, $el21, $el22, $el23, $el24, $el25, $el26, $el27, $el28, $el29, $el30, $el31, $el32, $el33, $el34, $el35, $el36, $el37, $el38, $el39, $el40, $el41, $el42, $el43, $el44, $el45, $el46, $el47, $el48, $el49, $el50, $el51, $el52, $el53, $el54, $el55, $el56, $el57, $el58, $el59, $el60, $el61, $el62, $el63, $el64, $el65, $el66, $el67, $el68, $el69, $el70, $el71, $el72, $el73, $el74, $el75, $el76, $el77, $el78, $el79, $el80, $el81, $el82, $el83, $el84, $el85, $el86, $el87, $el88, $el89, $el90, $el91, $el92, $el93, $el94, $el95, $el96, $el97, $el98, $el99, $el100, $el101, $el102, $el103, $el104, $el105, $el106, $el107, $el108, $el109, $el110, $el111, $el112, $el113, $el114, $el115, $el116, $el117, $el118, $el119, $el120, $el121, $el122, $el123, $el124, $el125, $el126, $el127, $el128):$build, + [{ return Helper.matchCombineExtractToShuffle(*${build}, ${matchinfo}, ${regpair}); }]), + (apply [{ Helper.applyCombineShuffleVector(*${build}, ${matchinfo}); }])>; // Fold away full elt extracts from a build_vector. def extract_all_elts_from_build_vector_matchinfo : @@ -926,7 +1220,13 @@ def extract_all_elts_from_build_vector : GICombineRule< def extract_vec_elt_combines : GICombineGroup<[ extract_vec_elt_build_vec, - extract_vector_element_build_vector_to_shuffle_vector, + extract_vector_element_build_vector_to_shuffle_vector2, + extract_vector_element_build_vector_to_shuffle_vector4, + extract_vector_element_build_vector_to_shuffle_vector8, + extract_vector_element_build_vector_to_shuffle_vector16, + extract_vector_element_build_vector_to_shuffle_vector32, + extract_vector_element_build_vector_to_shuffle_vector64, + extract_vector_element_build_vector_to_shuffle_vector128, extract_all_elts_from_build_vector]>; def funnel_shift_from_or_shift : GICombineRule< @@ -1927,7 +2227,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines, constant_fold_cast_op, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, + sub_add_reg, select_to_minmax, fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors, combine_concat_vector, match_addos, sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat,