From 3afd4eba40468069f4ceb95f7470e4344e9fc5b1 Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Wed, 29 Oct 2025 10:26:54 +0000 Subject: [PATCH 01/11] [AArch64][GlobalISel] Add combine for build_vector(unmerge, unmerge, undef, undef) --- llvm/lib/Target/AArch64/AArch64Combine.td | 9 +- .../GISel/AArch64PostLegalizerCombiner.cpp | 96 ++++++++++++++++++- llvm/test/CodeGen/AArch64/fptrunc.ll | 18 +--- llvm/test/CodeGen/AArch64/itofp.ll | 48 +++------- 4 files changed, 120 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 278314792bfb9..056e6145487d2 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -332,6 +332,13 @@ def combine_mul_cmlt : GICombineRule< (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }]) >; +def combine_build_unmerge : GICombineRule< + (defs root:$root, register_matchinfo:$unmergeSrc), + (match (wip_match_opcode G_BUILD_VECTOR):$root, + [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]), + (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }]) +>; + // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -366,7 +373,7 @@ def AArch64PostLegalizerCombiner select_to_minmax, or_to_bsp, combine_concat_vector, commute_constant_to_rhs, extract_vec_elt_combines, push_freeze_to_prevent_poison_from_propagating, - combine_mul_cmlt, combine_use_vector_truncate, + combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge, extmultomull, truncsat_combines, lshr_of_trunc_of_lshr, funnel_shift_from_or_shift_constants_are_legal]> { } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index fa7bb6ecc35ee..2f17fd33559ee 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" +#include #define GET_GICOMBINER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" @@ -133,6 +134,99 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT; } +// This pattern aims to match the following shape to avoid extra mov +// instructions +// G_BUILD_VECTOR( +// G_UNMERGE_VALUES(src, 0) +// G_UNMERGE_VALUES(src, 1) +// G_IMPLICIT_DEF +// G_IMPLICIT_DEF +// ) +// -> +// G_CONCAT_VECTORS( +// undef +// src +// ) +bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &UnmergeSrc) { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + + unsigned UnmergeInstrCount = 0; + unsigned UndefInstrCount = 0; + + unsigned UnmergeEltCount = 0; + unsigned UnmergeEltSize = 0; + + Register UnmergeSrcTemp; + + std::set KnownRegs; + + for (auto Use : MI.all_uses()) { + auto *Def = getDefIgnoringCopies(Use.getReg(), MRI); + + if (!Def) { + return false; + } + + unsigned Opcode = Def->getOpcode(); + + switch (Opcode) { + default: + return false; + case TargetOpcode::G_IMPLICIT_DEF: + ++UndefInstrCount; + break; + case TargetOpcode::G_UNMERGE_VALUES: + ++UnmergeInstrCount; + + UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits(); + UnmergeEltCount = Def->getNumDefs(); + if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 && + UnmergeEltSize * UnmergeEltCount != 128)) { + return false; + } + + // Unmerge should only use one register so we can use the last one + for (auto UnmergeUse : Def->all_uses()) + UnmergeSrcTemp = UnmergeUse.getReg(); + + // Track unique sources for the G_UNMERGE_VALUES + unsigned RegId = UnmergeSrcTemp.id(); + if (KnownRegs.find(RegId) != KnownRegs.end()) + continue; + + KnownRegs.insert(RegId); + + // We know the unmerge is a valid target now so store the register. + UnmergeSrc = UnmergeSrcTemp; + + break; + } + } + + // Only want to match patterns that pad half of a vector with undefined. We + // also want to ensure that these values come from a single unmerge and all + // unmerged values are consumed. + if (UndefInstrCount != UnmergeInstrCount || + UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) { + return false; + } + + return true; +} + +void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, Register &UnmergeSrc) { + assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES"); + B.setInstrAndDebugLoc(MI); + + Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc)); + B.buildUndef(UndefVec); + B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec}); + + MI.eraseFromParent(); +} + bool matchAArch64MulConstCombine( MachineInstr &MI, MachineRegisterInfo &MRI, std::function &ApplyFn) { @@ -890,4 +984,4 @@ namespace llvm { FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) { return new AArch64PostLegalizerCombiner(IsOptNone); } -} // end namespace llvm +} // end namespace llvm \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll index 1f84c944d7c16..ce1a2fc48c2e7 100644 --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -345,19 +345,11 @@ entry: } define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) { -; CHECK-SD-LABEL: fptrunc_v2f32_v2f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fptrunc_v2f32_v2f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-GI-NEXT: fcvtn v0.4h, v1.4s -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fptrunc_v2f32_v2f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: ret entry: %c = fptrunc <2 x float> %a to <2 x half> ret <2 x half> %c diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll index caf87a13f283b..6d168edf180a4 100644 --- a/llvm/test/CodeGen/AArch64/itofp.ll +++ b/llvm/test/CodeGen/AArch64/itofp.ll @@ -5763,18 +5763,14 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: scvtf v0.2d, v0.2d ; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i64_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: scvtf v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = sitofp <2 x i64> %a to <2 x half> @@ -5808,18 +5804,14 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) { ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2d, v0.2d ; CHECK-NOFP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i64_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: ucvtf v0.2d, v0.2d ; CHECK-FP16-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = uitofp <2 x i64> %a to <2 x half> @@ -6232,17 +6224,13 @@ define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-NOFP16-GI-LABEL: stofp_v2i32_v2f16: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i32_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = sitofp <2 x i32> %a to <2 x half> @@ -6267,17 +6255,13 @@ define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) { ; CHECK-NOFP16-GI-LABEL: utofp_v2i32_v2f16: ; CHECK-NOFP16-GI: // %bb.0: // %entry ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i32_v2f16: ; CHECK-FP16-GI: // %bb.0: // %entry ; CHECK-FP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-FP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-FP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-FP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-FP16-GI-NEXT: ret entry: %c = uitofp <2 x i32> %a to <2 x half> @@ -6480,9 +6464,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #16 ; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret entry: %c = sitofp <2 x i16> %a to <2 x half> @@ -6509,9 +6491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) { ; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret entry: %c = uitofp <2 x i16> %a to <2 x half> @@ -6766,9 +6746,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-NOFP16-GI-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NOFP16-GI-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NOFP16-GI-NEXT: scvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: stofp_v2i8_v2f16: @@ -6817,9 +6795,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) { ; CHECK-NOFP16-GI-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NOFP16-GI-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NOFP16-GI-NEXT: ucvtf v0.2s, v0.2s -; CHECK-NOFP16-GI-NEXT: mov v1.s[0], v0.s[0] -; CHECK-NOFP16-GI-NEXT: mov v1.s[1], v0.s[1] -; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v1.4s +; CHECK-NOFP16-GI-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NOFP16-GI-NEXT: ret ; ; CHECK-FP16-GI-LABEL: utofp_v2i8_v2f16: From 372742e8a80a52bbd5c4f779bc94266f4b0bbe4a Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Wed, 29 Oct 2025 11:15:21 +0000 Subject: [PATCH 02/11] Fix unnecessary copying from using auto. --- .../lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 2f17fd33559ee..c768d4b7a9a1c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -161,7 +161,7 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, std::set KnownRegs; - for (auto Use : MI.all_uses()) { + for (auto &Use : MI.all_uses()) { auto *Def = getDefIgnoringCopies(Use.getReg(), MRI); if (!Def) { @@ -187,7 +187,7 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, } // Unmerge should only use one register so we can use the last one - for (auto UnmergeUse : Def->all_uses()) + for (auto &UnmergeUse : Def->all_uses()) UnmergeSrcTemp = UnmergeUse.getReg(); // Track unique sources for the G_UNMERGE_VALUES From a0376ab6ba0a31cd6f318fee1c8e210503dac715 Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Thu, 30 Oct 2025 12:06:59 +0000 Subject: [PATCH 03/11] Remove wip_match_opcode and check operand order and consecutiveness --- llvm/lib/Target/AArch64/AArch64Combine.td | 2 +- .../GISel/AArch64PostLegalizerCombiner.cpp | 36 +++++++++++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 056e6145487d2..5d6feeaa363e8 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -334,7 +334,7 @@ def combine_mul_cmlt : GICombineRule< def combine_build_unmerge : GICombineRule< (defs root:$root, register_matchinfo:$unmergeSrc), - (match (wip_match_opcode G_BUILD_VECTOR):$root, + (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root, [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]), (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }]) >; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index c768d4b7a9a1c..33de0af5808e2 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -144,20 +144,24 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { // ) // -> // G_CONCAT_VECTORS( +// src, // undef -// src // ) bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) { assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - unsigned UnmergeInstrCount = 0; + unsigned UnmergeUseCount = 0; unsigned UndefInstrCount = 0; unsigned UnmergeEltCount = 0; unsigned UnmergeEltSize = 0; + unsigned BuildOperandCount = MI.getNumOperands(); + bool EncounteredUndef = false; + Register UnmergeSrcTemp; + MachineInstr *UnmergeInstr; std::set KnownRegs; @@ -170,14 +174,21 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, unsigned Opcode = Def->getOpcode(); + // Ensure that the unmerged instructions are consecutive and before the + // undefined values by checking we don't encounter an undef before we reach + // half way + if (EncounteredUndef && UnmergeUseCount < BuildOperandCount / 2) + return false; + switch (Opcode) { default: return false; case TargetOpcode::G_IMPLICIT_DEF: ++UndefInstrCount; + EncounteredUndef = true; break; case TargetOpcode::G_UNMERGE_VALUES: - ++UnmergeInstrCount; + ++UnmergeUseCount; UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits(); UnmergeEltCount = Def->getNumDefs(); @@ -197,8 +208,10 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, KnownRegs.insert(RegId); - // We know the unmerge is a valid target now so store the register. + // We know the unmerge is a valid target now so store the register & the + // instruction. UnmergeSrc = UnmergeSrcTemp; + UnmergeInstr = Def; break; } @@ -207,11 +220,22 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, // Only want to match patterns that pad half of a vector with undefined. We // also want to ensure that these values come from a single unmerge and all // unmerged values are consumed. - if (UndefInstrCount != UnmergeInstrCount || - UnmergeEltCount != UnmergeInstrCount || KnownRegs.size() != 1) { + if (UndefInstrCount != UnmergeUseCount || + UnmergeEltCount != UnmergeUseCount || KnownRegs.size() != 1) { return false; } + // Check the operands of the unmerge are used in the same order they are + // defined G_BUILD_VECTOR always defines 1 output so we know the uses start + // from index 1 + for (unsigned OperandIndex = 0; OperandIndex < UnmergeUseCount; + ++OperandIndex) { + Register BuildReg = MI.getOperand(OperandIndex + 1).getReg(); + Register UnmergeReg = UnmergeInstr->getOperand(OperandIndex).getReg(); + if (BuildReg != UnmergeReg) + return false; + } + return true; } From 3f8d7da31402bf0af864060f60442b1d51955d40 Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Mon, 3 Nov 2025 10:12:49 +0000 Subject: [PATCH 04/11] Simplify logic & check for legality of resulting concatenation --- .../GISel/AArch64PostLegalizerCombiner.cpp | 100 ++++++------------ 1 file changed, 31 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 33de0af5808e2..5d6b47806722d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -151,91 +151,53 @@ bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) { assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - unsigned UnmergeUseCount = 0; - unsigned UndefInstrCount = 0; + unsigned BuildUseCount = MI.getNumOperands() - 1; - unsigned UnmergeEltCount = 0; - unsigned UnmergeEltSize = 0; - - unsigned BuildOperandCount = MI.getNumOperands(); - bool EncounteredUndef = false; + if (BuildUseCount % 2 != 0) + return false; - Register UnmergeSrcTemp; - MachineInstr *UnmergeInstr; + unsigned HalfWayIndex = BuildUseCount / 2; - std::set KnownRegs; + // Check the first operand is an unmerge + auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); + if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + return false; - for (auto &Use : MI.all_uses()) { - auto *Def = getDefIgnoringCopies(Use.getReg(), MRI); + // Check that the resultant concat will be legal + auto UnmergeEltSize = + MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits(); + auto UnmergeEltCount = MaybeUnmerge->getNumDefs(); - if (!Def) { - return false; - } + if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 && + UnmergeEltSize * UnmergeEltCount != 128)) + return false; - unsigned Opcode = Def->getOpcode(); + // Check that all of the operands before the midpoint come from the same + // unmerge and are in the same order as they are used in the build_vector + for (unsigned I = 0; I < HalfWayIndex; ++I) { + auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg(); + auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI); - // Ensure that the unmerged instructions are consecutive and before the - // undefined values by checking we don't encounter an undef before we reach - // half way - if (EncounteredUndef && UnmergeUseCount < BuildOperandCount / 2) + if (Unmerge != MaybeUnmerge) return false; - switch (Opcode) { - default: + if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg) return false; - case TargetOpcode::G_IMPLICIT_DEF: - ++UndefInstrCount; - EncounteredUndef = true; - break; - case TargetOpcode::G_UNMERGE_VALUES: - ++UnmergeUseCount; - - UnmergeEltSize = MRI.getType(Use.getReg()).getScalarSizeInBits(); - UnmergeEltCount = Def->getNumDefs(); - if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 && - UnmergeEltSize * UnmergeEltCount != 128)) { - return false; - } - - // Unmerge should only use one register so we can use the last one - for (auto &UnmergeUse : Def->all_uses()) - UnmergeSrcTemp = UnmergeUse.getReg(); - - // Track unique sources for the G_UNMERGE_VALUES - unsigned RegId = UnmergeSrcTemp.id(); - if (KnownRegs.find(RegId) != KnownRegs.end()) - continue; - - KnownRegs.insert(RegId); - - // We know the unmerge is a valid target now so store the register & the - // instruction. - UnmergeSrc = UnmergeSrcTemp; - UnmergeInstr = Def; - - break; - } } - // Only want to match patterns that pad half of a vector with undefined. We - // also want to ensure that these values come from a single unmerge and all - // unmerged values are consumed. - if (UndefInstrCount != UnmergeUseCount || - UnmergeEltCount != UnmergeUseCount || KnownRegs.size() != 1) { - return false; - } + // Check that all of the operands after the mid point are undefs. + for (unsigned I = HalfWayIndex; I < BuildUseCount; ++I) { + auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI); - // Check the operands of the unmerge are used in the same order they are - // defined G_BUILD_VECTOR always defines 1 output so we know the uses start - // from index 1 - for (unsigned OperandIndex = 0; OperandIndex < UnmergeUseCount; - ++OperandIndex) { - Register BuildReg = MI.getOperand(OperandIndex + 1).getReg(); - Register UnmergeReg = UnmergeInstr->getOperand(OperandIndex).getReg(); - if (BuildReg != UnmergeReg) + if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF) return false; } + // Unmerge should only use one register so we can use the last one + for (auto &UnmergeUse : + getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses()) + UnmergeSrc = UnmergeUse.getReg(); + return true; } From 69ba5ab26d6bad07c0e0c655d55733160ff7186f Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Mon, 3 Nov 2025 10:13:58 +0000 Subject: [PATCH 05/11] Make this a non-backend specific optimisation & check all unmerged values are used --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++ .../include/llvm/Target/GlobalISel/Combine.td | 12 ++- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 85 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64Combine.td | 9 +- .../GISel/AArch64PostLegalizerCombiner.cpp | 79 ----------------- 5 files changed, 103 insertions(+), 88 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 36cb90b1bc134..968bbbf778254 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -647,6 +647,12 @@ class CombinerHelper { bool matchRotateOutOfRange(MachineInstr &MI) const; void applyRotateOutOfRange(MachineInstr &MI) const; + bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + Register &UnmergeSrc) const; + void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, + Register &UnmergeSrc) const; + bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const; void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 119695e53c3cb..0ab2d9487a295 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -921,6 +921,15 @@ def merge_of_x_and_zero : GICombineRule < [{ return Helper.matchMergeXAndZero(*${MI}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${MI}, ${matchinfo}); }])>; +// Transform build_vector(unmerge(src, 0), ... unmerge(src, n), undef, ..., undef) +// => concat_vectors(src, undef) +def combine_build_unmerge : GICombineRule< + (defs root:$root, register_matchinfo:$unmergeSrc), + (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root, + [{ return Helper.matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]), + (apply [{ Helper.applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }]) +>; + def merge_combines: GICombineGroup<[ unmerge_anyext_build_vector, unmerge_merge, @@ -930,7 +939,8 @@ def merge_combines: GICombineGroup<[ unmerge_dead_to_trunc, unmerge_zext_to_zext, merge_of_x_and_undef, - merge_of_x_and_zero + merge_of_x_and_zero, + combine_build_unmerge ]>; // Under certain conditions, transform: diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9ace7d65413ad..b7ade264cfc64 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3463,6 +3463,91 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, isConstTrueVal(TLI, Cst, IsVector, IsFP); } +// This pattern aims to match the following shape to avoid extra mov +// instructions +// G_BUILD_VECTOR( +// G_UNMERGE_VALUES(src, 0) +// G_UNMERGE_VALUES(src, 1) +// G_IMPLICIT_DEF +// G_IMPLICIT_DEF +// ) +// -> +// G_CONCAT_VECTORS( +// src, +// undef +// ) +bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, + MachineRegisterInfo &MRI, + Register &UnmergeSrc) const { + assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); + + unsigned BuildUseCount = MI.getNumOperands() - 1; + + if (BuildUseCount % 2 != 0) + return false; + + unsigned NumUnmerge = BuildUseCount / 2; + + // Check the first operand is an unmerge + auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); + if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + return false; + + // Check that the resultant concat will be legal + auto UnmergeEltSize = + MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits(); + auto UnmergeEltCount = MaybeUnmerge->getNumDefs(); + + if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 && + UnmergeEltSize * UnmergeEltCount != 128)) + return false; + + // Check that all of the operands before the midpoint come from the same + // unmerge and are in the same order as they are used in the build_vector + for (unsigned I = 0; I < NumUnmerge; ++I) { + auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg(); + auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI); + + if (Unmerge != MaybeUnmerge) + return false; + + if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg) + return false; + } + + // Check that all of the unmerged values are used + if (UnmergeEltCount != NumUnmerge) + return false; + + // Check that all of the operands after the mid point are undefs. + for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) { + auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI); + + if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF) + return false; + } + + // Unmerge should only use one register so we can use the last one + for (auto &UnmergeUse : + getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses()) + UnmergeSrc = UnmergeUse.getReg(); + + return true; +} + +void CombinerHelper::applyCombineBuildUnmerge(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B, + Register &UnmergeSrc) const { + assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES"); + B.setInstrAndDebugLoc(MI); + + Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0); + B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec}); + + MI.eraseFromParent(); +} + // This combine tries to reduce the number of scalarised G_TRUNC instructions by // using vector truncates instead // diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 5d6feeaa363e8..278314792bfb9 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -332,13 +332,6 @@ def combine_mul_cmlt : GICombineRule< (apply [{ applyCombineMulCMLT(*${root}, MRI, B, ${matchinfo}); }]) >; -def combine_build_unmerge : GICombineRule< - (defs root:$root, register_matchinfo:$unmergeSrc), - (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root, - [{ return matchCombineBuildUnmerge(*${root}, MRI, ${unmergeSrc}); }]), - (apply [{ applyCombineBuildUnmerge(*${root}, MRI, B, ${unmergeSrc}); }]) ->; - // Post-legalization combines which should happen at all optimization levels. // (E.g. ones that facilitate matching for the selector) For example, matching // pseudos. @@ -373,7 +366,7 @@ def AArch64PostLegalizerCombiner select_to_minmax, or_to_bsp, combine_concat_vector, commute_constant_to_rhs, extract_vec_elt_combines, push_freeze_to_prevent_poison_from_propagating, - combine_mul_cmlt, combine_use_vector_truncate, combine_build_unmerge, + combine_mul_cmlt, combine_use_vector_truncate, extmultomull, truncsat_combines, lshr_of_trunc_of_lshr, funnel_shift_from_or_shift_constants_are_legal]> { } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 5d6b47806722d..9b07d84a292f5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -134,85 +134,6 @@ bool isZeroExtended(Register R, MachineRegisterInfo &MRI) { return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT; } -// This pattern aims to match the following shape to avoid extra mov -// instructions -// G_BUILD_VECTOR( -// G_UNMERGE_VALUES(src, 0) -// G_UNMERGE_VALUES(src, 1) -// G_IMPLICIT_DEF -// G_IMPLICIT_DEF -// ) -// -> -// G_CONCAT_VECTORS( -// src, -// undef -// ) -bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, - Register &UnmergeSrc) { - assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - - unsigned BuildUseCount = MI.getNumOperands() - 1; - - if (BuildUseCount % 2 != 0) - return false; - - unsigned HalfWayIndex = BuildUseCount / 2; - - // Check the first operand is an unmerge - auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); - if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES) - return false; - - // Check that the resultant concat will be legal - auto UnmergeEltSize = - MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits(); - auto UnmergeEltCount = MaybeUnmerge->getNumDefs(); - - if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 && - UnmergeEltSize * UnmergeEltCount != 128)) - return false; - - // Check that all of the operands before the midpoint come from the same - // unmerge and are in the same order as they are used in the build_vector - for (unsigned I = 0; I < HalfWayIndex; ++I) { - auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg(); - auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI); - - if (Unmerge != MaybeUnmerge) - return false; - - if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg) - return false; - } - - // Check that all of the operands after the mid point are undefs. - for (unsigned I = HalfWayIndex; I < BuildUseCount; ++I) { - auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI); - - if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF) - return false; - } - - // Unmerge should only use one register so we can use the last one - for (auto &UnmergeUse : - getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses()) - UnmergeSrc = UnmergeUse.getReg(); - - return true; -} - -void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, - MachineIRBuilder &B, Register &UnmergeSrc) { - assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES"); - B.setInstrAndDebugLoc(MI); - - Register UndefVec = MRI.createGenericVirtualRegister(MRI.getType(UnmergeSrc)); - B.buildUndef(UndefVec); - B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec}); - - MI.eraseFromParent(); -} - bool matchAArch64MulConstCombine( MachineInstr &MI, MachineRegisterInfo &MRI, std::function &ApplyFn) { From aee79e7f70a21e2fb37a96b22351d311ca3fd0bc Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Mon, 3 Nov 2025 13:51:54 +0000 Subject: [PATCH 06/11] Remove unused set import --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 9b07d84a292f5..73384f3b4798e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -39,7 +39,6 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" -#include #define GET_GICOMBINER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" From 8df8592bc5a7ddbef00e320632ac985f5e5d830f Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Mon, 3 Nov 2025 15:46:41 +0000 Subject: [PATCH 07/11] Update missed test, check legality properly and remove unnecessary loop --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 20 +++---- .../build-vector-packed-partial-undef.ll | 54 +++++-------------- 2 files changed, 22 insertions(+), 52 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b7ade264cfc64..285b6393941a3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" @@ -3493,14 +3494,12 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES) return false; - // Check that the resultant concat will be legal - auto UnmergeEltSize = - MRI.getType(MaybeUnmerge->getOperand(1).getReg()).getScalarSizeInBits(); - auto UnmergeEltCount = MaybeUnmerge->getNumDefs(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT UnmergeSrcTy = MRI.getType(MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg()); - if (UnmergeEltCount < 2 || (UnmergeEltSize * UnmergeEltCount != 64 && - UnmergeEltSize * UnmergeEltCount != 128)) - return false; + // Ensure we only generate legal instructions post-legalizer + if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy, UnmergeSrcTy}})) + return false; // Check that all of the operands before the midpoint come from the same // unmerge and are in the same order as they are used in the build_vector @@ -3516,7 +3515,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, } // Check that all of the unmerged values are used - if (UnmergeEltCount != NumUnmerge) + if (MaybeUnmerge->getNumDefs() != NumUnmerge) return false; // Check that all of the operands after the mid point are undefs. @@ -3527,10 +3526,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, return false; } - // Unmerge should only use one register so we can use the last one - for (auto &UnmergeUse : - getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI)->all_uses()) - UnmergeSrc = UnmergeUse.getReg(); + UnmergeSrc = MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg(); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll index c1b8bc6031b18..f7dbcd137e742 100644 --- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll @@ -762,25 +762,13 @@ define void @undef_hi3_v4f16(half %arg0) { } define void @undef_hi2_v4i16(<2 x i16> %arg0) { -; GFX8-SDAG-LABEL: undef_hi2_v4i16: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: ;;#ASMSTART -; GFX8-SDAG-NEXT: ; use v[0:1] -; GFX8-SDAG-NEXT: ;;#ASMEND -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: undef_hi2_v4i16: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-GISEL-NEXT: ;;#ASMSTART -; GFX8-GISEL-NEXT: ; use v[0:1] -; GFX8-GISEL-NEXT: ;;#ASMEND -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: undef_hi2_v4i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: undef_hi2_v4i16: ; GFX9: ; %bb.0: @@ -803,25 +791,13 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) { } define void @undef_hi2_v4f16(<2 x half> %arg0) { -; GFX8-SDAG-LABEL: undef_hi2_v4f16: -; GFX8-SDAG: ; %bb.0: -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: ;;#ASMSTART -; GFX8-SDAG-NEXT: ; use v[0:1] -; GFX8-SDAG-NEXT: ;;#ASMEND -; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-GISEL-LABEL: undef_hi2_v4f16: -; GFX8-GISEL: ; %bb.0: -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-GISEL-NEXT: ;;#ASMSTART -; GFX8-GISEL-NEXT: ; use v[0:1] -; GFX8-GISEL-NEXT: ;;#ASMEND -; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX8-LABEL: undef_hi2_v4f16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: ;;#ASMSTART +; GFX8-NEXT: ; use v[0:1] +; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: undef_hi2_v4f16: ; GFX9: ; %bb.0: @@ -842,5 +818,3 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) { call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi); ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX8: {{.*}} From c9ef45fa7a610fe7a29a9f247d9873a7d05815f8 Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Mon, 3 Nov 2025 15:50:50 +0000 Subject: [PATCH 08/11] Linting --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 285b6393941a3..394beeee1ab18 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3495,11 +3495,13 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, return false; LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - LLT UnmergeSrcTy = MRI.getType(MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg()); + LLT UnmergeSrcTy = MRI.getType( + MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg()); // Ensure we only generate legal instructions post-legalizer - if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy, UnmergeSrcTy}})) - return false; + if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, + {DstTy, UnmergeSrcTy, UnmergeSrcTy}})) + return false; // Check that all of the operands before the midpoint come from the same // unmerge and are in the same order as they are used in the build_vector @@ -3526,7 +3528,8 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, return false; } - UnmergeSrc = MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands()-1).getReg(); + UnmergeSrc = + MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg(); return true; } From c9cd6a0198407122573ca7a9be0e721e8dbf872c Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Fri, 7 Nov 2025 10:23:03 +0000 Subject: [PATCH 09/11] Use cast templates & convenience functions. Also simplify legality check --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 394beeee1ab18..fdd9d012c399e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3480,57 +3480,56 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const { - assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); - - unsigned BuildUseCount = MI.getNumOperands() - 1; + auto &BV = cast(MI); + unsigned BuildUseCount = BV.getNumSources(); if (BuildUseCount % 2 != 0) return false; unsigned NumUnmerge = BuildUseCount / 2; - // Check the first operand is an unmerge - auto *MaybeUnmerge = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); - if (MaybeUnmerge->getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + auto *Unmerge = getOpcodeDef(BV.getSourceReg(0), MRI); + + // Check the first operand is an unmerge and has the correct number of + // operands + if (!Unmerge || Unmerge->getNumOperands() != NumUnmerge + 1) return false; + UnmergeSrc = Unmerge->getSourceReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - LLT UnmergeSrcTy = MRI.getType( - MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg()); + LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); // Ensure we only generate legal instructions post-legalizer - if (!IsPreLegalize && !isLegal({TargetOpcode::G_CONCAT_VECTORS, - {DstTy, UnmergeSrcTy, UnmergeSrcTy}})) + if (!IsPreLegalize && + !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}})) return false; // Check that all of the operands before the midpoint come from the same // unmerge and are in the same order as they are used in the build_vector for (unsigned I = 0; I < NumUnmerge; ++I) { - auto MaybeUnmergeReg = MI.getOperand(I + 1).getReg(); - auto *Unmerge = getDefIgnoringCopies(MaybeUnmergeReg, MRI); + auto MaybeUnmergeReg = BV.getSourceReg(I); + auto *LoopUnmerge = getOpcodeDef(MaybeUnmergeReg, MRI); - if (Unmerge != MaybeUnmerge) + if (!LoopUnmerge || LoopUnmerge != Unmerge) return false; - if (Unmerge->getOperand(I).getReg() != MaybeUnmergeReg) + if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg) return false; } // Check that all of the unmerged values are used - if (MaybeUnmerge->getNumDefs() != NumUnmerge) + if (Unmerge->getNumDefs() != NumUnmerge) return false; // Check that all of the operands after the mid point are undefs. for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) { - auto *Undef = getDefIgnoringCopies(MI.getOperand(I + 1).getReg(), MRI); + auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI); if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF) return false; } - UnmergeSrc = - MaybeUnmerge->getOperand(MaybeUnmerge->getNumOperands() - 1).getReg(); - return true; } @@ -8510,4 +8509,4 @@ bool CombinerHelper::matchSuboCarryOut(const MachineInstr &MI, } return false; -} +} \ No newline at end of file From 586fed999b98336c824c6b0a096f4592046315e4 Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Mon, 10 Nov 2025 10:46:06 +0000 Subject: [PATCH 10/11] Correct trailing newline --- llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index 73384f3b4798e..fa7bb6ecc35ee 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -890,4 +890,4 @@ namespace llvm { FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) { return new AArch64PostLegalizerCombiner(IsOptNone); } -} // end namespace llvm \ No newline at end of file +} // end namespace llvm From 51c76e2ee8ba802f353ce58413b12e7502adcc75 Mon Sep 17 00:00:00 2001 From: Ryan Cowan Date: Thu, 13 Nov 2025 14:40:13 +0000 Subject: [PATCH 11/11] Remove unnecessary include & make further use of cast --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index fdd9d012c399e..eb59ae6684689 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -30,7 +30,6 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" @@ -3492,7 +3491,7 @@ bool CombinerHelper::matchCombineBuildUnmerge(MachineInstr &MI, // Check the first operand is an unmerge and has the correct number of // operands - if (!Unmerge || Unmerge->getNumOperands() != NumUnmerge + 1) + if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge) return false; UnmergeSrc = Unmerge->getSourceReg();