From d5282a773f6bae02a162b710036bd39e47080fe6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 10 Jan 2025 12:25:57 +0000 Subject: [PATCH] [X86] widenSubVector - widen from smaller build vector if the upper elements are already the same padding elements --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 +++++++++++++-- llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll | 4 ++-- .../CodeGen/X86/vector-shuffle-combining-avx.ll | 3 +-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b0eb38e7e095..fbfcfc700ed62 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4144,9 +4144,20 @@ static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, const X86Subtarget &Subtarget, SelectionDAG &DAG, const SDLoc &dl) { - assert(Vec.getValueSizeInBits().getFixedValue() <= VT.getFixedSizeInBits() && - Vec.getValueType().getScalarType() == VT.getScalarType() && + EVT VecVT = Vec.getValueType(); + assert(VecVT.getFixedSizeInBits() <= VT.getFixedSizeInBits() && + VecVT.getScalarType() == VT.getScalarType() && "Unsupported vector widening type"); + // If the upper 128-bits of a build vector are already undef/zero, then try to + // widen from the lower 128-bits. + if (Vec.getOpcode() == ISD::BUILD_VECTOR && VecVT.is256BitVector()) { + unsigned NumSrcElts = VecVT.getVectorNumElements(); + ArrayRef Hi = Vec->ops().drop_front(NumSrcElts / 2); + if (all_of(Hi, [&](SDValue V) { + return V.isUndef() || (ZeroNewElements && X86::isZeroNode(V)); + })) + Vec = extract128BitVector(Vec, 0, DAG, dl); + } SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) : DAG.getUNDEF(VT); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index 445468d06fb04..e7557134b1486 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -442,7 +442,7 @@ define <4 x double> @PR34175(ptr %p) { ; ; AVX512BW-LABEL: PR34175: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40] +; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0] ; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1 ; AVX512BW-NEXT: vmovdqu 32(%rdi), %ymm2 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 @@ -461,7 +461,7 @@ define <4 x double> @PR34175(ptr %p) { ; ; AVX512VBMI-LABEL: PR34175: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} ymm0 = [0,8,32,40,0,8,32,40,0,8,32,40,0,8,32,40] +; AVX512VBMI-NEXT: vmovq {{.*#+}} xmm0 = [0,8,32,40,0,0,0,0] ; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 ; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %ymm2 ; AVX512VBMI-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 81ce14132c879..05071064fc60e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -598,8 +598,7 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n ; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm3 = [0,3,10,1] ; X64-AVX512-NEXT: vpermi2pd %zmm0, %zmm4, %zmm3 ; X64-AVX512-NEXT: vmovapd %ymm3, (%rsi) -; X64-AVX512-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [3,11,3,11] -; X64-AVX512-NEXT: # ymm3 = mem[0,1,0,1] +; X64-AVX512-NEXT: vmovapd {{.*#+}} xmm3 = [3,11] ; X64-AVX512-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3 ; X64-AVX512-NEXT: vmovapd {{.*#+}} ymm0 = [2,8,9,3] ; X64-AVX512-NEXT: vpermi2pd %zmm3, %zmm2, %zmm0