From fa2912b6b831b12ed5305660061d32fcdbfc951f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 12 Jun 2025 13:50:25 +0100 Subject: [PATCH] [X86] getTargetConstantBitsFromNode - handle EXTRACT_SUBVECTOR through bitcasts Generalize the extraction index/width to account for any changes in type through bitcasts --- llvm/lib/Target/X86/X86ISelLowering.cpp | 38 ++++++++--------- .../zero_extend_vector_inreg_of_broadcast.ll | 41 ++++++++----------- ...d_vector_inreg_of_broadcast_from_memory.ll | 30 ++++++-------- 3 files changed, 50 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f0fbf55e97be9..b4670e270141f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5242,25 +5242,25 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, } // Extract constant bits from a subvector's source. - if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - // TODO - support extract_subvector through bitcasts. - if (EltSizeInBits != VT.getScalarSizeInBits()) - return false; - - if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, - UndefElts, EltBits, AllowWholeUndefs, - AllowPartialUndefs)) { - EVT SrcVT = Op.getOperand(0).getValueType(); - unsigned NumSrcElts = SrcVT.getVectorNumElements(); - unsigned NumSubElts = VT.getVectorNumElements(); - unsigned BaseIdx = Op.getConstantOperandVal(1); - UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx); - if ((BaseIdx + NumSubElts) != NumSrcElts) - EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end()); - if (BaseIdx != 0) - EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx); - return true; - } + if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && + getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, UndefElts, + EltBits, AllowWholeUndefs, + AllowPartialUndefs)) { + EVT SrcVT = Op.getOperand(0).getValueType(); + unsigned NumSrcElts = SrcVT.getSizeInBits() / EltSizeInBits; + unsigned NumSubElts = VT.getSizeInBits() / EltSizeInBits; + unsigned BaseOfs = Op.getConstantOperandVal(1) * VT.getScalarSizeInBits(); + unsigned BaseIdx = BaseOfs / EltSizeInBits; + assert((SrcVT.getSizeInBits() % EltSizeInBits) == 0 && + (VT.getSizeInBits() % EltSizeInBits) == 0 && + (BaseOfs % EltSizeInBits) == 0 && "Bad subvector index"); + + UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx); + if ((BaseIdx + NumSubElts) != NumSrcElts) + EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end()); + if (BaseIdx != 0) + EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx); + return true; } // Extract constant bits from shuffle node sources. diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll index 7ad9fb0c27170..4311a64f86a04 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll @@ -3567,14 +3567,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] ; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7] ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero ; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1 ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0 -; AVX-NEXT: vmovdqa %xmm1, (%rcx) ; AVX-NEXT: vmovdqa %xmm0, 16(%rcx) +; AVX-NEXT: vmovdqa %xmm1, (%rcx) ; AVX-NEXT: vmovdqa %xmm2, 32(%rcx) ; AVX-NEXT: retq ; @@ -3757,14 +3756,14 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in. ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7] ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7] +; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7] ; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1 -; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddb 32(%rdx), %xmm3, %xmm3 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3],xmm0[4],xmm2[5,6],xmm0[7] ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0 ; AVX-NEXT: vmovdqa %xmm1, (%rcx) ; AVX-NEXT: vmovdqa %xmm0, 16(%rcx) -; AVX-NEXT: vmovdqa %xmm2, 32(%rcx) +; AVX-NEXT: vmovdqa %xmm3, 32(%rcx) ; AVX-NEXT: retq ; ; AVX2-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8: @@ -3955,10 +3954,9 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in. ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] ; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1 -; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2 ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0 ; AVX-NEXT: vmovdqa %xmm1, (%rcx) ; AVX-NEXT: vmovdqa %xmm0, 16(%rcx) @@ -4181,17 +4179,16 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in. ; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,0,0] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5],xmm2[6],xmm1[7] -; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] +; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] ; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0[2],xmm3[3,4,5,6,7] +; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5,6,7] ; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1 -; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2 -; AVX-NEXT: vpaddb 16(%rdx), %xmm2, %xmm2 +; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0 +; AVX-NEXT: vmovdqa %xmm0, 16(%rcx) ; AVX-NEXT: vmovdqa %xmm1, (%rcx) -; AVX-NEXT: vmovdqa %xmm2, 16(%rcx) -; AVX-NEXT: vmovdqa %xmm0, 32(%rcx) +; AVX-NEXT: vmovdqa %xmm2, 32(%rcx) ; AVX-NEXT: retq ; ; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4: @@ -4379,10 +4376,9 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i ; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] ; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1 -; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2 ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0 ; AVX-NEXT: vmovdqa %xmm1, (%rcx) ; AVX-NEXT: vmovdqa %xmm0, 16(%rcx) @@ -4517,10 +4513,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr % ; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX-NEXT: vmovaps 32(%rdx), %ymm2 ; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0 ; AVX-NEXT: vmovaps %ymm2, 32(%rcx) ; AVX-NEXT: vmovdqa %xmm1, (%rcx) diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll index 3bef834bbd902..38ed3ec0a975e 100644 --- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll +++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll @@ -2868,14 +2868,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i ; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u] ; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7] ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero ; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1 -; AVX-NEXT: vmovdqa %xmm1, (%rdx) +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero,xmm0[0,1],zero,zero +; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vmovdqa %xmm0, 16(%rdx) +; AVX-NEXT: vmovdqa %xmm1, (%rdx) ; AVX-NEXT: vmovdqa %xmm2, 32(%rdx) ; AVX-NEXT: retq ; @@ -2986,7 +2985,8 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in. ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2],xmm0[3],mem[4,5],xmm0[6],mem[7] ; AVX-NEXT: vmovdqa (%rdi), %xmm2 ; AVX-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,xmm2[0,1],zero,zero,zero,zero,xmm2[0,1],zero,zero,zero,zero -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0],xmm0[1],xmm3[2,3],xmm0[4],xmm3[5,6],xmm0[7] ; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1 ; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2 @@ -3135,9 +3135,8 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in. ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = mem[0,1,0,1] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7] ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] -; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] +; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2 ; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1 ; AVX-NEXT: vmovdqa %xmm1, (%rdx) @@ -3319,13 +3318,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in. ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1] ; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5,6,7] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1 -; AVX-NEXT: vmovdqa %xmm1, (%rdx) +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero +; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vmovdqa %xmm0, 16(%rdx) +; AVX-NEXT: vmovdqa %xmm1, (%rdx) ; AVX-NEXT: vmovdqa %xmm2, 32(%rdx) ; AVX-NEXT: retq ; @@ -3469,9 +3467,8 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i ; AVX-NEXT: vmovdqa (%rdi), %xmm0 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7] ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7] -; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7] +; AVX-NEXT: vpaddb 32(%rsi), %xmm0, %xmm2 ; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1 ; AVX-NEXT: vmovdqa %xmm1, (%rdx) @@ -3584,9 +3581,8 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr % ; AVX: # %bb.0: ; AVX-NEXT: vmovdqa (%rdi), %xmm0 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5,6,7] -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX-NEXT: vmovaps 32(%rsi), %ymm2 -; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero ; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0 ; AVX-NEXT: vpaddb (%rsi), %xmm1, %xmm1 ; AVX-NEXT: vmovaps %ymm2, 32(%rdx)