From e1a005a1ce833fc2dc5630903daed61a9c05edbd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 20 Jun 2025 15:49:32 +0100 Subject: [PATCH] [X86] combineConcatVectorOps - only concat AVX1 v4i64 shift-by-32 to a shuffle if the concat is free --- llvm/lib/Target/X86/X86ISelLowering.cpp | 20 ++++++++++--------- .../test/CodeGen/X86/vector-shift-lshr-256.ll | 18 ++++++----------- llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 18 ++++++----------- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index defb7730b4c7d..ebc616b31434a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58839,16 +58839,18 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, llvm::all_of(Ops, [](SDValue Op) { return Op.getConstantOperandAPInt(1) == 32; })) { - SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0)); - SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL); - if (Opcode == X86ISD::VSHLI) { - Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, - {8, 0, 8, 2, 8, 4, 8, 6}); - } else { - Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, - {1, 8, 3, 8, 5, 8, 7, 8}); + if (SDValue Res = CombineSubOperand(VT, Ops, 0)) { + SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL); + Res = DAG.getBitcast(MVT::v8i32, Res); + if (Opcode == X86ISD::VSHLI) { + Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, + {8, 0, 8, 2, 8, 4, 8, 6}); + } else { + Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, + {1, 8, 3, 8, 5, 8, 7, 8}); + } + return DAG.getBitcast(VT, Res); } - return DAG.getBitcast(VT, Res); } [[fallthrough]]; case X86ISD::VSRAI: diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index b45525b6e20f9..3a4bb223618dd 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -1974,11 +1974,9 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind { define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind { ; AVX1-LABEL: shift32_v4i64_concat: ; AVX1: # %bb.0: -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shift32_v4i64_concat: @@ -1990,11 +1988,9 @@ define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind { ; ; XOPAVX1-LABEL: shift32_v4i64_concat: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] -; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: shift32_v4i64_concat: @@ -2020,11 +2016,9 @@ define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind { ; ; X86-AVX1-LABEL: shift32_v4i64_concat: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] -; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: shift32_v4i64_concat: diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index 2248ee997d525..b56a8b5b2294c 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1827,11 +1827,9 @@ define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind { define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind { ; AVX1-LABEL: shift32_v4i64_concat: ; AVX1: # %bb.0: -; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shift32_v4i64_concat: @@ -1843,11 +1841,9 @@ define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind { ; ; XOPAVX1-LABEL: shift32_v4i64_concat: ; XOPAVX1: # %bb.0: -; XOPAVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOPAVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; XOPAVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] -; XOPAVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; XOPAVX1-NEXT: retq ; ; XOPAVX2-LABEL: shift32_v4i64_concat: @@ -1873,11 +1869,9 @@ define <4 x i64> @shift32_v4i64_concat(<2 x i64> %lo, <2 x i64> %hi) nounwind { ; ; X86-AVX1-LABEL: shift32_v4i64_concat: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsllq $32, %xmm1, %xmm1 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6] -; X86-AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: shift32_v4i64_concat: