Skip to content

Commit 851ecfa

Browse files
committed
[X86] combineConcatVectorOps - convert X86ISD::PACKSS/US concatenation to use combineConcatVectorOps recursion
Only concatenate X86ISD::PACKSS/US nodes if at least one operand is beneficial to concatenate
1 parent cf5aa55 commit 851ecfa

File tree

4 files changed

+179
-179
lines changed

4 files changed

+179
-179
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58442,9 +58442,13 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5844258442
MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
5844358443
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
5844458444
NumOps * SrcVT.getVectorNumElements());
58445-
return DAG.getNode(Op0.getOpcode(), DL, VT,
58446-
ConcatSubOperand(SrcVT, Ops, 0),
58447-
ConcatSubOperand(SrcVT, Ops, 1));
58445+
SDValue Concat0 = CombineSubOperand(SrcVT, Ops, 0);
58446+
SDValue Concat1 = CombineSubOperand(SrcVT, Ops, 1);
58447+
if (Concat0 || Concat1)
58448+
return DAG.getNode(
58449+
Op0.getOpcode(), DL, VT,
58450+
Concat0 ? Concat0 : ConcatSubOperand(SrcVT, Ops, 0),
58451+
Concat1 ? Concat1 : ConcatSubOperand(SrcVT, Ops, 1));
5844858452
}
5844958453
break;
5845058454
case X86ISD::PALIGNR:

llvm/test/CodeGen/X86/vector-pack-512.ll

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -245,21 +245,12 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
245245
}
246246

247247
define <32 x i16> @concat_packsswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
248-
; AVX512F-LABEL: concat_packsswd_int_2x256:
249-
; AVX512F: # %bb.0:
250-
; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
251-
; AVX512F-NEXT: vpackssdw %ymm3, %ymm2, %ymm1
252-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
253-
; AVX512F-NEXT: retq
254-
;
255-
; AVX512BW-LABEL: concat_packsswd_int_2x256:
256-
; AVX512BW: # %bb.0:
257-
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
258-
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
259-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
260-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
261-
; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
262-
; AVX512BW-NEXT: retq
248+
; AVX512-LABEL: concat_packsswd_int_2x256:
249+
; AVX512: # %bb.0:
250+
; AVX512-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
251+
; AVX512-NEXT: vpackssdw %ymm3, %ymm2, %ymm1
252+
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
253+
; AVX512-NEXT: retq
263254
%lo = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
264255
%hi = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a2, <8 x i32> %a3)
265256
%res = shufflevector <16 x i16> %lo, <16 x i16> %hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -268,21 +259,12 @@ define <32 x i16> @concat_packsswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x
268259
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
269260

270261
define <32 x i16> @concat_packuswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
271-
; AVX512F-LABEL: concat_packuswd_int_2x256:
272-
; AVX512F: # %bb.0:
273-
; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
274-
; AVX512F-NEXT: vpackusdw %ymm3, %ymm2, %ymm1
275-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
276-
; AVX512F-NEXT: retq
277-
;
278-
; AVX512BW-LABEL: concat_packuswd_int_2x256:
279-
; AVX512BW: # %bb.0:
280-
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
281-
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
282-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
283-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
284-
; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
285-
; AVX512BW-NEXT: retq
262+
; AVX512-LABEL: concat_packuswd_int_2x256:
263+
; AVX512: # %bb.0:
264+
; AVX512-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
265+
; AVX512-NEXT: vpackusdw %ymm3, %ymm2, %ymm1
266+
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
267+
; AVX512-NEXT: retq
286268
%lo = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
287269
%hi = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a2, <8 x i32> %a3)
288270
%res = shufflevector <16 x i16> %lo, <16 x i16> %hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>

llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -817,15 +817,13 @@ define <32 x i8> @concat_alignr_unnecessary(<16 x i8> %a0, <16 x i8> noundef %a1
817817
ret <32 x i8> %res
818818
}
819819

820-
; TODO: Not beneficial to concatenate both inputs just to create a 256-bit packss
821-
define <32 x i8> @concat_packsr_unnecessary(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) nounwind {
822-
; CHECK-LABEL: concat_packsr_unnecessary:
820+
; Not beneficial to concatenate both inputs just to create a 256-bit packss
821+
define <32 x i8> @concat_packss_unnecessary(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) nounwind {
822+
; CHECK-LABEL: concat_packss_unnecessary:
823823
; CHECK: # %bb.0:
824-
; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
825-
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
826-
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
827-
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
828-
; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
824+
; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm1
825+
; CHECK-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
826+
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
829827
; CHECK-NEXT: ret{{[l|q]}}
830828
%lo = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
831829
%hi = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a2)

0 commit comments

Comments
 (0)