Skip to content

Commit c97c117

Browse files
committed
[X86] combineConcatVectorOps - convert X86ISD::PACKSS/US concatenation to use combineConcatVectorOps recursion
Only concatenate X86ISD::PACKSS/US nodes if at least one operand is beneficial to concatenate
1 parent 8c8eff2 commit c97c117

File tree

4 files changed

+179
-179
lines changed

4 files changed

+179
-179
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58431,9 +58431,13 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5843158431
MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
5843258432
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
5843358433
NumOps * SrcVT.getVectorNumElements());
58434-
return DAG.getNode(Op0.getOpcode(), DL, VT,
58435-
ConcatSubOperand(SrcVT, Ops, 0),
58436-
ConcatSubOperand(SrcVT, Ops, 1));
58434+
SDValue Concat0 = CombineSubOperand(SrcVT, Ops, 0);
58435+
SDValue Concat1 = CombineSubOperand(SrcVT, Ops, 1);
58436+
if (Concat0 || Concat1)
58437+
return DAG.getNode(
58438+
Op0.getOpcode(), DL, VT,
58439+
Concat0 ? Concat0 : ConcatSubOperand(SrcVT, Ops, 0),
58440+
Concat1 ? Concat1 : ConcatSubOperand(SrcVT, Ops, 1));
5843758441
}
5843858442
break;
5843958443
case X86ISD::PALIGNR:

llvm/test/CodeGen/X86/vector-pack-512.ll

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -245,21 +245,12 @@ define <64 x i8> @concat_trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) noun
245245
}
246246

247247
define <32 x i16> @concat_packsswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
248-
; AVX512F-LABEL: concat_packsswd_int_2x256:
249-
; AVX512F: # %bb.0:
250-
; AVX512F-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
251-
; AVX512F-NEXT: vpackssdw %ymm3, %ymm2, %ymm1
252-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
253-
; AVX512F-NEXT: retq
254-
;
255-
; AVX512BW-LABEL: concat_packsswd_int_2x256:
256-
; AVX512BW: # %bb.0:
257-
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
258-
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
259-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
260-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
261-
; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
262-
; AVX512BW-NEXT: retq
248+
; AVX512-LABEL: concat_packsswd_int_2x256:
249+
; AVX512: # %bb.0:
250+
; AVX512-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
251+
; AVX512-NEXT: vpackssdw %ymm3, %ymm2, %ymm1
252+
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
253+
; AVX512-NEXT: retq
263254
%lo = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
264255
%hi = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a2, <8 x i32> %a3)
265256
%res = shufflevector <16 x i16> %lo, <16 x i16> %hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -268,21 +259,12 @@ define <32 x i16> @concat_packsswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x
268259
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
269260

270261
define <32 x i16> @concat_packuswd_int_2x256(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3) {
271-
; AVX512F-LABEL: concat_packuswd_int_2x256:
272-
; AVX512F: # %bb.0:
273-
; AVX512F-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
274-
; AVX512F-NEXT: vpackusdw %ymm3, %ymm2, %ymm1
275-
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
276-
; AVX512F-NEXT: retq
277-
;
278-
; AVX512BW-LABEL: concat_packuswd_int_2x256:
279-
; AVX512BW: # %bb.0:
280-
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
281-
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
282-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1
283-
; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
284-
; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
285-
; AVX512BW-NEXT: retq
262+
; AVX512-LABEL: concat_packuswd_int_2x256:
263+
; AVX512: # %bb.0:
264+
; AVX512-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
265+
; AVX512-NEXT: vpackusdw %ymm3, %ymm2, %ymm1
266+
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
267+
; AVX512-NEXT: retq
286268
%lo = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
287269
%hi = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a2, <8 x i32> %a3)
288270
%res = shufflevector <16 x i16> %lo, <16 x i16> %hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>

llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -791,15 +791,13 @@ define <32 x i8> @concat_alignr_unnecessary(<16 x i8> %a0, <16 x i8> noundef %a1
791791
ret <32 x i8> %res
792792
}
793793

794-
; TODO: Not beneficial to concatenate both inputs just to create a 256-bit packss
795-
define <32 x i8> @concat_packsr_unnecessary(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) nounwind {
796-
; CHECK-LABEL: concat_packsr_unnecessary:
794+
; Not beneficial to concatenate both inputs just to create a 256-bit packss
795+
define <32 x i8> @concat_packss_unnecessary(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) nounwind {
796+
; CHECK-LABEL: concat_packss_unnecessary:
797797
; CHECK: # %bb.0:
798-
; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
799-
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
800-
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
801-
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
802-
; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
798+
; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm1
799+
; CHECK-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
800+
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
803801
; CHECK-NEXT: ret{{[l|q]}}
804802
%lo = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
805803
%hi = tail call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a2)

0 commit comments

Comments
 (0)