Skip to content

Commit b1166e1

Browse files
committed
[X86][AVX] combineX86ShufflesRecursively - attempt to constant fold before widening shuffle inputs
combineX86ShufflesConstants/canonicalizeShuffleMaskWithHorizOp can both handle/earlyout shuffles with inputs of different widths, so delay widening as late as possible to make it easier to match constant folds etc. The plan is to eventually move the widening inside combineX86ShuffleChain so that we don't create any new nodes unless we successfully combine the shuffles.
1 parent a4914dc commit b1166e1

File tree

2 files changed

+17
-18
lines changed

2 files changed

+17
-18
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36610,6 +36610,17 @@ static SDValue combineX86ShufflesRecursively(
3661036610
}
3661136611
}
3661236612

36613+
// Attempt to constant fold all of the constant source ops.
36614+
if (SDValue Cst = combineX86ShufflesConstants(
36615+
Ops, Mask, Root, HasVariableMask, DAG, Subtarget))
36616+
return Cst;
36617+
36618+
// Canonicalize the combined shuffle mask chain with horizontal ops.
36619+
// NOTE: This will update the Ops and Mask.
36620+
if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
36621+
Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
36622+
return DAG.getBitcast(Root.getValueType(), HOp);
36623+
3661336624
// Widen any subvector shuffle inputs we've collected.
3661436625
if (any_of(Ops, [RootSizeInBits](SDValue Op) {
3661536626
return Op.getValueSizeInBits() < RootSizeInBits;
@@ -36622,17 +36633,6 @@ static SDValue combineX86ShufflesRecursively(
3662236633
resolveTargetShuffleInputsAndMask(Ops, Mask);
3662336634
}
3662436635

36625-
// Attempt to constant fold all of the constant source ops.
36626-
if (SDValue Cst = combineX86ShufflesConstants(
36627-
Ops, Mask, Root, HasVariableMask, DAG, Subtarget))
36628-
return Cst;
36629-
36630-
// Canonicalize the combined shuffle mask chain with horizontal ops.
36631-
// NOTE: This will update the Ops and Mask.
36632-
if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
36633-
Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
36634-
return DAG.getBitcast(Root.getValueType(), HOp);
36635-
3663636636
// We can only combine unary and binary shuffle mask cases.
3663736637
if (Ops.size() <= 2) {
3663836638
// Minor canonicalization of the accumulated shuffle mask to make it easier

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,12 @@ define void @PR46178(i16* %0) {
108108
; X86-NEXT: vmovdqu (%eax), %ymm1
109109
; X86-NEXT: vpmovqw %ymm0, %xmm0
110110
; X86-NEXT: vpmovqw %ymm1, %xmm1
111-
; X86-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
112-
; X86-NEXT: vpsllw $8, %ymm0, %ymm0
113-
; X86-NEXT: vpsraw $8, %ymm0, %ymm0
114-
; X86-NEXT: vmovapd {{.*#+}} ymm1 = [0,0,2,0,4,0,4,0]
115-
; X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
116-
; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
117-
; X86-NEXT: vmovupd %ymm1, (%eax)
111+
; X86-NEXT: vpsllw $8, %xmm1, %xmm1
112+
; X86-NEXT: vpsraw $8, %xmm1, %xmm1
113+
; X86-NEXT: vpsllw $8, %xmm0, %xmm0
114+
; X86-NEXT: vpsraw $8, %xmm0, %xmm0
115+
; X86-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
116+
; X86-NEXT: vmovupd %ymm0, (%eax)
118117
; X86-NEXT: vzeroupper
119118
; X86-NEXT: retl
120119
;

0 commit comments

Comments
 (0)