Skip to content

Commit 24f34e2

Browse files
RKSimonHoney Goyal
authored andcommitted
[X86] LowerShuffle - don't call canonicalizeShuffleMaskWithHorizOp if we could shuffle whole lanes (llvm#170838)
canonicalizeShuffleMaskWithHorizOp was getting stuck as it was canonicalizing a SHUFFLE(HADD(X,X)) to only refer to the results of the LHS X, but the original shuffle was shuffling entire lanes (with VPERM2F128), and the canonicalised shuffle was then attempting to lowering back to the original VPERM2F128 pattern. I think we can drop this call to canonicalizeShuffleMaskWithHorizOp once llvm#143000 is addressed as vectorcombine should fold away all the patterns this addresses. Fixes llvm#167793
1 parent a48c935 commit 24f34e2

File tree

2 files changed

+43
-9
lines changed

2 files changed

+43
-9
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18462,16 +18462,20 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
1846218462
SmallVector<int> Mask(OrigMask);
1846318463

1846418464
// Canonicalize the shuffle with any horizontal ops inputs.
18465+
// Don't attempt this if the shuffle can still be widened as we may lose
18466+
// whole lane shuffle patterns.
1846518467
// NOTE: This may update Ops and Mask.
18466-
if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
18467-
Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
18468-
return DAG.getBitcast(VT, HOp);
18469-
18470-
V1 = DAG.getBitcast(VT, Ops[0]);
18471-
V2 = DAG.getBitcast(VT, Ops[1]);
18472-
assert(NumElements == (int)Mask.size() &&
18473-
"canonicalizeShuffleMaskWithHorizOp "
18474-
"shouldn't alter the shuffle mask size");
18468+
if (!canWidenShuffleElements(Mask)) {
18469+
if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
18470+
Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
18471+
return DAG.getBitcast(VT, HOp);
18472+
18473+
V1 = DAG.getBitcast(VT, Ops[0]);
18474+
V2 = DAG.getBitcast(VT, Ops[1]);
18475+
assert(NumElements == (int)Mask.size() &&
18476+
"canonicalizeShuffleMaskWithHorizOp "
18477+
"shouldn't alter the shuffle mask size");
18478+
}
1847518479

1847618480
// Canonicalize zeros/ones/fp splat constants to ensure no undefs.
1847718481
// These will be materialized uniformly anyway, so make splat matching easier.

llvm/test/CodeGen/X86/pr167793.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s
3+
4+
define <4 x double> @PR167793(<4 x double> %a0, <4 x double> %a1) {
5+
; CHECK-LABEL: PR167793:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vhaddpd %ymm0, %ymm0, %ymm0
8+
; CHECK-NEXT: vhaddpd %ymm1, %ymm1, %ymm1
9+
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
10+
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm0[2,3],ymm1[2,3]
11+
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
12+
; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
13+
; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3]
14+
; CHECK-NEXT: retq
15+
%i5 = shufflevector <4 x double> %a0, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
16+
%i6 = fadd <4 x double> %a0, %i5
17+
%i8 = shufflevector <4 x double> %a1, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
18+
%i9 = fadd <4 x double> %a1, %i8
19+
%i10 = shufflevector <4 x double> %i6, <4 x double> poison, <2 x i32> <i32 poison, i32 3>
20+
%i11 = shufflevector <4 x double> %i6, <4 x double> poison, <2 x i32> <i32 poison, i32 1>
21+
%i12 = fadd <2 x double> %i10, %i11
22+
%i13 = shufflevector <4 x double> %i9, <4 x double> poison, <2 x i32> <i32 poison, i32 3>
23+
%i14 = shufflevector <4 x double> %i9, <4 x double> poison, <2 x i32> <i32 poison, i32 1>
24+
%i15 = fadd <2 x double> %i13, %i14
25+
%i16 = shufflevector <4 x double> zeroinitializer, <4 x double> poison, <2 x i32> <i32 poison, i32 1>
26+
%i18 = shufflevector <2 x double> %i15, <2 x double> %i16, <4 x i32> <i32 poison, i32 poison, i32 1, i32 3>
27+
%i19 = shufflevector <2 x double> %i12, <2 x double> poison, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison>
28+
%i20 = shufflevector <4 x double> %i19, <4 x double> %i18, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
29+
ret <4 x double> %i20
30+
}

0 commit comments

Comments
 (0)