Skip to content

Commit 14cec6d

Browse files
committed
[X86] Handle undef/zero/one cases after modifying Ops and Masks
1 parent acea1f5 commit 14cec6d

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41567,6 +41567,17 @@ static SDValue combineX86ShufflesRecursively(
4156741567
resolveTargetShuffleInputsAndMask(Ops, Mask);
4156841568
}
4156941569

41570+
// Handle the all undef/zero/ones cases.
41571+
if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
41572+
return DAG.getUNDEF(RootVT);
41573+
if (all_of(Mask, [](int Idx) { return Idx < 0; }))
41574+
return getZeroVector(RootVT, Subtarget, DAG, DL);
41575+
if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) &&
41576+
!llvm::is_contained(Mask, SM_SentinelZero))
41577+
return getOnesVector(RootVT, DAG, DL);
41578+
41579+
assert(!Ops.empty() && "Shuffle with no inputs detected");
41580+
4157041581
// We can only combine unary and binary shuffle mask cases.
4157141582
if (Ops.size() <= 2) {
4157241583
// Minor canonicalization of the accumulated shuffle mask to make it easier

llvm/test/CodeGen/X86/pr158415.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64 -mattr=+avx2 | FileCheck %s
3+
4+
define <32 x i16> @test(<8 x i8> %arg) {
5+
; CHECK-LABEL: test:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4]
8+
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
9+
; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
10+
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,24],zero,ymm0[25],zero,ymm0[30],zero,ymm0[31],zero,ymm0[u,u,u,u,u,u,u,u]
11+
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
12+
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3]
13+
; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[12,13,14,15],zero,zero,ymm1[4,5,u,u,u,u,u,u,u,u,28,29,30,31],zero,zero,ymm1[20,21],zero,zero,ymm1[26,27,28,29,30,31]
14+
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,0,2]
15+
; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
16+
; CHECK-NEXT: vpxor %ymm2, %ymm1, %ymm1
17+
; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm2
18+
; CHECK-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
19+
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
20+
; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
21+
; CHECK-NEXT: vpbroadcastw %xmm1, %ymm3
22+
; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4,5,6,7,8,9],ymm3[10],ymm0[11,12,13,14,15]
23+
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
24+
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
25+
; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[14,15],zero,zero,zero,zero,xmm2[u,u],zero,zero
26+
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
27+
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
28+
; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
29+
; CHECK-NEXT: retq
30+
entry:
31+
%shuffle2 = shufflevector <8 x i8> %arg, <8 x i8> zeroinitializer, <32 x i32> <i32 2, i32 2, i32 9, i32 3, i32 1, i32 0, i32 0, i32 2, i32 0, i32 5, i32 9, i32 6, i32 5, i32 4, i32 7, i32 2, i32 7, i32 9, i32 4, i32 0, i32 9, i32 2, i32 4, i32 3, i32 3, i32 2, i32 2, i32 3, i32 9, i32 0, i32 6, i32 4>
32+
%conv3 = zext <32 x i8> %shuffle2 to <32 x i16>
33+
%shuffle4 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %conv3, <32 x i32> <i32 5, i32 3, i32 4, i32 47, i32 5, i32 5, i32 3, i32 63, i32 4, i32 4, i32 60, i32 2, i32 2, i32 5, i32 4, i32 0, i32 38, i32 1, i32 0, i32 3, i32 59, i32 2, i32 3, i32 1, i32 1, i32 0, i32 3, i32 34, i32 0, i32 0, i32 62, i32 5>
34+
%not = xor <32 x i16> %shuffle4, splat (i16 1)
35+
%shuffle5 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %not, <32 x i32> <i32 3, i32 9, i32 3, i32 1, i32 9, i32 8, i32 9, i32 2, i32 0, i32 8, i32 48, i32 8, i32 35, i32 3, i32 0, i32 4, i32 4, i32 7, i32 4, i32 39, i32 9, i32 0, i32 59, i32 6, i32 0, i32 4, i32 9, i32 1, i32 1, i32 2, i32 8, i32 9>
36+
ret <32 x i16> %shuffle5
37+
}
38+

0 commit comments

Comments
 (0)