Skip to content

Commit 62db78a

Browse files
RKSimonanonymouspc
authored andcommitted
[X86] combineHorizOpWithShuffle - ensure we handle undef elements from widened shuffle (llvm#172014)
Since llvm#170838 we no longer canonicalise away whole-lane shuffles of horizontal ops, so we need to better handle cases where widened shuffle masks might still contain undefs. Fixes llvm#172010
1 parent 12bfd05 commit 62db78a

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50515,9 +50515,11 @@ static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
5051550515
}
5051650516
if ((Op00 == Op10) && (Op01 == Op11)) {
5051750517
const int Map[4] = {0, 2, 1, 3};
50518-
SmallVector<int, 4> ShuffleMask(
50519-
{Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]],
50520-
Map[ScaledMask1[1]]});
50518+
int ShuffleMask[] = {ScaledMask0[0], ScaledMask1[0], ScaledMask0[1],
50519+
ScaledMask1[1]};
50520+
for (int &M : ShuffleMask)
50521+
if (0 <= M)
50522+
M = Map[M];
5052150523
MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
5052250524
SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00),
5052350525
DAG.getBitcast(SrcVT, Op01));

llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,3 +1215,36 @@ entry:
12151215
%shuffle5 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %not, <32 x i32> <i32 3, i32 9, i32 3, i32 1, i32 9, i32 8, i32 9, i32 2, i32 0, i32 8, i32 48, i32 8, i32 35, i32 3, i32 0, i32 4, i32 4, i32 7, i32 4, i32 39, i32 9, i32 0, i32 59, i32 6, i32 0, i32 4, i32 9, i32 1, i32 1, i32 2, i32 8, i32 9>
12161216
ret <32 x i16> %shuffle5
12171217
}
1218+
1219+
define <9 x i16> @PR172010(<4 x i64> %a0) {
1220+
; AVX2-LABEL: PR172010:
1221+
; AVX2: # %bb.0:
1222+
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,1,1,1]
1223+
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm1
1224+
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
1225+
; AVX2-NEXT: vpackusdw %ymm0, %ymm0, %ymm0
1226+
; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0
1227+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1228+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1229+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[14,15,4,5,8,9,0,1],zero,zero,zero,zero,zero,zero,zero,zero
1230+
; AVX2-NEXT: ret{{[l|q]}}
1231+
;
1232+
; AVX512-LABEL: PR172010:
1233+
; AVX512: # %bb.0:
1234+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1235+
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[0,1,0,1,2,3,6,7]
1236+
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1237+
; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0
1238+
; AVX512-NEXT: vpmovqw %zmm1, %xmm1
1239+
; AVX512-NEXT: vpmovqw %zmm0, %xmm0
1240+
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,1]
1241+
; AVX512-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,4,6,7]
1242+
; AVX512-NEXT: vprolq $16, %zmm0, %zmm0
1243+
; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1244+
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1245+
; AVX512-NEXT: ret{{[l|q]}}
1246+
%shuffle = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <16 x i32> <i32 5, i32 3, i32 0, i32 5, i32 2, i32 2, i32 1, i32 1, i32 4, i32 6, i32 5, i32 3, i32 1, i32 7, i32 2, i32 1>
1247+
%trunc = trunc nuw <16 x i64> %shuffle to <16 x i16>
1248+
%result = shufflevector <16 x i16> zeroinitializer, <16 x i16> %trunc, <9 x i32> <i32 31, i32 18, i32 28, i32 20, i32 7, i32 5, i32 8, i32 4, i32 7>
1249+
ret <9 x i16> %result
1250+
}

0 commit comments

Comments
 (0)