Skip to content

Commit cc3d3f1

Browse files
committed
Merging r354034 and r354117:
------------------------------------------------------------------------ r354034 | rksimon | 2019-02-14 15:45:32 +0100 (Thu, 14 Feb 2019) | 1 line [X86][AVX] Add PR40730 test case ------------------------------------------------------------------------ ------------------------------------------------------------------------ r354117 | rksimon | 2019-02-15 12:39:21 +0100 (Fri, 15 Feb 2019) | 9 lines [X86][AVX] lowerShuffleAsLanePermuteAndPermute - fully populate the lane shuffle mask (PR40730) As detailed on PR40730, we are not correctly filling in the lane shuffle mask (D53148/rL344446) - we fill in for the correct src lane but don't add it to the correct mask element, so any reference to the correct element is likely to see an UNDEF mask index. This allows constant folding to propagate UNDEFs prior to the lane mask being (correctly) lowered to vperm2f128. This patch fixes the issue by fully populating the lane shuffle mask - this is more than is necessary (if we only filled in the required mask elements we might be able to match other shuffle instructions - broadcasts etc.), but its the most cautious approach as this needs to be cherrypicked into the 8.0.0 release branch. Differential Revision: https://reviews.llvm.org/D58237 ------------------------------------------------------------------------ llvm-svn: 354260
1 parent 6051407 commit cc3d3f1

File tree

2 files changed

+47
-2
lines changed

2 files changed

+47
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13884,7 +13884,6 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
1388413884
int NumEltsPerLane = NumElts / NumLanes;
1388513885

1388613886
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
13887-
SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
1388813887
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
1388913888

1389013889
for (int i = 0; i != NumElts; ++i) {
@@ -13899,10 +13898,20 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
1389913898
return SDValue();
1390013899
SrcLaneMask[DstLane] = SrcLane;
1390113900

13902-
LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
1390313901
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
1390413902
}
1390513903

13904+
// Make sure we set all elements of the lane mask, to avoid undef propagation.
13905+
SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
13906+
for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
13907+
int SrcLane = SrcLaneMask[DstLane];
13908+
if (0 <= SrcLane)
13909+
for (int j = 0; j != NumEltsPerLane; ++j) {
13910+
LaneMask[(DstLane * NumEltsPerLane) + j] =
13911+
(SrcLane * NumEltsPerLane) + j;
13912+
}
13913+
}
13914+
1390613915
// If we're only shuffling a single lowest lane and the rest are identity
1390713916
// then don't bother.
1390813917
// TODO - isShuffleMaskInputInPlace could be extended to something like this.

llvm/test/CodeGen/X86/pr40730.ll

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
3+
4+
define <8 x i32> @shuffle_v8i32_0dcd3f14(<8 x i32> %a, <8 x i32> %b) {
5+
; CHECK-LABEL: shuffle_v8i32_0dcd3f14:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
8+
; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3]
9+
; CHECK-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,1,0]
10+
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
11+
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,2,3]
12+
; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
13+
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5],ymm0[6,7]
14+
; CHECK-NEXT: retq
15+
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
16+
ret <8 x i32> %shuffle
17+
}
18+
19+
; CHECK: .LCPI1_0:
20+
; CHECK-NEXT: .quad 60129542157
21+
; CHECK-NEXT: .quad 60129542157
22+
; CHECK-NEXT: .quad 68719476736
23+
; CHECK-NEXT: .quad 60129542157
24+
25+
define <8 x i32> @shuffle_v8i32_0dcd3f14_constant(<8 x i32> %a0) {
26+
; CHECK-LABEL: shuffle_v8i32_0dcd3f14_constant:
27+
; CHECK: # %bb.0:
28+
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
29+
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
30+
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,1,0]
31+
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
32+
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5],ymm0[6,7]
33+
; CHECK-NEXT: retq
34+
%res = shufflevector <8 x i32> %a0, <8 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, <8 x i32> <i32 0, i32 13, i32 12, i32 13, i32 3, i32 15, i32 1, i32 4>
35+
ret <8 x i32> %res
36+
}

0 commit comments

Comments
 (0)