Skip to content

Commit 86b0c6e

Browse files
nemanjaitstellar
authored andcommitted
[SelectionDAG] Correctly reduce BV to shuffle with zero on big endian
This DAG combine is correct on little endian targets but is incorrect on big endian targets. Add big endian code to correct it. Differential revision: https://reviews.llvm.org/D146460
1 parent b273386 commit 86b0c6e

File tree

2 files changed

+136
-4
lines changed

2 files changed

+136
-4
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21361,10 +21361,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
2136121361
// the source vector. The high bits map to zero. We will use a zero vector
2136221362
// as the 2nd source operand of the shuffle, so use the 1st element of
2136321363
// that vector (mask value is number-of-elements) for the high bits.
21364-
if (i % ZextRatio == 0)
21365-
ShufMask[i] = Extract.getConstantOperandVal(1);
21366-
else
21367-
ShufMask[i] = NumMaskElts;
21364+
int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
21365+
ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
21366+
: NumMaskElts;
2136821367
}
2136921368

2137021369
// Undef elements of the build vector remain undef because we initialize

llvm/test/CodeGen/PowerPC/pr61315.ll

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
2+
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
3+
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
4+
; CHECK: .LCPI0_0:
5+
; CHECK-NEXT: .byte 7 # 0x7
6+
; CHECK-NEXT: .byte 7 # 0x7
7+
; CHECK-NEXT: .byte 7 # 0x7
8+
; CHECK-NEXT: .byte 7 # 0x7
9+
; CHECK-NEXT: .byte 7 # 0x7
10+
; CHECK-NEXT: .byte 7 # 0x7
11+
; CHECK-NEXT: .byte 7 # 0x7
12+
; CHECK-NEXT: .byte 7 # 0x7
13+
; CHECK-NEXT: .byte 16 # 0x10
14+
; CHECK-NEXT: .byte 16 # 0x10
15+
; CHECK-NEXT: .byte 16 # 0x10
16+
; CHECK-NEXT: .byte 16 # 0x10
17+
; CHECK-NEXT: .byte 16 # 0x10
18+
; CHECK-NEXT: .byte 16 # 0x10
19+
; CHECK-NEXT: .byte 16 # 0x10
20+
; CHECK-NEXT: .byte 16 # 0x10
21+
; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
22+
; CHECK: # %bb.0:
23+
; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha
24+
; CHECK-NEXT: xxlxor v4, v4, v4
25+
; CHECK-NEXT: xxlxor v3, v3, v3
26+
; CHECK-NEXT: addi r3, r3, .LCPI0_0@toc@l
27+
; CHECK-NEXT: lxv vs0, 0(r3)
28+
; CHECK-NEXT: addis r3, r2, .LCPI0_1@toc@ha
29+
; CHECK-NEXT: addi r3, r3, .LCPI0_1@toc@l
30+
; CHECK-NEXT: xxperm v4, v2, vs0
31+
; CHECK-NEXT: lxv vs0, 0(r3)
32+
; CHECK-NEXT: xxland v2, v4, vs0
33+
; CHECK-NEXT: vcmpequb v2, v2, v3
34+
; CHECK-NEXT: xxlnor v2, v2, v2
35+
; CHECK-NEXT: blr
36+
%a4 = extractelement <16 x i8> %0, i64 7
37+
%a5 = zext i8 %a4 to i16
38+
%a6 = insertelement <8 x i16> poison, i16 %a5, i64 0
39+
%a7 = bitcast <8 x i16> %a6 to <16 x i8>
40+
%a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
41+
%a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
42+
%a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
43+
%a11 = sext <16 x i1> %a10 to <16 x i8>
44+
ret <16 x i8> %a11
45+
}
46+
47+
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
48+
; CHECK: .LCPI1_0:
49+
; CHECK-NEXT: .byte 7 # 0x7
50+
; CHECK-NEXT: .byte 7 # 0x7
51+
; CHECK-NEXT: .byte 7 # 0x7
52+
; CHECK-NEXT: .byte 7 # 0x7
53+
; CHECK-NEXT: .byte 7 # 0x7
54+
; CHECK-NEXT: .byte 7 # 0x7
55+
; CHECK-NEXT: .byte 7 # 0x7
56+
; CHECK-NEXT: .byte 7 # 0x7
57+
; CHECK-NEXT: .byte 16 # 0x10
58+
; CHECK-NEXT: .byte 16 # 0x10
59+
; CHECK-NEXT: .byte 16 # 0x10
60+
; CHECK-NEXT: .byte 16 # 0x10
61+
; CHECK-NEXT: .byte 16 # 0x10
62+
; CHECK-NEXT: .byte 16 # 0x10
63+
; CHECK-NEXT: .byte 16 # 0x10
64+
; CHECK-NEXT: .byte 16 # 0x10
65+
; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
66+
; CHECK: # %bb.0:
67+
; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha
68+
; CHECK-NEXT: xxlxor v4, v4, v4
69+
; CHECK-NEXT: xxlxor v3, v3, v3
70+
; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l
71+
; CHECK-NEXT: lxv vs0, 0(r3)
72+
; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha
73+
; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l
74+
; CHECK-NEXT: xxperm v4, v2, vs0
75+
; CHECK-NEXT: lxv vs0, 0(r3)
76+
; CHECK-NEXT: xxland v2, v4, vs0
77+
; CHECK-NEXT: vcmpequb v2, v2, v3
78+
; CHECK-NEXT: xxlnor v2, v2, v2
79+
; CHECK-NEXT: blr
80+
%a4 = extractelement <16 x i8> %0, i64 7
81+
%a5 = zext i8 %a4 to i32
82+
%a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
83+
%a7 = bitcast <4 x i32> %a6 to <16 x i8>
84+
%a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
85+
%a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
86+
%a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
87+
%a11 = sext <16 x i1> %a10 to <16 x i8>
88+
ret <16 x i8> %a11
89+
}
90+
91+
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
92+
; CHECK: .LCPI2_0:
93+
; CHECK-NEXT: .byte 6 # 0x6
94+
; CHECK-NEXT: .byte 7 # 0x7
95+
; CHECK-NEXT: .byte 6 # 0x6
96+
; CHECK-NEXT: .byte 7 # 0x7
97+
; CHECK-NEXT: .byte 6 # 0x6
98+
; CHECK-NEXT: .byte 7 # 0x7
99+
; CHECK-NEXT: .byte 6 # 0x6
100+
; CHECK-NEXT: .byte 7 # 0x7
101+
; CHECK-NEXT: .byte 16 # 0x10
102+
; CHECK-NEXT: .byte 16 # 0x10
103+
; CHECK-NEXT: .byte 16 # 0x10
104+
; CHECK-NEXT: .byte 16 # 0x10
105+
; CHECK-NEXT: .byte 16 # 0x10
106+
; CHECK-NEXT: .byte 16 # 0x10
107+
; CHECK-NEXT: .byte 16 # 0x10
108+
; CHECK-NEXT: .byte 16 # 0x10
109+
; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
110+
; CHECK: # %bb.0:
111+
; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha
112+
; CHECK-NEXT: xxlxor v4, v4, v4
113+
; CHECK-NEXT: xxlxor v3, v3, v3
114+
; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l
115+
; CHECK-NEXT: lxv vs0, 0(r3)
116+
; CHECK-NEXT: addis r3, r2, .LCPI2_1@toc@ha
117+
; CHECK-NEXT: addi r3, r3, .LCPI2_1@toc@l
118+
; CHECK-NEXT: xxperm v4, v2, vs0
119+
; CHECK-NEXT: lxv vs0, 0(r3)
120+
; CHECK-NEXT: xxland v2, v4, vs0
121+
; CHECK-NEXT: vcmpequb v2, v2, v3
122+
; CHECK-NEXT: xxlnor v2, v2, v2
123+
; CHECK-NEXT: blr
124+
%a4 = extractelement <8 x i16> %0, i64 3
125+
%a5 = zext i16 %a4 to i32
126+
%a6 = insertelement <4 x i32> poison, i32 %a5, i64 0
127+
%a7 = bitcast <4 x i32> %a6 to <16 x i8>
128+
%a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
129+
%a9 = and <16 x i8> %a8, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
130+
%a10 = icmp eq <16 x i8> %a9, <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>
131+
%a11 = sext <16 x i1> %a10 to <16 x i8>
132+
ret <16 x i8> %a11
133+
}

0 commit comments

Comments
 (0)