11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK
3- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK
4- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK
5- ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK
2+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
5+ ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
66
77declare void @use (<4 x i1 >)
88
99; icmp - eq v4i32 is cheap
1010
1111define <4 x i32 > @shuf_icmp_eq_v4i32 (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z , <4 x i32 > %w ) {
12- ; CHECK-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
13- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
14- ; CHECK-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
15- ; CHECK-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
16- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
17- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
18- ; CHECK-NEXT: ret <4 x i32> [[R]]
12+ ; SSE-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
13+ ; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
14+ ; SSE-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
15+ ; SSE-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
16+ ; SSE-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
17+ ; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
18+ ; SSE-NEXT: ret <4 x i32> [[R]]
19+ ;
20+ ; AVX2-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
21+ ; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
22+ ; AVX2-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
23+ ; AVX2-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
24+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
25+ ; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
26+ ; AVX2-NEXT: ret <4 x i32> [[R]]
27+ ;
28+ ; AVX512-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
29+ ; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
30+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
31+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
32+ ; AVX512-NEXT: [[S:%.*]] = icmp eq <4 x i32> [[TMP1]], [[TMP2]]
33+ ; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
34+ ; AVX512-NEXT: ret <4 x i32> [[R]]
1935;
2036 %c0 = icmp eq <4 x i32 > %x , %y
2137 %c1 = icmp eq <4 x i32 > %z , %w
@@ -27,13 +43,37 @@ define <4 x i32> @shuf_icmp_eq_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <
2743; icmp - eq v2i64 is only cheap on SSE4+ targets with PCMPEQQ
2844
2945define <2 x i64 > @shuf_icmp_eq_v2i64 (<2 x i64 > %x , <2 x i64 > %y , <2 x i64 > %z , <2 x i64 > %w ) {
30- ; CHECK-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
31- ; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
32- ; CHECK-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
33- ; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
34- ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
35- ; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
36- ; CHECK-NEXT: ret <2 x i64> [[R]]
46+ ; SSE2-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
47+ ; SSE2-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
48+ ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> <i32 1, i32 3>
49+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> [[W]], <2 x i32> <i32 1, i32 3>
50+ ; SSE2-NEXT: [[S:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
51+ ; SSE2-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
52+ ; SSE2-NEXT: ret <2 x i64> [[R]]
53+ ;
54+ ; SSE4-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
55+ ; SSE4-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
56+ ; SSE4-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
57+ ; SSE4-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
58+ ; SSE4-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
59+ ; SSE4-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
60+ ; SSE4-NEXT: ret <2 x i64> [[R]]
61+ ;
62+ ; AVX2-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
63+ ; AVX2-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
64+ ; AVX2-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
65+ ; AVX2-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
66+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
67+ ; AVX2-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
68+ ; AVX2-NEXT: ret <2 x i64> [[R]]
69+ ;
70+ ; AVX512-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
71+ ; AVX512-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
72+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> <i32 1, i32 3>
73+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> [[W]], <2 x i32> <i32 1, i32 3>
74+ ; AVX512-NEXT: [[S:%.*]] = icmp eq <2 x i64> [[TMP1]], [[TMP2]]
75+ ; AVX512-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
76+ ; AVX512-NEXT: ret <2 x i64> [[R]]
3777;
3878 %c0 = icmp eq <2 x i64 > %x , %y
3979 %c1 = icmp eq <2 x i64 > %z , %w
@@ -46,10 +86,10 @@ define <2 x i64> @shuf_icmp_eq_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z, <
4686
4787define <4 x i32 > @shuf_icmp_ugt_v4i32 (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z , <4 x i32 > %w ) {
4888; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32(
49- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
50- ; CHECK-NEXT: [[C0 :%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
51- ; CHECK-NEXT: [[C1 :%.*]] = icmp ugt <4 x i32> [[Z ]], [[W]]
52- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1 > [[C0 ]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
89+ ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+ ]] {
90+ ; CHECK-NEXT: [[TMP1 :%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
91+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x i32> [[Y ]], <4 x i32> [[W]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
92+ ; CHECK-NEXT: [[S:%.*]] = icmp ugt <4 x i32 > [[TMP1 ]], [[TMP2]]
5393; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
5494; CHECK-NEXT: ret <4 x i32> [[R]]
5595;
@@ -60,16 +100,32 @@ define <4 x i32> @shuf_icmp_ugt_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z,
60100 ret <4 x i32 > %r
61101}
62102
63- ; Common operand is op0 of the fcmps.
103+ ; Common operand is op0 of the fcmps (CMPPS cheaper on SSE4+) .
64104
65105define <4 x i32 > @shuf_fcmp_oeq_v4i32 (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
66- ; CHECK-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
67- ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
68- ; CHECK-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
69- ; CHECK-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
70- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
71- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
72- ; CHECK-NEXT: ret <4 x i32> [[R]]
106+ ; SSE2-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
107+ ; SSE2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
108+ ; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
109+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
110+ ; SSE2-NEXT: [[S:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[TMP2]]
111+ ; SSE2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
112+ ; SSE2-NEXT: ret <4 x i32> [[R]]
113+ ;
114+ ; SSE4-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
115+ ; SSE4-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
116+ ; SSE4-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
117+ ; SSE4-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
118+ ; SSE4-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
119+ ; SSE4-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
120+ ; SSE4-NEXT: ret <4 x i32> [[R]]
121+ ;
122+ ; AVX-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
123+ ; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124+ ; AVX-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
125+ ; AVX-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
126+ ; AVX-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
127+ ; AVX-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
128+ ; AVX-NEXT: ret <4 x i32> [[R]]
73129;
74130 %b0 = fcmp oeq <4 x float > %x , %y
75131 %b1 = fcmp oeq <4 x float > %x , %z
@@ -81,13 +137,29 @@ define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float
81137; For commutative instructions, common operand may be swapped
82138
83139define <4 x i32 > @shuf_fcmp_one_v4f32_swap (<4 x float > %x , <4 x float > %y , <4 x float > %z ) {
84- ; CHECK-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
85- ; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
86- ; CHECK-NEXT: [[B0:%.*]] = fcmp one <4 x float> [[X]], [[Y]]
87- ; CHECK-NEXT: [[B1:%.*]] = fcmp one <4 x float> [[Z]], [[X]]
88- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
89- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
90- ; CHECK-NEXT: ret <4 x i32> [[R]]
140+ ; SSE-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
141+ ; SSE-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
142+ ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
143+ ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
144+ ; SSE-NEXT: [[S:%.*]] = fcmp one <4 x float> [[TMP1]], [[TMP2]]
145+ ; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
146+ ; SSE-NEXT: ret <4 x i32> [[R]]
147+ ;
148+ ; AVX2-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
149+ ; AVX2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
150+ ; AVX2-NEXT: [[B0:%.*]] = fcmp one <4 x float> [[X]], [[Y]]
151+ ; AVX2-NEXT: [[B1:%.*]] = fcmp one <4 x float> [[Z]], [[X]]
152+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
153+ ; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
154+ ; AVX2-NEXT: ret <4 x i32> [[R]]
155+ ;
156+ ; AVX512-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
157+ ; AVX512-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
158+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
159+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3>
160+ ; AVX512-NEXT: [[S:%.*]] = fcmp one <4 x float> [[TMP1]], [[TMP2]]
161+ ; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
162+ ; AVX512-NEXT: ret <4 x i32> [[R]]
91163;
92164 %b0 = fcmp one <4 x float > %x , %y
93165 %b1 = fcmp one <4 x float > %z , %x
@@ -99,13 +171,29 @@ define <4 x i32> @shuf_fcmp_one_v4f32_swap(<4 x float> %x, <4 x float> %y, <4 x
99171; non-commutative pred, but common op0
100172
101173define <4 x i32 > @shuf_icmp_sgt_v4i32_swap (<4 x i32 > %x , <4 x i32 > %y , <4 x i32 > %z ) {
102- ; CHECK-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
103- ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
104- ; CHECK-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
105- ; CHECK-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
106- ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
107- ; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
108- ; CHECK-NEXT: ret <4 x i32> [[R]]
174+ ; SSE-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
175+ ; SSE-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
176+ ; SSE-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
177+ ; SSE-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
178+ ; SSE-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
179+ ; SSE-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
180+ ; SSE-NEXT: ret <4 x i32> [[R]]
181+ ;
182+ ; AVX2-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
183+ ; AVX2-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
184+ ; AVX2-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
185+ ; AVX2-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
186+ ; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
187+ ; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
188+ ; AVX2-NEXT: ret <4 x i32> [[R]]
189+ ;
190+ ; AVX512-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
191+ ; AVX512-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
192+ ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 2>
193+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
194+ ; AVX512-NEXT: [[S:%.*]] = icmp sgt <4 x i32> [[TMP1]], [[TMP2]]
195+ ; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
196+ ; AVX512-NEXT: ret <4 x i32> [[R]]
109197;
110198 %b0 = icmp sgt <4 x i32 > %x , %y
111199 %b1 = icmp sgt <4 x i32 > %x , %z
0 commit comments