Skip to content

Commit a9215ed

Browse files
committed
[InstCombine][X86] simplifyDemandedVectorEltsIntrinsic - handle avx2 per-element vector shifts
1 parent 75e1cf4 commit a9215ed

File tree

3 files changed

+77
-80
lines changed

3 files changed

+77
-80
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,6 +1931,23 @@ Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
19311931
break;
19321932
}
19331933

1934+
// General per-element vector operations.
1935+
case Intrinsic::x86_avx2_psllv_d:
1936+
case Intrinsic::x86_avx2_psllv_d_256:
1937+
case Intrinsic::x86_avx2_psllv_q:
1938+
case Intrinsic::x86_avx2_psllv_q_256:
1939+
case Intrinsic::x86_avx2_psrlv_d:
1940+
case Intrinsic::x86_avx2_psrlv_d_256:
1941+
case Intrinsic::x86_avx2_psrlv_q:
1942+
case Intrinsic::x86_avx2_psrlv_q_256:
1943+
case Intrinsic::x86_avx2_psrav_d:
1944+
case Intrinsic::x86_avx2_psrav_d_256: {
1945+
simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1946+
simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1947+
UndefElts &= UndefElts2;
1948+
break;
1949+
}
1950+
19341951
case Intrinsic::x86_sse2_packssdw_128:
19351952
case Intrinsic::x86_sse2_packsswb_128:
19361953
case Intrinsic::x86_sse2_packuswb_128:

llvm/test/Transforms/InstCombine/X86/x86-avx2-inseltpoison.ll

Lines changed: 30 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,9 @@ define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
108108

109109
define <2 x i64> @elts_test_vpsllvq(<2 x i64> %a0, <2 x i64> %a1) {
110110
; CHECK-LABEL: @elts_test_vpsllvq(
111-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[A1:%.*]], i64 0, i64 1
112-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[TMP1]])
113-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer
114-
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
111+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
112+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
113+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
115114
;
116115
%1 = insertelement <2 x i64> %a1, i64 0, i64 1
117116
%2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %1)
@@ -121,10 +120,9 @@ define <2 x i64> @elts_test_vpsllvq(<2 x i64> %a0, <2 x i64> %a1) {
121120

122121
define <2 x i64> @elts_test_vpsrlvq(<2 x i64> %a0, <2 x i64> %a1) {
123122
; CHECK-LABEL: @elts_test_vpsrlvq(
124-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[A1:%.*]], i64 0, i64 1
125-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[TMP1]])
126-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer
127-
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
123+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
124+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
125+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
128126
;
129127
%1 = insertelement <2 x i64> %a1, i64 0, i64 1
130128
%2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %1)
@@ -134,10 +132,9 @@ define <2 x i64> @elts_test_vpsrlvq(<2 x i64> %a0, <2 x i64> %a1) {
134132

135133
define <4 x i64> @elts_test_vpsllvq_256(<4 x i64> %a0, <4 x i64> %a1) {
136134
; CHECK-LABEL: @elts_test_vpsllvq_256(
137-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[A1:%.*]], i64 0, i64 2
138-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[TMP1]])
139-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> zeroinitializer
140-
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
135+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
136+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
137+
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
141138
;
142139
%1 = insertelement <4 x i64> %a1, i64 0, i64 2
143140
%2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %1)
@@ -147,10 +144,9 @@ define <4 x i64> @elts_test_vpsllvq_256(<4 x i64> %a0, <4 x i64> %a1) {
147144

148145
define <4 x i64> @elts_test_vpsrlvq_256(<4 x i64> %a0, <4 x i64> %a1) {
149146
; CHECK-LABEL: @elts_test_vpsrlvq_256(
150-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[A1:%.*]], i64 0, i64 3
151-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[TMP1]])
152-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> zeroinitializer
153-
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
147+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
148+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
149+
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
154150
;
155151
%1 = insertelement <4 x i64> %a1, i64 0, i64 3
156152
%2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %1)
@@ -160,10 +156,9 @@ define <4 x i64> @elts_test_vpsrlvq_256(<4 x i64> %a0, <4 x i64> %a1) {
160156

161157
define <4 x i32> @elts_test_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
162158
; CHECK-LABEL: @elts_test_vpsllvd(
163-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[A1:%.*]], i32 0, i64 3
164-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]])
165-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
166-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
159+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
160+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
161+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
167162
;
168163
%1 = insertelement <4 x i32> %a1, i32 0, i64 3
169164
%2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %1)
@@ -173,10 +168,9 @@ define <4 x i32> @elts_test_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
173168

174169
define <4 x i32> @elts_test_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
175170
; CHECK-LABEL: @elts_test_vpsravd(
176-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[A1:%.*]], i32 0, i64 1
177-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]])
178-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
179-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
171+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
172+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
173+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
180174
;
181175
%1 = insertelement <4 x i32> %a1, i32 0, i64 1
182176
%2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %1)
@@ -186,10 +180,9 @@ define <4 x i32> @elts_test_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
186180

187181
define <4 x i32> @elts_test_vpsrlvd(<4 x i32> %a0, <4 x i32> %a1) {
188182
; CHECK-LABEL: @elts_test_vpsrlvd(
189-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[A1:%.*]], i32 0, i64 2
190-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]])
191-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
192-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
183+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
184+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
185+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
193186
;
194187
%1 = insertelement <4 x i32> %a1, i32 0, i64 2
195188
%2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %1)
@@ -199,10 +192,9 @@ define <4 x i32> @elts_test_vpsrlvd(<4 x i32> %a0, <4 x i32> %a1) {
199192

200193
define <8 x i32> @elts_test_vpsllvd_256(<8 x i32> %a0, <8 x i32> %a1) {
201194
; CHECK-LABEL: @elts_test_vpsllvd_256(
202-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[A1:%.*]], i32 0, i64 3
203-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[TMP1]])
204-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
205-
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
195+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
196+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
197+
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
206198
;
207199
%1 = insertelement <8 x i32> %a1, i32 0, i64 3
208200
%2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %1)
@@ -212,10 +204,9 @@ define <8 x i32> @elts_test_vpsllvd_256(<8 x i32> %a0, <8 x i32> %a1) {
212204

213205
define <8 x i32> @elts_test_vpsravd_256(<8 x i32> %a0, <8 x i32> %a1) {
214206
; CHECK-LABEL: @elts_test_vpsravd_256(
215-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[A1:%.*]], i32 0, i64 4
216-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[TMP1]])
217-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
218-
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
207+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
208+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
209+
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
219210
;
220211
%1 = insertelement <8 x i32> %a1, i32 0, i64 4
221212
%2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %1)
@@ -225,10 +216,9 @@ define <8 x i32> @elts_test_vpsravd_256(<8 x i32> %a0, <8 x i32> %a1) {
225216

226217
define <8 x i32> @elts_test_vpsrlvd_256(<8 x i32> %a0, <8 x i32> %a1) {
227218
; CHECK-LABEL: @elts_test_vpsrlvd_256(
228-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[A1:%.*]], i32 0, i64 5
229-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[TMP1]])
230-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
231-
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
219+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
220+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
221+
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
232222
;
233223
%1 = insertelement <8 x i32> %a1, i32 0, i64 5
234224
%2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %1)

llvm/test/Transforms/InstCombine/X86/x86-avx2.ll

Lines changed: 30 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,9 @@ define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
108108

109109
define <2 x i64> @elts_test_vpsllvq(<2 x i64> %a0, <2 x i64> %a1) {
110110
; CHECK-LABEL: @elts_test_vpsllvq(
111-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[A1:%.*]], i64 0, i64 1
112-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[TMP1]])
113-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> zeroinitializer
114-
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
111+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
112+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> undef, <2 x i32> zeroinitializer
113+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
115114
;
116115
%1 = insertelement <2 x i64> %a1, i64 0, i64 1
117116
%2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %1)
@@ -121,10 +120,9 @@ define <2 x i64> @elts_test_vpsllvq(<2 x i64> %a0, <2 x i64> %a1) {
121120

122121
define <2 x i64> @elts_test_vpsrlvq(<2 x i64> %a0, <2 x i64> %a1) {
123122
; CHECK-LABEL: @elts_test_vpsrlvq(
124-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> [[A1:%.*]], i64 0, i64 1
125-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[TMP1]])
126-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> zeroinitializer
127-
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
123+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
124+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> undef, <2 x i32> zeroinitializer
125+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
128126
;
129127
%1 = insertelement <2 x i64> %a1, i64 0, i64 1
130128
%2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %1)
@@ -134,10 +132,9 @@ define <2 x i64> @elts_test_vpsrlvq(<2 x i64> %a0, <2 x i64> %a1) {
134132

135133
define <4 x i64> @elts_test_vpsllvq_256(<4 x i64> %a0, <4 x i64> %a1) {
136134
; CHECK-LABEL: @elts_test_vpsllvq_256(
137-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[A1:%.*]], i64 0, i64 2
138-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[TMP1]])
139-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> undef, <4 x i32> zeroinitializer
140-
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
135+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
136+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> zeroinitializer
137+
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
141138
;
142139
%1 = insertelement <4 x i64> %a1, i64 0, i64 2
143140
%2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %1)
@@ -147,10 +144,9 @@ define <4 x i64> @elts_test_vpsllvq_256(<4 x i64> %a0, <4 x i64> %a1) {
147144

148145
define <4 x i64> @elts_test_vpsrlvq_256(<4 x i64> %a0, <4 x i64> %a1) {
149146
; CHECK-LABEL: @elts_test_vpsrlvq_256(
150-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[A1:%.*]], i64 0, i64 3
151-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[TMP1]])
152-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> undef, <4 x i32> zeroinitializer
153-
; CHECK-NEXT: ret <4 x i64> [[TMP3]]
147+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
148+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> zeroinitializer
149+
; CHECK-NEXT: ret <4 x i64> [[TMP2]]
154150
;
155151
%1 = insertelement <4 x i64> %a1, i64 0, i64 3
156152
%2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %1)
@@ -160,10 +156,9 @@ define <4 x i64> @elts_test_vpsrlvq_256(<4 x i64> %a0, <4 x i64> %a1) {
160156

161157
define <4 x i32> @elts_test_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
162158
; CHECK-LABEL: @elts_test_vpsllvd(
163-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[A1:%.*]], i32 0, i64 3
164-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]])
165-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
166-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
159+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
160+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
161+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
167162
;
168163
%1 = insertelement <4 x i32> %a1, i32 0, i64 3
169164
%2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %1)
@@ -173,10 +168,9 @@ define <4 x i32> @elts_test_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) {
173168

174169
define <4 x i32> @elts_test_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
175170
; CHECK-LABEL: @elts_test_vpsravd(
176-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[A1:%.*]], i32 0, i64 1
177-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]])
178-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
179-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
171+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
172+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
173+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
180174
;
181175
%1 = insertelement <4 x i32> %a1, i32 0, i64 1
182176
%2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %1)
@@ -186,10 +180,9 @@ define <4 x i32> @elts_test_vpsravd(<4 x i32> %a0, <4 x i32> %a1) {
186180

187181
define <4 x i32> @elts_test_vpsrlvd(<4 x i32> %a0, <4 x i32> %a1) {
188182
; CHECK-LABEL: @elts_test_vpsrlvd(
189-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[A1:%.*]], i32 0, i64 2
190-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]])
191-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer
192-
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
183+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
184+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
185+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
193186
;
194187
%1 = insertelement <4 x i32> %a1, i32 0, i64 2
195188
%2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %1)
@@ -199,10 +192,9 @@ define <4 x i32> @elts_test_vpsrlvd(<4 x i32> %a0, <4 x i32> %a1) {
199192

200193
define <8 x i32> @elts_test_vpsllvd_256(<8 x i32> %a0, <8 x i32> %a1) {
201194
; CHECK-LABEL: @elts_test_vpsllvd_256(
202-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[A1:%.*]], i32 0, i64 3
203-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[TMP1]])
204-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> zeroinitializer
205-
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
195+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
196+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
197+
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
206198
;
207199
%1 = insertelement <8 x i32> %a1, i32 0, i64 3
208200
%2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %1)
@@ -212,10 +204,9 @@ define <8 x i32> @elts_test_vpsllvd_256(<8 x i32> %a0, <8 x i32> %a1) {
212204

213205
define <8 x i32> @elts_test_vpsravd_256(<8 x i32> %a0, <8 x i32> %a1) {
214206
; CHECK-LABEL: @elts_test_vpsravd_256(
215-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[A1:%.*]], i32 0, i64 4
216-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[TMP1]])
217-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> zeroinitializer
218-
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
207+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
208+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
209+
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
219210
;
220211
%1 = insertelement <8 x i32> %a1, i32 0, i64 4
221212
%2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %1)
@@ -225,10 +216,9 @@ define <8 x i32> @elts_test_vpsravd_256(<8 x i32> %a0, <8 x i32> %a1) {
225216

226217
define <8 x i32> @elts_test_vpsrlvd_256(<8 x i32> %a0, <8 x i32> %a1) {
227218
; CHECK-LABEL: @elts_test_vpsrlvd_256(
228-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[A1:%.*]], i32 0, i64 5
229-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[TMP1]])
230-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> zeroinitializer
231-
; CHECK-NEXT: ret <8 x i32> [[TMP3]]
219+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
220+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> undef, <8 x i32> zeroinitializer
221+
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
232222
;
233223
%1 = insertelement <8 x i32> %a1, i32 0, i64 5
234224
%2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %1)

0 commit comments

Comments
 (0)