@@ -30,28 +30,20 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
3030;
3131; AVX512VBMI-LABEL: shl_i512_1:
3232; AVX512VBMI: # %bb.0:
33- ; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm1
34- ; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
35- ; AVX512VBMI-NEXT: vextracti32x4 $3, %zmm0, %xmm3
36- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
37- ; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm4, %xmm4
38- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
39- ; AVX512VBMI-NEXT: vpshldq $1, %xmm2, %xmm5, %xmm6
40- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm6, %ymm4
41- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[2,3,2,3]
42- ; AVX512VBMI-NEXT: vpshldq $1, %xmm1, %xmm6, %xmm7
43- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
44- ; AVX512VBMI-NEXT: vpshldq $1, %xmm0, %xmm8, %xmm9
45- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm7, %ymm9, %ymm7
46- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm4, %zmm7, %zmm4
47- ; AVX512VBMI-NEXT: vpshldq $1, %xmm8, %xmm1, %xmm1
48- ; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm0
49- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
50- ; AVX512VBMI-NEXT: vpshldq $1, %xmm5, %xmm3, %xmm1
51- ; AVX512VBMI-NEXT: vpshldq $1, %xmm6, %xmm2, %xmm2
33+ ; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm1
34+ ; AVX512VBMI-NEXT: vextracti128 $1, %ymm0, %xmm2
35+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
36+ ; AVX512VBMI-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3
37+ ; AVX512VBMI-NEXT: vpsllq $1, %xmm0, %xmm4
38+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
5239; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
53- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
54- ; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm4[0],zmm0[2],zmm4[2],zmm0[4],zmm4[4],zmm0[6],zmm4[6]
40+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
41+ ; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm2
42+ ; AVX512VBMI-NEXT: vpshldq $1, %ymm1, %ymm2, %ymm1
43+ ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
44+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
45+ ; AVX512VBMI-NEXT: vpshldq $1, %zmm0, %zmm2, %zmm0
46+ ; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6]
5547; AVX512VBMI-NEXT: retq
5648;
5749; ZNVER4-LABEL: shl_i512_1:
@@ -131,27 +123,22 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) {
131123;
132124; AVX512VBMI-LABEL: lshr_i512_1:
133125; AVX512VBMI: # %bb.0:
134- ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm1
126+ ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm1
135127; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
136- ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm3
128+ ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm3
137129; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
138- ; AVX512VBMI-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
139- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
140- ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
141- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
142- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
143- ; AVX512VBMI-NEXT: vpshldq $63, %xmm1, %xmm7, %xmm8
144- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
145- ; AVX512VBMI-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
146- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
147- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
148- ; AVX512VBMI-NEXT: vpshldq $63, %xmm7, %xmm2, %xmm2
149- ; AVX512VBMI-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
150- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
151- ; AVX512VBMI-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
152- ; AVX512VBMI-NEXT: vpsrlq $1, %xmm4, %xmm3
153- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
154- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
130+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
131+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
132+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
133+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
134+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
135+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
136+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
137+ ; AVX512VBMI-NEXT: vpsrlq $1, %xmm1, %xmm1
138+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
139+ ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
140+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
141+ ; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
155142; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
156143; AVX512VBMI-NEXT: retq
157144;
@@ -251,27 +238,22 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) {
251238;
252239; AVX512VBMI-LABEL: ashr_i512_1:
253240; AVX512VBMI: # %bb.0:
254- ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm1
241+ ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm1
255242; AVX512VBMI-NEXT: vextracti32x4 $2, %zmm0, %xmm2
256- ; AVX512VBMI-NEXT: vextracti32x4 $3 , %zmm0 , %xmm3
243+ ; AVX512VBMI-NEXT: vextracti128 $1 , %ymm0 , %xmm3
257244; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
258- ; AVX512VBMI-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
259- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
260- ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
261- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
262- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[2,3,2,3]
263- ; AVX512VBMI-NEXT: vpshldq $63, %xmm1, %xmm7, %xmm8
264- ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
265- ; AVX512VBMI-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
266- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
267- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
268- ; AVX512VBMI-NEXT: vpshldq $63, %xmm7, %xmm2, %xmm2
269- ; AVX512VBMI-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
270- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
271- ; AVX512VBMI-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
272- ; AVX512VBMI-NEXT: vpsraq $1, %xmm4, %xmm3
273- ; AVX512VBMI-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
274- ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
245+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
246+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
247+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm5, %xmm3, %xmm3
248+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
249+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
250+ ; AVX512VBMI-NEXT: vpshldq $63, %xmm2, %xmm1, %xmm2
251+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
252+ ; AVX512VBMI-NEXT: vpsraq $1, %xmm1, %xmm1
253+ ; AVX512VBMI-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
254+ ; AVX512VBMI-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
255+ ; AVX512VBMI-NEXT: vpshufd {{.*#+}} zmm2 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
256+ ; AVX512VBMI-NEXT: vpshldq $63, %zmm0, %zmm2, %zmm0
275257; AVX512VBMI-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
276258; AVX512VBMI-NEXT: retq
277259;
0 commit comments