@@ -48,28 +48,20 @@ define <8 x i64> @shl_i512_1(<8 x i64> %a) {
4848;
4949; ZNVER4-LABEL: shl_i512_1:
5050; ZNVER4: # %bb.0:
51- ; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm1
52- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
53- ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm2
54- ; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm3
55- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm10 = xmm1[2,3,2,3]
56- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
57- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
58- ; ZNVER4-NEXT: vpshldq $1, %xmm0, %xmm8, %xmm9
59- ; ZNVER4-NEXT: vpsllq $1, %xmm0, %xmm0
60- ; ZNVER4-NEXT: vpshldq $1, %xmm1, %xmm10, %xmm7
61- ; ZNVER4-NEXT: vpshldq $1, %xmm8, %xmm1, %xmm1
62- ; ZNVER4-NEXT: vpshldq $1, %xmm2, %xmm5, %xmm6
63- ; ZNVER4-NEXT: vpshldq $1, %xmm3, %xmm4, %xmm4
64- ; ZNVER4-NEXT: vpshldq $1, %xmm10, %xmm2, %xmm2
65- ; ZNVER4-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
66- ; ZNVER4-NEXT: vpshldq $1, %xmm5, %xmm3, %xmm1
67- ; ZNVER4-NEXT: vinserti128 $1, %xmm4, %ymm6, %ymm4
68- ; ZNVER4-NEXT: vinserti128 $1, %xmm7, %ymm9, %ymm7
69- ; ZNVER4-NEXT: vinserti64x4 $1, %ymm4, %zmm7, %zmm4
51+ ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm1
52+ ; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm2
53+ ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
54+ ; ZNVER4-NEXT: vpsllq $1, %xmm0, %xmm4
7055; ZNVER4-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
71- ; ZNVER4-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
72- ; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm4[0],zmm0[2],zmm4[2],zmm0[4],zmm4[4],zmm0[6],zmm4[6]
56+ ; ZNVER4-NEXT: vpshldq $1, %xmm3, %xmm2, %xmm3
57+ ; ZNVER4-NEXT: vextracti64x4 $1, %zmm0, %ymm2
58+ ; ZNVER4-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
59+ ; ZNVER4-NEXT: vpshldq $1, %ymm1, %ymm2, %ymm1
60+ ; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
61+ ; ZNVER4-NEXT: vinserti64x4 $1, %ymm1, %zmm3, %zmm1
62+ ; ZNVER4-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
63+ ; ZNVER4-NEXT: vpshldq $1, %zmm0, %zmm3, %zmm0
64+ ; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[2],zmm0[2],zmm1[4],zmm0[4],zmm1[6],zmm0[6]
7365; ZNVER4-NEXT: retq
7466 %d = bitcast <8 x i64 > %a to i512
7567 %s = shl i512 %d , 1
@@ -124,26 +116,19 @@ define <8 x i64> @lshr_i512_1(<8 x i64> %a) {
124116;
125117; ZNVER4-LABEL: lshr_i512_1:
126118; ZNVER4: # %bb.0:
119+ ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm3
127120; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm1
128- ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm2
129- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
130- ; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm3
131- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
132- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm10 = xmm1[2,3,2,3]
121+ ; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm2
133122; ZNVER4-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
134- ; ZNVER4-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
135- ; ZNVER4-NEXT: vpshldq $63, %xmm1, %xmm10, %xmm8
136- ; ZNVER4-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
137- ; ZNVER4-NEXT: vpshldq $63, %xmm10, %xmm2, %xmm2
138- ; ZNVER4-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
139- ; ZNVER4-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
140- ; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
141- ; ZNVER4-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
142- ; ZNVER4-NEXT: vpsrlq $1, %xmm4, %xmm3
143- ; ZNVER4-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
144- ; ZNVER4-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
145- ; ZNVER4-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
146- ; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
123+ ; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
124+ ; ZNVER4-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
125+ ; ZNVER4-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
126+ ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
127+ ; ZNVER4-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1
128+ ; ZNVER4-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
129+ ; ZNVER4-NEXT: vpsrlq $1, %xmm2, %xmm2
130+ ; ZNVER4-NEXT: vpshldq $63, %zmm0, %zmm3, %zmm0
131+ ; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
147132; ZNVER4-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
148133; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
149134; ZNVER4-NEXT: retq
@@ -200,26 +185,19 @@ define <8 x i64> @ashr_i512_1(<8 x i64> %a) {
200185;
201186; ZNVER4-LABEL: ashr_i512_1:
202187; ZNVER4: # %bb.0:
188+ ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm3
203189; ZNVER4-NEXT: vextracti128 $1, %ymm0, %xmm1
204- ; ZNVER4-NEXT: vextracti32x4 $2, %zmm0, %xmm2
205- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
206- ; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm3
207- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[2,3,2,3]
208- ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm10 = xmm1[2,3,2,3]
190+ ; ZNVER4-NEXT: vextracti32x4 $3, %zmm0, %xmm2
209191; ZNVER4-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
210- ; ZNVER4-NEXT: vpshldq $63, %xmm0, %xmm9, %xmm0
211- ; ZNVER4-NEXT: vpshldq $63, %xmm1, %xmm10, %xmm8
212- ; ZNVER4-NEXT: vpshldq $63, %xmm2, %xmm6, %xmm7
213- ; ZNVER4-NEXT: vpshldq $63, %xmm10, %xmm2, %xmm2
214- ; ZNVER4-NEXT: vpshldq $63, %xmm9, %xmm1, %xmm1
215- ; ZNVER4-NEXT: vpshldq $63, %xmm3, %xmm4, %xmm5
216- ; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
217- ; ZNVER4-NEXT: vpshldq $63, %xmm6, %xmm3, %xmm2
218- ; ZNVER4-NEXT: vpsraq $1, %xmm4, %xmm3
219- ; ZNVER4-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5
220- ; ZNVER4-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm0
221- ; ZNVER4-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0
222- ; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
192+ ; ZNVER4-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
193+ ; ZNVER4-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
194+ ; ZNVER4-NEXT: vpshldq $63, %xmm4, %xmm2, %xmm4
195+ ; ZNVER4-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
196+ ; ZNVER4-NEXT: vpshldq $63, %ymm3, %ymm1, %ymm1
197+ ; ZNVER4-NEXT: vpshufd {{.*#+}} zmm3 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
198+ ; ZNVER4-NEXT: vpsraq $1, %xmm2, %xmm2
199+ ; ZNVER4-NEXT: vpshldq $63, %zmm0, %zmm3, %zmm0
200+ ; ZNVER4-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
223201; ZNVER4-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
224202; ZNVER4-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
225203; ZNVER4-NEXT: retq
0 commit comments