@@ -255,35 +255,35 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
255255; SSE-LABEL: combine_vec_srem_by_pow2b:
256256; SSE: # %bb.0:
257257; SSE-NEXT: movdqa %xmm0, %xmm1
258- ; SSE-NEXT: psrad $31, %xmm1
259- ; SSE-NEXT: movdqa %xmm1 , %xmm2
260- ; SSE-NEXT: psrld $29 , %xmm2
261- ; SSE-NEXT: movdqa %xmm1 , %xmm3
262- ; SSE-NEXT: psrld $31 , %xmm3
263- ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3 [0,1,2,3],xmm2 [4,5,6,7]
264- ; SSE-NEXT: psrld $30, %xmm1
265- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1 [0,1],xmm3[2,3],xmm1 [4,5],xmm3[6,7]
266- ; SSE-NEXT: paddd %xmm0, %xmm1
267- ; SSE-NEXT: movdqa %xmm1 , %xmm2
268- ; SSE-NEXT: psrad $3, %xmm2
269- ; SSE-NEXT: movdqa %xmm1 , %xmm3
258+ ; SSE-NEXT: psrld $31, %xmm1
259+ ; SSE-NEXT: movdqa %xmm0 , %xmm2
260+ ; SSE-NEXT: psrad $31 , %xmm2
261+ ; SSE-NEXT: movdqa %xmm2 , %xmm3
262+ ; SSE-NEXT: psrld $29 , %xmm3
263+ ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1 [0,1,2,3],xmm3 [4,5,6,7]
264+ ; SSE-NEXT: psrld $30, %xmm2
265+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2 [0,1],xmm3[2,3],xmm2 [4,5],xmm3[6,7]
266+ ; SSE-NEXT: paddd %xmm0, %xmm2
267+ ; SSE-NEXT: movdqa %xmm2 , %xmm1
268+ ; SSE-NEXT: psrad $3, %xmm1
269+ ; SSE-NEXT: movdqa %xmm2 , %xmm3
270270; SSE-NEXT: psrad $1, %xmm3
271- ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2 [4,5,6,7]
272- ; SSE-NEXT: psrad $2, %xmm1
273- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1 [0,1],xmm3[2,3],xmm1 [4,5],xmm3[6,7]
274- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1 [2,3,4,5,6,7]
275- ; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
276- ; SSE-NEXT: psubd %xmm1 , %xmm0
271+ ; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1 [4,5,6,7]
272+ ; SSE-NEXT: psrad $2, %xmm2
273+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2 [0,1],xmm3[2,3],xmm2 [4,5],xmm3[6,7]
274+ ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2 [2,3,4,5,6,7]
275+ ; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
276+ ; SSE-NEXT: psubd %xmm2 , %xmm0
277277; SSE-NEXT: retq
278278;
279279; AVX1-LABEL: combine_vec_srem_by_pow2b:
280280; AVX1: # %bb.0:
281- ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
282- ; AVX1-NEXT: vpsrld $29 , %xmm1 , %xmm2
283- ; AVX1-NEXT: vpsrld $31 , %xmm1 , %xmm3
284- ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3 [0,1,2,3],xmm2 [4,5,6,7]
285- ; AVX1-NEXT: vpsrld $30, %xmm1 , %xmm1
286- ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1 [0,1],xmm2 [2,3],xmm1 [4,5],xmm2 [6,7]
281+ ; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
282+ ; AVX1-NEXT: vpsrad $31 , %xmm0 , %xmm2
283+ ; AVX1-NEXT: vpsrld $29 , %xmm2 , %xmm3
284+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1 [0,1,2,3],xmm3 [4,5,6,7]
285+ ; AVX1-NEXT: vpsrld $30, %xmm2 , %xmm2
286+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2 [0,1],xmm1 [2,3],xmm2 [4,5],xmm1 [6,7]
287287; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
288288; AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
289289; AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
@@ -320,10 +320,10 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
320320; SSE-NEXT: movdqa %xmm1, %xmm3
321321; SSE-NEXT: psrld $30, %xmm3
322322; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
323- ; SSE-NEXT: movdqa %xmm1 , %xmm2
324- ; SSE-NEXT: psrld $29 , %xmm2
325- ; SSE-NEXT: psrld $31 , %xmm1
326- ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1 [0,1,2,3],xmm2 [4,5,6,7]
323+ ; SSE-NEXT: movdqa %xmm0 , %xmm2
324+ ; SSE-NEXT: psrld $31 , %xmm2
325+ ; SSE-NEXT: psrld $29 , %xmm1
326+ ; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2 [0,1,2,3],xmm1 [4,5,6,7]
327327; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
328328; SSE-NEXT: paddd %xmm0, %xmm1
329329; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -346,9 +346,9 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
346346; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2
347347; AVX1-NEXT: vpsrld $30, %xmm1, %xmm3
348348; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
349- ; AVX1-NEXT: vpsrld $29 , %xmm1 , %xmm3
350- ; AVX1-NEXT: vpsrld $31 , %xmm1, %xmm1
351- ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1 [0,1,2,3],xmm3 [4,5,6,7]
349+ ; AVX1-NEXT: vpsrld $31 , %xmm0 , %xmm3
350+ ; AVX1-NEXT: vpsrld $29 , %xmm1, %xmm1
351+ ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3 [0,1,2,3],xmm1 [4,5,6,7]
352352; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
353353; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
354354; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2
0 commit comments