@@ -276,8 +276,8 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4
276
276
ret <2 x double > %6
277
277
}
278
278
279
- define float @signbits_ashr_sextinreg_bitops_extract_sitofp (<2 x i64 > %a0 , <2 x i64 > %a1 , i32 %a2 ) nounwind {
280
- ; X32-LABEL: signbits_ashr_sextinreg_bitops_extract_sitofp :
279
+ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp (<2 x i64 > %a0 , <2 x i64 > %a1 , i32 %a2 ) nounwind {
280
+ ; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp :
281
281
; X32: # BB#0:
282
282
; X32-NEXT: pushl %ebp
283
283
; X32-NEXT: movl %esp, %ebp
@@ -300,9 +300,7 @@ define float @signbits_ashr_sextinreg_bitops_extract_sitofp(<2 x i64> %a0, <2 x
300
300
; X32-NEXT: vpsrad $20, %xmm1, %xmm2
301
301
; X32-NEXT: vpsrlq $20, %xmm1, %xmm1
302
302
; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
303
- ; X32-NEXT: vpand %xmm1, %xmm0, %xmm2
304
- ; X32-NEXT: vpor %xmm1, %xmm2, %xmm1
305
- ; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0
303
+ ; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
306
304
; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
307
305
; X32-NEXT: fildll {{[0-9]+}}(%esp)
308
306
; X32-NEXT: fstps {{[0-9]+}}(%esp)
@@ -311,7 +309,7 @@ define float @signbits_ashr_sextinreg_bitops_extract_sitofp(<2 x i64> %a0, <2 x
311
309
; X32-NEXT: popl %ebp
312
310
; X32-NEXT: retl
313
311
;
314
- ; X64-LABEL: signbits_ashr_sextinreg_bitops_extract_sitofp :
312
+ ; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp :
315
313
; X64: # BB#0:
316
314
; X64-NEXT: vpsrlq $60, %xmm0, %xmm2
317
315
; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
@@ -325,21 +323,71 @@ define float @signbits_ashr_sextinreg_bitops_extract_sitofp(<2 x i64> %a0, <2 x
325
323
; X64-NEXT: vpsrad $20, %xmm1, %xmm2
326
324
; X64-NEXT: vpsrlq $20, %xmm1, %xmm1
327
325
; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
326
+ ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
327
+ ; X64-NEXT: vmovq %xmm0, %rax
328
+ ; X64-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
329
+ ; X64-NEXT: retq
330
+ %1 = ashr <2 x i64 > %a0 , <i64 61 , i64 60 >
331
+ %2 = sext i32 %a2 to i64
332
+ %3 = insertelement <2 x i64 > %a1 , i64 %2 , i32 0
333
+ %4 = shl <2 x i64 > %3 , <i64 20 , i64 20 >
334
+ %5 = ashr <2 x i64 > %4 , <i64 20 , i64 20 >
335
+ %6 = and <2 x i64 > %1 , %5
336
+ %7 = extractelement <2 x i64 > %6 , i32 0
337
+ %8 = sitofp i64 %7 to float
338
+ ret float %8
339
+ }
340
+
341
+ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp (<2 x i64 > %a0 , <4 x i32 > %a1 ) nounwind {
342
+ ; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
343
+ ; X32: # BB#0:
344
+ ; X32-NEXT: pushl %ebp
345
+ ; X32-NEXT: movl %esp, %ebp
346
+ ; X32-NEXT: andl $-8, %esp
347
+ ; X32-NEXT: subl $16, %esp
348
+ ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
349
+ ; X32-NEXT: vpsrlq $60, %xmm2, %xmm3
350
+ ; X32-NEXT: vpsrlq $61, %xmm2, %xmm2
351
+ ; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
352
+ ; X32-NEXT: vpsrlq $60, %xmm0, %xmm3
353
+ ; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
354
+ ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
355
+ ; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
356
+ ; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
357
+ ; X32-NEXT: vpmovsxdq %xmm1, %xmm1
358
+ ; X32-NEXT: vpand %xmm1, %xmm0, %xmm2
359
+ ; X32-NEXT: vpor %xmm1, %xmm2, %xmm1
360
+ ; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0
361
+ ; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
362
+ ; X32-NEXT: fildll {{[0-9]+}}(%esp)
363
+ ; X32-NEXT: fstps {{[0-9]+}}(%esp)
364
+ ; X32-NEXT: flds {{[0-9]+}}(%esp)
365
+ ; X32-NEXT: movl %ebp, %esp
366
+ ; X32-NEXT: popl %ebp
367
+ ; X32-NEXT: retl
368
+ ;
369
+ ; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
370
+ ; X64: # BB#0:
371
+ ; X64-NEXT: vpsrlq $60, %xmm0, %xmm2
372
+ ; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
373
+ ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
374
+ ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8]
375
+ ; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0
376
+ ; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0
377
+ ; X64-NEXT: vpmovsxdq %xmm1, %xmm1
328
378
; X64-NEXT: vpand %xmm1, %xmm0, %xmm2
329
379
; X64-NEXT: vpor %xmm1, %xmm2, %xmm1
330
380
; X64-NEXT: vpxor %xmm0, %xmm1, %xmm0
331
381
; X64-NEXT: vmovq %xmm0, %rax
332
382
; X64-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
333
383
; X64-NEXT: retq
334
- %1 = ashr <2 x i64 > %a0 , <i64 61 , i64 60 >
335
- %2 = sext i32 %a2 to i64
336
- %3 = insertelement <2 x i64 > %a1 , i64 %2 , i32 0
337
- %4 = shl <2 x i64 > %3 , <i64 20 , i64 20 >
338
- %5 = ashr <2 x i64 > %4 , <i64 20 , i64 20 >
339
- %6 = and <2 x i64 > %1 , %5
340
- %7 = or <2 x i64 > %6 , %5
341
- %8 = xor <2 x i64 > %7 , %1
342
- %9 = extractelement <2 x i64 > %8 , i32 0
343
- %10 = sitofp i64 %9 to float
344
- ret float %10
384
+ %1 = ashr <2 x i64 > %a0 , <i64 61 , i64 60 >
385
+ %2 = shufflevector <4 x i32 > %a1 , <4 x i32 > undef , <2 x i32 > <i32 0 , i32 1 >
386
+ %3 = sext <2 x i32 > %2 to <2 x i64 >
387
+ %4 = and <2 x i64 > %1 , %3
388
+ %5 = or <2 x i64 > %4 , %3
389
+ %6 = xor <2 x i64 > %5 , %1
390
+ %7 = extractelement <2 x i64 > %6 , i32 0
391
+ %8 = sitofp i64 %7 to float
392
+ ret float %8
345
393
}
0 commit comments