Skip to content

Commit 3b6aec7

Browse files
committed
[X86] Add test cases for v4i64->v4f32 and v8i64->v8f32 strict_sint_to_fp/strict_uint_to_fp to vec-strict-inttofp-256.ll and vec-strict-inttofp-512.ll. NFC
1 parent b6cf400 commit 3b6aec7

File tree

2 files changed

+474
-0
lines changed

2 files changed

+474
-0
lines changed

llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>
2828
declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata)
2929
declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
3030
declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata)
31+
declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
32+
declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata)
3133

3234
define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 {
3335
; CHECK-LABEL: sitofp_v8i1_v8f32:
@@ -515,4 +517,244 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
515517
ret <4 x double> %result
516518
}
517519

520+
define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
521+
; AVX-32-LABEL: sitofp_v4i64_v4f32:
522+
; AVX-32: # %bb.0:
523+
; AVX-32-NEXT: pushl %ebp
524+
; AVX-32-NEXT: .cfi_def_cfa_offset 8
525+
; AVX-32-NEXT: .cfi_offset %ebp, -8
526+
; AVX-32-NEXT: movl %esp, %ebp
527+
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
528+
; AVX-32-NEXT: andl $-8, %esp
529+
; AVX-32-NEXT: subl $48, %esp
530+
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
531+
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
532+
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
533+
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
534+
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
535+
; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
536+
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
537+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
538+
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
539+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
540+
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
541+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
542+
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
543+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
544+
; AVX-32-NEXT: fstps (%esp)
545+
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
546+
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
547+
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
548+
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
549+
; AVX-32-NEXT: movl %ebp, %esp
550+
; AVX-32-NEXT: popl %ebp
551+
; AVX-32-NEXT: .cfi_def_cfa %esp, 4
552+
; AVX-32-NEXT: vzeroupper
553+
; AVX-32-NEXT: retl
554+
;
555+
; AVX-64-LABEL: sitofp_v4i64_v4f32:
556+
; AVX-64: # %bb.0:
557+
; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
558+
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
559+
; AVX-64-NEXT: vmovq %xmm0, %rax
560+
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
561+
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
562+
; AVX-64-NEXT: vextracti128 $1, %ymm0, %xmm0
563+
; AVX-64-NEXT: vmovq %xmm0, %rax
564+
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
565+
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
566+
; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
567+
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
568+
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
569+
; AVX-64-NEXT: vzeroupper
570+
; AVX-64-NEXT: retq
571+
;
572+
; AVX512DQ-LABEL: sitofp_v4i64_v4f32:
573+
; AVX512DQ: # %bb.0:
574+
; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
575+
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
576+
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
577+
; AVX512DQ-NEXT: vzeroupper
578+
; AVX512DQ-NEXT: ret{{[l|q]}}
579+
;
580+
; AVX512DQVL-LABEL: sitofp_v4i64_v4f32:
581+
; AVX512DQVL: # %bb.0:
582+
; AVX512DQVL-NEXT: vcvtqq2ps %ymm0, %xmm0
583+
; AVX512DQVL-NEXT: vzeroupper
584+
; AVX512DQVL-NEXT: ret{{[l|q]}}
585+
%result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x,
586+
metadata !"round.dynamic",
587+
metadata !"fpexcept.strict") #0
588+
ret <4 x float> %result
589+
}
590+
591+
define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
592+
; AVX-32-LABEL: uitofp_v4i64_v4f32:
593+
; AVX-32: # %bb.0:
594+
; AVX-32-NEXT: pushl %ebp
595+
; AVX-32-NEXT: .cfi_def_cfa_offset 8
596+
; AVX-32-NEXT: .cfi_offset %ebp, -8
597+
; AVX-32-NEXT: movl %esp, %ebp
598+
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
599+
; AVX-32-NEXT: andl $-8, %esp
600+
; AVX-32-NEXT: subl $48, %esp
601+
; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
602+
; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
603+
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
604+
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
605+
; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
606+
; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
607+
; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
608+
; AVX-32-NEXT: vextractps $1, %xmm0, %eax
609+
; AVX-32-NEXT: xorl %ecx, %ecx
610+
; AVX-32-NEXT: testl %eax, %eax
611+
; AVX-32-NEXT: setns %cl
612+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
613+
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
614+
; AVX-32-NEXT: fstps (%esp)
615+
; AVX-32-NEXT: vextractps $3, %xmm0, %eax
616+
; AVX-32-NEXT: xorl %ecx, %ecx
617+
; AVX-32-NEXT: testl %eax, %eax
618+
; AVX-32-NEXT: setns %cl
619+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
620+
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
621+
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
622+
; AVX-32-NEXT: vextractps $1, %xmm1, %eax
623+
; AVX-32-NEXT: xorl %ecx, %ecx
624+
; AVX-32-NEXT: testl %eax, %eax
625+
; AVX-32-NEXT: setns %cl
626+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
627+
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
628+
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
629+
; AVX-32-NEXT: vextractps $3, %xmm1, %eax
630+
; AVX-32-NEXT: xorl %ecx, %ecx
631+
; AVX-32-NEXT: testl %eax, %eax
632+
; AVX-32-NEXT: setns %cl
633+
; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
634+
; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
635+
; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
636+
; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
637+
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
638+
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
639+
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
640+
; AVX-32-NEXT: movl %ebp, %esp
641+
; AVX-32-NEXT: popl %ebp
642+
; AVX-32-NEXT: .cfi_def_cfa %esp, 4
643+
; AVX-32-NEXT: vzeroupper
644+
; AVX-32-NEXT: retl
645+
;
646+
; AVX1-64-LABEL: uitofp_v4i64_v4f32:
647+
; AVX1-64: # %bb.0:
648+
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
649+
; AVX1-64-NEXT: movq %rax, %rcx
650+
; AVX1-64-NEXT: shrq %rcx
651+
; AVX1-64-NEXT: movl %eax, %edx
652+
; AVX1-64-NEXT: andl $1, %edx
653+
; AVX1-64-NEXT: orq %rcx, %rdx
654+
; AVX1-64-NEXT: testq %rax, %rax
655+
; AVX1-64-NEXT: cmovnsq %rax, %rdx
656+
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
657+
; AVX1-64-NEXT: jns .LBB19_2
658+
; AVX1-64-NEXT: # %bb.1:
659+
; AVX1-64-NEXT: vaddss %xmm1, %xmm1, %xmm1
660+
; AVX1-64-NEXT: .LBB19_2:
661+
; AVX1-64-NEXT: vmovq %xmm0, %rax
662+
; AVX1-64-NEXT: movq %rax, %rcx
663+
; AVX1-64-NEXT: shrq %rcx
664+
; AVX1-64-NEXT: movl %eax, %edx
665+
; AVX1-64-NEXT: andl $1, %edx
666+
; AVX1-64-NEXT: orq %rcx, %rdx
667+
; AVX1-64-NEXT: testq %rax, %rax
668+
; AVX1-64-NEXT: cmovnsq %rax, %rdx
669+
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
670+
; AVX1-64-NEXT: jns .LBB19_4
671+
; AVX1-64-NEXT: # %bb.3:
672+
; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
673+
; AVX1-64-NEXT: .LBB19_4:
674+
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
675+
; AVX1-64-NEXT: vextracti128 $1, %ymm0, %xmm0
676+
; AVX1-64-NEXT: vmovq %xmm0, %rax
677+
; AVX1-64-NEXT: movq %rax, %rcx
678+
; AVX1-64-NEXT: shrq %rcx
679+
; AVX1-64-NEXT: movl %eax, %edx
680+
; AVX1-64-NEXT: andl $1, %edx
681+
; AVX1-64-NEXT: orq %rcx, %rdx
682+
; AVX1-64-NEXT: testq %rax, %rax
683+
; AVX1-64-NEXT: cmovnsq %rax, %rdx
684+
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
685+
; AVX1-64-NEXT: jns .LBB19_6
686+
; AVX1-64-NEXT: # %bb.5:
687+
; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
688+
; AVX1-64-NEXT: .LBB19_6:
689+
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
690+
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
691+
; AVX1-64-NEXT: movq %rax, %rcx
692+
; AVX1-64-NEXT: shrq %rcx
693+
; AVX1-64-NEXT: movl %eax, %edx
694+
; AVX1-64-NEXT: andl $1, %edx
695+
; AVX1-64-NEXT: orq %rcx, %rdx
696+
; AVX1-64-NEXT: testq %rax, %rax
697+
; AVX1-64-NEXT: cmovnsq %rax, %rdx
698+
; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
699+
; AVX1-64-NEXT: jns .LBB19_8
700+
; AVX1-64-NEXT: # %bb.7:
701+
; AVX1-64-NEXT: vaddss %xmm0, %xmm0, %xmm0
702+
; AVX1-64-NEXT: .LBB19_8:
703+
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
704+
; AVX1-64-NEXT: vzeroupper
705+
; AVX1-64-NEXT: retq
706+
;
707+
; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
708+
; AVX512F-64: # %bb.0:
709+
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
710+
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
711+
; AVX512F-64-NEXT: vmovq %xmm0, %rax
712+
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
713+
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
714+
; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
715+
; AVX512F-64-NEXT: vmovq %xmm0, %rax
716+
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
717+
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
718+
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
719+
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
720+
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
721+
; AVX512F-64-NEXT: vzeroupper
722+
; AVX512F-64-NEXT: retq
723+
;
724+
; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
725+
; AVX512VL-64: # %bb.0:
726+
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
727+
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
728+
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
729+
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
730+
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
731+
; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
732+
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
733+
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
734+
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
735+
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
736+
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
737+
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
738+
; AVX512VL-64-NEXT: vzeroupper
739+
; AVX512VL-64-NEXT: retq
740+
;
741+
; AVX512DQ-LABEL: uitofp_v4i64_v4f32:
742+
; AVX512DQ: # %bb.0:
743+
; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
744+
; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
745+
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
746+
; AVX512DQ-NEXT: vzeroupper
747+
; AVX512DQ-NEXT: ret{{[l|q]}}
748+
;
749+
; AVX512DQVL-LABEL: uitofp_v4i64_v4f32:
750+
; AVX512DQVL: # %bb.0:
751+
; AVX512DQVL-NEXT: vcvtuqq2ps %ymm0, %xmm0
752+
; AVX512DQVL-NEXT: vzeroupper
753+
; AVX512DQVL-NEXT: ret{{[l|q]}}
754+
%result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x,
755+
metadata !"round.dynamic",
756+
metadata !"fpexcept.strict") #0
757+
ret <4 x float> %result
758+
}
759+
518760
attributes #0 = { strictfp }

0 commit comments

Comments
 (0)