@@ -28,6 +28,8 @@ declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>
2828declare <4 x double > @llvm.experimental.constrained.uitofp.v4f64.v4i32 (<4 x i32 >, metadata , metadata )
2929declare <4 x double > @llvm.experimental.constrained.sitofp.v4f64.v4i64 (<4 x i64 >, metadata , metadata )
3030declare <4 x double > @llvm.experimental.constrained.uitofp.v4f64.v4i64 (<4 x i64 >, metadata , metadata )
31+ declare <4 x float > @llvm.experimental.constrained.sitofp.v4f32.v4i64 (<4 x i64 >, metadata , metadata )
32+ declare <4 x float > @llvm.experimental.constrained.uitofp.v4f32.v4i64 (<4 x i64 >, metadata , metadata )
3133
3234define <8 x float > @sitofp_v8i1_v8f32 (<8 x i1 > %x ) #0 {
3335; CHECK-LABEL: sitofp_v8i1_v8f32:
@@ -515,4 +517,244 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
515517 ret <4 x double > %result
516518}
517519
520+ define <4 x float > @sitofp_v4i64_v4f32 (<4 x i64 > %x ) #0 {
521+ ; AVX-32-LABEL: sitofp_v4i64_v4f32:
522+ ; AVX-32: # %bb.0:
523+ ; AVX-32-NEXT: pushl %ebp
524+ ; AVX-32-NEXT: .cfi_def_cfa_offset 8
525+ ; AVX-32-NEXT: .cfi_offset %ebp, -8
526+ ; AVX-32-NEXT: movl %esp, %ebp
527+ ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
528+ ; AVX-32-NEXT: andl $-8, %esp
529+ ; AVX-32-NEXT: subl $48, %esp
530+ ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
531+ ; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
532+ ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
533+ ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0
534+ ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
535+ ; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
536+ ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
537+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
538+ ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
539+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
540+ ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
541+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
542+ ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
543+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
544+ ; AVX-32-NEXT: fstps (%esp)
545+ ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
546+ ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
547+ ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
548+ ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
549+ ; AVX-32-NEXT: movl %ebp, %esp
550+ ; AVX-32-NEXT: popl %ebp
551+ ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
552+ ; AVX-32-NEXT: vzeroupper
553+ ; AVX-32-NEXT: retl
554+ ;
555+ ; AVX-64-LABEL: sitofp_v4i64_v4f32:
556+ ; AVX-64: # %bb.0:
557+ ; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
558+ ; AVX-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
559+ ; AVX-64-NEXT: vmovq %xmm0, %rax
560+ ; AVX-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
561+ ; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
562+ ; AVX-64-NEXT: vextracti128 $1, %ymm0, %xmm0
563+ ; AVX-64-NEXT: vmovq %xmm0, %rax
564+ ; AVX-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
565+ ; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
566+ ; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
567+ ; AVX-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
568+ ; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
569+ ; AVX-64-NEXT: vzeroupper
570+ ; AVX-64-NEXT: retq
571+ ;
572+ ; AVX512DQ-LABEL: sitofp_v4i64_v4f32:
573+ ; AVX512DQ: # %bb.0:
574+ ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
575+ ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
576+ ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
577+ ; AVX512DQ-NEXT: vzeroupper
578+ ; AVX512DQ-NEXT: ret{{[l|q]}}
579+ ;
580+ ; AVX512DQVL-LABEL: sitofp_v4i64_v4f32:
581+ ; AVX512DQVL: # %bb.0:
582+ ; AVX512DQVL-NEXT: vcvtqq2ps %ymm0, %xmm0
583+ ; AVX512DQVL-NEXT: vzeroupper
584+ ; AVX512DQVL-NEXT: ret{{[l|q]}}
585+ %result = call <4 x float > @llvm.experimental.constrained.sitofp.v4f32.v4i64 (<4 x i64 > %x ,
586+ metadata !"round.dynamic" ,
587+ metadata !"fpexcept.strict" ) #0
588+ ret <4 x float > %result
589+ }
590+
591+ define <4 x float > @uitofp_v4i64_v4f32 (<4 x i64 > %x ) #0 {
592+ ; AVX-32-LABEL: uitofp_v4i64_v4f32:
593+ ; AVX-32: # %bb.0:
594+ ; AVX-32-NEXT: pushl %ebp
595+ ; AVX-32-NEXT: .cfi_def_cfa_offset 8
596+ ; AVX-32-NEXT: .cfi_offset %ebp, -8
597+ ; AVX-32-NEXT: movl %esp, %ebp
598+ ; AVX-32-NEXT: .cfi_def_cfa_register %ebp
599+ ; AVX-32-NEXT: andl $-8, %esp
600+ ; AVX-32-NEXT: subl $48, %esp
601+ ; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp)
602+ ; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
603+ ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
604+ ; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1
605+ ; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp)
606+ ; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
607+ ; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp)
608+ ; AVX-32-NEXT: vextractps $1, %xmm0, %eax
609+ ; AVX-32-NEXT: xorl %ecx, %ecx
610+ ; AVX-32-NEXT: testl %eax, %eax
611+ ; AVX-32-NEXT: setns %cl
612+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
613+ ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
614+ ; AVX-32-NEXT: fstps (%esp)
615+ ; AVX-32-NEXT: vextractps $3, %xmm0, %eax
616+ ; AVX-32-NEXT: xorl %ecx, %ecx
617+ ; AVX-32-NEXT: testl %eax, %eax
618+ ; AVX-32-NEXT: setns %cl
619+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
620+ ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
621+ ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
622+ ; AVX-32-NEXT: vextractps $1, %xmm1, %eax
623+ ; AVX-32-NEXT: xorl %ecx, %ecx
624+ ; AVX-32-NEXT: testl %eax, %eax
625+ ; AVX-32-NEXT: setns %cl
626+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
627+ ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
628+ ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
629+ ; AVX-32-NEXT: vextractps $3, %xmm1, %eax
630+ ; AVX-32-NEXT: xorl %ecx, %ecx
631+ ; AVX-32-NEXT: testl %eax, %eax
632+ ; AVX-32-NEXT: setns %cl
633+ ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp)
634+ ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
635+ ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp)
636+ ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
637+ ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
638+ ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
639+ ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
640+ ; AVX-32-NEXT: movl %ebp, %esp
641+ ; AVX-32-NEXT: popl %ebp
642+ ; AVX-32-NEXT: .cfi_def_cfa %esp, 4
643+ ; AVX-32-NEXT: vzeroupper
644+ ; AVX-32-NEXT: retl
645+ ;
646+ ; AVX1-64-LABEL: uitofp_v4i64_v4f32:
647+ ; AVX1-64: # %bb.0:
648+ ; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
649+ ; AVX1-64-NEXT: movq %rax, %rcx
650+ ; AVX1-64-NEXT: shrq %rcx
651+ ; AVX1-64-NEXT: movl %eax, %edx
652+ ; AVX1-64-NEXT: andl $1, %edx
653+ ; AVX1-64-NEXT: orq %rcx, %rdx
654+ ; AVX1-64-NEXT: testq %rax, %rax
655+ ; AVX1-64-NEXT: cmovnsq %rax, %rdx
656+ ; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
657+ ; AVX1-64-NEXT: jns .LBB19_2
658+ ; AVX1-64-NEXT: # %bb.1:
659+ ; AVX1-64-NEXT: vaddss %xmm1, %xmm1, %xmm1
660+ ; AVX1-64-NEXT: .LBB19_2:
661+ ; AVX1-64-NEXT: vmovq %xmm0, %rax
662+ ; AVX1-64-NEXT: movq %rax, %rcx
663+ ; AVX1-64-NEXT: shrq %rcx
664+ ; AVX1-64-NEXT: movl %eax, %edx
665+ ; AVX1-64-NEXT: andl $1, %edx
666+ ; AVX1-64-NEXT: orq %rcx, %rdx
667+ ; AVX1-64-NEXT: testq %rax, %rax
668+ ; AVX1-64-NEXT: cmovnsq %rax, %rdx
669+ ; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
670+ ; AVX1-64-NEXT: jns .LBB19_4
671+ ; AVX1-64-NEXT: # %bb.3:
672+ ; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
673+ ; AVX1-64-NEXT: .LBB19_4:
674+ ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
675+ ; AVX1-64-NEXT: vextracti128 $1, %ymm0, %xmm0
676+ ; AVX1-64-NEXT: vmovq %xmm0, %rax
677+ ; AVX1-64-NEXT: movq %rax, %rcx
678+ ; AVX1-64-NEXT: shrq %rcx
679+ ; AVX1-64-NEXT: movl %eax, %edx
680+ ; AVX1-64-NEXT: andl $1, %edx
681+ ; AVX1-64-NEXT: orq %rcx, %rdx
682+ ; AVX1-64-NEXT: testq %rax, %rax
683+ ; AVX1-64-NEXT: cmovnsq %rax, %rdx
684+ ; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
685+ ; AVX1-64-NEXT: jns .LBB19_6
686+ ; AVX1-64-NEXT: # %bb.5:
687+ ; AVX1-64-NEXT: vaddss %xmm2, %xmm2, %xmm2
688+ ; AVX1-64-NEXT: .LBB19_6:
689+ ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
690+ ; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
691+ ; AVX1-64-NEXT: movq %rax, %rcx
692+ ; AVX1-64-NEXT: shrq %rcx
693+ ; AVX1-64-NEXT: movl %eax, %edx
694+ ; AVX1-64-NEXT: andl $1, %edx
695+ ; AVX1-64-NEXT: orq %rcx, %rdx
696+ ; AVX1-64-NEXT: testq %rax, %rax
697+ ; AVX1-64-NEXT: cmovnsq %rax, %rdx
698+ ; AVX1-64-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
699+ ; AVX1-64-NEXT: jns .LBB19_8
700+ ; AVX1-64-NEXT: # %bb.7:
701+ ; AVX1-64-NEXT: vaddss %xmm0, %xmm0, %xmm0
702+ ; AVX1-64-NEXT: .LBB19_8:
703+ ; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
704+ ; AVX1-64-NEXT: vzeroupper
705+ ; AVX1-64-NEXT: retq
706+ ;
707+ ; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
708+ ; AVX512F-64: # %bb.0:
709+ ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
710+ ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
711+ ; AVX512F-64-NEXT: vmovq %xmm0, %rax
712+ ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
713+ ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
714+ ; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
715+ ; AVX512F-64-NEXT: vmovq %xmm0, %rax
716+ ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
717+ ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
718+ ; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
719+ ; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
720+ ; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
721+ ; AVX512F-64-NEXT: vzeroupper
722+ ; AVX512F-64-NEXT: retq
723+ ;
724+ ; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
725+ ; AVX512VL-64: # %bb.0:
726+ ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
727+ ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
728+ ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
729+ ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
730+ ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
731+ ; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
732+ ; AVX512VL-64-NEXT: vmovq %xmm0, %rax
733+ ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
734+ ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
735+ ; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
736+ ; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
737+ ; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
738+ ; AVX512VL-64-NEXT: vzeroupper
739+ ; AVX512VL-64-NEXT: retq
740+ ;
741+ ; AVX512DQ-LABEL: uitofp_v4i64_v4f32:
742+ ; AVX512DQ: # %bb.0:
743+ ; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
744+ ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
745+ ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
746+ ; AVX512DQ-NEXT: vzeroupper
747+ ; AVX512DQ-NEXT: ret{{[l|q]}}
748+ ;
749+ ; AVX512DQVL-LABEL: uitofp_v4i64_v4f32:
750+ ; AVX512DQVL: # %bb.0:
751+ ; AVX512DQVL-NEXT: vcvtuqq2ps %ymm0, %xmm0
752+ ; AVX512DQVL-NEXT: vzeroupper
753+ ; AVX512DQVL-NEXT: ret{{[l|q]}}
754+ %result = call <4 x float > @llvm.experimental.constrained.uitofp.v4f32.v4i64 (<4 x i64 > %x ,
755+ metadata !"round.dynamic" ,
756+ metadata !"fpexcept.strict" ) #0
757+ ret <4 x float > %result
758+ }
759+
518760attributes #0 = { strictfp }
0 commit comments