@@ -581,145 +581,63 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
581
581
ret { half , half } %insert.1
582
582
}
583
583
584
- ; This one asserted with -enable-no-signed-zeros-fp-math
585
- define amdgpu_ps half @fneg_fadd_0_f16 (half inreg %tmp2 , half inreg %tmp6 , <4 x i32 > %arg ) #0 {
586
- ; SI-SAFE-LABEL: fneg_fadd_0_f16:
587
- ; SI-SAFE: ; %bb.0: ; %.entry
588
- ; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
589
- ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s1
590
- ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, s0
591
- ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
592
- ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
593
- ; SI-SAFE-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
594
- ; SI-SAFE-NEXT: v_rcp_f32_e32 v3, v2
595
- ; SI-SAFE-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
596
- ; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
597
- ; SI-SAFE-NEXT: v_fma_f32 v5, -v2, v3, 1.0
598
- ; SI-SAFE-NEXT: v_fma_f32 v3, v5, v3, v3
599
- ; SI-SAFE-NEXT: v_mul_f32_e32 v5, v4, v3
600
- ; SI-SAFE-NEXT: v_fma_f32 v6, -v2, v5, v4
601
- ; SI-SAFE-NEXT: v_fma_f32 v5, v6, v3, v5
602
- ; SI-SAFE-NEXT: v_fma_f32 v2, -v2, v5, v4
603
- ; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
604
- ; SI-SAFE-NEXT: v_div_fmas_f32 v2, v2, v3, v5
605
- ; SI-SAFE-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
606
- ; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
607
- ; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
608
- ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
609
- ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
610
- ; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
611
- ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
612
- ; SI-SAFE-NEXT: ; return to shader part epilog
613
- ;
614
- ; SI-NSZ-LABEL: fneg_fadd_0_f16:
615
- ; SI-NSZ: ; %bb.0: ; %.entry
616
- ; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
617
- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
618
- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
619
- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
620
- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
621
- ; SI-NSZ-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
622
- ; SI-NSZ-NEXT: v_rcp_f32_e32 v3, v2
623
- ; SI-NSZ-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
624
- ; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
625
- ; SI-NSZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
626
- ; SI-NSZ-NEXT: v_fma_f32 v3, v5, v3, v3
627
- ; SI-NSZ-NEXT: v_mul_f32_e32 v5, v4, v3
628
- ; SI-NSZ-NEXT: v_fma_f32 v6, -v2, v5, v4
629
- ; SI-NSZ-NEXT: v_fma_f32 v5, v6, v3, v5
630
- ; SI-NSZ-NEXT: v_fma_f32 v2, -v2, v5, v4
631
- ; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
632
- ; SI-NSZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
633
- ; SI-NSZ-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
634
- ; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
635
- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
636
- ; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
637
- ; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
638
- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
639
- ; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
640
- ; SI-NSZ-NEXT: ; return to shader part epilog
641
- ;
642
- ; VI-SAFE-LABEL: fneg_fadd_0_f16:
643
- ; VI-SAFE: ; %bb.0: ; %.entry
644
- ; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
645
- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
646
- ; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
647
- ; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
648
- ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
649
- ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
650
- ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
651
- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
652
- ; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
653
- ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
654
- ; VI-SAFE-NEXT: ; return to shader part epilog
655
- ;
656
- ; VI-NSZ-LABEL: fneg_fadd_0_f16:
657
- ; VI-NSZ: ; %bb.0: ; %.entry
658
- ; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
659
- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
660
- ; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
661
- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
662
- ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
663
- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
664
- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
665
- ; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
666
- ; VI-NSZ-NEXT: ; return to shader part epilog
667
- ;
668
- ; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
669
- ; GFX11-SAFE: ; %bb.0: ; %.entry
670
- ; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
671
- ; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
672
- ; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
673
- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
674
- ; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
675
- ; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
676
- ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
677
- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
678
- ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
679
- ; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
680
- ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
681
- ; GFX11-SAFE-NEXT: ; return to shader part epilog
682
- ;
683
- ; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
684
- ; GFX11-NSZ: ; %bb.0: ; %.entry
685
- ; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
686
- ; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
687
- ; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
688
- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
689
- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
690
- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
691
- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
692
- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
693
- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
694
- ; GFX11-NSZ-NEXT: ; return to shader part epilog
695
- ; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_f16:
696
- ; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
697
- ; GFX11-SAFE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
698
- ; GFX11-SAFE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
699
- ; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0, v0.l
700
- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
701
- ; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v0.l, 0, v0.l
702
- ; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
703
- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
704
- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
705
- ; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
706
- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
707
- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
708
- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
709
- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
710
- ; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
711
- ; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_f16:
712
- ; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
713
- ; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
714
- ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
715
- ; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
716
- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
717
- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
718
- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
719
- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
720
- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
721
- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
722
- ; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
584
+ define amdgpu_ps half @fneg_fadd_0_safe_f16 (half inreg %tmp2 , half inreg %tmp6 , <4 x i32 > %arg ) #0 {
585
+ ; SI-LABEL: fneg_fadd_0_safe_f16:
586
+ ; SI: ; %bb.0: ; %.entry
587
+ ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
588
+ ; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
589
+ ; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
590
+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
591
+ ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
592
+ ; SI-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
593
+ ; SI-NEXT: v_rcp_f32_e32 v3, v2
594
+ ; SI-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
595
+ ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
596
+ ; SI-NEXT: v_fma_f32 v5, -v2, v3, 1.0
597
+ ; SI-NEXT: v_fma_f32 v3, v5, v3, v3
598
+ ; SI-NEXT: v_mul_f32_e32 v5, v4, v3
599
+ ; SI-NEXT: v_fma_f32 v6, -v2, v5, v4
600
+ ; SI-NEXT: v_fma_f32 v5, v6, v3, v5
601
+ ; SI-NEXT: v_fma_f32 v2, -v2, v5, v4
602
+ ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
603
+ ; SI-NEXT: v_div_fmas_f32 v2, v2, v3, v5
604
+ ; SI-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
605
+ ; SI-NEXT: v_mad_f32 v0, v0, 0, 0
606
+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
607
+ ; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
608
+ ; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
609
+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
610
+ ; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
611
+ ; SI-NEXT: ; return to shader part epilog
612
+ ;
613
+ ; VI-LABEL: fneg_fadd_0_safe_f16:
614
+ ; VI: ; %bb.0: ; %.entry
615
+ ; VI-NEXT: v_rcp_f16_e32 v0, s1
616
+ ; VI-NEXT: v_mov_b32_e32 v1, s0
617
+ ; VI-NEXT: v_mul_f16_e32 v0, 0, v0
618
+ ; VI-NEXT: v_add_f16_e32 v0, 0, v0
619
+ ; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v0
620
+ ; VI-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
621
+ ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
622
+ ; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
623
+ ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
624
+ ; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
625
+ ; VI-NEXT: ; return to shader part epilog
626
+ ;
627
+ ; GFX11-LABEL: fneg_fadd_0_safe_f16:
628
+ ; GFX11: ; %bb.0: ; %.entry
629
+ ; GFX11-NEXT: v_rcp_f16_e32 v0, s1
630
+ ; GFX11-NEXT: s_waitcnt_depctr 0xfff
631
+ ; GFX11-NEXT: v_mul_f16_e32 v0, 0, v0
632
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
633
+ ; GFX11-NEXT: v_add_f16_e32 v0, 0, v0
634
+ ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
635
+ ; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
636
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
637
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
638
+ ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
639
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
640
+ ; GFX11-NEXT: ; return to shader part epilog
723
641
.entry:
724
642
%tmp7 = fdiv half 1 .000000e+00 , %tmp6
725
643
%tmp8 = fmul half 0 .000000e+00 , %tmp7
@@ -733,108 +651,51 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
733
651
ret half %.i198
734
652
}
735
653
736
- ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
737
- ; function attribute unsafe-fp-math automatically. Combine with the previous test
738
- ; when that is done.
739
654
define amdgpu_ps half @fneg_fadd_0_nsz_f16 (half inreg %tmp2 , half inreg %tmp6 , <4 x i32 > %arg ) #2 {
740
- ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
741
- ; SI-SAFE: ; %bb.0: ; %.entry
742
- ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0
743
- ; SI-SAFE-NEXT: s_brev_b32 s0, 1
744
- ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
745
- ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
746
- ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0
747
- ; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
748
- ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
749
- ; SI-SAFE-NEXT: ; return to shader part epilog
750
- ;
751
- ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
752
- ; SI-NSZ: ; %bb.0: ; %.entry
753
- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
754
- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
755
- ; SI-NSZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
756
- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
757
- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
758
- ; SI-NSZ-NEXT: v_rcp_f32_e32 v0, v0
759
- ; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
760
- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
761
- ; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
762
- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
763
- ; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
764
- ; SI-NSZ-NEXT: ; return to shader part epilog
765
- ;
766
- ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
767
- ; VI-SAFE: ; %bb.0: ; %.entry
768
- ; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
769
- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
770
- ; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
771
- ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
772
- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
773
- ; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
774
- ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
775
- ; VI-SAFE-NEXT: ; return to shader part epilog
776
- ;
777
- ; VI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
778
- ; VI-NSZ: ; %bb.0: ; %.entry
779
- ; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
780
- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
781
- ; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
782
- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
783
- ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
784
- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
785
- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
786
- ; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
787
- ; VI-NSZ-NEXT: ; return to shader part epilog
788
- ;
789
- ; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
790
- ; GFX11-SAFE: ; %bb.0: ; %.entry
791
- ; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
792
- ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
793
- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
794
- ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
795
- ; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
796
- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
797
- ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
798
- ; GFX11-SAFE-NEXT: ; return to shader part epilog
799
- ;
800
- ; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
801
- ; GFX11-NSZ: ; %bb.0: ; %.entry
802
- ; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
803
- ; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
804
- ; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
805
- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
806
- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
807
- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
808
- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
809
- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
810
- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
811
- ; GFX11-NSZ-NEXT: ; return to shader part epilog
812
- ; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
813
- ; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
814
- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, s0, 0
815
- ; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000
816
- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
817
- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
818
- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
819
- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
820
- ; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
821
- ; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
822
- ; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
823
- ; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
824
- ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
825
- ; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
826
- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
827
- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
828
- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
829
- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
830
- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
831
- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
832
- ; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
655
+ ; SI-LABEL: fneg_fadd_0_nsz_f16:
656
+ ; SI: ; %bb.0: ; %.entry
657
+ ; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
658
+ ; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
659
+ ; SI-NEXT: v_mov_b32_e32 v2, 0x7fc00000
660
+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
661
+ ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
662
+ ; SI-NEXT: v_rcp_f32_e32 v0, v0
663
+ ; SI-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
664
+ ; SI-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
665
+ ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
666
+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
667
+ ; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
668
+ ; SI-NEXT: ; return to shader part epilog
669
+ ;
670
+ ; VI-LABEL: fneg_fadd_0_nsz_f16:
671
+ ; VI: ; %bb.0: ; %.entry
672
+ ; VI-NEXT: v_rcp_f16_e32 v0, s1
673
+ ; VI-NEXT: v_mov_b32_e32 v1, s0
674
+ ; VI-NEXT: v_mul_f16_e32 v0, 0x8000, v0
675
+ ; VI-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
676
+ ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
677
+ ; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
678
+ ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
679
+ ; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
680
+ ; VI-NEXT: ; return to shader part epilog
681
+ ;
682
+ ; GFX11-LABEL: fneg_fadd_0_nsz_f16:
683
+ ; GFX11: ; %bb.0: ; %.entry
684
+ ; GFX11-NEXT: v_rcp_f16_e32 v0, s1
685
+ ; GFX11-NEXT: s_waitcnt_depctr 0xfff
686
+ ; GFX11-NEXT: v_mul_f16_e32 v0, 0x8000, v0
687
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
688
+ ; GFX11-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
689
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
690
+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
691
+ ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
692
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
693
+ ; GFX11-NEXT: ; return to shader part epilog
833
694
.entry:
834
695
%tmp7 = fdiv afn half 1 .000000e+00 , %tmp6
835
696
%tmp8 = fmul contract half 0 .000000e+00 , %tmp7
836
697
%tmp9 = fmul reassoc nnan arcp contract half 0 .000000e+00 , %tmp8
837
- %.i188 = fadd nnan ninf contract half %tmp9 , 0 .000000e+00
698
+ %.i188 = fadd nsz half %tmp9 , 0 .000000e+00
838
699
%tmp10 = fcmp uge half %.i188 , %tmp2
839
700
%tmp11 = fneg half %.i188
840
701
%.i092 = select i1 %tmp10 , half %tmp2 , half %tmp11
0 commit comments