@@ -581,145 +581,63 @@ define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c
581581 ret { half , half } %insert.1
582582}
583583
584- ; This one asserted with -enable-no-signed-zeros-fp-math
585- define amdgpu_ps half @fneg_fadd_0_f16 (half inreg %tmp2 , half inreg %tmp6 , <4 x i32 > %arg ) #0 {
586- ; SI-SAFE-LABEL: fneg_fadd_0_f16:
587- ; SI-SAFE: ; %bb.0: ; %.entry
588- ; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
589- ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s1
590- ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, s0
591- ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
592- ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1
593- ; SI-SAFE-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
594- ; SI-SAFE-NEXT: v_rcp_f32_e32 v3, v2
595- ; SI-SAFE-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
596- ; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
597- ; SI-SAFE-NEXT: v_fma_f32 v5, -v2, v3, 1.0
598- ; SI-SAFE-NEXT: v_fma_f32 v3, v5, v3, v3
599- ; SI-SAFE-NEXT: v_mul_f32_e32 v5, v4, v3
600- ; SI-SAFE-NEXT: v_fma_f32 v6, -v2, v5, v4
601- ; SI-SAFE-NEXT: v_fma_f32 v5, v6, v3, v5
602- ; SI-SAFE-NEXT: v_fma_f32 v2, -v2, v5, v4
603- ; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
604- ; SI-SAFE-NEXT: v_div_fmas_f32 v2, v2, v3, v5
605- ; SI-SAFE-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
606- ; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0
607- ; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
608- ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
609- ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
610- ; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
611- ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
612- ; SI-SAFE-NEXT: ; return to shader part epilog
613- ;
614- ; SI-NSZ-LABEL: fneg_fadd_0_f16:
615- ; SI-NSZ: ; %bb.0: ; %.entry
616- ; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
617- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
618- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
619- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
620- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
621- ; SI-NSZ-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
622- ; SI-NSZ-NEXT: v_rcp_f32_e32 v3, v2
623- ; SI-NSZ-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
624- ; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
625- ; SI-NSZ-NEXT: v_fma_f32 v5, -v2, v3, 1.0
626- ; SI-NSZ-NEXT: v_fma_f32 v3, v5, v3, v3
627- ; SI-NSZ-NEXT: v_mul_f32_e32 v5, v4, v3
628- ; SI-NSZ-NEXT: v_fma_f32 v6, -v2, v5, v4
629- ; SI-NSZ-NEXT: v_fma_f32 v5, v6, v3, v5
630- ; SI-NSZ-NEXT: v_fma_f32 v2, -v2, v5, v4
631- ; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
632- ; SI-NSZ-NEXT: v_div_fmas_f32 v2, v2, v3, v5
633- ; SI-NSZ-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
634- ; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
635- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
636- ; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
637- ; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000
638- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
639- ; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
640- ; SI-NSZ-NEXT: ; return to shader part epilog
641- ;
642- ; VI-SAFE-LABEL: fneg_fadd_0_f16:
643- ; VI-SAFE: ; %bb.0: ; %.entry
644- ; VI-SAFE-NEXT: v_rcp_f16_e32 v0, s1
645- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
646- ; VI-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
647- ; VI-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
648- ; VI-SAFE-NEXT: v_xor_b32_e32 v2, 0x8000, v0
649- ; VI-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
650- ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
651- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
652- ; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
653- ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
654- ; VI-SAFE-NEXT: ; return to shader part epilog
655- ;
656- ; VI-NSZ-LABEL: fneg_fadd_0_f16:
657- ; VI-NSZ: ; %bb.0: ; %.entry
658- ; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
659- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
660- ; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
661- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
662- ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
663- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
664- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
665- ; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
666- ; VI-NSZ-NEXT: ; return to shader part epilog
667- ;
668- ; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
669- ; GFX11-SAFE: ; %bb.0: ; %.entry
670- ; GFX11-SAFE-NEXT: v_rcp_f16_e32 v0, s1
671- ; GFX11-SAFE-NEXT: s_waitcnt_depctr 0xfff
672- ; GFX11-SAFE-NEXT: v_mul_f16_e32 v0, 0, v0
673- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
674- ; GFX11-SAFE-NEXT: v_add_f16_e32 v0, 0, v0
675- ; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v0
676- ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
677- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
678- ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
679- ; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
680- ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
681- ; GFX11-SAFE-NEXT: ; return to shader part epilog
682- ;
683- ; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
684- ; GFX11-NSZ: ; %bb.0: ; %.entry
685- ; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
686- ; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
687- ; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
688- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
689- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
690- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
691- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
692- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
693- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
694- ; GFX11-NSZ-NEXT: ; return to shader part epilog
695- ; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_f16:
696- ; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
697- ; GFX11-SAFE-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
698- ; GFX11-SAFE-TRUE16-NEXT: s_waitcnt_depctr 0xfff
699- ; GFX11-SAFE-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0, v0.l
700- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
701- ; GFX11-SAFE-TRUE16-NEXT: v_add_f16_e32 v0.l, 0, v0.l
702- ; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
703- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0.l
704- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
705- ; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v1
706- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0/*Invalid register, operand has 'VS_16' register class*/, s0, vcc_lo
707- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
708- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
709- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
710- ; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
711- ; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_f16:
712- ; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
713- ; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
714- ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
715- ; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
716- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
717- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
718- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
719- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
720- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
721- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
722- ; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
584+ define amdgpu_ps half @fneg_fadd_0_safe_f16 (half inreg %tmp2 , half inreg %tmp6 , <4 x i32 > %arg ) #0 {
585+ ; SI-LABEL: fneg_fadd_0_safe_f16:
586+ ; SI: ; %bb.0: ; %.entry
587+ ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
588+ ; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
589+ ; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
590+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
591+ ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
592+ ; SI-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
593+ ; SI-NEXT: v_rcp_f32_e32 v3, v2
594+ ; SI-NEXT: v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
595+ ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
596+ ; SI-NEXT: v_fma_f32 v5, -v2, v3, 1.0
597+ ; SI-NEXT: v_fma_f32 v3, v5, v3, v3
598+ ; SI-NEXT: v_mul_f32_e32 v5, v4, v3
599+ ; SI-NEXT: v_fma_f32 v6, -v2, v5, v4
600+ ; SI-NEXT: v_fma_f32 v5, v6, v3, v5
601+ ; SI-NEXT: v_fma_f32 v2, -v2, v5, v4
602+ ; SI-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
603+ ; SI-NEXT: v_div_fmas_f32 v2, v2, v3, v5
604+ ; SI-NEXT: v_div_fixup_f32 v0, v2, v0, 1.0
605+ ; SI-NEXT: v_mad_f32 v0, v0, 0, 0
606+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1
607+ ; SI-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc
608+ ; SI-NEXT: v_mov_b32_e32 v1, 0x7fc00000
609+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
610+ ; SI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
611+ ; SI-NEXT: ; return to shader part epilog
612+ ;
613+ ; VI-LABEL: fneg_fadd_0_safe_f16:
614+ ; VI: ; %bb.0: ; %.entry
615+ ; VI-NEXT: v_rcp_f16_e32 v0, s1
616+ ; VI-NEXT: v_mov_b32_e32 v1, s0
617+ ; VI-NEXT: v_mul_f16_e32 v0, 0, v0
618+ ; VI-NEXT: v_add_f16_e32 v0, 0, v0
619+ ; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v0
620+ ; VI-NEXT: v_cmp_ngt_f16_e32 vcc, s0, v0
621+ ; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
622+ ; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
623+ ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
624+ ; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
625+ ; VI-NEXT: ; return to shader part epilog
626+ ;
627+ ; GFX11-LABEL: fneg_fadd_0_safe_f16:
628+ ; GFX11: ; %bb.0: ; %.entry
629+ ; GFX11-NEXT: v_rcp_f16_e32 v0, s1
630+ ; GFX11-NEXT: s_waitcnt_depctr 0xfff
631+ ; GFX11-NEXT: v_mul_f16_e32 v0, 0, v0
632+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
633+ ; GFX11-NEXT: v_add_f16_e32 v0, 0, v0
634+ ; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v0
635+ ; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, s0, v0
636+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
637+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, s0, vcc_lo
638+ ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
639+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
640+ ; GFX11-NEXT: ; return to shader part epilog
723641.entry:
724642 %tmp7 = fdiv half 1 .000000e+00 , %tmp6
725643 %tmp8 = fmul half 0 .000000e+00 , %tmp7
@@ -733,108 +651,51 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
733651 ret half %.i198
734652}
735653
736- ; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
737- ; function attribute unsafe-fp-math automatically. Combine with the previous test
738- ; when that is done.
739654define amdgpu_ps half @fneg_fadd_0_nsz_f16 (half inreg %tmp2 , half inreg %tmp6 , <4 x i32 > %arg ) #2 {
740- ; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
741- ; SI-SAFE: ; %bb.0: ; %.entry
742- ; SI-SAFE-NEXT: v_cvt_f16_f32_e32 v0, s0
743- ; SI-SAFE-NEXT: s_brev_b32 s0, 1
744- ; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000
745- ; SI-SAFE-NEXT: v_cvt_f32_f16_e32 v0, v0
746- ; SI-SAFE-NEXT: v_min_legacy_f32_e32 v0, 0, v0
747- ; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0
748- ; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
749- ; SI-SAFE-NEXT: ; return to shader part epilog
750- ;
751- ; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
752- ; SI-NSZ: ; %bb.0: ; %.entry
753- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v0, s1
754- ; SI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, s0
755- ; SI-NSZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000
756- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v0, v0
757- ; SI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1
758- ; SI-NSZ-NEXT: v_rcp_f32_e32 v0, v0
759- ; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
760- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
761- ; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
762- ; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
763- ; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
764- ; SI-NSZ-NEXT: ; return to shader part epilog
765- ;
766- ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
767- ; VI-SAFE: ; %bb.0: ; %.entry
768- ; VI-SAFE-NEXT: v_mov_b32_e32 v0, 0x8000
769- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0
770- ; VI-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc, s0, 0
771- ; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
772- ; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7e00
773- ; VI-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
774- ; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
775- ; VI-SAFE-NEXT: ; return to shader part epilog
776- ;
777- ; VI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
778- ; VI-NSZ: ; %bb.0: ; %.entry
779- ; VI-NSZ-NEXT: v_rcp_f16_e32 v0, s1
780- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, s0
781- ; VI-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
782- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
783- ; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
784- ; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7e00
785- ; VI-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
786- ; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
787- ; VI-NSZ-NEXT: ; return to shader part epilog
788- ;
789- ; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
790- ; GFX11-SAFE: ; %bb.0: ; %.entry
791- ; GFX11-SAFE-NEXT: v_mov_b32_e32 v0, s0
792- ; GFX11-SAFE-NEXT: v_cmp_ngt_f16_e64 vcc_lo, s0, 0
793- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
794- ; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
795- ; GFX11-SAFE-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
796- ; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_3)
797- ; GFX11-SAFE-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
798- ; GFX11-SAFE-NEXT: ; return to shader part epilog
799- ;
800- ; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
801- ; GFX11-NSZ: ; %bb.0: ; %.entry
802- ; GFX11-NSZ-NEXT: v_rcp_f16_e32 v0, s1
803- ; GFX11-NSZ-NEXT: s_waitcnt_depctr 0xfff
804- ; GFX11-NSZ-NEXT: v_mul_f16_e32 v0, 0x8000, v0
805- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
806- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
807- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
808- ; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_1)
809- ; GFX11-NSZ-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
810- ; GFX11-NSZ-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
811- ; GFX11-NSZ-NEXT: ; return to shader part epilog
812- ; GFX11-SAFE-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
813- ; GFX11-SAFE-TRUE16: ; %bb.0: ; %.entry
814- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_ngt_f16_e64 s1, s0, 0
815- ; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x8000
816- ; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
817- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
818- ; GFX11-SAFE-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
819- ; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
820- ; GFX11-SAFE-TRUE16-NEXT: ; return to shader part epilog
821- ; GFX11-NSZ-TRUE16-LABEL: fneg_fadd_0_nsz_f16:
822- ; GFX11-NSZ-TRUE16: ; %bb.0: ; %.entry
823- ; GFX11-NSZ-TRUE16-NEXT: v_rcp_f16_e32 v0.l, s1
824- ; GFX11-NSZ-TRUE16-NEXT: s_waitcnt_depctr 0xfff
825- ; GFX11-NSZ-TRUE16-NEXT: v_mul_f16_e32 v0.l, 0x8000, v0.l
826- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
827- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e64 s1, -v0.l, s0
828- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, s0, s1
829- ; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
830- ; GFX11-NSZ-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0.l
831- ; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, 0, vcc_lo
832- ; GFX11-NSZ-TRUE16-NEXT: ; return to shader part epilog
655+ ; SI-LABEL: fneg_fadd_0_nsz_f16:
656+ ; SI: ; %bb.0: ; %.entry
657+ ; SI-NEXT: v_cvt_f16_f32_e32 v0, s1
658+ ; SI-NEXT: v_cvt_f16_f32_e32 v1, s0
659+ ; SI-NEXT: v_mov_b32_e32 v2, 0x7fc00000
660+ ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
661+ ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
662+ ; SI-NEXT: v_rcp_f32_e32 v0, v0
663+ ; SI-NEXT: v_mul_f32_e32 v0, 0x80000000, v0
664+ ; SI-NEXT: v_cmp_nlt_f32_e64 vcc, -v0, v1
665+ ; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
666+ ; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
667+ ; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc
668+ ; SI-NEXT: ; return to shader part epilog
669+ ;
670+ ; VI-LABEL: fneg_fadd_0_nsz_f16:
671+ ; VI: ; %bb.0: ; %.entry
672+ ; VI-NEXT: v_rcp_f16_e32 v0, s1
673+ ; VI-NEXT: v_mov_b32_e32 v1, s0
674+ ; VI-NEXT: v_mul_f16_e32 v0, 0x8000, v0
675+ ; VI-NEXT: v_cmp_nlt_f16_e64 vcc, -v0, s0
676+ ; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
677+ ; VI-NEXT: v_mov_b32_e32 v1, 0x7e00
678+ ; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0, v0
679+ ; VI-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc
680+ ; VI-NEXT: ; return to shader part epilog
681+ ;
682+ ; GFX11-LABEL: fneg_fadd_0_nsz_f16:
683+ ; GFX11: ; %bb.0: ; %.entry
684+ ; GFX11-NEXT: v_rcp_f16_e32 v0, s1
685+ ; GFX11-NEXT: s_waitcnt_depctr 0xfff
686+ ; GFX11-NEXT: v_mul_f16_e32 v0, 0x8000, v0
687+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
688+ ; GFX11-NEXT: v_cmp_nlt_f16_e64 s1, -v0, s0
689+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s0, s1
690+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
691+ ; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0, v0
692+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
693+ ; GFX11-NEXT: ; return to shader part epilog
833694.entry:
834695 %tmp7 = fdiv afn half 1 .000000e+00 , %tmp6
835696 %tmp8 = fmul contract half 0 .000000e+00 , %tmp7
836697 %tmp9 = fmul reassoc nnan arcp contract half 0 .000000e+00 , %tmp8
837- %.i188 = fadd nnan ninf contract half %tmp9 , 0 .000000e+00
698+ %.i188 = fadd nsz half %tmp9 , 0 .000000e+00
838699 %tmp10 = fcmp uge half %.i188 , %tmp2
839700 %tmp11 = fneg half %.i188
840701 %.i092 = select i1 %tmp10 , half %tmp2 , half %tmp11
0 commit comments