@@ -3766,69 +3766,3 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
37663766 %insert.1 = insertelement <2 x double > %insert.0 , double %max1 , i32 1
37673767 ret <2 x double > %insert.1
37683768}
3769-
3770- ; Checks whether the test passes; performMinMaxCombine() should not optimize vector patterns of maximum3
3771- ; since there are no pack instructions for fmaximum3.
3772- define <2 x half > @no_fmaximum3_v2f16 (<2 x half > %a , <2 x half > %b , <2 x half > %c , <2 x half > %d ) {
3773- ; GFX12-LABEL: no_fmaximum3_v2f16:
3774- ; GFX12: ; %bb.0: ; %entry
3775- ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
3776- ; GFX12-NEXT: s_wait_expcnt 0x0
3777- ; GFX12-NEXT: s_wait_samplecnt 0x0
3778- ; GFX12-NEXT: s_wait_bvhcnt 0x0
3779- ; GFX12-NEXT: s_wait_kmcnt 0x0
3780- ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1
3781- ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3782- ; GFX12-NEXT: v_pk_maximum_f16 v0, v2, v0
3783- ; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v3
3784- ; GFX12-NEXT: s_setpc_b64 s[30:31]
3785- ;
3786- ; GFX940-LABEL: no_fmaximum3_v2f16:
3787- ; GFX940: ; %bb.0: ; %entry
3788- ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3789- ; GFX940-NEXT: v_pk_max_f16 v4, v0, v1
3790- ; GFX940-NEXT: v_mov_b32_e32 v5, 0x7e00
3791- ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v0, v1
3792- ; GFX940-NEXT: s_mov_b32 s0, 0x5040100
3793- ; GFX940-NEXT: s_nop 0
3794- ; GFX940-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc
3795- ; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4
3796- ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:WORD_1
3797- ; GFX940-NEXT: s_nop 1
3798- ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
3799- ; GFX940-NEXT: v_perm_b32 v1, v0, v6, s0
3800- ; GFX940-NEXT: v_pk_max_f16 v1, v2, v1
3801- ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v2, v6
3802- ; GFX940-NEXT: s_nop 1
3803- ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v1, vcc
3804- ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3805- ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v2, v0 src0_sel:WORD_1 src1_sel:DWORD
3806- ; GFX940-NEXT: s_nop 1
3807- ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc
3808- ; GFX940-NEXT: v_perm_b32 v1, v0, v4, s0
3809- ; GFX940-NEXT: v_pk_max_f16 v1, v1, v3
3810- ; GFX940-NEXT: v_cmp_o_f16_e32 vcc, v4, v3
3811- ; GFX940-NEXT: s_nop 1
3812- ; GFX940-NEXT: v_cndmask_b32_e32 v2, v5, v1, vcc
3813- ; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1
3814- ; GFX940-NEXT: v_cmp_o_f16_sdwa vcc, v0, v3 src0_sel:DWORD src1_sel:WORD_1
3815- ; GFX940-NEXT: s_nop 1
3816- ; GFX940-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc
3817- ; GFX940-NEXT: v_perm_b32 v0, v0, v2, s0
3818- ; GFX940-NEXT: s_setpc_b64 s[30:31]
3819- ;
3820- ; GFX950-LABEL: no_fmaximum3_v2f16:
3821- ; GFX950: ; %bb.0: ; %entry
3822- ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3823- ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v1, v1
3824- ; GFX950-NEXT: s_nop 0
3825- ; GFX950-NEXT: v_pk_maximum3_f16 v0, v2, v0, v0
3826- ; GFX950-NEXT: s_nop 0
3827- ; GFX950-NEXT: v_pk_maximum3_f16 v0, v0, v3, v3
3828- ; GFX950-NEXT: s_setpc_b64 s[30:31]
3829- entry:
3830- %max = call <2 x half > @llvm.maximum.v2f16 (<2 x half > %a , <2 x half > %b )
3831- %max1 = call <2 x half > @llvm.maximum.v2f16 (<2 x half > %c , <2 x half > %max )
3832- %res = call <2 x half > @llvm.maximum.v2f16 (<2 x half > %max1 , <2 x half > %d )
3833- ret <2 x half > %res
3834- }
0 commit comments