@@ -2425,30 +2425,30 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
24252425; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
24262426; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
24272427; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2428- ; GFX1150-TRUE16-NEXT: v_fma_f16 v0 .l, v0.l, v5.l, v4 .l
2428+ ; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v4 .l, v0.l, v5.l
24292429; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v5, v3.l
2430- ; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v4 , v2.l
2430+ ; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0 , v2.l
24312431; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v5, v5
24322432; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2433- ; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v4, v4 , v5
2434- ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v4 , v2 op_sel_hi:[1,0,1]
2433+ ; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0 , v5
2434+ ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v0 , v2 op_sel_hi:[1,0,1]
24352435; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2436- ; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v4 , v6, v5
2437- ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v4 , v2 op_sel_hi:[1,0,1]
2436+ ; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0 , v6, v5
2437+ ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v0 , v2 op_sel_hi:[1,0,1]
24382438; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
24392439; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v5, v6, v5
24402440; GFX1150-TRUE16-NEXT: v_and_b32_e32 v5, 0xff800000, v5
24412441; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2442- ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v4 , v5, v4
2443- ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v4
2442+ ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0 , v5, v0
2443+ ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
24442444; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2445- ; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h , v0.h , v3.l, v2.l
2446- ; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v4 .l, v0.h
2445+ ; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l , v0.l , v3.l, v2.l
2446+ ; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0 .l, v0.l
24472447; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2448- ; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v4 , 0x8000, v4
2449- ; GFX1150-TRUE16-NEXT: v_fma_f16 v0.h, v4. l, v3 .l, v2 .l
2448+ ; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0 , 0x8000, v0
2449+ ; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v2. l, v0 .l, v3 .l
24502450; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
2451- ; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0 .l
2451+ ; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v0, v2.l, v4 .l
24522452; GFX1150-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
24532453; GFX1150-TRUE16-NEXT: s_endpgm
24542454;
@@ -3215,31 +3215,31 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
32153215; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
32163216; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
32173217; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3218- ; GFX1150-TRUE16-NEXT: v_fma_f16 v0 .l, v0.l, v7.l, v6 .l
3218+ ; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v6 .l, v0.l, v7.l
32193219; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v7, v3.l
3220- ; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v6 , v1.l
3220+ ; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0 , v1.l
32213221; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v7, v7
32223222; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3223- ; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v6, v6 , v7
3224- ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v10, -v8, v6 , v9 op_sel_hi:[1,0,1]
3223+ ; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0 , v7
3224+ ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v10, -v8, v0 , v9 op_sel_hi:[1,0,1]
32253225; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3226- ; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v6 , v10, v7
3227- ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v8, -v8, v6 , v9 op_sel_hi:[1,0,1]
3226+ ; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0 , v10, v7
3227+ ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v8, -v8, v0 , v9 op_sel_hi:[1,0,1]
32283228; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
32293229; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v7, v8, v7
32303230; GFX1150-TRUE16-NEXT: v_and_b32_e32 v7, 0xff800000, v7
32313231; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3232- ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v6 , v7, v6
3233- ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v6
3232+ ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0 , v7, v0
3233+ ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
32343234; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3235- ; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h , v0.h , v3.l, v1.l
3236- ; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v6 .l, v0.h
3235+ ; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l , v0.l , v3.l, v1.l
3236+ ; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0 .l, v0.l
32373237; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3238- ; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v6 , 0x8000, v6
3239- ; GFX1150-TRUE16-NEXT: v_fma_f16 v0.h, v6 .l, v3.l, v1.l
3238+ ; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0 , 0x8000, v0
3239+ ; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0 .l, v3.l, v1.l
32403240; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v4.h
32413241; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3242- ; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v1, v0.h, v0 .l
3242+ ; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, v6 .l
32433243; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
32443244; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v2.h
32453245; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
@@ -3262,30 +3262,30 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
32623262; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
32633263; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
32643264; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3265- ; GFX1150-TRUE16-NEXT: v_fma_f16 v0 .l, v0.l, v6.l, v3 .l
3265+ ; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v3 .l, v0.l, v6.l
32663266; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v4.l
3267- ; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v3 , v2.l
3267+ ; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0 , v2.l
32683268; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v6, v6
32693269; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3270- ; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v3, v3 , v6
3271- ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v3 , v2 op_sel_hi:[1,0,1]
3270+ ; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0 , v6
3271+ ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v0 , v2 op_sel_hi:[1,0,1]
32723272; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3273- ; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v3 , v7, v6
3274- ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v3 , v2 op_sel_hi:[1,0,1]
3273+ ; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0 , v7, v6
3274+ ; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v0 , v2 op_sel_hi:[1,0,1]
32753275; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
32763276; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v6, v7, v6
32773277; GFX1150-TRUE16-NEXT: v_and_b32_e32 v6, 0xff800000, v6
32783278; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3279- ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v3 , v6, v3
3280- ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
3279+ ; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0 , v6, v0
3280+ ; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
32813281; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3282- ; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h , v0.h , v4.l, v2.l
3283- ; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v3 .l, v0.h
3282+ ; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l , v0.l , v4.l, v2.l
3283+ ; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0 .l, v0.l
32843284; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3285- ; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v3 , 0x8000, v3
3286- ; GFX1150-TRUE16-NEXT: v_fma_f16 v0.h, v3 .l, v4.l, v2.l
3285+ ; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0 , 0x8000, v0
3286+ ; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0 .l, v4.l, v2.l
32873287; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
3288- ; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v2, v0.h, v0 .l
3288+ ; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v2, v0.l, v3 .l
32893289; GFX1150-TRUE16-NEXT: global_store_b64 v5, v[1:2], s[0:1]
32903290; GFX1150-TRUE16-NEXT: s_endpgm
32913291;
0 commit comments