Skip to content

Commit f68998c

Browse files
committed
turn on true16 for gfx11
1 parent 4a07c9a commit f68998c

File tree

2 files changed

+42
-40
lines changed

2 files changed

+42
-40
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1780,7 +1780,9 @@ def FeatureISAVersion11_Common : FeatureSet<
17801780
FeatureImageInsts,
17811781
FeaturePackedTID,
17821782
FeatureVcmpxPermlaneHazard,
1783-
FeatureMemoryAtomicFAddF32DenormalSupport]>;
1783+
FeatureMemoryAtomicFAddF32DenormalSupport,
1784+
FeatureRealTrue16Insts
1785+
]>;
17841786

17851787
// There are few workarounds that need to be
17861788
// added to all targets. This pessimizes codegen

llvm/test/CodeGen/AMDGPU/frem.ll

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2425,30 +2425,30 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
24252425
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
24262426
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
24272427
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2428-
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v5.l, v4.l
2428+
; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v4.l, v0.l, v5.l
24292429
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v5, v3.l
2430-
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v4, v2.l
2430+
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v2.l
24312431
; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v5, v5
24322432
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2433-
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v4, v4, v5
2434-
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v4, v2 op_sel_hi:[1,0,1]
2433+
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v5
2434+
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v0, v2 op_sel_hi:[1,0,1]
24352435
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2436-
; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v4, v6, v5
2437-
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v4, v2 op_sel_hi:[1,0,1]
2436+
; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0, v6, v5
2437+
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v6, -v3, v0, v2 op_sel_hi:[1,0,1]
24382438
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
24392439
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v5, v6, v5
24402440
; GFX1150-TRUE16-NEXT: v_and_b32_e32 v5, 0xff800000, v5
24412441
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2442-
; GFX1150-TRUE16-NEXT: v_add_f32_e32 v4, v5, v4
2443-
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v4
2442+
; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0, v5, v0
2443+
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
24442444
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2445-
; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v3.l, v2.l
2446-
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v4.l, v0.h
2445+
; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v3.l, v2.l
2446+
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
24472447
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2448-
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v4, 0x8000, v4
2449-
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.h, v4.l, v3.l, v2.l
2448+
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
2449+
; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v3.l
24502450
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
2451-
; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
2451+
; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v0, v2.l, v4.l
24522452
; GFX1150-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
24532453
; GFX1150-TRUE16-NEXT: s_endpgm
24542454
;
@@ -3215,31 +3215,31 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
32153215
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
32163216
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
32173217
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3218-
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v7.l, v6.l
3218+
; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v6.l, v0.l, v7.l
32193219
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v7, v3.l
3220-
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v1.l
3220+
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v1.l
32213221
; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v7, v7
32223222
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3223-
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v6, v6, v7
3224-
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v10, -v8, v6, v9 op_sel_hi:[1,0,1]
3223+
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v7
3224+
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v10, -v8, v0, v9 op_sel_hi:[1,0,1]
32253225
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3226-
; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v6, v10, v7
3227-
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v8, -v8, v6, v9 op_sel_hi:[1,0,1]
3226+
; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0, v10, v7
3227+
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v8, -v8, v0, v9 op_sel_hi:[1,0,1]
32283228
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
32293229
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v7, v8, v7
32303230
; GFX1150-TRUE16-NEXT: v_and_b32_e32 v7, 0xff800000, v7
32313231
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3232-
; GFX1150-TRUE16-NEXT: v_add_f32_e32 v6, v7, v6
3233-
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v6
3232+
; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0, v7, v0
3233+
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
32343234
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3235-
; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v3.l, v1.l
3236-
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v6.l, v0.h
3235+
; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v3.l, v1.l
3236+
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
32373237
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3238-
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v6, 0x8000, v6
3239-
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.h, v6.l, v3.l, v1.l
3238+
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
3239+
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v3.l, v1.l
32403240
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v4.h
32413241
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3242-
; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v1, v0.h, v0.l
3242+
; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v1, v0.l, v6.l
32433243
; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v3, v3
32443244
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v2.h
32453245
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1)
@@ -3262,30 +3262,30 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
32623262
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
32633263
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
32643264
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3265-
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v6.l, v3.l
3265+
; GFX1150-TRUE16-NEXT: v_fmac_f16_e32 v3.l, v0.l, v6.l
32663266
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v6, v4.l
3267-
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v3, v2.l
3267+
; GFX1150-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v2.l
32683268
; GFX1150-TRUE16-NEXT: v_rcp_f32_e32 v6, v6
32693269
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3270-
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v3, v3, v6
3271-
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v3, v2 op_sel_hi:[1,0,1]
3270+
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v0, v0, v6
3271+
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v0, v2 op_sel_hi:[1,0,1]
32723272
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3273-
; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v3, v7, v6
3274-
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v3, v2 op_sel_hi:[1,0,1]
3273+
; GFX1150-TRUE16-NEXT: v_fmac_f32_e32 v0, v7, v6
3274+
; GFX1150-TRUE16-NEXT: v_fma_mix_f32 v7, -v4, v0, v2 op_sel_hi:[1,0,1]
32753275
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
32763276
; GFX1150-TRUE16-NEXT: v_mul_f32_e32 v6, v7, v6
32773277
; GFX1150-TRUE16-NEXT: v_and_b32_e32 v6, 0xff800000, v6
32783278
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3279-
; GFX1150-TRUE16-NEXT: v_add_f32_e32 v3, v6, v3
3280-
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v3
3279+
; GFX1150-TRUE16-NEXT: v_add_f32_e32 v0, v6, v0
3280+
; GFX1150-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
32813281
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3282-
; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.h, v0.h, v4.l, v2.l
3283-
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v3.l, v0.h
3282+
; GFX1150-TRUE16-NEXT: v_div_fixup_f16 v0.l, v0.l, v4.l, v2.l
3283+
; GFX1150-TRUE16-NEXT: v_trunc_f16_e32 v0.l, v0.l
32843284
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3285-
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v3, 0x8000, v3
3286-
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.h, v3.l, v4.l, v2.l
3285+
; GFX1150-TRUE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
3286+
; GFX1150-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v4.l, v2.l
32873287
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
3288-
; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v2, v0.h, v0.l
3288+
; GFX1150-TRUE16-NEXT: v_pack_b32_f16 v2, v0.l, v3.l
32893289
; GFX1150-TRUE16-NEXT: global_store_b64 v5, v[1:2], s[0:1]
32903290
; GFX1150-TRUE16-NEXT: s_endpgm
32913291
;

0 commit comments

Comments
 (0)