@@ -18652,12 +18652,20 @@ define amdgpu_ps i32 @s_fabs_bf16(bfloat inreg %a) {
1865218652; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
1865318653; GFX10-NEXT: ; return to shader part epilog
1865418654;
18655- ; GFX11-LABEL: s_fabs_bf16:
18656- ; GFX11: ; %bb.0:
18657- ; GFX11-NEXT: s_and_b32 s0, s0, 0x7fff
18658- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18659- ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
18660- ; GFX11-NEXT: ; return to shader part epilog
18655+ ; GFX11TRUE16-LABEL: s_fabs_bf16:
18656+ ; GFX11TRUE16: ; %bb.0:
18657+ ; GFX11TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, s0
18658+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
18659+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
18660+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
18661+ ; GFX11TRUE16-NEXT: ; return to shader part epilog
18662+ ;
18663+ ; GFX11FAKE16-LABEL: s_fabs_bf16:
18664+ ; GFX11FAKE16: ; %bb.0:
18665+ ; GFX11FAKE16-NEXT: s_and_b32 s0, s0, 0x7fff
18666+ ; GFX11FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18667+ ; GFX11FAKE16-NEXT: s_and_b32 s0, 0xffff, s0
18668+ ; GFX11FAKE16-NEXT: ; return to shader part epilog
1866118669 %op = call bfloat @llvm.fabs.bf16(bfloat %a)
1866218670 %cast = bitcast bfloat %op to i16
1866318671 %zext = zext i16 %cast to i32
@@ -18747,12 +18755,20 @@ define amdgpu_ps i32 @s_fneg_bf16(bfloat inreg %a) {
1874718755; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
1874818756; GFX10-NEXT: ; return to shader part epilog
1874918757;
18750- ; GFX11-LABEL: s_fneg_bf16:
18751- ; GFX11: ; %bb.0:
18752- ; GFX11-NEXT: s_xor_b32 s0, s0, 0x8000
18753- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18754- ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
18755- ; GFX11-NEXT: ; return to shader part epilog
18758+ ; GFX11TRUE16-LABEL: s_fneg_bf16:
18759+ ; GFX11TRUE16: ; %bb.0:
18760+ ; GFX11TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, s0
18761+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
18762+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
18763+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
18764+ ; GFX11TRUE16-NEXT: ; return to shader part epilog
18765+ ;
18766+ ; GFX11FAKE16-LABEL: s_fneg_bf16:
18767+ ; GFX11FAKE16: ; %bb.0:
18768+ ; GFX11FAKE16-NEXT: s_xor_b32 s0, s0, 0x8000
18769+ ; GFX11FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18770+ ; GFX11FAKE16-NEXT: s_and_b32 s0, 0xffff, s0
18771+ ; GFX11FAKE16-NEXT: ; return to shader part epilog
1875618772 %op = fneg bfloat %a
1875718773 %cast = bitcast bfloat %op to i16
1875818774 %zext = zext i16 %cast to i32
@@ -18859,12 +18875,20 @@ define amdgpu_ps i32 @s_fneg_fabs_bf16(bfloat inreg %a) {
1885918875; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
1886018876; GFX10-NEXT: ; return to shader part epilog
1886118877;
18862- ; GFX11-LABEL: s_fneg_fabs_bf16:
18863- ; GFX11: ; %bb.0:
18864- ; GFX11-NEXT: s_bitset1_b32 s0, 15
18865- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18866- ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
18867- ; GFX11-NEXT: ; return to shader part epilog
18878+ ; GFX11TRUE16-LABEL: s_fneg_fabs_bf16:
18879+ ; GFX11TRUE16: ; %bb.0:
18880+ ; GFX11TRUE16-NEXT: v_or_b16 v0.l, 0x8000, s0
18881+ ; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
18882+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
18883+ ; GFX11TRUE16-NEXT: v_readfirstlane_b32 s0, v0
18884+ ; GFX11TRUE16-NEXT: ; return to shader part epilog
18885+ ;
18886+ ; GFX11FAKE16-LABEL: s_fneg_fabs_bf16:
18887+ ; GFX11FAKE16: ; %bb.0:
18888+ ; GFX11FAKE16-NEXT: s_bitset1_b32 s0, 15
18889+ ; GFX11FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
18890+ ; GFX11FAKE16-NEXT: s_and_b32 s0, 0xffff, s0
18891+ ; GFX11FAKE16-NEXT: ; return to shader part epilog
1886818892 %fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
1886918893 %op = fneg bfloat %fabs
1887018894 %cast = bitcast bfloat %op to i16
0 commit comments