|
2 | 2 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GFX1250-TRUE16 %s
|
3 | 3 | ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GFX1250-FAKE16 %s
|
4 | 4 |
|
5 |
| -/* TODO: Support safe bf16 fdiv lowering. |
6 | 5 | define bfloat @v_fdiv_bf16(bfloat %x, bfloat %y) {
|
| 6 | +; GFX1250-TRUE16-LABEL: v_fdiv_bf16: |
| 7 | +; GFX1250-TRUE16: ; %bb.0: |
| 8 | +; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 9 | +; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0 |
| 10 | +; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 |
| 11 | +; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l |
| 12 | +; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l |
| 13 | +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 14 | +; GFX1250-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l |
| 15 | +; GFX1250-TRUE16-NEXT: v_div_scale_f32 v0, null, v2, v2, v1 |
| 16 | +; GFX1250-TRUE16-NEXT: v_div_scale_f32 v4, vcc_lo, v1, v2, v1 |
| 17 | +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(TRANS32_DEP_1) |
| 18 | +; GFX1250-TRUE16-NEXT: v_rcp_f32_e32 v3, v0 |
| 19 | +; GFX1250-TRUE16-NEXT: s_denorm_mode 15 |
| 20 | +; GFX1250-TRUE16-NEXT: v_nop |
| 21 | +; GFX1250-TRUE16-NEXT: v_fma_f32 v5, -v0, v3, 1.0 |
| 22 | +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 23 | +; GFX1250-TRUE16-NEXT: v_fmac_f32_e32 v3, v5, v3 |
| 24 | +; GFX1250-TRUE16-NEXT: v_mul_f32_e32 v5, v4, v3 |
| 25 | +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 26 | +; GFX1250-TRUE16-NEXT: v_fma_f32 v6, -v0, v5, v4 |
| 27 | +; GFX1250-TRUE16-NEXT: v_fmac_f32_e32 v5, v6, v3 |
| 28 | +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| 29 | +; GFX1250-TRUE16-NEXT: v_fma_f32 v0, -v0, v5, v4 |
| 30 | +; GFX1250-TRUE16-NEXT: s_denorm_mode 12 |
| 31 | +; GFX1250-TRUE16-NEXT: v_div_fmas_f32 v0, v0, v3, v5 |
| 32 | +; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 33 | +; GFX1250-TRUE16-NEXT: v_div_fixup_f32 v0, v0, v2, v1 |
| 34 | +; GFX1250-TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 |
| 35 | +; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31] |
| 36 | +; |
| 37 | +; GFX1250-FAKE16-LABEL: v_fdiv_bf16: |
| 38 | +; GFX1250-FAKE16: ; %bb.0: |
| 39 | +; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| 40 | +; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| 41 | +; GFX1250-FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0 |
| 42 | +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) |
| 43 | +; GFX1250-FAKE16-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 |
| 44 | +; GFX1250-FAKE16-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 |
| 45 | +; GFX1250-FAKE16-NEXT: v_rcp_f32_e32 v3, v2 |
| 46 | +; GFX1250-FAKE16-NEXT: s_denorm_mode 15 |
| 47 | +; GFX1250-FAKE16-NEXT: v_nop |
| 48 | +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 49 | +; GFX1250-FAKE16-NEXT: v_fma_f32 v5, -v2, v3, 1.0 |
| 50 | +; GFX1250-FAKE16-NEXT: v_fmac_f32_e32 v3, v5, v3 |
| 51 | +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 52 | +; GFX1250-FAKE16-NEXT: v_mul_f32_e32 v5, v4, v3 |
| 53 | +; GFX1250-FAKE16-NEXT: v_fma_f32 v6, -v2, v5, v4 |
| 54 | +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 55 | +; GFX1250-FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v3 |
| 56 | +; GFX1250-FAKE16-NEXT: v_fma_f32 v2, -v2, v5, v4 |
| 57 | +; GFX1250-FAKE16-NEXT: s_denorm_mode 12 |
| 58 | +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| 59 | +; GFX1250-FAKE16-NEXT: v_div_fmas_f32 v2, v2, v3, v5 |
| 60 | +; GFX1250-FAKE16-NEXT: v_div_fixup_f32 v0, v2, v1, v0 |
| 61 | +; GFX1250-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 62 | +; GFX1250-FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0 |
| 63 | +; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31] |
7 | 64 | %fdiv = fdiv bfloat %x, %y
|
8 | 65 | ret bfloat %fdiv
|
9 | 66 | }
|
10 |
| -*/ |
11 | 67 |
|
12 | 68 | define bfloat @v_rcp_bf16(bfloat %x) {
|
13 | 69 | ; GFX1250-TRUE16-LABEL: v_rcp_bf16:
|
|
0 commit comments