|
7 | 7 | ; FIXME: t16 doesn't work at the moment because the store of s16 under t16 mode fails to select. |
8 | 8 | ; FIXME: GlobalISel does not work with bf16 |
9 | 9 |
|
| 10 | +declare float @llvm.amdgcn.tanh.f32(float) #0 |
10 | 11 | declare bfloat @llvm.amdgcn.tanh.bf16(bfloat) #0 |
11 | 12 |
|
| 13 | +define amdgpu_kernel void @tanh_f32(ptr addrspace(1) %out, float %src) #1 { |
| 14 | +; SDAG-REAL16-LABEL: tanh_f32: |
| 15 | +; SDAG-REAL16: ; %bb.0: |
| 16 | +; SDAG-REAL16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 |
| 17 | +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 |
| 18 | +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 |
| 19 | +; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, s2 |
| 20 | +; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 21 | +; SDAG-REAL16-NEXT: s_endpgm |
| 22 | +; |
| 23 | +; SDAG-FAKE16-LABEL: tanh_f32: |
| 24 | +; SDAG-FAKE16: ; %bb.0: |
| 25 | +; SDAG-FAKE16-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 |
| 26 | +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 27 | +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| 28 | +; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, s2 |
| 29 | +; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 30 | +; SDAG-FAKE16-NEXT: s_endpgm |
| 31 | + %tanh = call float @llvm.amdgcn.tanh.f32(float %src) #0 |
| 32 | + store float %tanh, ptr addrspace(1) %out, align 4 |
| 33 | + ret void |
| 34 | +} |
| 35 | + |
| 36 | +; TODO: Really these should be constant folded |
| 37 | +define amdgpu_kernel void @tanh_f32_constant_4.0(ptr addrspace(1) %out) #1 { |
| 38 | +; SDAG-REAL16-LABEL: tanh_f32_constant_4.0: |
| 39 | +; SDAG-REAL16: ; %bb.0: |
| 40 | +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 41 | +; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, 4.0 |
| 42 | +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 |
| 43 | +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 |
| 44 | +; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 45 | +; SDAG-REAL16-NEXT: s_endpgm |
| 46 | +; |
| 47 | +; SDAG-FAKE16-LABEL: tanh_f32_constant_4.0: |
| 48 | +; SDAG-FAKE16: ; %bb.0: |
| 49 | +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 50 | +; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, 4.0 |
| 51 | +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 52 | +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| 53 | +; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 54 | +; SDAG-FAKE16-NEXT: s_endpgm |
| 55 | + %tanh = call float @llvm.amdgcn.tanh.f32(float 4.0) #0 |
| 56 | + store float %tanh, ptr addrspace(1) %out, align 4 |
| 57 | + ret void |
| 58 | +} |
| 59 | + |
| 60 | +define amdgpu_kernel void @tanh_f32_constant_100.0(ptr addrspace(1) %out) #1 { |
| 61 | +; SDAG-REAL16-LABEL: tanh_f32_constant_100.0: |
| 62 | +; SDAG-REAL16: ; %bb.0: |
| 63 | +; SDAG-REAL16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 64 | +; SDAG-REAL16-NEXT: v_tanh_f32_e32 v0, 0x42c80000 |
| 65 | +; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0 |
| 66 | +; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0 |
| 67 | +; SDAG-REAL16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 68 | +; SDAG-REAL16-NEXT: s_endpgm |
| 69 | +; |
| 70 | +; SDAG-FAKE16-LABEL: tanh_f32_constant_100.0: |
| 71 | +; SDAG-FAKE16: ; %bb.0: |
| 72 | +; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 |
| 73 | +; SDAG-FAKE16-NEXT: v_tanh_f32_e32 v0, 0x42c80000 |
| 74 | +; SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
| 75 | +; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| 76 | +; SDAG-FAKE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 77 | +; SDAG-FAKE16-NEXT: s_endpgm |
| 78 | + %tanh = call float @llvm.amdgcn.tanh.f32(float 100.0) #0 |
| 79 | + store float %tanh, ptr addrspace(1) %out, align 4 |
| 80 | + ret void |
| 81 | +} |
| 82 | + |
| 83 | +define amdgpu_kernel void @tanh_undef_f32(ptr addrspace(1) %out) #1 { |
| 84 | +; SDAG-REAL16-LABEL: tanh_undef_f32: |
| 85 | +; SDAG-REAL16: ; %bb.0: |
| 86 | +; SDAG-REAL16-NEXT: s_endpgm |
| 87 | +; |
| 88 | +; SDAG-FAKE16-LABEL: tanh_undef_f32: |
| 89 | +; SDAG-FAKE16: ; %bb.0: |
| 90 | +; SDAG-FAKE16-NEXT: s_endpgm |
| 91 | + %tanh = call float @llvm.amdgcn.tanh.f32(float undef) |
| 92 | + store float %tanh, ptr addrspace(1) %out, align 4 |
| 93 | + ret void |
| 94 | +} |
| 95 | + |
12 | 96 | define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 { |
13 | 97 | ; SDAG-REAL16-LABEL: tanh_bf16: |
14 | 98 | ; SDAG-REAL16: ; %bb.0: |
|
0 commit comments