Skip to content

Commit e801a10

Browse files
authored
[AMDGPU] Add the code generation support for llvm.[sin/cos].bf16 (#149631)
This is a partial support because some other instructions have not been upstreamed yet.
1 parent ba81903 commit e801a10

File tree

3 files changed

+77
-1
lines changed

3 files changed

+77
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
620620

621621
// BF16 - VOP1 Actions.
622622
if (Subtarget->hasBF16TransInsts())
623-
setOperationAction(ISD::FDIV, MVT::bf16, Custom);
623+
setOperationAction({ISD::FCOS, ISD::FSIN, ISD::FDIV}, MVT::bf16, Custom);
624624

625625
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::f16, Promote);
626626
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::bf16, Promote);
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=GCN %s
3+
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
4+
5+
; FIXME: GlobalISel does not work with bf16
6+
7+
declare bfloat @llvm.cos.bf16(bfloat) #0
8+
9+
define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
10+
; GCN-LABEL: cos_bf16_constant_4:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13+
; GCN-NEXT: v_cos_bf16_e32 v0, 0x3f23
14+
; GCN-NEXT: v_mov_b32_e32 v1, 0
15+
; GCN-NEXT: s_wait_kmcnt 0x0
16+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
17+
; GCN-NEXT: s_endpgm
18+
%cos = call bfloat @llvm.cos.bf16(bfloat 4.0) #0
19+
store bfloat %cos, ptr addrspace(1) %out, align 2
20+
ret void
21+
}
22+
23+
define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 {
24+
; GCN-LABEL: cos_bf16_constant_100:
25+
; GCN: ; %bb.0:
26+
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
27+
; GCN-NEXT: v_cos_bf16_e32 v0, 0x417f
28+
; GCN-NEXT: v_mov_b32_e32 v1, 0
29+
; GCN-NEXT: s_wait_kmcnt 0x0
30+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
31+
; GCN-NEXT: s_endpgm
32+
%cos = call bfloat @llvm.cos.bf16(bfloat 100.0) #0
33+
store bfloat %cos, ptr addrspace(1) %out, align 2
34+
ret void
35+
}
36+
37+
attributes #0 = { nounwind readnone }
38+
attributes #1 = { nounwind }
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=GCN %s
3+
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
4+
5+
; FIXME: GlobalISel does not work with bf16
6+
7+
declare bfloat @llvm.sin.bf16(bfloat) #0
8+
9+
define amdgpu_kernel void @sin_bf16_constant_4(ptr addrspace(1) %out) #1 {
10+
; GCN-LABEL: sin_bf16_constant_4:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
13+
; GCN-NEXT: v_sin_bf16_e32 v0, 0x3f23
14+
; GCN-NEXT: v_mov_b32_e32 v1, 0
15+
; GCN-NEXT: s_wait_kmcnt 0x0
16+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
17+
; GCN-NEXT: s_endpgm
18+
%sin = call bfloat @llvm.sin.bf16(bfloat 4.0) #0
19+
store bfloat %sin, ptr addrspace(1) %out, align 2
20+
ret void
21+
}
22+
23+
define amdgpu_kernel void @sin_bf16_constant_100(ptr addrspace(1) %out) #1 {
24+
; GCN-LABEL: sin_bf16_constant_100:
25+
; GCN: ; %bb.0:
26+
; GCN-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
27+
; GCN-NEXT: v_sin_bf16_e32 v0, 0x417f
28+
; GCN-NEXT: v_mov_b32_e32 v1, 0
29+
; GCN-NEXT: s_wait_kmcnt 0x0
30+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
31+
; GCN-NEXT: s_endpgm
32+
%sin = call bfloat @llvm.sin.bf16(bfloat 100.0) #0
33+
store bfloat %sin, ptr addrspace(1) %out, align 2
34+
ret void
35+
}
36+
37+
attributes #0 = { nounwind readnone }
38+
attributes #1 = { nounwind }

0 commit comments

Comments
 (0)