Skip to content

Commit 43a9b66

Browse files
rampitecshiltian
andauthored
[AMDGPU] gfx1250 trans instructions bf16 codegen tests update. NFC (#155310)
Co-authored-by: Shilei Tian <[email protected]>
1 parent efd8143 commit 43a9b66

File tree

3 files changed

+97
-0
lines changed

3 files changed

+97
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,68 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
3131
ret void
3232
}
3333

34+
define amdgpu_kernel void @rcp_bf16_constant_4(ptr addrspace(1) %out) #1 {
35+
; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
36+
; SDAG-TRUE16: ; %bb.0:
37+
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
38+
; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
39+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
40+
; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
41+
; SDAG-TRUE16-NEXT: s_endpgm
42+
;
43+
; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
44+
; SDAG-FAKE16: ; %bb.0:
45+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
46+
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
47+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
48+
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
49+
; SDAG-FAKE16-NEXT: s_endpgm
50+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat 4.0) #0
51+
store bfloat %rcp, ptr addrspace(1) %out, align 2
52+
ret void
53+
}
54+
55+
define amdgpu_kernel void @rcp_bf16_constant_100(ptr addrspace(1) %out) #1 {
56+
; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
57+
; SDAG-TRUE16: ; %bb.0:
58+
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
59+
; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
60+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
61+
; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
62+
; SDAG-TRUE16-NEXT: s_endpgm
63+
;
64+
; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
65+
; SDAG-FAKE16: ; %bb.0:
66+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
67+
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
68+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
69+
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
70+
; SDAG-FAKE16-NEXT: s_endpgm
71+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat 100.0) #0
72+
store bfloat %rcp, ptr addrspace(1) %out, align 2
73+
ret void
74+
}
75+
76+
define amdgpu_kernel void @rcp_undef_bf16(ptr addrspace(1) %out) #1 {
77+
; SDAG-TRUE16-LABEL: rcp_undef_bf16:
78+
; SDAG-TRUE16: ; %bb.0:
79+
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
80+
; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
81+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
82+
; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
83+
; SDAG-TRUE16-NEXT: s_endpgm
84+
;
85+
; SDAG-FAKE16-LABEL: rcp_undef_bf16:
86+
; SDAG-FAKE16: ; %bb.0:
87+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
88+
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
89+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
90+
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
91+
; SDAG-FAKE16-NEXT: s_endpgm
92+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat undef)
93+
store bfloat %rcp, ptr addrspace(1) %out, align 2
94+
ret void
95+
}
96+
3497
attributes #0 = { nounwind readnone }
3598
attributes #1 = { nounwind }

llvm/test/CodeGen/AMDGPU/llvm.cos.bf16.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,23 @@
66

77
declare bfloat @llvm.cos.bf16(bfloat) #0
88

9+
define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
10+
; GCN-LABEL: cos_bf16:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
13+
; GCN-NEXT: s_mov_b32 s3, 0x3e230000
14+
; GCN-NEXT: v_mov_b32_e32 v1, 0
15+
; GCN-NEXT: s_wait_kmcnt 0x0
16+
; GCN-NEXT: v_fma_mixlo_bf16 v0, s2, s3, 0 op_sel_hi:[1,0,0]
17+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
18+
; GCN-NEXT: v_cos_bf16_e32 v0, v0
19+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
20+
; GCN-NEXT: s_endpgm
21+
%cos = call bfloat @llvm.cos.bf16(bfloat %src) #0
22+
store bfloat %cos, ptr addrspace(1) %out, align 2
23+
ret void
24+
}
25+
926
define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
1027
; GCN-LABEL: cos_bf16_constant_4:
1128
; GCN: ; %bb.0:

llvm/test/CodeGen/AMDGPU/llvm.sin.bf16.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,23 @@
66

77
declare bfloat @llvm.sin.bf16(bfloat) #0
88

9+
define amdgpu_kernel void @sin_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
10+
; GCN-LABEL: sin_bf16:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
13+
; GCN-NEXT: s_mov_b32 s3, 0x3e230000
14+
; GCN-NEXT: v_mov_b32_e32 v1, 0
15+
; GCN-NEXT: s_wait_kmcnt 0x0
16+
; GCN-NEXT: v_fma_mixlo_bf16 v0, s2, s3, 0 op_sel_hi:[1,0,0]
17+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
18+
; GCN-NEXT: v_sin_bf16_e32 v0, v0
19+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
20+
; GCN-NEXT: s_endpgm
21+
%sin = call bfloat @llvm.sin.bf16(bfloat %src) #0
22+
store bfloat %sin, ptr addrspace(1) %out, align 2
23+
ret void
24+
}
25+
926
define amdgpu_kernel void @sin_bf16_constant_4(ptr addrspace(1) %out) #1 {
1027
; GCN-LABEL: sin_bf16_constant_4:
1128
; GCN: ; %bb.0:

0 commit comments

Comments
 (0)