Skip to content

Commit 71ce079

Browse files
authored
Reapply "[AMDGPU] gfx1250 trans instructions bf16 codegen tests update. NFC (llvm#155310)" (llvm#155515)
1 parent 329e706 commit 71ce079

File tree

3 files changed

+100
-0
lines changed

3 files changed

+100
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,71 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
3131
ret void
3232
}
3333

34+
define amdgpu_kernel void @rcp_bf16_constant_4(ptr addrspace(1) %out) #1 {
35+
; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
36+
; SDAG-TRUE16: ; %bb.0:
37+
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
38+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
39+
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
40+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
41+
; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
42+
; SDAG-TRUE16-NEXT: s_endpgm
43+
;
44+
; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
45+
; SDAG-FAKE16: ; %bb.0:
46+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
47+
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
48+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
49+
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
50+
; SDAG-FAKE16-NEXT: s_endpgm
51+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat 4.0) #0
52+
store bfloat %rcp, ptr addrspace(1) %out, align 2
53+
ret void
54+
}
55+
56+
define amdgpu_kernel void @rcp_bf16_constant_100(ptr addrspace(1) %out) #1 {
57+
; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
58+
; SDAG-TRUE16: ; %bb.0:
59+
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
60+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
61+
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
62+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
63+
; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
64+
; SDAG-TRUE16-NEXT: s_endpgm
65+
;
66+
; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
67+
; SDAG-FAKE16: ; %bb.0:
68+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
69+
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
70+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
71+
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
72+
; SDAG-FAKE16-NEXT: s_endpgm
73+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat 100.0) #0
74+
store bfloat %rcp, ptr addrspace(1) %out, align 2
75+
ret void
76+
}
77+
78+
define amdgpu_kernel void @rcp_undef_bf16(ptr addrspace(1) %out) #1 {
79+
; SDAG-TRUE16-LABEL: rcp_undef_bf16:
80+
; SDAG-TRUE16: ; %bb.0:
81+
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
82+
; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
83+
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
84+
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
85+
; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
86+
; SDAG-TRUE16-NEXT: s_endpgm
87+
;
88+
; SDAG-FAKE16-LABEL: rcp_undef_bf16:
89+
; SDAG-FAKE16: ; %bb.0:
90+
; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
91+
; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
92+
; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
93+
; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
94+
; SDAG-FAKE16-NEXT: s_endpgm
95+
%rcp = call bfloat @llvm.amdgcn.rcp.bf16(bfloat undef)
96+
store bfloat %rcp, ptr addrspace(1) %out, align 2
97+
ret void
98+
}
99+
34100
attributes #0 = { nounwind readnone }
35101
attributes #1 = { nounwind }

llvm/test/CodeGen/AMDGPU/llvm.cos.bf16.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,23 @@
66

77
declare bfloat @llvm.cos.bf16(bfloat) #0
88

9+
define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
10+
; GCN-LABEL: cos_bf16:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
13+
; GCN-NEXT: s_mov_b32 s3, 0x3e230000
14+
; GCN-NEXT: v_mov_b32_e32 v1, 0
15+
; GCN-NEXT: s_wait_kmcnt 0x0
16+
; GCN-NEXT: v_fma_mixlo_bf16 v0, s2, s3, 0 op_sel_hi:[1,0,0]
17+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
18+
; GCN-NEXT: v_cos_bf16_e32 v0, v0
19+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
20+
; GCN-NEXT: s_endpgm
21+
%cos = call bfloat @llvm.cos.bf16(bfloat %src) #0
22+
store bfloat %cos, ptr addrspace(1) %out, align 2
23+
ret void
24+
}
25+
926
define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 {
1027
; GCN-LABEL: cos_bf16_constant_4:
1128
; GCN: ; %bb.0:

llvm/test/CodeGen/AMDGPU/llvm.sin.bf16.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,23 @@
66

77
declare bfloat @llvm.sin.bf16(bfloat) #0
88

9+
define amdgpu_kernel void @sin_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
10+
; GCN-LABEL: sin_bf16:
11+
; GCN: ; %bb.0:
12+
; GCN-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
13+
; GCN-NEXT: s_mov_b32 s3, 0x3e230000
14+
; GCN-NEXT: v_mov_b32_e32 v1, 0
15+
; GCN-NEXT: s_wait_kmcnt 0x0
16+
; GCN-NEXT: v_fma_mixlo_bf16 v0, s2, s3, 0 op_sel_hi:[1,0,0]
17+
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
18+
; GCN-NEXT: v_sin_bf16_e32 v0, v0
19+
; GCN-NEXT: global_store_b16 v1, v0, s[0:1]
20+
; GCN-NEXT: s_endpgm
21+
%sin = call bfloat @llvm.sin.bf16(bfloat %src) #0
22+
store bfloat %sin, ptr addrspace(1) %out, align 2
23+
ret void
24+
}
25+
926
define amdgpu_kernel void @sin_bf16_constant_4(ptr addrspace(1) %out) #1 {
1027
; GCN-LABEL: sin_bf16_constant_4:
1128
; GCN: ; %bb.0:

0 commit comments

Comments
 (0)