@@ -31,68 +31,5 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
3131 ret void
3232}
3333
34- define amdgpu_kernel void @rcp_bf16_constant_4 (ptr addrspace (1 ) %out ) #1 {
35- ; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
36- ; SDAG-TRUE16: ; %bb.0:
37- ; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
38- ; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
39- ; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
40- ; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
41- ; SDAG-TRUE16-NEXT: s_endpgm
42- ;
43- ; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
44- ; SDAG-FAKE16: ; %bb.0:
45- ; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
46- ; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
47- ; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
48- ; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
49- ; SDAG-FAKE16-NEXT: s_endpgm
50- %rcp = call bfloat @llvm.amdgcn.rcp.bf16 (bfloat 4 .0 ) #0
51- store bfloat %rcp , ptr addrspace (1 ) %out , align 2
52- ret void
53- }
54-
55- define amdgpu_kernel void @rcp_bf16_constant_100 (ptr addrspace (1 ) %out ) #1 {
56- ; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
57- ; SDAG-TRUE16: ; %bb.0:
58- ; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
59- ; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
60- ; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
61- ; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
62- ; SDAG-TRUE16-NEXT: s_endpgm
63- ;
64- ; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
65- ; SDAG-FAKE16: ; %bb.0:
66- ; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
67- ; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
68- ; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
69- ; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
70- ; SDAG-FAKE16-NEXT: s_endpgm
71- %rcp = call bfloat @llvm.amdgcn.rcp.bf16 (bfloat 100 .0 ) #0
72- store bfloat %rcp , ptr addrspace (1 ) %out , align 2
73- ret void
74- }
75-
76- define amdgpu_kernel void @rcp_undef_bf16 (ptr addrspace (1 ) %out ) #1 {
77- ; SDAG-TRUE16-LABEL: rcp_undef_bf16:
78- ; SDAG-TRUE16: ; %bb.0:
79- ; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
80- ; SDAG-TRUE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
81- ; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
82- ; SDAG-TRUE16-NEXT: flat_store_b16 v0, v1, s[0:1]
83- ; SDAG-TRUE16-NEXT: s_endpgm
84- ;
85- ; SDAG-FAKE16-LABEL: rcp_undef_bf16:
86- ; SDAG-FAKE16: ; %bb.0:
87- ; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
88- ; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
89- ; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
90- ; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
91- ; SDAG-FAKE16-NEXT: s_endpgm
92- %rcp = call bfloat @llvm.amdgcn.rcp.bf16 (bfloat undef )
93- store bfloat %rcp , ptr addrspace (1 ) %out , align 2
94- ret void
95- }
96-
9734attributes #0 = { nounwind readnone }
9835attributes #1 = { nounwind }
0 commit comments