@@ -31,5 +31,71 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
31
31
ret void
32
32
}
33
33
34
+ define amdgpu_kernel void @rcp_bf16_constant_4 (ptr addrspace (1 ) %out ) #1 {
35
+ ; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
36
+ ; SDAG-TRUE16: ; %bb.0:
37
+ ; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
38
+ ; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
39
+ ; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
40
+ ; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
41
+ ; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
42
+ ; SDAG-TRUE16-NEXT: s_endpgm
43
+ ;
44
+ ; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
45
+ ; SDAG-FAKE16: ; %bb.0:
46
+ ; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
47
+ ; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3e80
48
+ ; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
49
+ ; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
50
+ ; SDAG-FAKE16-NEXT: s_endpgm
51
+ %rcp = call bfloat @llvm.amdgcn.rcp.bf16 (bfloat 4 .0 ) #0
52
+ store bfloat %rcp , ptr addrspace (1 ) %out , align 2
53
+ ret void
54
+ }
55
+
56
+ define amdgpu_kernel void @rcp_bf16_constant_100 (ptr addrspace (1 ) %out ) #1 {
57
+ ; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
58
+ ; SDAG-TRUE16: ; %bb.0:
59
+ ; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
60
+ ; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
61
+ ; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
62
+ ; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
63
+ ; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
64
+ ; SDAG-TRUE16-NEXT: s_endpgm
65
+ ;
66
+ ; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
67
+ ; SDAG-FAKE16: ; %bb.0:
68
+ ; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
69
+ ; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x3c24
70
+ ; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
71
+ ; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
72
+ ; SDAG-FAKE16-NEXT: s_endpgm
73
+ %rcp = call bfloat @llvm.amdgcn.rcp.bf16 (bfloat 100 .0 ) #0
74
+ store bfloat %rcp , ptr addrspace (1 ) %out , align 2
75
+ ret void
76
+ }
77
+
78
+ define amdgpu_kernel void @rcp_undef_bf16 (ptr addrspace (1 ) %out ) #1 {
79
+ ; SDAG-TRUE16-LABEL: rcp_undef_bf16:
80
+ ; SDAG-TRUE16: ; %bb.0:
81
+ ; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
82
+ ; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
83
+ ; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
84
+ ; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
85
+ ; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
86
+ ; SDAG-TRUE16-NEXT: s_endpgm
87
+ ;
88
+ ; SDAG-FAKE16-LABEL: rcp_undef_bf16:
89
+ ; SDAG-FAKE16: ; %bb.0:
90
+ ; SDAG-FAKE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
91
+ ; SDAG-FAKE16-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7fc0
92
+ ; SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
93
+ ; SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
94
+ ; SDAG-FAKE16-NEXT: s_endpgm
95
+ %rcp = call bfloat @llvm.amdgcn.rcp.bf16 (bfloat undef )
96
+ store bfloat %rcp , ptr addrspace (1 ) %out , align 2
97
+ ret void
98
+ }
99
+
34
100
attributes #0 = { nounwind readnone }
35
101
attributes #1 = { nounwind }
0 commit comments