@@ -134,6 +134,117 @@ entry:
134134 ret float %cond6
135135}
136136
137+ define float @safe_math_fract_f32_swap (float %x , ptr addrspace (1 ) writeonly captures(none) %ip ) {
138+ ; GFX6-IR-LABEL: define float @safe_math_fract_f32_swap(
139+ ; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
140+ ; GFX6-IR-NEXT: [[ENTRY:.*:]]
141+ ; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
142+ ; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]]
143+ ; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000)
144+ ; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00
145+ ; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]]
146+ ; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
147+ ; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
148+ ; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
149+ ; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
150+ ; GFX6-IR-NEXT: ret float [[COND6]]
151+ ;
152+ ; IR-FRACT-LABEL: define float @safe_math_fract_f32_swap(
153+ ; IR-FRACT-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
154+ ; IR-FRACT-NEXT: [[ENTRY:.*:]]
155+ ; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]])
156+ ; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]])
157+ ; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]])
158+ ; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000
159+ ; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]]
160+ ; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4
161+ ; IR-FRACT-NEXT: ret float [[COND6]]
162+ ;
163+ ; GFX6-LABEL: safe_math_fract_f32_swap:
164+ ; GFX6: ; %bb.0: ; %entry
165+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166+ ; GFX6-NEXT: v_floor_f32_e32 v3, v0
167+ ; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3
168+ ; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
169+ ; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0
170+ ; GFX6-NEXT: s_mov_b32 s8, 0x7f800000
171+ ; GFX6-NEXT: s_mov_b32 s6, 0
172+ ; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc
173+ ; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
174+ ; GFX6-NEXT: s_mov_b32 s7, 0xf000
175+ ; GFX6-NEXT: s_mov_b32 s4, s6
176+ ; GFX6-NEXT: s_mov_b32 s5, s6
177+ ; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
178+ ; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
179+ ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
180+ ; GFX6-NEXT: s_setpc_b64 s[30:31]
181+ ;
182+ ; GFX7-LABEL: safe_math_fract_f32_swap:
183+ ; GFX7: ; %bb.0: ; %entry
184+ ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+ ; GFX7-NEXT: s_mov_b32 s8, 0x7f800000
186+ ; GFX7-NEXT: s_mov_b32 s6, 0
187+ ; GFX7-NEXT: v_fract_f32_e32 v4, v0
188+ ; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8
189+ ; GFX7-NEXT: s_mov_b32 s7, 0xf000
190+ ; GFX7-NEXT: s_mov_b32 s4, s6
191+ ; GFX7-NEXT: s_mov_b32 s5, s6
192+ ; GFX7-NEXT: v_floor_f32_e32 v3, v0
193+ ; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
194+ ; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64
195+ ; GFX7-NEXT: s_waitcnt vmcnt(0)
196+ ; GFX7-NEXT: s_setpc_b64 s[30:31]
197+ ;
198+ ; GFX8-LABEL: safe_math_fract_f32_swap:
199+ ; GFX8: ; %bb.0: ; %entry
200+ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201+ ; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
202+ ; GFX8-NEXT: v_fract_f32_e32 v4, v0
203+ ; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4
204+ ; GFX8-NEXT: v_floor_f32_e32 v3, v0
205+ ; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
206+ ; GFX8-NEXT: global_store_dword v[1:2], v3, off
207+ ; GFX8-NEXT: s_waitcnt vmcnt(0)
208+ ; GFX8-NEXT: s_setpc_b64 s[30:31]
209+ ;
210+ ; GFX11-LABEL: safe_math_fract_f32_swap:
211+ ; GFX11: ; %bb.0: ; %entry
212+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213+ ; GFX11-NEXT: v_fract_f32_e32 v3, v0
214+ ; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
215+ ; GFX11-NEXT: v_floor_f32_e32 v4, v0
216+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
217+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
218+ ; GFX11-NEXT: global_store_b32 v[1:2], v4, off
219+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
220+ ;
221+ ; GFX12-LABEL: safe_math_fract_f32_swap:
222+ ; GFX12: ; %bb.0: ; %entry
223+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
224+ ; GFX12-NEXT: s_wait_expcnt 0x0
225+ ; GFX12-NEXT: s_wait_samplecnt 0x0
226+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
227+ ; GFX12-NEXT: s_wait_kmcnt 0x0
228+ ; GFX12-NEXT: v_fract_f32_e32 v3, v0
229+ ; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
230+ ; GFX12-NEXT: v_floor_f32_e32 v4, v0
231+ ; GFX12-NEXT: s_wait_alu 0xfffd
232+ ; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
233+ ; GFX12-NEXT: global_store_b32 v[1:2], v4, off
234+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
235+ entry:
236+ %floor = tail call float @llvm.floor.f32 (float %x )
237+ %sub = fsub float %x , %floor
238+ %min = tail call float @llvm.minnum.f32 (float %sub , float 0x3FEFFFFFE0000000 )
239+ %uno = fcmp ord float %x , 0 .000000e+00
240+ %cond = select i1 %uno , float %min , float %x
241+ %fabs = tail call float @llvm.fabs.f32 (float %x )
242+ %cmpinf = fcmp oeq float %fabs , 0x7FF0000000000000
243+ %cond6 = select i1 %cmpinf , float 0 .000000e+00 , float %cond
244+ store float %floor , ptr addrspace (1 ) %ip , align 4
245+ ret float %cond6
246+ }
247+
137248define float @safe_math_fract_f32_noinf_check (float %x , ptr addrspace (1 ) writeonly captures(none) %ip ) {
138249; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check(
139250; GFX6-IR-SAME: float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] {
0 commit comments