88; RUN: llc -global-isel= -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
99; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
1010
11+ ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s
12+ ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s
13+
1114define float @v_constained_fsub_f32_fpexcept_strict (float %x , float %y ) #0 {
1215; GCN-LABEL: v_constained_fsub_f32_fpexcept_strict:
1316; GCN: ; %bb.0:
@@ -20,6 +23,26 @@ define float @v_constained_fsub_f32_fpexcept_strict(float %x, float %y) #0 {
2023; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
2225; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
26+ ;
27+ ; GFX12-SDAG-LABEL: v_constained_fsub_f32_fpexcept_strict:
28+ ; GFX12-SDAG: ; %bb.0:
29+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
30+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
31+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
32+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
33+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
34+ ; GFX12-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
35+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
36+ ;
37+ ; GFX12-GISEL-LABEL: v_constained_fsub_f32_fpexcept_strict:
38+ ; GFX12-GISEL: ; %bb.0:
39+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
40+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
41+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
42+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
43+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
44+ ; GFX12-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
45+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
2346 %val = call float @llvm.experimental.constrained.fsub.f32 (float %x , float %y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
2447 ret float %val
2548}
@@ -36,6 +59,26 @@ define float @v_constained_fsub_f32_fpexcept_ignore(float %x, float %y) #0 {
3659; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3760; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
3861; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
62+ ;
63+ ; GFX12-SDAG-LABEL: v_constained_fsub_f32_fpexcept_ignore:
64+ ; GFX12-SDAG: ; %bb.0:
65+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
66+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
67+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
68+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
69+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
70+ ; GFX12-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
71+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
72+ ;
73+ ; GFX12-GISEL-LABEL: v_constained_fsub_f32_fpexcept_ignore:
74+ ; GFX12-GISEL: ; %bb.0:
75+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
76+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
77+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
78+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
79+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
80+ ; GFX12-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
81+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
3982 %val = call float @llvm.experimental.constrained.fsub.f32 (float %x , float %y , metadata !"round.tonearest" , metadata !"fpexcept.ignore" )
4083 ret float %val
4184}
@@ -52,6 +95,26 @@ define float @v_constained_fsub_f32_fpexcept_maytrap(float %x, float %y) #0 {
5295; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5396; GFX10PLUS-NEXT: v_sub_f32_e32 v0, v0, v1
5497; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
98+ ;
99+ ; GFX12-SDAG-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
100+ ; GFX12-SDAG: ; %bb.0:
101+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
102+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
103+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
104+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
105+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
106+ ; GFX12-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
107+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
108+ ;
109+ ; GFX12-GISEL-LABEL: v_constained_fsub_f32_fpexcept_maytrap:
110+ ; GFX12-GISEL: ; %bb.0:
111+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
112+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
113+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
114+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
115+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
116+ ; GFX12-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1
117+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
55118 %val = call float @llvm.experimental.constrained.fsub.f32 (float %x , float %y , metadata !"round.tonearest" , metadata !"fpexcept.maytrap" )
56119 ret float %val
57120}
@@ -76,6 +139,26 @@ define <2 x float> @v_constained_fsub_v2f32_fpexcept_strict(<2 x float> %x, <2 x
76139; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77140; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
78141; GFX11-NEXT: s_setpc_b64 s[30:31]
142+ ;
143+ ; GFX12-SDAG-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
144+ ; GFX12-SDAG: ; %bb.0:
145+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
146+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
147+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
148+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
149+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
150+ ; GFX12-SDAG-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
151+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
152+ ;
153+ ; GFX12-GISEL-LABEL: v_constained_fsub_v2f32_fpexcept_strict:
154+ ; GFX12-GISEL: ; %bb.0:
155+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
156+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
157+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
158+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
159+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
160+ ; GFX12-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
161+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
79162 %val = call <2 x float > @llvm.experimental.constrained.fsub.v2f32 (<2 x float > %x , <2 x float > %y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
80163 ret <2 x float > %val
81164}
@@ -100,6 +183,26 @@ define <2 x float> @v_constained_fsub_v2f32_fpexcept_ignore(<2 x float> %x, <2 x
100183; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101184; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
102185; GFX11-NEXT: s_setpc_b64 s[30:31]
186+ ;
187+ ; GFX12-SDAG-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
188+ ; GFX12-SDAG: ; %bb.0:
189+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
190+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
191+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
192+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
193+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
194+ ; GFX12-SDAG-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
195+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
196+ ;
197+ ; GFX12-GISEL-LABEL: v_constained_fsub_v2f32_fpexcept_ignore:
198+ ; GFX12-GISEL: ; %bb.0:
199+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
200+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
201+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
202+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
203+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
204+ ; GFX12-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
205+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
103206 %val = call <2 x float > @llvm.experimental.constrained.fsub.v2f32 (<2 x float > %x , <2 x float > %y , metadata !"round.tonearest" , metadata !"fpexcept.ignore" )
104207 ret <2 x float > %val
105208}
@@ -124,6 +227,26 @@ define <2 x float> @v_constained_fsub_v2f32_fpexcept_maytrap(<2 x float> %x, <2
124227; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125228; GFX11-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
126229; GFX11-NEXT: s_setpc_b64 s[30:31]
230+ ;
231+ ; GFX12-SDAG-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
232+ ; GFX12-SDAG: ; %bb.0:
233+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
234+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
235+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
236+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
237+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
238+ ; GFX12-SDAG-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
239+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
240+ ;
241+ ; GFX12-GISEL-LABEL: v_constained_fsub_v2f32_fpexcept_maytrap:
242+ ; GFX12-GISEL: ; %bb.0:
243+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
244+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
245+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
246+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
247+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
248+ ; GFX12-GISEL-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
249+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
127250 %val = call <2 x float > @llvm.experimental.constrained.fsub.v2f32 (<2 x float > %x , <2 x float > %y , metadata !"round.tonearest" , metadata !"fpexcept.maytrap" )
128251 ret <2 x float > %val
129252}
@@ -151,6 +274,28 @@ define <3 x float> @v_constained_fsub_v3f32_fpexcept_strict(<3 x float> %x, <3 x
151274; GFX11-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
152275; GFX11-NEXT: v_sub_f32_e32 v2, v2, v5
153276; GFX11-NEXT: s_setpc_b64 s[30:31]
277+ ;
278+ ; GFX12-SDAG-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
279+ ; GFX12-SDAG: ; %bb.0:
280+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
281+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
282+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
283+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
284+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
285+ ; GFX12-SDAG-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
286+ ; GFX12-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5
287+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
288+ ;
289+ ; GFX12-GISEL-LABEL: v_constained_fsub_v3f32_fpexcept_strict:
290+ ; GFX12-GISEL: ; %bb.0:
291+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
292+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
293+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
294+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
295+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
296+ ; GFX12-GISEL-NEXT: v_dual_sub_f32 v0, v0, v3 :: v_dual_sub_f32 v1, v1, v4
297+ ; GFX12-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5
298+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
154299 %val = call <3 x float > @llvm.experimental.constrained.fsub.v3f32 (<3 x float > %x , <3 x float > %y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
155300 ret <3 x float > %val
156301}
@@ -166,6 +311,20 @@ define amdgpu_ps float @s_constained_fsub_f32_fpexcept_strict(float inreg %x, fl
166311; GFX10PLUS: ; %bb.0:
167312; GFX10PLUS-NEXT: v_sub_f32_e64 v0, s2, s3
168313; GFX10PLUS-NEXT: ; return to shader part epilog
314+ ;
315+ ; GFX12-SDAG-LABEL: s_constained_fsub_f32_fpexcept_strict:
316+ ; GFX12-SDAG: ; %bb.0:
317+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s2, s3
318+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
319+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
320+ ; GFX12-SDAG-NEXT: ; return to shader part epilog
321+ ;
322+ ; GFX12-GISEL-LABEL: s_constained_fsub_f32_fpexcept_strict:
323+ ; GFX12-GISEL: ; %bb.0:
324+ ; GFX12-GISEL-NEXT: s_sub_f32 s0, s2, s3
325+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
326+ ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
327+ ; GFX12-GISEL-NEXT: ; return to shader part epilog
169328 %val = call float @llvm.experimental.constrained.fsub.f32 (float %x , float %y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
170329 ret float %val
171330}
@@ -182,6 +341,26 @@ define float @v_constained_fsub_f32_fpexcept_strict_fabs_lhs(float %x, float %y)
182341; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183342; GFX10PLUS-NEXT: v_sub_f32_e64 v0, |v0|, v1
184343; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
344+ ;
345+ ; GFX12-SDAG-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
346+ ; GFX12-SDAG: ; %bb.0:
347+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
348+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
349+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
350+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
351+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
352+ ; GFX12-SDAG-NEXT: v_sub_f32_e64 v0, |v0|, v1
353+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
354+ ;
355+ ; GFX12-GISEL-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_lhs:
356+ ; GFX12-GISEL: ; %bb.0:
357+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
358+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
359+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
360+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
361+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
362+ ; GFX12-GISEL-NEXT: v_sub_f32_e64 v0, |v0|, v1
363+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
185364 %fabs.x = call float @llvm.fabs.f32 (float %x ) #0
186365 %val = call float @llvm.experimental.constrained.fsub.f32 (float %fabs.x , float %y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
187366 ret float %val
@@ -199,6 +378,26 @@ define float @v_constained_fsub_f32_fpexcept_strict_fabs_rhs(float %x, float %y)
199378; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200379; GFX10PLUS-NEXT: v_sub_f32_e64 v0, v0, |v1|
201380; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
381+ ;
382+ ; GFX12-SDAG-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
383+ ; GFX12-SDAG: ; %bb.0:
384+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
385+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
386+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
387+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
388+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
389+ ; GFX12-SDAG-NEXT: v_sub_f32_e64 v0, v0, |v1|
390+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
391+ ;
392+ ; GFX12-GISEL-LABEL: v_constained_fsub_f32_fpexcept_strict_fabs_rhs:
393+ ; GFX12-GISEL: ; %bb.0:
394+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
395+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
396+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
397+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
398+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
399+ ; GFX12-GISEL-NEXT: v_sub_f32_e64 v0, v0, |v1|
400+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
202401 %fabs.y = call float @llvm.fabs.f32 (float %y ) #0
203402 %val = call float @llvm.experimental.constrained.fsub.f32 (float %x , float %fabs.y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
204403 ret float %val
@@ -216,6 +415,26 @@ define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
216415; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217416; GFX10PLUS-NEXT: v_sub_f32_e64 v0, -|v0|, v1
218417; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
418+ ;
419+ ; GFX12-SDAG-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
420+ ; GFX12-SDAG: ; %bb.0:
421+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
422+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
423+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
424+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
425+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
426+ ; GFX12-SDAG-NEXT: v_sub_f32_e64 v0, -|v0|, v1
427+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
428+ ;
429+ ; GFX12-GISEL-LABEL: v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs:
430+ ; GFX12-GISEL: ; %bb.0:
431+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
432+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
433+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
434+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
435+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
436+ ; GFX12-GISEL-NEXT: v_sub_f32_e64 v0, -|v0|, v1
437+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
219438 %fabs.x = call float @llvm.fabs.f32 (float %x ) #0
220439 %neg.fabs.x = fneg float %fabs.x
221440 %val = call float @llvm.experimental.constrained.fsub.f32 (float %neg.fabs.x , float %y , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
0 commit comments