22; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
33; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
44
5+ ; TODO: GlobalISel should avoid generating v_ldexp_f32.
56define amdgpu_cs float @v_s_exp_f32 (float inreg %src ) {
6- ; GFX12-LABEL: v_s_exp_f32:
7- ; GFX12: ; %bb.0:
8- ; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
9- ; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0
10- ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
11- ; GFX12-NEXT: s_add_f32 s0, s0, s1
12- ; GFX12-NEXT: s_cselect_b32 s1, 0xffffffc0, 0
13- ; GFX12-NEXT: v_s_exp_f32 s0, s0
14- ; GFX12-NEXT: s_wait_alu 0xf1ff
15- ; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
16- ; GFX12-NEXT: v_ldexp_f32 v0, s0, s1
17- ; GFX12-NEXT: ; return to shader part epilog
7+ ; GFX12-SDAG-LABEL: v_s_exp_f32:
8+ ; GFX12-SDAG: ; %bb.0:
9+ ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
10+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42800000, 0
11+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
12+ ; GFX12-SDAG-NEXT: s_add_f32 s0, s0, s1
13+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0
14+ ; GFX12-SDAG-NEXT: v_s_exp_f32 s0, s0
15+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
16+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s0, s1
17+ ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
18+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
19+ ; GFX12-SDAG-NEXT: ; return to shader part epilog
20+ ;
21+ ; GFX12-GISEL-LABEL: v_s_exp_f32:
22+ ; GFX12-GISEL: ; %bb.0:
23+ ; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000
24+ ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42800000, 0
25+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
26+ ; GFX12-GISEL-NEXT: s_add_f32 s0, s0, s1
27+ ; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0xffffffc0, 0
28+ ; GFX12-GISEL-NEXT: v_s_exp_f32 s0, s0
29+ ; GFX12-GISEL-NEXT: s_wait_alu 0xf1ff
30+ ; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
31+ ; GFX12-GISEL-NEXT: v_ldexp_f32 v0, s0, s1
32+ ; GFX12-GISEL-NEXT: ; return to shader part epilog
1833 %result = call float @llvm.exp2.f32 (float %src )
1934 ret float %result
2035}
@@ -59,14 +74,15 @@ define amdgpu_cs float @v_s_log_f32(float inreg %src) {
5974; GFX12-SDAG-LABEL: v_s_log_f32:
6075; GFX12-SDAG: ; %bb.0:
6176; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0x800000
62- ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 32, 0
63- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
64- ; GFX12-SDAG-NEXT: v_ldexp_f32 v0, s0, s1
65- ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0
66- ; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0
77+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
78+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
79+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s0, s1
80+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
81+ ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
82+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
83+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
6784; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
68- ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
69- ; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0
85+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
7086; GFX12-SDAG-NEXT: ; return to shader part epilog
7187;
7288; GFX12-GISEL-LABEL: v_s_log_f32:
@@ -147,7 +163,7 @@ define amdgpu_cs half @v_s_rcp_f16(half inreg %src) {
147163 ret half %result
148164}
149165
150- ; TODO-GFX12 : GlobalISel should generate v_s_rsq.
166+ ; TODO: GlobalISel should generate v_s_rsq.
151167define amdgpu_cs float @v_s_rsq_f32 (float inreg %src ) {
152168; GFX12-SDAG-LABEL: v_s_rsq_f32:
153169; GFX12-SDAG: ; %bb.0:
@@ -184,7 +200,7 @@ define amdgpu_cs half @v_s_rsq_f16(half inreg %src) {
184200 ret half %result
185201}
186202
187- ; TODO-GFX12 : Should not use any VALU instructions .
203+ ; TODO: Should avoid generating v_cmp_class_f32 .
188204define amdgpu_cs float @v_s_sqrt_f32 (float inreg %src ) {
189205; GFX12-SDAG-LABEL: v_s_sqrt_f32:
190206; GFX12-SDAG: ; %bb.0:
@@ -298,16 +314,18 @@ define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src) {
298314define amdgpu_cs float @srcmods_abs_f32 (float inreg %src ) {
299315; GFX12-SDAG-LABEL: srcmods_abs_f32:
300316; GFX12-SDAG: ; %bb.0:
301- ; GFX12-SDAG-NEXT: s_and_b32 s1, s0, 0x7fffffff
317+ ; GFX12-SDAG-NEXT: s_bitset0_b32 s0, 31
302318; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
303- ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s1, 0x800000
304- ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 32, 0
305- ; GFX12-SDAG-NEXT: v_ldexp_f32 v0, |s0|, s1
306- ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0
307- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
308- ; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0
319+ ; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0x800000
320+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0
321+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s0, s1
322+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
323+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(TRANS32_DEP_1)
324+ ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
325+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
309326; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
310- ; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0
327+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
328+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
311329; GFX12-SDAG-NEXT: ; return to shader part epilog
312330;
313331; GFX12-GISEL-LABEL: srcmods_abs_f32:
@@ -333,15 +351,17 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) {
333351define amdgpu_cs float @srcmods_neg_f32 (float inreg %src ) {
334352; GFX12-SDAG-LABEL: srcmods_neg_f32:
335353; GFX12-SDAG: ; %bb.0:
354+ ; GFX12-SDAG-NEXT: s_xor_b32 s1, s0, 0x80000000
336355; GFX12-SDAG-NEXT: s_cmp_gt_f32 s0, 0x80800000
337- ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 32, 0
338- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
339- ; GFX12-SDAG-NEXT: v_ldexp_f32 v0, -s0, s1
340- ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0
341- ; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0
356+ ; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x4f800000, 1.0
357+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
358+ ; GFX12-SDAG-NEXT: s_mul_f32 s0, s1, s0
359+ ; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0
360+ ; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0
361+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2)
362+ ; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1
342363; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
343- ; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
344- ; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0
364+ ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0
345365; GFX12-SDAG-NEXT: ; return to shader part epilog
346366;
347367; GFX12-GISEL-LABEL: srcmods_neg_f32:
0 commit comments