Skip to content

Commit b6e7c69

Browse files
committed
updated the testCases, and increased the coverage of dagCombine-fmul-to-ldexp to include globalIsel in pipeline as well.
1 parent 5bb2713 commit b6e7c69

File tree

11 files changed

+7952
-5559
lines changed

11 files changed

+7952
-5559
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll

Lines changed: 402 additions & 367 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ define i16 @v_powi_f16(i16 %l, i32 %r) {
1818
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc
1919
; GFX7-NEXT: v_add_f32_e32 v0, v0, v1
2020
; GFX7-NEXT: v_exp_f32_e32 v0, v0
21-
; GFX7-NEXT: v_mov_b32_e32 v1, 0x1f800000
22-
; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
23-
; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1
21+
; GFX7-NEXT: v_not_b32_e32 v1, 63
22+
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
23+
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
2424
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
2525
; GFX7-NEXT: s_setpc_b64 s[30:31]
2626
;
@@ -75,53 +75,80 @@ define i16 @v_powi_f16(i16 %l, i32 %r) {
7575
}
7676

7777
define float @v_powi_f32(float %l, i32 %r) {
78-
; GFX78-LABEL: v_powi_f32:
79-
; GFX78: ; %bb.0:
80-
; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81-
; GFX78-NEXT: v_mov_b32_e32 v2, 0x800000
82-
; GFX78-NEXT: v_mov_b32_e32 v3, 0x4f800000
83-
; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
84-
; GFX78-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc
85-
; GFX78-NEXT: v_mul_f32_e32 v0, v0, v2
86-
; GFX78-NEXT: v_log_f32_e32 v0, v0
87-
; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1
88-
; GFX78-NEXT: v_mov_b32_e32 v2, 0x42000000
89-
; GFX78-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
90-
; GFX78-NEXT: v_sub_f32_e32 v0, v0, v2
91-
; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
92-
; GFX78-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
93-
; GFX78-NEXT: v_mov_b32_e32 v2, 0x42800000
94-
; GFX78-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
95-
; GFX78-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
96-
; GFX78-NEXT: v_add_f32_e32 v0, v0, v1
97-
; GFX78-NEXT: v_exp_f32_e32 v0, v0
98-
; GFX78-NEXT: v_mov_b32_e32 v1, 0x1f800000
99-
; GFX78-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc
100-
; GFX78-NEXT: v_mul_f32_e32 v0, v0, v1
101-
; GFX78-NEXT: s_setpc_b64 s[30:31]
78+
; GFX7-LABEL: v_powi_f32:
79+
; GFX7: ; %bb.0:
80+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81+
; GFX7-NEXT: v_mov_b32_e32 v2, 0x800000
82+
; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
83+
; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
84+
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 5, v2
85+
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v2
86+
; GFX7-NEXT: v_log_f32_e32 v0, v0
87+
; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1
88+
; GFX7-NEXT: v_mov_b32_e32 v2, 0x42000000
89+
; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
90+
; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2
91+
; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
92+
; GFX7-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
93+
; GFX7-NEXT: v_mov_b32_e32 v2, 0x42800000
94+
; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
95+
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
96+
; GFX7-NEXT: v_add_f32_e32 v0, v0, v1
97+
; GFX7-NEXT: v_exp_f32_e32 v0, v0
98+
; GFX7-NEXT: v_not_b32_e32 v1, 63
99+
; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
100+
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
101+
; GFX7-NEXT: s_setpc_b64 s[30:31]
102+
;
103+
; GFX8-LABEL: v_powi_f32:
104+
; GFX8: ; %bb.0:
105+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106+
; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000
107+
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2
108+
; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
109+
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2
110+
; GFX8-NEXT: v_ldexp_f32 v0, v0, v2
111+
; GFX8-NEXT: v_log_f32_e32 v0, v0
112+
; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1
113+
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000
114+
; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
115+
; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2
116+
; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
117+
; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000
118+
; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000
119+
; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1
120+
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc
121+
; GFX8-NEXT: v_add_f32_e32 v0, v0, v1
122+
; GFX8-NEXT: v_exp_f32_e32 v0, v0
123+
; GFX8-NEXT: v_not_b32_e32 v1, 63
124+
; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
125+
; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
126+
; GFX8-NEXT: s_setpc_b64 s[30:31]
102127
;
103128
; GFX11-LABEL: v_powi_f32:
104129
; GFX11: ; %bb.0:
105130
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106131
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
107132
; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1
108-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo
109-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
110-
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2
133+
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
134+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
135+
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
136+
; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
111137
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
138+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
112139
; GFX11-NEXT: v_log_f32_e32 v0, v0
113140
; GFX11-NEXT: s_waitcnt_depctr 0xfff
114141
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
115-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
116142
; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1
143+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
117144
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
118145
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
119-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
120146
; GFX11-NEXT: v_add_f32_e32 v0, v0, v1
121-
; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo
147+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
148+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
122149
; GFX11-NEXT: v_exp_f32_e32 v0, v0
123150
; GFX11-NEXT: s_waitcnt_depctr 0xfff
124-
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1
151+
; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
125152
; GFX11-NEXT: s_setpc_b64 s[30:31]
126153
%res = call float @llvm.powi.f32.i32(float %l, i32 %r)
127154
ret float %res

0 commit comments

Comments
 (0)