Skip to content

Commit 751f45b

Browse files
committed
[SelectionDAG] Utilized target hook convertSelectOfConstantsToMath in SelectWithConstant scenario.
1 parent ba7858c commit 751f45b

22 files changed

+2648
-3020
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27692,7 +27692,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
2769227692
if ((Fold || Swap) &&
2769327693
TLI.getBooleanContents(CmpOpVT) ==
2769427694
TargetLowering::ZeroOrOneBooleanContent &&
27695-
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27695+
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT)) &&
27696+
TLI.convertSelectOfConstantsToMath(VT)) {
2769627697

2769727698
if (Swap) {
2769827699
CC = ISD::getSetCCInverse(CC, CmpOpVT);

llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
1919
; GCN-ALLOCA: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v{{[0-9]+}}, v0
2020

2121
; GCN-PROMOTE: s_cmp_eq_u32 [[IN]], 1
22-
; GCN-PROMOTE-NEXT: s_cselect_b64 vcc, -1, 0
23-
; GCN-PROMOTE-NEXT: v_addc_u32_e32 [[RESULT:v[0-9]+]], vcc, 0, v0, vcc
22+
; GCN-PROMOTE-NEXT: s_cselect_b32 [[SCC:s[0-9]+]], 1, 0
23+
; GCN-PROMOTE-NEXT: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[SCC]], v0
2424

2525
; GCN: buffer_store_dword [[RESULT]]
2626
define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 31 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -24693,8 +24693,7 @@ define bfloat @v_log_bf16(bfloat %a) {
2469324693
; GCN-NEXT: v_mov_b32_e32 v1, 0x41b17218
2469424694
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2469524695
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24696-
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
24697-
; GCN-NEXT: v_lshlrev_b32_e32 v2, 5, v2
24696+
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
2469824697
; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v2
2469924698
; GCN-NEXT: v_log_f32_e32 v0, v0
2470024699
; GCN-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
@@ -24720,8 +24719,7 @@ define bfloat @v_log_bf16(bfloat %a) {
2472024719
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2472124720
; GFX7-NEXT: s_mov_b32 s4, 0x800000
2472224721
; GFX7-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24723-
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
24724-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24722+
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2472524723
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
2472624724
; GFX7-NEXT: v_log_f32_e32 v0, v0
2472724725
; GFX7-NEXT: s_mov_b32 s4, 0x3f317217
@@ -24745,8 +24743,7 @@ define bfloat @v_log_bf16(bfloat %a) {
2474524743
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2474624744
; GFX8-NEXT: s_mov_b32 s4, 0x800000
2474724745
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24748-
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
24749-
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24746+
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2475024747
; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
2475124748
; GFX8-NEXT: v_log_f32_e32 v0, v0
2475224749
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
@@ -24779,8 +24776,7 @@ define bfloat @v_log_bf16(bfloat %a) {
2477924776
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2478024777
; GFX9-NEXT: s_mov_b32 s4, 0x800000
2478124778
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24782-
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
24783-
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24779+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2478424780
; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
2478524781
; GFX9-NEXT: v_log_f32_e32 v0, v0
2478624782
; GFX9-NEXT: s_mov_b32 s4, 0x3f317217
@@ -24809,8 +24805,7 @@ define bfloat @v_log_bf16(bfloat %a) {
2480924805
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2481024806
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2481124807
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
24812-
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
24813-
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24808+
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
2481424809
; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
2481524810
; GFX10-NEXT: v_log_f32_e32 v0, v0
2481624811
; GFX10-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
@@ -24835,30 +24830,28 @@ define bfloat @v_log_bf16(bfloat %a) {
2483524830
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2483624831
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2483724832
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
24838-
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
24839-
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24840-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
24833+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
2484124834
; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
24835+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2484224836
; GFX11-NEXT: v_log_f32_e32 v0, v0
2484324837
; GFX11-NEXT: s_waitcnt_depctr 0xfff
2484424838
; GFX11-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
24845-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2484624839
; GFX11-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
24840+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2484724841
; GFX11-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
24848-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2484924842
; GFX11-NEXT: v_add_f32_e32 v1, v1, v2
2485024843
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, vcc_lo
2485124844
; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
24845+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2485224846
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
24853-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2485424847
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
24848+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2485524849
; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1
2485624850
; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0
2485724851
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
24858-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2485924852
; GFX11-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
24853+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2486024854
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
24861-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2486224855
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2486324856
; GFX11-NEXT: s_setpc_b64 s[30:31]
2486424857
%op = call bfloat @llvm.log.bf16(bfloat %a)
@@ -24874,8 +24867,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
2487424867
; GCN-NEXT: v_mov_b32_e32 v1, 0x42000000
2487524868
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2487624869
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24877-
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
24878-
; GCN-NEXT: v_lshlrev_b32_e32 v2, 5, v2
24870+
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
2487924871
; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v2
2488024872
; GCN-NEXT: v_log_f32_e32 v0, v0
2488124873
; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -24890,8 +24882,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
2489024882
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2489124883
; GFX7-NEXT: s_mov_b32 s4, 0x800000
2489224884
; GFX7-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24893-
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
24894-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24885+
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2489524886
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
2489624887
; GFX7-NEXT: v_log_f32_e32 v0, v0
2489724888
; GFX7-NEXT: v_mov_b32_e32 v1, 0x42000000
@@ -24906,8 +24897,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
2490624897
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2490724898
; GFX8-NEXT: s_mov_b32 s4, 0x800000
2490824899
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24909-
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
24910-
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1
24900+
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2491124901
; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
2491224902
; GFX8-NEXT: v_log_f32_e32 v0, v0
2491324903
; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000
@@ -24928,8 +24918,7 @@ define bfloat @v_log2_bf16(bfloat %a) {
2492824918
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2492924919
; GFX9-NEXT: s_mov_b32 s4, 0x800000
2493024920
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
24931-
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
24932-
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2
24921+
; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
2493324922
; GFX9-NEXT: v_ldexp_f32 v0, v0, v2
2493424923
; GFX9-NEXT: v_log_f32_e32 v0, v0
2493524924
; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000
@@ -24949,9 +24938,8 @@ define bfloat @v_log2_bf16(bfloat %a) {
2494924938
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2495024939
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2495124940
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
24952-
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
24941+
; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc_lo
2495324942
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
24954-
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2
2495524943
; GFX10-NEXT: v_ldexp_f32 v0, v0, v2
2495624944
; GFX10-NEXT: v_log_f32_e32 v0, v0
2495724945
; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1
@@ -24969,21 +24957,20 @@ define bfloat @v_log2_bf16(bfloat %a) {
2496924957
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2497024958
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2497124959
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
24972-
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
24960+
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc_lo
2497324961
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
24974-
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2
24975-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2497624962
; GFX11-NEXT: v_ldexp_f32 v0, v0, v2
24963+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2497724964
; GFX11-NEXT: v_log_f32_e32 v0, v0
2497824965
; GFX11-NEXT: s_waitcnt_depctr 0xfff
2497924966
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
24980-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2498124967
; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1
2498224968
; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0
2498324969
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
24970+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2498424971
; GFX11-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
24985-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2498624972
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
24973+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2498724974
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2498824975
; GFX11-NEXT: s_setpc_b64 s[30:31]
2498924976
%op = call bfloat @llvm.log2.bf16(bfloat %a)
@@ -25000,8 +24987,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
2500024987
; GCN-NEXT: v_mov_b32_e32 v1, 0x411a209b
2500124988
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2500224989
; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
25003-
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
25004-
; GCN-NEXT: v_lshlrev_b32_e32 v2, 5, v2
24990+
; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 32, vcc
2500524991
; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v2
2500624992
; GCN-NEXT: v_log_f32_e32 v0, v0
2500724993
; GCN-NEXT: v_and_b32_e32 v2, 0xfffff000, v0
@@ -25027,8 +25013,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
2502725013
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
2502825014
; GFX7-NEXT: s_mov_b32 s4, 0x800000
2502925015
; GFX7-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
25030-
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
25031-
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 5, v1
25016+
; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2503225017
; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1
2503325018
; GFX7-NEXT: v_log_f32_e32 v0, v0
2503425019
; GFX7-NEXT: s_mov_b32 s4, 0x3e9a209a
@@ -25052,8 +25037,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
2505225037
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2505325038
; GFX8-NEXT: s_mov_b32 s4, 0x800000
2505425039
; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
25055-
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
25056-
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1
25040+
; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2505725041
; GFX8-NEXT: v_ldexp_f32 v0, v0, v1
2505825042
; GFX8-NEXT: v_log_f32_e32 v0, v0
2505925043
; GFX8-NEXT: s_mov_b32 s4, 0x7f800000
@@ -25086,8 +25070,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
2508625070
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2508725071
; GFX9-NEXT: s_mov_b32 s4, 0x800000
2508825072
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
25089-
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
25090-
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1
25073+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc
2509125074
; GFX9-NEXT: v_ldexp_f32 v0, v0, v1
2509225075
; GFX9-NEXT: v_log_f32_e32 v0, v0
2509325076
; GFX9-NEXT: s_mov_b32 s4, 0x3e9a209a
@@ -25116,8 +25099,7 @@ define bfloat @v_log10_bf16(bfloat %a) {
2511625099
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2511725100
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2511825101
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
25119-
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
25120-
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1
25102+
; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
2512125103
; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
2512225104
; GFX10-NEXT: v_log_f32_e32 v0, v0
2512325105
; GFX10-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
@@ -25142,30 +25124,28 @@ define bfloat @v_log10_bf16(bfloat %a) {
2514225124
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0
2514325125
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2514425126
; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
25145-
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
25146-
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1
25147-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
25127+
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
2514825128
; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
25129+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2514925130
; GFX11-NEXT: v_log_f32_e32 v0, v0
2515025131
; GFX11-NEXT: s_waitcnt_depctr 0xfff
2515125132
; GFX11-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
25152-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2515325133
; GFX11-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
25134+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2515425135
; GFX11-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
25155-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2515625136
; GFX11-NEXT: v_add_f32_e32 v1, v1, v2
2515725137
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x411a209b, vcc_lo
2515825138
; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
25139+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2515925140
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
25160-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2516125141
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2
25142+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2516225143
; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1
2516325144
; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0
2516425145
; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
25165-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
2516625146
; GFX11-NEXT: v_add3_u32 v1, v1, v0, 0x7fff
25147+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2516725148
; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo
25168-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2516925149
; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0
2517025150
; GFX11-NEXT: s_setpc_b64 s[30:31]
2517125151
%op = call bfloat @llvm.log10.bf16(bfloat %a)

llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,7 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) {
337337
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338338
; GFX9-NEXT: s_mov_b32 s4, 0x800000
339339
; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
340-
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
341-
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 5, v3
340+
; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 32, vcc
342341
; GFX9-NEXT: v_ldexp_f32 v3, |v0|, v3
343342
; GFX9-NEXT: v_log_f32_e32 v3, v3
344343
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1

0 commit comments

Comments
 (0)