Skip to content

Commit 6752a29

Browse files
committed
update
1 parent 82336e5 commit 6752a29

File tree

4 files changed

+33
-37
lines changed

4 files changed

+33
-37
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19029,6 +19029,10 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
1902919029
if (SDValue C = DAG.FoldConstantArithmetic(ISD::FABS, DL, VT, {N0}))
1903019030
return C;
1903119031

19032+
// fold (fabs (fabs x)) -> (fabs x)
19033+
if (N0.getOpcode() == ISD::FABS)
19034+
return N->getOperand(0);
19035+
1903219036
if (SimplifyDemandedBits(N0,
1903319037
APInt::getSignedMaxValue(VT.getScalarSizeInBits())))
1903419038
return SDValue(N, 0);

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2980,7 +2980,8 @@ bool TargetLowering::SimplifyDemandedBits(
29802980
if (Known.isNonNegative())
29812981
return TLO.CombineTo(Op, Op0);
29822982
if (Known.isNegative())
2983-
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0));
2983+
return TLO.CombineTo(
2984+
Op, TLO.DAG.getNode(ISD::FNEG, dl, VT, Op0, Op->getFlags()));
29842985

29852986
Known.Zero |= SignMask;
29862987
Known.One &= ~SignMask;
@@ -2996,21 +2997,18 @@ bool TargetLowering::SimplifyDemandedBits(
29962997
return TLO.CombineTo(Op, Op0);
29972998

29982999
if (SimplifyDemandedBits(Op0, ~SignMask & DemandedBits, DemandedElts, Known,
2999-
TLO, Depth + 1))
3000-
return true;
3001-
if (SimplifyDemandedBits(Op1, SignMask, DemandedElts, Known2, TLO,
3000+
TLO, Depth + 1) ||
3001+
SimplifyDemandedBits(Op1, SignMask, DemandedElts, Known2, TLO,
30023002
Depth + 1))
30033003
return true;
30043004

3005-
if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
3006-
return true;
3007-
30083005
if ((Known.isNonNegative() && Known2.isNonNegative()) ||
30093006
(Known.isNegative() && Known2.isNegative()))
30103007
return TLO.CombineTo(Op, Op0);
30113008

30123009
if (Known2.isNonNegative())
3013-
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0));
3010+
return TLO.CombineTo(
3011+
Op, TLO.DAG.getNode(ISD::FABS, dl, VT, Op0, Op->getFlags()));
30143012

30153013
if (Known2.isNegative()) {
30163014
Known.One |= SignMask;

llvm/test/CodeGen/AMDGPU/bf16-conversions.ll

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -427,18 +427,16 @@ entry:
427427
define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
428428
; GFX-942-LABEL: fptrunc_f64_to_bf16_abs:
429429
; GFX-942: ; %bb.0: ; %entry
430-
; GFX-942-NEXT: v_cvt_f32_f64_e64 v8, |v[0:1]|
431-
; GFX-942-NEXT: v_and_b32_e32 v5, 0x7fffffff, v1
432-
; GFX-942-NEXT: v_mov_b32_e32 v4, v0
433-
; GFX-942-NEXT: v_cvt_f64_f32_e32 v[6:7], v8
434-
; GFX-942-NEXT: v_and_b32_e32 v9, 1, v8
435-
; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[4:5]|, |v[6:7]|
436-
; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[6:7]
437-
; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v9
430+
; GFX-942-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
431+
; GFX-942-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
432+
; GFX-942-NEXT: v_and_b32_e32 v7, 1, v6
433+
; GFX-942-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]|
434+
; GFX-942-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
435+
; GFX-942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
438436
; GFX-942-NEXT: v_cndmask_b32_e64 v4, -1, 1, s[2:3]
439-
; GFX-942-NEXT: v_add_u32_e32 v4, v8, v4
437+
; GFX-942-NEXT: v_add_u32_e32 v4, v6, v4
440438
; GFX-942-NEXT: s_or_b64 vcc, s[0:1], vcc
441-
; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
439+
; GFX-942-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
442440
; GFX-942-NEXT: v_bfe_u32 v5, v4, 16, 1
443441
; GFX-942-NEXT: s_movk_i32 s0, 0x7fff
444442
; GFX-942-NEXT: v_add3_u32 v5, v5, v4, s0
@@ -451,18 +449,16 @@ define amdgpu_ps void @fptrunc_f64_to_bf16_abs(double %a, ptr %out) {
451449
;
452450
; GFX-950-LABEL: fptrunc_f64_to_bf16_abs:
453451
; GFX-950: ; %bb.0: ; %entry
454-
; GFX-950-NEXT: v_cvt_f32_f64_e64 v8, |v[0:1]|
455-
; GFX-950-NEXT: v_and_b32_e32 v5, 0x7fffffff, v1
456-
; GFX-950-NEXT: v_mov_b32_e32 v4, v0
457-
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[6:7], v8
458-
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[6:7]
459-
; GFX-950-NEXT: v_and_b32_e32 v0, 1, v8
460-
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[4:5]|, |v[6:7]|
461-
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
462-
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
452+
; GFX-950-NEXT: v_cvt_f32_f64_e64 v6, |v[0:1]|
453+
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
454+
; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
455+
; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]|
456+
; GFX-950-NEXT: v_cmp_nlg_f64_e64 s[0:1], |v[0:1]|, v[4:5]
457+
; GFX-950-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7
463458
; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
464-
; GFX-950-NEXT: v_add_u32_e32 v0, v8, v0
465-
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
459+
; GFX-950-NEXT: v_add_u32_e32 v0, v6, v0
460+
; GFX-950-NEXT: s_or_b64 vcc, s[0:1], vcc
461+
; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
466462
; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
467463
; GFX-950-NEXT: flat_store_short v[2:3], v0
468464
; GFX-950-NEXT: s_endpgm

llvm/test/CodeGen/AMDGPU/rcp-pattern.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -559,11 +559,10 @@ define float @v_rcp_fabs_f32_ieee_ulp25(float %x) #3 {
559559
; SI-LABEL: v_rcp_fabs_f32_ieee_ulp25:
560560
; SI: ; %bb.0:
561561
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562-
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
563562
; SI-NEXT: s_mov_b32 s4, 0x7f800000
564-
; SI-NEXT: v_frexp_mant_f32_e64 v2, |v0|
565-
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s4
566-
; SI-NEXT: v_cndmask_b32_e64 v1, |v0|, v2, s[4:5]
563+
; SI-NEXT: v_frexp_mant_f32_e64 v1, |v0|
564+
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
565+
; SI-NEXT: v_cndmask_b32_e64 v1, |v0|, v1, s[4:5]
567566
; SI-NEXT: v_rcp_f32_e32 v1, v1
568567
; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
569568
; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0
@@ -701,11 +700,10 @@ define float @v_rcp_neg_fabs_f32_ieee_ulp25(float %x) #3 {
701700
; SI-LABEL: v_rcp_neg_fabs_f32_ieee_ulp25:
702701
; SI: ; %bb.0:
703702
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704-
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0
705703
; SI-NEXT: s_mov_b32 s4, 0x7f800000
706-
; SI-NEXT: v_frexp_mant_f32_e64 v2, -|v0|
707-
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s4
708-
; SI-NEXT: v_cndmask_b32_e64 v1, -|v0|, v2, s[4:5]
704+
; SI-NEXT: v_frexp_mant_f32_e64 v1, -|v0|
705+
; SI-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4
706+
; SI-NEXT: v_cndmask_b32_e64 v1, -|v0|, v1, s[4:5]
709707
; SI-NEXT: v_rcp_f32_e32 v1, v1
710708
; SI-NEXT: v_frexp_exp_i32_f32_e32 v0, v0
711709
; SI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0

0 commit comments

Comments
 (0)