Skip to content

Commit 45e974f

Browse files
committed
Remove unnnecessary node duplication
1 parent 57f8903 commit 45e974f

File tree

2 files changed

+26
-29
lines changed

2 files changed

+26
-29
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,18 @@ static bool selectSupportsSourceMods(const SDNode *N) {
719719
return N->getValueType(0) == MVT::f32;
720720
}
721721

722+
LLVM_READONLY
723+
static bool buildVectorSupportsSourceMods(const SDNode *N) {
724+
if (N->getValueType(0) != MVT::v2f32)
725+
return true;
726+
727+
if (N->getOperand(0)->getOpcode() != ISD::SELECT ||
728+
N->getOperand(1)->getOpcode() != ISD::SELECT)
729+
return true;
730+
731+
return false;
732+
}
733+
722734
// Most FP instructions support source modifiers, but this could be refined
723735
// slightly.
724736
LLVM_READONLY
@@ -752,6 +764,8 @@ static bool hasSourceMods(const SDNode *N) {
752764
return true;
753765
}
754766
}
767+
case ISD::BUILD_VECTOR:
768+
return buildVectorSupportsSourceMods(N);
755769
case ISD::SELECT:
756770
return selectSupportsSourceMods(N);
757771
default:
@@ -4087,9 +4101,10 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
40874101
LHSAND, Zero);
40884102
SDValue Hi =
40894103
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, LHSAND, One);
4090-
SDValue AndMask = DAG.getConstant(0x1f, SL, MVT::i32);
4091-
SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, AndMask);
4092-
SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, AndMask);
4104+
SDValue LoAnd =
4105+
DAG.getNode(ISD::AND, SL, MVT::i32, Lo, RHSAND->getOperand(0));
4106+
SDValue HiAnd =
4107+
DAG.getNode(ISD::AND, SL, MVT::i32, Hi, RHSAND->getOperand(0));
40934108
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
40944109
if (AndIndex == 0 || AndIndex == 1)
40954110
return DAG.getNode(ISD::SHL, SL, MVT::i32, Trunc,
@@ -4858,24 +4873,6 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
48584873
if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
48594874
return SDValue();
48604875

4861-
// select c, (fneg (f32 bitcast i32 x)), (fneg (f32 bitcast i32 y)) can be
4862-
// lowered directly to a V_CNDMASK_. So prevent the fneg from being pulled
4863-
// out in this case. For now I've made the logic as specific to the case as
4864-
// possible, hopefully this can be relaxed in future.
4865-
if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) {
4866-
SDValue LHSB = LHS.getOperand(0);
4867-
SDValue RHSB = RHS.getOperand(0);
4868-
if (LHSB.getOpcode() == ISD::BITCAST &&
4869-
RHSB->getOpcode() == ISD::BITCAST) {
4870-
EVT LHSBOpTy = LHSB->getOperand(0).getValueType();
4871-
EVT RHSBOpTy = RHSB->getOperand(0).getValueType();
4872-
if (LHSB.getValueType() == MVT::f32 &&
4873-
RHSB.getValueType() == MVT::f32 && LHSBOpTy == MVT::i32 &&
4874-
RHSBOpTy == MVT::i32)
4875-
return SDValue();
4876-
}
4877-
}
4878-
48794876
return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS,
48804877
RHS);
48814878
}

llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,12 +1634,12 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16341634
; GFX7-NEXT: v_mov_b32_e32 v0, s3
16351635
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16361636
; GFX7-NEXT: s_cselect_b32 s1, s1, s3
1637-
; GFX7-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1637+
; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16381638
; GFX7-NEXT: s_cselect_b32 s0, s0, s2
16391639
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16401640
; GFX7-NEXT: v_mov_b32_e32 v2, s4
16411641
; GFX7-NEXT: s_mov_b32 flat_scratch_lo, s13
1642-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1642+
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16431643
; GFX7-NEXT: v_mov_b32_e32 v0, s0
16441644
; GFX7-NEXT: v_mov_b32_e32 v3, s5
16451645
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
@@ -1658,10 +1658,10 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16581658
; GFX9-NEXT: v_mov_b32_e32 v0, s3
16591659
; GFX9-NEXT: v_mov_b32_e32 v1, s1
16601660
; GFX9-NEXT: s_cselect_b32 s1, s1, s3
1661-
; GFX9-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1661+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16621662
; GFX9-NEXT: s_cselect_b32 s0, s0, s2
16631663
; GFX9-NEXT: v_mov_b32_e32 v1, s1
1664-
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1664+
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16651665
; GFX9-NEXT: v_mov_b32_e32 v0, s0
16661666
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
16671667
; GFX9-NEXT: s_endpgm
@@ -1672,17 +1672,17 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16721672
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
16731673
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10
16741674
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x18
1675+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
16751676
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
16761677
; GFX11-NEXT: v_mov_b32_e32 v0, s1
16771678
; GFX11-NEXT: s_bitcmp1_b32 s6, 0
16781679
; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1679-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1680-
; GFX11-NEXT: v_cndmask_b32_e64 v0, -s3, -v0, vcc_lo
1680+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1681+
; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
16811682
; GFX11-NEXT: s_and_b32 s6, vcc_lo, exec_lo
16821683
; GFX11-NEXT: s_cselect_b32 s1, s1, s3
16831684
; GFX11-NEXT: s_cselect_b32 s0, s0, s2
1684-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1685-
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, s1, v0
1685+
; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, -v0, vcc_lo
16861686
; GFX11-NEXT: v_mov_b32_e32 v0, s0
16871687
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
16881688
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)