Skip to content

Commit 2c0de2f

Browse files
committed
Modify allUsesHaveSourceMods() instead of foldFreeOpFromSelect()
This prevents any regressions in feng-modifier-casting.ll.
1 parent 62edfa9 commit 2c0de2f

File tree

2 files changed

+23
-26
lines changed

2 files changed

+23
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,19 @@ static bool selectSupportsSourceMods(const SDNode *N) {
726726
return N->getValueType(0) == MVT::f32;
727727
}
728728

729+
LLVM_READONLY
730+
static bool buildVectorSupportsSourceMods(const SDNode *N) {
731+
if (N->getValueType(0) != MVT::v2f32)
732+
return true;
733+
734+
SDValue LHS = N->getOperand(0);
735+
SDValue RHS = N->getOperand(1);
736+
if (LHS->getOpcode() != ISD::SELECT || RHS->getOpcode() != ISD::SELECT)
737+
return true;
738+
739+
return false;
740+
}
741+
729742
// Most FP instructions support source modifiers, but this could be refined
730743
// slightly.
731744
LLVM_READONLY
@@ -759,6 +772,8 @@ static bool hasSourceMods(const SDNode *N) {
759772
return true;
760773
}
761774
}
775+
case ISD::BUILD_VECTOR:
776+
return buildVectorSupportsSourceMods(N);
762777
case ISD::SELECT:
763778
return selectSupportsSourceMods(N);
764779
default:
@@ -4866,24 +4881,6 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
48664881
if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
48674882
return SDValue();
48684883

4869-
// select c, (fneg (f32 bitcast i32 x)), (fneg (f32 bitcast i32 y)) can be
4870-
// lowered directly to a V_CNDMASK_. So prevent the fneg from being pulled
4871-
// out in this case. For now I've made the logic as specific to the case as
4872-
// possible, hopefully this can be relaxed in future.
4873-
if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) {
4874-
SDValue LHSB = LHS.getOperand(0);
4875-
SDValue RHSB = RHS.getOperand(0);
4876-
if (LHSB.getOpcode() == ISD::BITCAST &&
4877-
RHSB->getOpcode() == ISD::BITCAST) {
4878-
EVT LHSBOpTy = LHSB->getOperand(0).getValueType();
4879-
EVT RHSBOpTy = RHSB->getOperand(0).getValueType();
4880-
if (LHSB.getValueType() == MVT::f32 &&
4881-
RHSB.getValueType() == MVT::f32 && LHSBOpTy == MVT::i32 &&
4882-
RHSBOpTy == MVT::i32)
4883-
return SDValue();
4884-
}
4885-
}
4886-
48874884
return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS,
48884885
RHS);
48894886
}

llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,12 +1634,12 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16341634
; GFX7-NEXT: v_mov_b32_e32 v0, s3
16351635
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16361636
; GFX7-NEXT: s_cselect_b32 s1, s1, s3
1637-
; GFX7-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1637+
; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16381638
; GFX7-NEXT: s_cselect_b32 s0, s0, s2
16391639
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16401640
; GFX7-NEXT: v_mov_b32_e32 v2, s4
16411641
; GFX7-NEXT: s_mov_b32 flat_scratch_lo, s13
1642-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1642+
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16431643
; GFX7-NEXT: v_mov_b32_e32 v0, s0
16441644
; GFX7-NEXT: v_mov_b32_e32 v3, s5
16451645
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
@@ -1658,10 +1658,10 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16581658
; GFX9-NEXT: v_mov_b32_e32 v0, s3
16591659
; GFX9-NEXT: v_mov_b32_e32 v1, s1
16601660
; GFX9-NEXT: s_cselect_b32 s1, s1, s3
1661-
; GFX9-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1661+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16621662
; GFX9-NEXT: s_cselect_b32 s0, s0, s2
16631663
; GFX9-NEXT: v_mov_b32_e32 v1, s1
1664-
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1664+
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16651665
; GFX9-NEXT: v_mov_b32_e32 v0, s0
16661666
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
16671667
; GFX9-NEXT: s_endpgm
@@ -1672,17 +1672,17 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16721672
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
16731673
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10
16741674
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x18
1675+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
16751676
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
16761677
; GFX11-NEXT: v_mov_b32_e32 v0, s1
16771678
; GFX11-NEXT: s_bitcmp1_b32 s6, 0
16781679
; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1679-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1680-
; GFX11-NEXT: v_cndmask_b32_e64 v0, -s3, -v0, vcc_lo
1680+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1681+
; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
16811682
; GFX11-NEXT: s_and_b32 s6, vcc_lo, exec_lo
16821683
; GFX11-NEXT: s_cselect_b32 s1, s1, s3
16831684
; GFX11-NEXT: s_cselect_b32 s0, s0, s2
1684-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1685-
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, s1, v0
1685+
; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, -v0, vcc_lo
16861686
; GFX11-NEXT: v_mov_b32_e32 v0, s0
16871687
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
16881688
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)