Skip to content

Commit 1d3f754

Browse files
committed
Modify allUsesHaveSourceMods() instead of foldFreeOpFromSelect()
This prevents any regressions in feng-modifier-casting.ll.
1 parent d60d011 commit 1d3f754

File tree

2 files changed

+23
-26
lines changed

2 files changed

+23
-26
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,19 @@ static bool selectSupportsSourceMods(const SDNode *N) {
725725
return N->getValueType(0) == MVT::f32;
726726
}
727727

728+
LLVM_READONLY
729+
static bool buildVectorSupportsSourceMods(const SDNode *N) {
730+
if (N->getValueType(0) != MVT::v2f32)
731+
return true;
732+
733+
SDValue LHS = N->getOperand(0);
734+
SDValue RHS = N->getOperand(1);
735+
if (LHS->getOpcode() != ISD::SELECT || RHS->getOpcode() != ISD::SELECT)
736+
return true;
737+
738+
return false;
739+
}
740+
728741
// Most FP instructions support source modifiers, but this could be refined
729742
// slightly.
730743
LLVM_READONLY
@@ -758,6 +771,8 @@ static bool hasSourceMods(const SDNode *N) {
758771
return true;
759772
}
760773
}
774+
case ISD::BUILD_VECTOR:
775+
return buildVectorSupportsSourceMods(N);
761776
case ISD::SELECT:
762777
return selectSupportsSourceMods(N);
763778
default:
@@ -4865,24 +4880,6 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
48654880
if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
48664881
return SDValue();
48674882

4868-
// select c, (fneg (f32 bitcast i32 x)), (fneg (f32 bitcast i32 y)) can be
4869-
// lowered directly to a V_CNDMASK_. So prevent the fneg from being pulled
4870-
// out in this case. For now I've made the logic as specific to the case as
4871-
// possible, hopefully this can be relaxed in future.
4872-
if (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG) {
4873-
SDValue LHSB = LHS.getOperand(0);
4874-
SDValue RHSB = RHS.getOperand(0);
4875-
if (LHSB.getOpcode() == ISD::BITCAST &&
4876-
RHSB->getOpcode() == ISD::BITCAST) {
4877-
EVT LHSBOpTy = LHSB->getOperand(0).getValueType();
4878-
EVT RHSBOpTy = RHSB->getOperand(0).getValueType();
4879-
if (LHSB.getValueType() == MVT::f32 &&
4880-
RHSB.getValueType() == MVT::f32 && LHSBOpTy == MVT::i32 &&
4881-
RHSBOpTy == MVT::i32)
4882-
return SDValue();
4883-
}
4884-
}
4885-
48864883
return distributeOpThroughSelect(DCI, LHS.getOpcode(), SDLoc(N), Cond, LHS,
48874884
RHS);
48884885
}

llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1634,12 +1634,12 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16341634
; GFX7-NEXT: v_mov_b32_e32 v0, s3
16351635
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16361636
; GFX7-NEXT: s_cselect_b32 s1, s1, s3
1637-
; GFX7-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1637+
; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16381638
; GFX7-NEXT: s_cselect_b32 s0, s0, s2
16391639
; GFX7-NEXT: v_mov_b32_e32 v1, s1
16401640
; GFX7-NEXT: v_mov_b32_e32 v2, s4
16411641
; GFX7-NEXT: s_mov_b32 flat_scratch_lo, s13
1642-
; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1642+
; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16431643
; GFX7-NEXT: v_mov_b32_e32 v0, s0
16441644
; GFX7-NEXT: v_mov_b32_e32 v3, s5
16451645
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
@@ -1658,10 +1658,10 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16581658
; GFX9-NEXT: v_mov_b32_e32 v0, s3
16591659
; GFX9-NEXT: v_mov_b32_e32 v1, s1
16601660
; GFX9-NEXT: s_cselect_b32 s1, s1, s3
1661-
; GFX9-NEXT: v_cndmask_b32_e64 v0, -v0, -v1, vcc
1661+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
16621662
; GFX9-NEXT: s_cselect_b32 s0, s0, s2
16631663
; GFX9-NEXT: v_mov_b32_e32 v1, s1
1664-
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc
1664+
; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc
16651665
; GFX9-NEXT: v_mov_b32_e32 v0, s0
16661666
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
16671667
; GFX9-NEXT: s_endpgm
@@ -1672,17 +1672,17 @@ define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i
16721672
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
16731673
; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10
16741674
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x18
1675+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
16751676
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
16761677
; GFX11-NEXT: v_mov_b32_e32 v0, s1
16771678
; GFX11-NEXT: s_bitcmp1_b32 s6, 0
16781679
; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1679-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1680-
; GFX11-NEXT: v_cndmask_b32_e64 v0, -s3, -v0, vcc_lo
1680+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1681+
; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
16811682
; GFX11-NEXT: s_and_b32 s6, vcc_lo, exec_lo
16821683
; GFX11-NEXT: s_cselect_b32 s1, s1, s3
16831684
; GFX11-NEXT: s_cselect_b32 s0, s0, s2
1684-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1685-
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_cndmask_b32 v1, s1, v0
1685+
; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, -v0, vcc_lo
16861686
; GFX11-NEXT: v_mov_b32_e32 v0, s0
16871687
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5]
16881688
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)