Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
case PPCISD::STORE_COND:
return "PPCISD::STORE_COND";
case PPCISD::VSX_CMPSEL:
return "PPCISD::VSX_CMPSEL";
}
return nullptr;
}
Expand Down Expand Up @@ -15560,6 +15562,65 @@ static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
return true;
}

// Use VSX compare gt/ge/eq instruction to implement select_cc
static SDValue combineFloatSelectCC(SDValue Op, const PPCSubtarget &Subtarget,
SelectionDAG &DAG) {
EVT VT = Op.getValueType();

// Use subtraction based lowering if it is finite-math.
if (DAG.getTarget().Options.NoInfsFPMath || Op->getFlags().hasNoInfs())
return SDValue();

// Vector comparison is already implemented in isel.
if (VT.isVector() || !VT.isFloatingPoint() ||
(VT == MVT::f128 && !Subtarget.hasP10Vector()) || !Subtarget.hasVSX())
return SDValue();
SDValue TrueVal = Op.getOperand(2), FalseVal = Op.getOperand(3);
SDValue Cond1 = Op.getOperand(0), Cond2 = Op.getOperand(1);
SDLoc DL(Op);
unsigned CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();

// Recognize the form after legalizer if cond code is illegal.
if (VT != Cond1.getValueType()) {
if (!isNullConstant(Cond2) || CC != ISD::SETNE ||
Cond1.getOpcode() != ISD::AND)
return SDValue();
Cond1 = Cond1.getOperand(0);
Cond2 = Cond1.getOperand(1);
if (Cond1.getOpcode() != ISD::SETCC || Cond2.getOpcode() != ISD::SETCC ||
Cond1.getOperand(0) != Cond2.getOperand(0) ||
Cond1.getOperand(1) != Cond2.getOperand(1))
return SDValue();
CC = cast<CondCodeSDNode>(Cond1.getOperand(2))->get() &
cast<CondCodeSDNode>(Cond2.getOperand(2))->get();
Cond1 = Cond1.getOperand(0);
Cond2 = Cond1.getOperand(1);
}

// The instruction is ordered. Treat it as ordered if we don't care order.
if (CC & ISD::SETUO)
return SDValue();
if (CC & ISD::SETFALSE2)
CC &= ISD::SETO;

// Use min/max instructions if available.
if (((Cond1 == TrueVal && Cond2 == FalseVal) ||
(Cond1 == FalseVal && Cond2 == TrueVal)) &&
(CC == ISD::SETOLT || CC == ISD::SETOGT))
return SDValue();

bool Inverse = false;
if (CC == ISD::SETOLT || CC == ISD::SETOLE || CC == ISD::SETONE) {
CC = (~CC) & ISD::SETO;
Inverse = true;
}
if (CC == ISD::SETOGE || CC == ISD::SETOGT || CC == ISD::SETOEQ)
return DAG.getNode(PPCISD::VSX_CMPSEL, DL, VT, Inverse ? Cond2 : Cond1,
Inverse ? Cond1 : Cond2, TrueVal, FalseVal,
DAG.getCondCode((ISD::CondCode)CC));
return SDValue();
}

SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -15629,6 +15690,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return CSCC;
[[fallthrough]];
case ISD::SELECT_CC:
if (SDValue V = combineFloatSelectCC(SDValue(N, 0), Subtarget, DCI.DAG))
return V;
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ namespace llvm {
/// XXMFACC = This corresponds to the xxmfacc instruction.
XXMFACC,

/// VSX_CMPSEL = VSX compare gt/ge/eq instruction with selection.
VSX_CMPSEL,

// Constrained conversion from floating point to int
STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCTIWZ,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
[(PPCfaddrtz node:$lhs, node:$rhs),
(PPCstrict_faddrtz node:$lhs, node:$rhs)]>;

def PPCvsx_cmpsel : SDNode<"PPCISD::VSX_CMPSEL", SDTSelectCC, []>;

def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrP10.td
Original file line number Diff line number Diff line change
Expand Up @@ -2067,6 +2067,19 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
(v1i128 (VSRAQ v1i128:$VRA,
(XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
(COPY_TO_REGCLASS $VRB, VSRC), 2)))>;

def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOEQ)),
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XSCMPEQQP $lhs, $rhs)), VSFRC)>;
def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOGE)),
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XSCMPGEQP $lhs, $rhs)), VSFRC)>;
def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOGT)),
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XSCMPGTQP $lhs, $rhs)), VSFRC)>;
}

class xxevalPattern <dag pattern, bits<8> imm> :
Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrVSX.td
Original file line number Diff line number Diff line change
Expand Up @@ -2918,6 +2918,39 @@ def : Pat<(PPCstore_scal_int_from_vsr f64:$src, XForm:$dst, 8),
(STXSDX $src, XForm:$dst)>;
def : Pat<(PPCstore_scal_int_from_vsr f128:$src, XForm:$dst, 8),
(STXSDX (COPY_TO_REGCLASS $src, VSFRC), XForm:$dst)>;

def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOEQ)),
(COPY_TO_REGCLASS (XXSEL
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
(XVCMPEQSP (COPY_TO_REGCLASS $lhs, VSRC),
(COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOGE)),
(COPY_TO_REGCLASS (XXSEL
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
(XVCMPGESP (COPY_TO_REGCLASS $lhs, VSRC),
(COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOGT)),
(COPY_TO_REGCLASS (XXSEL
(COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XVCMPGTSP (COPY_TO_REGCLASS $lhs, VSRC),
(COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOEQ)),
(COPY_TO_REGCLASS (XXSEL
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
(XVCMPEQDP (COPY_TO_REGCLASS $lhs, VSRC),
(COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGE)),
(COPY_TO_REGCLASS (XXSEL
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
(XVCMPGEDP (COPY_TO_REGCLASS $lhs, VSRC),
(COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGT)),
(COPY_TO_REGCLASS (XXSEL
(COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XVCMPGTDP (COPY_TO_REGCLASS $lhs, VSRC),
(COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
} // HasVSX

// Any big endian VSX subtarget.
Expand Down Expand Up @@ -3933,6 +3966,19 @@ foreach Ty = [v4i32, v4f32, v2i64, v2f64] in {
def : Pat<(store Ty:$rS, XForm:$dst), (STXVX $rS, XForm:$dst)>;
}

def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOEQ)),
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XSCMPEQDP $lhs, $rhs)), VSFRC)>;
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGE)),
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XSCMPGEDP $lhs, $rhs)), VSFRC)>;
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGT)),
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
(COPY_TO_REGCLASS $fval, VSRC),
(XSCMPGTDP $lhs, $rhs)), VSFRC)>;

def : Pat<(f128 (load DQForm:$src)),
(COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
def : Pat<(f128 (load XForm:$src)),
Expand Down
30 changes: 16 additions & 14 deletions llvm/test/CodeGen/PowerPC/cgp-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,32 @@ define dso_local void @wibble(ptr nocapture readonly %arg, i32 signext %arg1, pt
; CHECK-NEXT: li 7, 7
; CHECK-NEXT: cmpwi 4, 2
; CHECK-NEXT: xsaddsp 0, 0, 0
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: blt 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %bb6
; CHECK-NEXT: clrldi 4, 4, 32
; CHECK-NEXT: addi 4, 4, -1
; CHECK-NEXT: mtctr 4
; CHECK-NEXT: li 4, 8
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: # %bb11
; CHECK-NEXT: #
; CHECK-NEXT: lfsu 2, 4(3)
; CHECK-NEXT: xsaddsp 1, 2, 2
; CHECK-NEXT: xvcmpgtsp 3, 2, 0
; CHECK-NEXT: fcmpu 0, 2, 0
; CHECK-NEXT: iselgt 7, 4, 7
; CHECK-NEXT: addi 4, 4, 1
; CHECK-NEXT: bdz .LBB0_5
; CHECK-NEXT: .LBB0_3: # %bb11
; CHECK-NEXT: #
; CHECK-NEXT: lfsu 1, 4(3)
; CHECK-NEXT: fcmpu 0, 1, 0
; CHECK-NEXT: ble 0, .LBB0_2
; CHECK-NEXT: # %bb.4:
; CHECK-NEXT: xsaddsp 0, 1, 1
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_5: # %bb8
; CHECK-NEXT: xxsel 1, 1, 0, 3
; CHECK-NEXT: fmr 0, 1
; CHECK-NEXT: bdnz .LBB0_2
; CHECK-NEXT: # %bb.3: # %bb8
; CHECK-NEXT: stw 7, 0(5)
; CHECK-NEXT: stfs 1, 0(6)
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: fmr 1, 0
; CHECK-NEXT: stw 7, 0(5)
; CHECK-NEXT: stfs 0, 0(6)
; CHECK-NEXT: stfs 1, 0(6)
; CHECK-NEXT: blr
bb:
%tmp = load float, ptr %arg, align 4
Expand Down
Loading