Skip to content

Commit 6f576aa

Browse files
committed
Use VSX compare and xxsel to optimize float select_cc
1 parent dff5bb9 commit 6f576aa

File tree

11 files changed

+484
-364
lines changed

11 files changed

+484
-364
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,6 +1830,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
18301830
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
18311831
case PPCISD::STORE_COND:
18321832
return "PPCISD::STORE_COND";
1833+
case PPCISD::VSX_CMPSEL:
1834+
return "PPCISD::VSX_CMPSEL";
18331835
}
18341836
return nullptr;
18351837
}
@@ -15560,6 +15562,65 @@ static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
1556015562
return true;
1556115563
}
1556215564

15565+
// Use VSX compare gt/ge/eq instruction to implement select_cc
15566+
static SDValue combineFloatSelectCC(SDValue Op, const PPCSubtarget &Subtarget,
15567+
SelectionDAG &DAG) {
15568+
EVT VT = Op.getValueType();
15569+
15570+
// Use subtraction based lowering if it is finite-math.
15571+
if (DAG.getTarget().Options.NoInfsFPMath || Op->getFlags().hasNoInfs())
15572+
return SDValue();
15573+
15574+
// Vector comparison is already implemented in isel.
15575+
if (VT.isVector() || !VT.isFloatingPoint() ||
15576+
(VT == MVT::f128 && !Subtarget.hasP10Vector()) || !Subtarget.hasVSX())
15577+
return SDValue();
15578+
SDValue TrueVal = Op.getOperand(2), FalseVal = Op.getOperand(3);
15579+
SDValue Cond1 = Op.getOperand(0), Cond2 = Op.getOperand(1);
15580+
SDLoc DL(Op);
15581+
unsigned CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
15582+
15583+
// Recognize the form after legalizer if cond code is illegal.
15584+
if (VT != Cond1.getValueType()) {
15585+
if (!isNullConstant(Cond2) || CC != ISD::SETNE ||
15586+
Cond1.getOpcode() != ISD::AND)
15587+
return SDValue();
15588+
Cond1 = Cond1.getOperand(0);
15589+
Cond2 = Cond1.getOperand(1);
15590+
if (Cond1.getOpcode() != ISD::SETCC || Cond2.getOpcode() != ISD::SETCC ||
15591+
Cond1.getOperand(0) != Cond2.getOperand(0) ||
15592+
Cond1.getOperand(1) != Cond2.getOperand(1))
15593+
return SDValue();
15594+
CC = cast<CondCodeSDNode>(Cond1.getOperand(2))->get() &
15595+
cast<CondCodeSDNode>(Cond2.getOperand(2))->get();
15596+
Cond1 = Cond1.getOperand(0);
15597+
Cond2 = Cond1.getOperand(1);
15598+
}
15599+
15600+
// The instruction is ordered. Treat it as ordered if we don't care order.
15601+
if (CC & ISD::SETUO)
15602+
return SDValue();
15603+
if (CC & ISD::SETFALSE2)
15604+
CC &= ISD::SETO;
15605+
15606+
// Use min/max instructions if available.
15607+
if (((Cond1 == TrueVal && Cond2 == FalseVal) ||
15608+
(Cond1 == FalseVal && Cond2 == TrueVal)) &&
15609+
(CC == ISD::SETOLT || CC == ISD::SETOGT))
15610+
return SDValue();
15611+
15612+
bool Inverse = false;
15613+
if (CC == ISD::SETOLT || CC == ISD::SETOLE || CC == ISD::SETONE) {
15614+
CC = (~CC) & ISD::SETO;
15615+
Inverse = true;
15616+
}
15617+
if (CC == ISD::SETOGE || CC == ISD::SETOGT || CC == ISD::SETOEQ)
15618+
return DAG.getNode(PPCISD::VSX_CMPSEL, DL, VT, Inverse ? Cond2 : Cond1,
15619+
Inverse ? Cond1 : Cond2, TrueVal, FalseVal,
15620+
DAG.getCondCode((ISD::CondCode)CC));
15621+
return SDValue();
15622+
}
15623+
1556315624
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
1556415625
DAGCombinerInfo &DCI) const {
1556515626
SelectionDAG &DAG = DCI.DAG;
@@ -15629,6 +15690,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
1562915690
return CSCC;
1563015691
[[fallthrough]];
1563115692
case ISD::SELECT_CC:
15693+
if (SDValue V = combineFloatSelectCC(SDValue(N, 0), Subtarget, DCI.DAG))
15694+
return V;
1563215695
return DAGCombineTruncBoolExt(N, DCI);
1563315696
case ISD::SINT_TO_FP:
1563415697
case ISD::UINT_TO_FP:

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,9 @@ namespace llvm {
475475
/// XXMFACC = This corresponds to the xxmfacc instruction.
476476
XXMFACC,
477477

478+
/// VSX_CMPSEL = VSX compare gt/ge/eq instruction with selection.
479+
VSX_CMPSEL,
480+
478481
// Constrained conversion from floating point to int
479482
STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
480483
STRICT_FCTIWZ,

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
193193
[(PPCfaddrtz node:$lhs, node:$rhs),
194194
(PPCstrict_faddrtz node:$lhs, node:$rhs)]>;
195195

196+
def PPCvsx_cmpsel : SDNode<"PPCISD::VSX_CMPSEL", SDTSelectCC, []>;
197+
196198
def PPCfsel : SDNode<"PPCISD::FSEL",
197199
// Type constraint for fsel.
198200
SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,

llvm/lib/Target/PowerPC/PPCInstrP10.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2067,6 +2067,19 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
20672067
(v1i128 (VSRAQ v1i128:$VRA,
20682068
(XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
20692069
(COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
2070+
2071+
def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOEQ)),
2072+
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
2073+
(COPY_TO_REGCLASS $fval, VSRC),
2074+
(XSCMPEQQP $lhs, $rhs)), VSFRC)>;
2075+
def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOGE)),
2076+
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
2077+
(COPY_TO_REGCLASS $fval, VSRC),
2078+
(XSCMPGEQP $lhs, $rhs)), VSFRC)>;
2079+
def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOGT)),
2080+
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
2081+
(COPY_TO_REGCLASS $fval, VSRC),
2082+
(XSCMPGTQP $lhs, $rhs)), VSFRC)>;
20702083
}
20712084

20722085
class xxevalPattern <dag pattern, bits<8> imm> :

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2918,6 +2918,39 @@ def : Pat<(PPCstore_scal_int_from_vsr f64:$src, XForm:$dst, 8),
29182918
(STXSDX $src, XForm:$dst)>;
29192919
def : Pat<(PPCstore_scal_int_from_vsr f128:$src, XForm:$dst, 8),
29202920
(STXSDX (COPY_TO_REGCLASS $src, VSFRC), XForm:$dst)>;
2921+
2922+
def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOEQ)),
2923+
(COPY_TO_REGCLASS (XXSEL
2924+
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
2925+
(XVCMPEQSP (COPY_TO_REGCLASS $lhs, VSRC),
2926+
(COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
2927+
def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOGE)),
2928+
(COPY_TO_REGCLASS (XXSEL
2929+
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
2930+
(XVCMPGESP (COPY_TO_REGCLASS $lhs, VSRC),
2931+
(COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
2932+
def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOGT)),
2933+
(COPY_TO_REGCLASS (XXSEL
2934+
(COPY_TO_REGCLASS $tval, VSRC),
2935+
(COPY_TO_REGCLASS $fval, VSRC),
2936+
(XVCMPGTSP (COPY_TO_REGCLASS $lhs, VSRC),
2937+
(COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
2938+
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOEQ)),
2939+
(COPY_TO_REGCLASS (XXSEL
2940+
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
2941+
(XVCMPEQDP (COPY_TO_REGCLASS $lhs, VSRC),
2942+
(COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
2943+
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGE)),
2944+
(COPY_TO_REGCLASS (XXSEL
2945+
(COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
2946+
(XVCMPGEDP (COPY_TO_REGCLASS $lhs, VSRC),
2947+
(COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
2948+
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGT)),
2949+
(COPY_TO_REGCLASS (XXSEL
2950+
(COPY_TO_REGCLASS $tval, VSRC),
2951+
(COPY_TO_REGCLASS $fval, VSRC),
2952+
(XVCMPGTDP (COPY_TO_REGCLASS $lhs, VSRC),
2953+
(COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
29212954
} // HasVSX
29222955

29232956
// Any big endian VSX subtarget.
@@ -3933,6 +3966,19 @@ foreach Ty = [v4i32, v4f32, v2i64, v2f64] in {
39333966
def : Pat<(store Ty:$rS, XForm:$dst), (STXVX $rS, XForm:$dst)>;
39343967
}
39353968

3969+
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOEQ)),
3970+
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
3971+
(COPY_TO_REGCLASS $fval, VSRC),
3972+
(XSCMPEQDP $lhs, $rhs)), VSFRC)>;
3973+
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGE)),
3974+
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
3975+
(COPY_TO_REGCLASS $fval, VSRC),
3976+
(XSCMPGEDP $lhs, $rhs)), VSFRC)>;
3977+
def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGT)),
3978+
(COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
3979+
(COPY_TO_REGCLASS $fval, VSRC),
3980+
(XSCMPGTDP $lhs, $rhs)), VSFRC)>;
3981+
39363982
def : Pat<(f128 (load DQForm:$src)),
39373983
(COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
39383984
def : Pat<(f128 (load XForm:$src)),

llvm/test/CodeGen/PowerPC/cgp-select.ll

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,32 @@ define dso_local void @wibble(ptr nocapture readonly %arg, i32 signext %arg1, pt
88
; CHECK-NEXT: li 7, 7
99
; CHECK-NEXT: cmpwi 4, 2
1010
; CHECK-NEXT: xsaddsp 0, 0, 0
11-
; CHECK-NEXT: blt 0, .LBB0_5
11+
; CHECK-NEXT: blt 0, .LBB0_4
1212
; CHECK-NEXT: # %bb.1: # %bb6
1313
; CHECK-NEXT: clrldi 4, 4, 32
1414
; CHECK-NEXT: addi 4, 4, -1
1515
; CHECK-NEXT: mtctr 4
1616
; CHECK-NEXT: li 4, 8
17-
; CHECK-NEXT: b .LBB0_3
18-
; CHECK-NEXT: .p2align 5
17+
; CHECK-NEXT: .p2align 4
1918
; CHECK-NEXT: .LBB0_2: # %bb11
2019
; CHECK-NEXT: #
20+
; CHECK-NEXT: lfsu 2, 4(3)
21+
; CHECK-NEXT: xsaddsp 1, 2, 2
22+
; CHECK-NEXT: xvcmpgtsp 3, 2, 0
23+
; CHECK-NEXT: fcmpu 0, 2, 0
2124
; CHECK-NEXT: iselgt 7, 4, 7
2225
; CHECK-NEXT: addi 4, 4, 1
23-
; CHECK-NEXT: bdz .LBB0_5
24-
; CHECK-NEXT: .LBB0_3: # %bb11
25-
; CHECK-NEXT: #
26-
; CHECK-NEXT: lfsu 1, 4(3)
27-
; CHECK-NEXT: fcmpu 0, 1, 0
28-
; CHECK-NEXT: ble 0, .LBB0_2
29-
; CHECK-NEXT: # %bb.4:
30-
; CHECK-NEXT: xsaddsp 0, 1, 1
31-
; CHECK-NEXT: b .LBB0_2
32-
; CHECK-NEXT: .LBB0_5: # %bb8
26+
; CHECK-NEXT: xxsel 1, 1, 0, 3
27+
; CHECK-NEXT: fmr 0, 1
28+
; CHECK-NEXT: bdnz .LBB0_2
29+
; CHECK-NEXT: # %bb.3: # %bb8
30+
; CHECK-NEXT: stw 7, 0(5)
31+
; CHECK-NEXT: stfs 1, 0(6)
32+
; CHECK-NEXT: blr
33+
; CHECK-NEXT: .LBB0_4:
34+
; CHECK-NEXT: fmr 1, 0
3335
; CHECK-NEXT: stw 7, 0(5)
34-
; CHECK-NEXT: stfs 0, 0(6)
36+
; CHECK-NEXT: stfs 1, 0(6)
3537
; CHECK-NEXT: blr
3638
bb:
3739
%tmp = load float, ptr %arg, align 4

0 commit comments

Comments
 (0)