llvm
diff --git a/‎llvm/lib/Target/PowerPC/PPCISelLowering.cpp‎
Lines changed: 63 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPCISelLowering.cpp‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPCISelLowering.h‎
Lines changed: 3 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPCISelLowering.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPCInstrInfo.td‎
Lines changed: 2 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPCInstrInfo.td‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPCInstrP10.td‎
Lines changed: 13 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPCInstrP10.td‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎llvm/lib/Target/PowerPC/PPCInstrVSX.td‎
Lines changed: 46 additions & 0 deletions b/‎llvm/lib/Target/PowerPC/PPCInstrVSX.td‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎llvm/test/CodeGen/PowerPC/cgp-select.ll‎
Lines changed: 16 additions & 14 deletions b/‎llvm/test/CodeGen/PowerPC/cgp-select.ll‎
Lines changed: 16 additions & 14 deletions
@@ -1830,6 +1830,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
   case PPCISD::STORE_COND:
     return "PPCISD::STORE_COND";
+  case PPCISD::VSX_CMPSEL:
+    return "PPCISD::VSX_CMPSEL";
   }
   return nullptr;
 }
@@ -15560,6 +15562,65 @@ static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
   return true;
 }
 
+// Use VSX compare gt/ge/eq instruction to implement select_cc
+static SDValue combineFloatSelectCC(SDValue Op, const PPCSubtarget &Subtarget,
+                                    SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // Use subtraction based lowering if it is finite-math.
+  if (DAG.getTarget().Options.NoInfsFPMath || Op->getFlags().hasNoInfs())
+    return SDValue();
+
+  // Vector comparison is already implemented in isel.
+  if (VT.isVector() || !VT.isFloatingPoint() ||
+      (VT == MVT::f128 && !Subtarget.hasP10Vector()) || !Subtarget.hasVSX())
+    return SDValue();
+  SDValue TrueVal = Op.getOperand(2), FalseVal = Op.getOperand(3);
+  SDValue Cond1 = Op.getOperand(0), Cond2 = Op.getOperand(1);
+  SDLoc DL(Op);
+  unsigned CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+  // Recognize the form after legalizer if cond code is illegal.
+  if (VT != Cond1.getValueType()) {
+    if (!isNullConstant(Cond2) || CC != ISD::SETNE ||
+        Cond1.getOpcode() != ISD::AND)
+      return SDValue();
+    Cond1 = Cond1.getOperand(0);
+    Cond2 = Cond1.getOperand(1);
+    if (Cond1.getOpcode() != ISD::SETCC || Cond2.getOpcode() != ISD::SETCC ||
+        Cond1.getOperand(0) != Cond2.getOperand(0) ||
+        Cond1.getOperand(1) != Cond2.getOperand(1))
+      return SDValue();
+    CC = cast<CondCodeSDNode>(Cond1.getOperand(2))->get() &
+         cast<CondCodeSDNode>(Cond2.getOperand(2))->get();
+    Cond1 = Cond1.getOperand(0);
+    Cond2 = Cond1.getOperand(1);
+  }
+
+  // The instruction is ordered. Treat it as ordered if we don't care order.
+  if (CC & ISD::SETUO)
+    return SDValue();
+  if (CC & ISD::SETFALSE2)
+    CC &= ISD::SETO;
+
+  // Use min/max instructions if available.
+  if (((Cond1 == TrueVal && Cond2 == FalseVal) ||
+       (Cond1 == FalseVal && Cond2 == TrueVal)) &&
+      (CC == ISD::SETOLT || CC == ISD::SETOGT))
+    return SDValue();
+
+  bool Inverse = false;
+  if (CC == ISD::SETOLT || CC == ISD::SETOLE || CC == ISD::SETONE) {
+    CC = (~CC) & ISD::SETO;
+    Inverse = true;
+  }
+  if (CC == ISD::SETOGE || CC == ISD::SETOGT || CC == ISD::SETOEQ)
+    return DAG.getNode(PPCISD::VSX_CMPSEL, DL, VT, Inverse ? Cond2 : Cond1,
+                       Inverse ? Cond1 : Cond2, TrueVal, FalseVal,
+                       DAG.getCondCode((ISD::CondCode)CC));
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -15629,6 +15690,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       return CSCC;
     [[fallthrough]];
   case ISD::SELECT_CC:
+    if (SDValue V = combineFloatSelectCC(SDValue(N, 0), Subtarget, DCI.DAG))
+      return V;
     return DAGCombineTruncBoolExt(N, DCI);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
 
@@ -475,6 +475,9 @@ namespace llvm {
     /// XXMFACC = This corresponds to the xxmfacc instruction.
     XXMFACC,
 
+    /// VSX_CMPSEL = VSX compare gt/ge/eq instruction with selection.
+    VSX_CMPSEL,
+
     // Constrained conversion from floating point to int
     STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
     STRICT_FCTIWZ,
 
@@ -193,6 +193,8 @@ def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
                              [(PPCfaddrtz node:$lhs, node:$rhs),
                               (PPCstrict_faddrtz node:$lhs, node:$rhs)]>;
 
+def PPCvsx_cmpsel : SDNode<"PPCISD::VSX_CMPSEL", SDTSelectCC, []>;
+
 def PPCfsel   : SDNode<"PPCISD::FSEL",
    // Type constraint for fsel.
    SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
 
@@ -2067,6 +2067,19 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
             (v1i128 (VSRAQ v1i128:$VRA,
                      (XXPERMDI (COPY_TO_REGCLASS $VRB, VSRC),
                                (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
+
+def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOEQ)),
+          (COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
+                                   (COPY_TO_REGCLASS $fval, VSRC),
+                                   (XSCMPEQQP $lhs, $rhs)), VSFRC)>;
+def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOGE)),
+          (COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
+                                   (COPY_TO_REGCLASS $fval, VSRC),
+                                   (XSCMPGEQP $lhs, $rhs)), VSFRC)>;
+def : Pat<(f128 (PPCvsx_cmpsel f128:$lhs, f128:$rhs, f128:$tval, f128:$fval, SETOGT)),
+          (COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
+                                   (COPY_TO_REGCLASS $fval, VSRC),
+                                   (XSCMPGTQP $lhs, $rhs)), VSFRC)>;
 }
 
 class xxevalPattern <dag pattern, bits<8> imm> :
 
@@ -2918,6 +2918,39 @@ def : Pat<(PPCstore_scal_int_from_vsr f64:$src, XForm:$dst, 8),
           (STXSDX $src, XForm:$dst)>;
 def : Pat<(PPCstore_scal_int_from_vsr f128:$src, XForm:$dst, 8),
           (STXSDX (COPY_TO_REGCLASS $src, VSFRC), XForm:$dst)>;
+
+def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOEQ)),
+          (COPY_TO_REGCLASS (XXSEL
+            (COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
+            (XVCMPEQSP (COPY_TO_REGCLASS $lhs, VSRC),
+                       (COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
+def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOGE)),
+          (COPY_TO_REGCLASS (XXSEL
+            (COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
+            (XVCMPGESP (COPY_TO_REGCLASS $lhs, VSRC),
+                       (COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
+def : Pat<(f32 (PPCvsx_cmpsel f32:$lhs, f32:$rhs, f32:$tval, f32:$fval, SETOGT)),
+          (COPY_TO_REGCLASS (XXSEL
+            (COPY_TO_REGCLASS $tval, VSRC),
+            (COPY_TO_REGCLASS $fval, VSRC),
+            (XVCMPGTSP (COPY_TO_REGCLASS $lhs, VSRC),
+                       (COPY_TO_REGCLASS $rhs, VSRC))), VSSRC)>;
+def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOEQ)),
+          (COPY_TO_REGCLASS (XXSEL
+            (COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
+            (XVCMPEQDP (COPY_TO_REGCLASS $lhs, VSRC),
+                       (COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
+def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGE)),
+          (COPY_TO_REGCLASS (XXSEL
+            (COPY_TO_REGCLASS $tval, VSRC), (COPY_TO_REGCLASS $fval, VSRC),
+            (XVCMPGEDP (COPY_TO_REGCLASS $lhs, VSRC),
+                       (COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
+def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGT)),
+          (COPY_TO_REGCLASS (XXSEL
+            (COPY_TO_REGCLASS $tval, VSRC),
+            (COPY_TO_REGCLASS $fval, VSRC),
+            (XVCMPGTDP (COPY_TO_REGCLASS $lhs, VSRC),
+                       (COPY_TO_REGCLASS $rhs, VSRC))), VSFRC)>;
 } // HasVSX
 
 // Any big endian VSX subtarget.
@@ -3933,6 +3966,19 @@ foreach Ty = [v4i32, v4f32, v2i64, v2f64] in {
   def : Pat<(store Ty:$rS, XForm:$dst), (STXVX $rS, XForm:$dst)>;
 }
 
+def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOEQ)),
+          (COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
+                                   (COPY_TO_REGCLASS $fval, VSRC),
+                                   (XSCMPEQDP $lhs, $rhs)), VSFRC)>;
+def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGE)),
+          (COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
+                                   (COPY_TO_REGCLASS $fval, VSRC),
+                                   (XSCMPGEDP $lhs, $rhs)), VSFRC)>;
+def : Pat<(f64 (PPCvsx_cmpsel f64:$lhs, f64:$rhs, f64:$tval, f64:$fval, SETOGT)),
+          (COPY_TO_REGCLASS (XXSEL (COPY_TO_REGCLASS $tval, VSRC),
+                                   (COPY_TO_REGCLASS $fval, VSRC),
+                                   (XSCMPGTDP $lhs, $rhs)), VSFRC)>;
+
 def : Pat<(f128 (load DQForm:$src)),
           (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
 def : Pat<(f128 (load XForm:$src)),
 
@@ -8,30 +8,32 @@ define dso_local void @wibble(ptr nocapture readonly %arg, i32 signext %arg1, pt
 ; CHECK-NEXT:    li 7, 7
 ; CHECK-NEXT:    cmpwi 4, 2
 ; CHECK-NEXT:    xsaddsp 0, 0, 0
-; CHECK-NEXT:    blt 0, .LBB0_5
+; CHECK-NEXT:    blt 0, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %bb6
 ; CHECK-NEXT:    clrldi 4, 4, 32
 ; CHECK-NEXT:    addi 4, 4, -1
 ; CHECK-NEXT:    mtctr 4
 ; CHECK-NEXT:    li 4, 8
-; CHECK-NEXT:    b .LBB0_3
-; CHECK-NEXT:    .p2align 5
+; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2: # %bb11
 ; CHECK-NEXT:    #
+; CHECK-NEXT:    lfsu 2, 4(3)
+; CHECK-NEXT:    xsaddsp 1, 2, 2
+; CHECK-NEXT:    xvcmpgtsp 3, 2, 0
+; CHECK-NEXT:    fcmpu 0, 2, 0
 ; CHECK-NEXT:    iselgt 7, 4, 7
 ; CHECK-NEXT:    addi 4, 4, 1
-; CHECK-NEXT:    bdz .LBB0_5
-; CHECK-NEXT:  .LBB0_3: # %bb11
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lfsu 1, 4(3)
-; CHECK-NEXT:    fcmpu 0, 1, 0
-; CHECK-NEXT:    ble 0, .LBB0_2
-; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    xsaddsp 0, 1, 1
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_5: # %bb8
+; CHECK-NEXT:    xxsel 1, 1, 0, 3
+; CHECK-NEXT:    fmr 0, 1
+; CHECK-NEXT:    bdnz .LBB0_2
+; CHECK-NEXT:  # %bb.3: # %bb8
+; CHECK-NEXT:    stw 7, 0(5)
+; CHECK-NEXT:    stfs 1, 0(6)
+; CHECK-NEXT:    blr
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    fmr 1, 0
 ; CHECK-NEXT:    stw 7, 0(5)
-; CHECK-NEXT:    stfs 0, 0(6)
+; CHECK-NEXT:    stfs 1, 0(6)
 ; CHECK-NEXT:    blr
 bb:
   %tmp = load float, ptr %arg, align 4