diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 93a48ce2b8c72..e917ef3f5e8c9 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -198,6 +198,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UADDO, RegVT, Custom); + // On P10, the default lowering generates better code using the + // setbc instruction. + if (!Subtarget.hasP10Vector()) + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + // Match BITREVERSE to customized fast code sequence in the td file. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); @@ -12041,6 +12046,27 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const { return Res; } +SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { + + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, LHS, RHS); + + SDValue Xor1 = DAG.getNode(ISD::XOR, dl, MVT::i32, RHS, LHS); + SDValue Xor2 = DAG.getNode(ISD::XOR, dl, MVT::i32, Sub, LHS); + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, Xor1, Xor2); + + SDValue Overflow = DAG.getNode(ISD::SRL, dl, MVT::i32, And, + DAG.getConstant(31, dl, MVT::i32)); + SDValue OverflowTrunc = + DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow); + + return DAG.getMergeValues({Sub, OverflowTrunc}, dl); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12063,6 +12089,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::SSUBO: + return LowerSSUBO(Op, DAG); case ISD::INLINEASM: case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 1fd4b83d6c119..1c63444db427d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1285,6 +1285,7 @@ namespace llvm { SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll index fd5f26ba35742..4c11f7f919a3c 100644 --- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll +++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll @@ -129,12 +129,11 @@ entry: define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_ssubo_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sub 5, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 5, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 5, 4, 3 +; CHECK-NEXT: sub 4, 3, 4 +; CHECK-NEXT: xor 3, 4, 3 +; CHECK-NEXT: and 3, 5, 3 +; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr entry: %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind diff --git a/llvm/test/CodeGen/PowerPC/ssubo-32.ll b/llvm/test/CodeGen/PowerPC/ssubo-32.ll index 7a42007b8a11a..488d1e26fa36e 100644 --- a/llvm/test/CodeGen/PowerPC/ssubo-32.ll +++ b/llvm/test/CodeGen/PowerPC/ssubo-32.ll @@ -6,13 +6,12 @@ define i1 @subovfi_i32(i32 noundef %a, i32 noundef %b, ptr %c) { ; CHECK-LABEL: subovfi_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sub 6, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 6, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: stw 6, 0(5) -; CHECK-NEXT: creqv 20, 5, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 6, 4, 3 +; CHECK-NEXT: sub 4, 3, 4 +; CHECK-NEXT: xor 3, 4, 3 +; CHECK-NEXT: stw 4, 0(5) +; CHECK-NEXT: and 3, 6, 3 +; CHECK-NEXT: srwi 3, 3, 31 ; CHECK-NEXT: blr entry: %0 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)