diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f69218056fc44..944a1e2e6fa17 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -585,6 +585,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + // Custom handling for PowerPC ucmp instruction + setOperationAction(ISD::UCMP, MVT::i32, Custom); + setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no @@ -12618,6 +12622,33 @@ SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues({Sub, OverflowTrunc}, dl); } +// Lower unsigned 3-way compare producing -1/0/1. +SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue A = DAG.getFreeze(Op.getOperand(0)); + SDValue B = DAG.getFreeze(Op.getOperand(1)); + EVT OpVT = A.getValueType(); // operand type + EVT ResVT = Op.getValueType(); // result type + + // First compute diff = A - B (will become subf). + SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B); + + // Generate B - A using SUBC to capture carry. + SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); + SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A); + SDValue CA0 = SubC.getValue(1); + + // t2 = A - B + CA0 using SUBE. + SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0); + SDValue CA1 = SubE1.getValue(1); + + // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1). + SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1); + + // Extract the first result and truncate to result type if needed + return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12722,6 +12753,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UADDO_CARRY: case ISD::USUBO_CARRY: return LowerADDSUBO_CARRY(Op, DAG); + case ISD::UCMP: + return LowerUCMP(Op, DAG); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 669430550f4e6..59f338782ba43 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1318,6 +1318,7 @@ namespace llvm { SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerToLibCall(const char *LibCallName, SDValue Op, SelectionDAG &DAG) const; SDValue lowerLibCallBasedOnType(const char *LibCallFloatName, diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll index 39f9269997315..4998d87cf397b 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp.ll @@ -6,12 +6,10 @@ define signext i32 @memcmp8(ptr nocapture readonly %buffer1, ptr nocapture reado ; CHECK: # %bb.0: ; CHECK-NEXT: ldbrx 3, 0, 3 ; CHECK-NEXT: ldbrx 4, 0, 4 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 8) @@ -23,11 +21,11 @@ define signext i32 @memcmp4(ptr nocapture readonly %buffer1, ptr nocapture reado ; CHECK: # %bb.0: ; CHECK-NEXT: lwbrx 3, 0, 3 ; CHECK-NEXT: lwbrx 4, 0, 4 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 +; CHECK-NEXT: extsw 3, 3 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(ptr %buffer1, ptr %buffer2, i64 4) ret i32 %call diff --git a/llvm/test/CodeGen/PowerPC/ucmp.ll b/llvm/test/CodeGen/PowerPC/ucmp.ll index d2dff6e7e05c8..4d393dd00e3db 100644 --- a/llvm/test/CodeGen/PowerPC/ucmp.ll +++ b/llvm/test/CodeGen/PowerPC/ucmp.ll @@ -4,12 +4,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_8: ; CHECK: # %bb.0: -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 @@ -18,12 +16,10 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind { define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { ; CHECK-LABEL: ucmp_8_16: ; CHECK: # %bb.0: -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: sub 5, 4, 3 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 @@ -32,14 +28,10 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind { define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_8_32: ; CHECK: # %bb.0: -; CHECK-NEXT: clrldi 5, 4, 32 -; CHECK-NEXT: clrldi 6, 3, 32 -; CHECK-NEXT: sub 5, 5, 6 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 @@ -48,12 +40,10 @@ define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind { define i8 @ucmp_8_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_8_64: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i8 @llvm.ucmp(i64 %x, i64 %y) ret i8 %1 @@ -82,14 +72,10 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind { define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ucmp_32_32: ; CHECK: # %bb.0: -; CHECK-NEXT: clrldi 5, 4, 32 -; CHECK-NEXT: clrldi 6, 3, 32 -; CHECK-NEXT: sub 5, 5, 6 -; CHECK-NEXT: cmplw 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: rldic 3, 3, 0, 32 -; CHECK-NEXT: rldicl 5, 5, 1, 63 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 @@ -98,12 +84,10 @@ define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind { define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_32_64: ; CHECK: # %bb.0: -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: subc 3, 4, 3 -; CHECK-NEXT: subfe 3, 4, 4 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: isellt 3, 4, 3 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i32 @llvm.ucmp(i64 %x, i64 %y) ret i32 %1 @@ -112,12 +96,10 @@ define i32 @ucmp_32_64(i64 %x, i64 %y) nounwind { define i64 @ucmp_64_64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: ucmp_64_64: ; CHECK: # %bb.0: -; CHECK-NEXT: subc 5, 4, 3 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, -1 -; CHECK-NEXT: subfe 5, 4, 4 -; CHECK-NEXT: neg 5, 5 -; CHECK-NEXT: isellt 3, 3, 5 +; CHECK-NEXT: subc 6, 4, 3 +; CHECK-NEXT: sub 5, 3, 4 +; CHECK-NEXT: subfe 3, 4, 3 +; CHECK-NEXT: subfe 3, 3, 5 ; CHECK-NEXT: blr %1 = call i64 @llvm.ucmp(i64 %x, i64 %y) ret i64 %1