diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 2fc8ef6a52a52..c0f52e9b5cb16 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7950,6 +7950,7 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) { Register Dst = Cmp->getReg(0); LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Cmp->getReg(1)); LLT CmpTy = DstTy.changeElementSize(1); CmpInst::Predicate LTPredicate = Cmp->isSigned() @@ -7959,16 +7960,32 @@ LegalizerHelper::lowerThreewayCompare(MachineInstr &MI) { ? CmpInst::Predicate::ICMP_SGT : CmpInst::Predicate::ICMP_UGT; - auto One = MIRBuilder.buildConstant(DstTy, 1); auto Zero = MIRBuilder.buildConstant(DstTy, 0); auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(), Cmp->getRHSReg()); - auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero); - - auto MinusOne = MIRBuilder.buildConstant(DstTy, -1); auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(), Cmp->getRHSReg()); - MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne); + + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false); + if (TLI.shouldExpandCmpUsingSelects(getApproximateEVTForLLT(SrcTy, Ctx)) || + BC == TargetLowering::UndefinedBooleanContent) { + auto One = MIRBuilder.buildConstant(DstTy, 1); + auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero); + + auto MinusOne = MIRBuilder.buildConstant(DstTy, -1); + MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne); + } else { + if (BC == TargetLowering::ZeroOrNegativeOneBooleanContent) + std::swap(IsGT, IsLT); + // Extend boolean results to DstTy, which is at least i2, before subtracting + // them. + unsigned BoolExtOp = + MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false); + IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT}); + IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT}); + MIRBuilder.buildSub(Dst, IsGT, IsLT); + } MI.eraseFromParent(); return Legalized; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir index 18c4f3c31efa5..ae16e40671785 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir @@ -8,10 +8,10 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]] ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2 @@ -31,10 +31,10 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]] ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT1]], 2 @@ -61,42 +61,13 @@ body: | ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $w3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ugt), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR2]](<8 x s8>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC]], [[UV]] - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV2]], [[TRUNC1]] - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV4]], [[XOR]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(ult), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s16>), [[UV7:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>) - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[TRUNC2]], [[UV6]] - ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C3]](s8), [[C3]](s8), [[C3]](s8), [[C3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR6]](<8 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s16>), [[UV9:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT4]](<8 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[UV8]], [[TRUNC3]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[XOR1]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND2]], [[AND3]] - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR1]](<4 x s16>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT5]], 2 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(<4 x s16>) = G_SUB [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[SUB]](<4 x s16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<4 x s32>) = G_SEXT_INREG [[ANYEXT]], 2 ; CHECK-NEXT: $q0 = COPY [[SEXT_INREG]](<4 x s32>) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 @@ -125,15 +96,15 @@ body: | ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[DEF]](s64), [[DEF]] ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]] ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s32), [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]] ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[DEF]](s64), [[DEF]] ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[DEF]](s64), [[DEF]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[ICMP3]], [[ICMP4]] + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s32), [[ICMP3]], [[ICMP4]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[SELECT]](s32), [[C]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT2]](s32), [[C2]], [[SELECT1]] + ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[SELECT1]](s32), [[C2]], [[SELECT2]] ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SELECT3]], 2 ; CHECK-NEXT: $w0 = COPY [[SEXT_INREG]](s32) %0:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/scmp.ll b/llvm/test/CodeGen/AArch64/scmp.ll index be167fde7dbe6..7a73578f43e80 100644 --- a/llvm/test/CodeGen/AArch64/scmp.ll +++ b/llvm/test/CodeGen/AArch64/scmp.ll @@ -84,13 +84,13 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind { ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w9, gt ; CHECK-GI-NEXT: csel w8, w8, w9, eq -; CHECK-GI-NEXT: tst w8, #0x1 -; CHECK-GI-NEXT: cset w8, ne ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w9, lo ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w10, lt ; CHECK-GI-NEXT: csel w9, w9, w10, eq +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: cset w8, ne ; CHECK-GI-NEXT: tst w9, #0x1 ; CHECK-GI-NEXT: csinv w0, w8, wzr, eq ; CHECK-GI-NEXT: ret @@ -132,88 +132,48 @@ define i64 @scmp.64.64(i64 %x, i64 %y) nounwind { } define <8 x i8> @s_v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-SD-LABEL: s_v8i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.8b, v0.8b, v1.8b -; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v8i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.8b, #1 -; CHECK-GI-NEXT: cmgt v3.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff -; CHECK-GI-NEXT: cmgt v0.8b, v1.8b, v0.8b -; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b -; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.8b, v0.8b, v1.8b +; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b +; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ret entry: %c = call <8 x i8> @llvm.scmp(<8 x i8> %a, <8 x i8> %b) ret <8 x i8> %c } define <16 x i8> @s_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-SD-LABEL: s_v16i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: cmgt v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v16i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.16b, #1 -; CHECK-GI-NEXT: cmgt v3.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: cmgt v0.16b, v1.16b, v0.16b -; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.16b, v0.16b, v1.16b +; CHECK-NEXT: cmgt v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret entry: %c = call <16 x i8> @llvm.scmp(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %c } define <4 x i16> @s_v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-SD-LABEL: s_v4i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v4i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.4h, #1 -; CHECK-GI-NEXT: cmgt v3.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff -; CHECK-GI-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b -; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.4h, v0.4h, v1.4h +; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h +; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h +; CHECK-NEXT: ret entry: %c = call <4 x i16> @llvm.scmp(<4 x i16> %a, <4 x i16> %b) ret <4 x i16> %c } define <8 x i16> @s_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: s_v8i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.8h, v0.8h, v1.8h -; CHECK-SD-NEXT: cmgt v0.8h, v1.8h, v0.8h -; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v8i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.8h, #1 -; CHECK-GI-NEXT: cmgt v3.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: cmgt v0.8h, v1.8h, v0.8h -; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.8h, v0.8h, v1.8h +; CHECK-NEXT: cmgt v0.8h, v1.8h, v0.8h +; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret entry: %c = call <8 x i16> @llvm.scmp(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %c @@ -232,16 +192,12 @@ define <16 x i16> @s_v16i16(<16 x i16> %a, <16 x i16> %b) { ; ; CHECK-GI-LABEL: s_v16i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v4.8h, #1 -; CHECK-GI-NEXT: cmgt v5.8h, v0.8h, v2.8h -; CHECK-GI-NEXT: cmgt v6.8h, v1.8h, v3.8h -; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: cmgt v4.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h ; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h ; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h -; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b -; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b -; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b -; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h ; CHECK-GI-NEXT: ret entry: %c = call <16 x i16> @llvm.scmp(<16 x i16> %a, <16 x i16> %b) @@ -249,44 +205,24 @@ entry: } define <2 x i32> @s_v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-SD-LABEL: s_v2i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.2s, v0.2s, v1.2s -; CHECK-SD-NEXT: cmgt v0.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v2i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.2s, #1 -; CHECK-GI-NEXT: cmgt v3.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff -; CHECK-GI-NEXT: cmgt v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b -; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.2s, v0.2s, v1.2s +; CHECK-NEXT: cmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s +; CHECK-NEXT: ret entry: %c = call <2 x i32> @llvm.scmp(<2 x i32> %a, <2 x i32> %b) ret <2 x i32> %c } define <4 x i32> @s_v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: s_v4i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v4i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.4s, #1 -; CHECK-GI-NEXT: cmgt v3.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: cmgt v0.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.4s, v0.4s, v1.4s +; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ret entry: %c = call <4 x i32> @llvm.scmp(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %c @@ -305,16 +241,12 @@ define <8 x i32> @s_v8i32(<8 x i32> %a, <8 x i32> %b) { ; ; CHECK-GI-LABEL: s_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v4.4s, #1 -; CHECK-GI-NEXT: cmgt v5.4s, v0.4s, v2.4s -; CHECK-GI-NEXT: cmgt v6.4s, v1.4s, v3.4s -; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: cmgt v4.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: cmgt v5.4s, v1.4s, v3.4s ; CHECK-GI-NEXT: cmgt v0.4s, v2.4s, v0.4s ; CHECK-GI-NEXT: cmgt v1.4s, v3.4s, v1.4s -; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b -; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b -; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b -; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b +; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s ; CHECK-GI-NEXT: ret entry: %c = call <8 x i32> @llvm.scmp(<8 x i32> %a, <8 x i32> %b) @@ -322,23 +254,12 @@ entry: } define <2 x i64> @s_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: s_v2i64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmgt v2.2d, v0.2d, v1.2d -; CHECK-SD-NEXT: cmgt v0.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: s_v2i64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI16_0 -; CHECK-GI-NEXT: cmgt v2.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: cmgt v0.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: s_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d +; CHECK-NEXT: cmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d +; CHECK-NEXT: ret entry: %c = call <2 x i64> @llvm.scmp(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %c @@ -357,17 +278,12 @@ define <4 x i64> @s_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; CHECK-GI-LABEL: s_v4i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI17_0 ; CHECK-GI-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-GI-NEXT: cmgt v6.2d, v1.2d, v3.2d -; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI17_0] -; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: cmgt v5.2d, v1.2d, v3.2d ; CHECK-GI-NEXT: cmgt v0.2d, v2.2d, v0.2d ; CHECK-GI-NEXT: cmgt v1.2d, v3.2d, v1.2d -; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b -; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b -; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b -; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b +; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sub v1.2d, v1.2d, v5.2d ; CHECK-GI-NEXT: ret entry: %c = call <4 x i64> @llvm.scmp(<4 x i64> %a, <4 x i64> %b) @@ -392,16 +308,13 @@ define <16 x i8> @signOf_neon_scmp(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> ; CHECK-GI-NEXT: cmgt v5.8h, v1.8h, v3.8h ; CHECK-GI-NEXT: cmgt v0.8h, v2.8h, v0.8h ; CHECK-GI-NEXT: cmgt v1.8h, v3.8h, v1.8h -; CHECK-GI-NEXT: movi v2.16b, #1 -; CHECK-GI-NEXT: movi v3.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: uzp1 v4.16b, v4.16b, v5.16b ; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: shl v1.16b, v4.16b, #7 +; CHECK-GI-NEXT: uzp1 v1.16b, v4.16b, v5.16b ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 -; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b -; CHECK-GI-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: sub v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: %0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/ucmp.ll b/llvm/test/CodeGen/AArch64/ucmp.ll index 0e4da89fcaebc..ad46e4abc477c 100644 --- a/llvm/test/CodeGen/AArch64/ucmp.ll +++ b/llvm/test/CodeGen/AArch64/ucmp.ll @@ -84,13 +84,13 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w9, hi ; CHECK-GI-NEXT: csel w8, w8, w9, eq -; CHECK-GI-NEXT: tst w8, #0x1 -; CHECK-GI-NEXT: cset w8, ne ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w9, lo ; CHECK-GI-NEXT: cmp x1, x3 ; CHECK-GI-NEXT: cset w10, lo ; CHECK-GI-NEXT: csel w9, w9, w10, eq +; CHECK-GI-NEXT: tst w8, #0x1 +; CHECK-GI-NEXT: cset w8, ne ; CHECK-GI-NEXT: tst w9, #0x1 ; CHECK-GI-NEXT: csinv w0, w8, wzr, eq ; CHECK-GI-NEXT: ret @@ -154,15 +154,15 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) { ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: cset w11, hi ; CHECK-GI-NEXT: csel w10, w10, w11, eq -; CHECK-GI-NEXT: tst w10, #0x1 -; CHECK-GI-NEXT: cset x10, ne ; CHECK-GI-NEXT: cmp x0, x2 ; CHECK-GI-NEXT: cset w11, lo ; CHECK-GI-NEXT: cmp x8, x9 ; CHECK-GI-NEXT: cset w8, lo ; CHECK-GI-NEXT: csel w8, w11, w8, eq +; CHECK-GI-NEXT: tst w10, #0x1 +; CHECK-GI-NEXT: cset x9, ne ; CHECK-GI-NEXT: tst w8, #0x1 -; CHECK-GI-NEXT: csinv x8, x10, xzr, eq +; CHECK-GI-NEXT: csinv x8, x9, xzr, eq ; CHECK-GI-NEXT: fmov d0, x8 ; CHECK-GI-NEXT: ret %1 = call <1 x i64> @llvm.ucmp(<1 x i65> %x, <1 x i65> %y) @@ -170,88 +170,48 @@ define <1 x i64> @ucmp.1.64.65(<1 x i65> %x, <1 x i65> %y) { } define <8 x i8> @u_v8i8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-SD-LABEL: u_v8i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.8b, v0.8b, v1.8b -; CHECK-SD-NEXT: cmhi v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: sub v0.8b, v0.8b, v2.8b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v8i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.8b, #1 -; CHECK-GI-NEXT: cmhi v3.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff -; CHECK-GI-NEXT: cmhi v0.8b, v1.8b, v0.8b -; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b -; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.8b, v0.8b, v1.8b +; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b +; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ret entry: %c = call <8 x i8> @llvm.ucmp(<8 x i8> %a, <8 x i8> %b) ret <8 x i8> %c } define <16 x i8> @u_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-SD-LABEL: u_v16i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: cmhi v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: sub v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v16i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.16b, #1 -; CHECK-GI-NEXT: cmhi v3.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: cmhi v0.16b, v1.16b, v0.16b -; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b +; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b +; CHECK-NEXT: ret entry: %c = call <16 x i8> @llvm.ucmp(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %c } define <4 x i16> @u_v4i16(<4 x i16> %a, <4 x i16> %b) { -; CHECK-SD-LABEL: u_v4i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.4h, v0.4h, v1.4h -; CHECK-SD-NEXT: cmhi v0.4h, v1.4h, v0.4h -; CHECK-SD-NEXT: sub v0.4h, v0.4h, v2.4h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v4i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.4h, #1 -; CHECK-GI-NEXT: cmhi v3.4h, v0.4h, v1.4h -; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff -; CHECK-GI-NEXT: cmhi v0.4h, v1.4h, v0.4h -; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b -; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.4h, v0.4h, v1.4h +; CHECK-NEXT: cmhi v0.4h, v1.4h, v0.4h +; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h +; CHECK-NEXT: ret entry: %c = call <4 x i16> @llvm.ucmp(<4 x i16> %a, <4 x i16> %b) ret <4 x i16> %c } define <8 x i16> @u_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: u_v8i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.8h, v0.8h, v1.8h -; CHECK-SD-NEXT: cmhi v0.8h, v1.8h, v0.8h -; CHECK-SD-NEXT: sub v0.8h, v0.8h, v2.8h -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v8i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.8h, #1 -; CHECK-GI-NEXT: cmhi v3.8h, v0.8h, v1.8h -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: cmhi v0.8h, v1.8h, v0.8h -; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.8h, v0.8h, v1.8h +; CHECK-NEXT: cmhi v0.8h, v1.8h, v0.8h +; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h +; CHECK-NEXT: ret entry: %c = call <8 x i16> @llvm.ucmp(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %c @@ -270,16 +230,12 @@ define <16 x i16> @u_v16i16(<16 x i16> %a, <16 x i16> %b) { ; ; CHECK-GI-LABEL: u_v16i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v4.8h, #1 -; CHECK-GI-NEXT: cmhi v5.8h, v0.8h, v2.8h -; CHECK-GI-NEXT: cmhi v6.8h, v1.8h, v3.8h -; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: cmhi v4.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: cmhi v5.8h, v1.8h, v3.8h ; CHECK-GI-NEXT: cmhi v0.8h, v2.8h, v0.8h ; CHECK-GI-NEXT: cmhi v1.8h, v3.8h, v1.8h -; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b -; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b -; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b -; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sub v1.8h, v1.8h, v5.8h ; CHECK-GI-NEXT: ret entry: %c = call <16 x i16> @llvm.ucmp(<16 x i16> %a, <16 x i16> %b) @@ -287,44 +243,24 @@ entry: } define <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) { -; CHECK-SD-LABEL: u_v2i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.2s, v0.2s, v1.2s -; CHECK-SD-NEXT: cmhi v0.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: sub v0.2s, v0.2s, v2.2s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v2i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.2s, #1 -; CHECK-GI-NEXT: cmhi v3.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: movi d4, #0xffffffffffffffff -; CHECK-GI-NEXT: cmhi v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: and v2.8b, v2.8b, v3.8b -; CHECK-GI-NEXT: bsl v0.8b, v4.8b, v2.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.2s, v0.2s, v1.2s +; CHECK-NEXT: cmhi v0.2s, v1.2s, v0.2s +; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s +; CHECK-NEXT: ret entry: %c = call <2 x i32> @llvm.ucmp(<2 x i32> %a, <2 x i32> %b) ret <2 x i32> %c } define <4 x i32> @u_v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-SD-LABEL: u_v4i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: cmhi v0.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: sub v0.4s, v0.4s, v2.4s -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v4i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v2.4s, #1 -; CHECK-GI-NEXT: cmhi v3.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: cmhi v0.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: and v2.16b, v2.16b, v3.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ret entry: %c = call <4 x i32> @llvm.ucmp(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %c @@ -343,16 +279,12 @@ define <8 x i32> @u_v8i32(<8 x i32> %a, <8 x i32> %b) { ; ; CHECK-GI-LABEL: u_v8i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: movi v4.4s, #1 -; CHECK-GI-NEXT: cmhi v5.4s, v0.4s, v2.4s -; CHECK-GI-NEXT: cmhi v6.4s, v1.4s, v3.4s -; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: cmhi v4.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: cmhi v5.4s, v1.4s, v3.4s ; CHECK-GI-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-GI-NEXT: cmhi v1.4s, v3.4s, v1.4s -; CHECK-GI-NEXT: and v5.16b, v4.16b, v5.16b -; CHECK-GI-NEXT: and v4.16b, v4.16b, v6.16b -; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v5.16b -; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v4.16b +; CHECK-GI-NEXT: sub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sub v1.4s, v1.4s, v5.4s ; CHECK-GI-NEXT: ret entry: %c = call <8 x i32> @llvm.ucmp(<8 x i32> %a, <8 x i32> %b) @@ -360,23 +292,12 @@ entry: } define <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-SD-LABEL: u_v2i64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cmhi v2.2d, v0.2d, v1.2d -; CHECK-SD-NEXT: cmhi v0.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: sub v0.2d, v0.2d, v2.2d -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: u_v2i64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI17_0 -; CHECK-GI-NEXT: cmhi v2.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: movi v4.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] -; CHECK-GI-NEXT: cmhi v0.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: and v2.16b, v3.16b, v2.16b -; CHECK-GI-NEXT: bsl v0.16b, v4.16b, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: u_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d +; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d +; CHECK-NEXT: sub v0.2d, v0.2d, v2.2d +; CHECK-NEXT: ret entry: %c = call <2 x i64> @llvm.ucmp(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %c @@ -395,17 +316,12 @@ define <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) { ; ; CHECK-GI-LABEL: u_v4i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI18_0 ; CHECK-GI-NEXT: cmhi v4.2d, v0.2d, v2.2d -; CHECK-GI-NEXT: cmhi v6.2d, v1.2d, v3.2d -; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI18_0] -; CHECK-GI-NEXT: movi v7.2d, #0xffffffffffffffff +; CHECK-GI-NEXT: cmhi v5.2d, v1.2d, v3.2d ; CHECK-GI-NEXT: cmhi v0.2d, v2.2d, v0.2d ; CHECK-GI-NEXT: cmhi v1.2d, v3.2d, v1.2d -; CHECK-GI-NEXT: and v4.16b, v5.16b, v4.16b -; CHECK-GI-NEXT: and v5.16b, v5.16b, v6.16b -; CHECK-GI-NEXT: bsl v0.16b, v7.16b, v4.16b -; CHECK-GI-NEXT: bsl v1.16b, v7.16b, v5.16b +; CHECK-GI-NEXT: sub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sub v1.2d, v1.2d, v5.2d ; CHECK-GI-NEXT: ret entry: %c = call <4 x i64> @llvm.ucmp(<4 x i64> %a, <4 x i64> %b) @@ -430,16 +346,13 @@ define <16 x i8> @signOf_neon(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_ ; CHECK-GI-NEXT: cmhi v5.8h, v1.8h, v3.8h ; CHECK-GI-NEXT: cmhi v0.8h, v2.8h, v0.8h ; CHECK-GI-NEXT: cmhi v1.8h, v3.8h, v1.8h -; CHECK-GI-NEXT: movi v2.16b, #1 -; CHECK-GI-NEXT: movi v3.2d, #0xffffffffffffffff -; CHECK-GI-NEXT: uzp1 v4.16b, v4.16b, v5.16b ; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: shl v1.16b, v4.16b, #7 +; CHECK-GI-NEXT: uzp1 v1.16b, v4.16b, v5.16b ; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7 -; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: shl v1.16b, v1.16b, #7 ; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 -; CHECK-GI-NEXT: and v1.16b, v2.16b, v1.16b -; CHECK-GI-NEXT: bsl v0.16b, v3.16b, v1.16b +; CHECK-GI-NEXT: sshr v1.16b, v1.16b, #7 +; CHECK-GI-NEXT: sub v0.16b, v0.16b, v1.16b ; CHECK-GI-NEXT: ret entry: %0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir index 5c3d7e5975f1f..4ffca796a4c20 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv32.mir @@ -12,14 +12,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]] - ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = COPY $x11 @@ -40,14 +36,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[C]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s32), [[C2]], [[SELECT]] - ; CHECK-NEXT: $x10 = COPY [[SELECT1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $x10 = COPY [[SUB]](s32) ; CHECK-NEXT: PseudoRET implicit $x10 %0:_(s32) = COPY $x10 %1:_(s32) = COPY $x11 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir index ccade88ffae7c..9e60a767c55fe 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-threeway-cmp-rv64.mir @@ -12,20 +12,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s64), [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s64), [[TRUNC2]], [[SELECT]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SELECT1]](s32) - ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) @@ -48,20 +41,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C1]](s64) ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY]], 32 ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 32 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s64) = G_ICMP intpred(sgt), [[SEXT_INREG]](s64), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s64), [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s64) = G_ICMP intpred(slt), [[SEXT_INREG]](s64), [[SEXT_INREG1]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s64), [[TRUNC2]], [[SELECT]] - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SELECT1]](s32) - ; CHECK-NEXT: $x10 = COPY [[SEXT]](s64) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 32 + ; CHECK-NEXT: $x10 = COPY [[SEXT_INREG2]](s64) ; CHECK-NEXT: PseudoRET implicit $x10 %2:_(s64) = COPY $x10 %0:_(s32) = G_TRUNC %2(s64) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll index 0f2b6281b6f88..4346e04ecda66 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/scmp.ll @@ -5,34 +5,16 @@ define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind { ; RV32I-LABEL: scmp.8.8: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt a1, a0, .LBB0_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: blt a2, a1, .LBB0_3 -; RV32I-NEXT: j .LBB0_4 -; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bge a2, a1, .LBB0_4 -; RV32I-NEXT: .LBB0_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB0_4: +; RV32I-NEXT: slt a2, a1, a0 +; RV32I-NEXT: slt a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.8.8: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt a1, a0, .LBB0_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB0_3 -; RV64I-NEXT: j .LBB0_4 -; RV64I-NEXT: .LBB0_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB0_4 -; RV64I-NEXT: .LBB0_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB0_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.scmp(i8 %x, i8 %y) ret i8 %1 @@ -41,34 +23,16 @@ define i8 @scmp.8.8(i8 signext %x, i8 signext %y) nounwind { define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind { ; RV32I-LABEL: scmp.8.16: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt a1, a0, .LBB1_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: blt a2, a1, .LBB1_3 -; RV32I-NEXT: j .LBB1_4 -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bge a2, a1, .LBB1_4 -; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB1_4: +; RV32I-NEXT: slt a2, a1, a0 +; RV32I-NEXT: slt a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.8.16: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt a1, a0, .LBB1_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB1_3 -; RV64I-NEXT: j .LBB1_4 -; RV64I-NEXT: .LBB1_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB1_4 -; RV64I-NEXT: .LBB1_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB1_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.scmp(i16 %x, i16 %y) ret i8 %1 @@ -77,35 +41,18 @@ define i8 @scmp.8.16(i16 signext %x, i16 signext %y) nounwind { define i8 @scmp.8.32(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: scmp.8.32: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt a1, a0, .LBB2_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: blt a2, a1, .LBB2_3 -; RV32I-NEXT: j .LBB2_4 -; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bge a2, a1, .LBB2_4 -; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB2_4: +; RV32I-NEXT: slt a2, a1, a0 +; RV32I-NEXT: slt a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.8.32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a0 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: blt a1, a2, .LBB2_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB2_3 -; RV64I-NEXT: j .LBB2_4 -; RV64I-NEXT: .LBB2_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB2_4 -; RV64I-NEXT: .LBB2_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB2_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.scmp(i32 %x, i32 %y) ret i8 %1 @@ -117,42 +64,20 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: beq a1, a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt a4, a3, a1 -; RV32I-NEXT: bnez a4, .LBB3_3 -; RV32I-NEXT: j .LBB3_4 +; RV32I-NEXT: slt a0, a1, a3 +; RV32I-NEXT: sub a0, a4, a0 +; RV32I-NEXT: ret ; RV32I-NEXT: .LBB3_2: ; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: beqz a4, .LBB3_4 -; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: li a4, 1 -; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: beq a1, a3, .LBB3_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a0, a1, a3 -; RV32I-NEXT: bnez a0, .LBB3_7 -; RV32I-NEXT: j .LBB3_8 -; RV32I-NEXT: .LBB3_6: ; RV32I-NEXT: sltu a0, a0, a2 -; RV32I-NEXT: beqz a0, .LBB3_8 -; RV32I-NEXT: .LBB3_7: -; RV32I-NEXT: li a4, -1 -; RV32I-NEXT: .LBB3_8: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: sub a0, a4, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.8.64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt a1, a0, .LBB3_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB3_3 -; RV64I-NEXT: j .LBB3_4 -; RV64I-NEXT: .LBB3_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB3_4 -; RV64I-NEXT: .LBB3_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB3_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.scmp(i64 %x, i64 %y) ret i8 %1 @@ -161,35 +86,18 @@ define i8 @scmp.8.64(i64 %x, i64 %y) nounwind { define i32 @scmp.32.32(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: scmp.32.32: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: blt a1, a0, .LBB4_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: blt a2, a1, .LBB4_3 -; RV32I-NEXT: j .LBB4_4 -; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bge a2, a1, .LBB4_4 -; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB4_4: +; RV32I-NEXT: slt a2, a1, a0 +; RV32I-NEXT: slt a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.32.32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a0 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: blt a1, a2, .LBB4_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB4_3 -; RV64I-NEXT: j .LBB4_4 -; RV64I-NEXT: .LBB4_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB4_4 -; RV64I-NEXT: .LBB4_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB4_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i32 @llvm.scmp(i32 %x, i32 %y) ret i32 %1 @@ -201,42 +109,20 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: beq a1, a3, .LBB5_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt a4, a3, a1 -; RV32I-NEXT: bnez a4, .LBB5_3 -; RV32I-NEXT: j .LBB5_4 +; RV32I-NEXT: slt a0, a1, a3 +; RV32I-NEXT: sub a0, a4, a0 +; RV32I-NEXT: ret ; RV32I-NEXT: .LBB5_2: ; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: beqz a4, .LBB5_4 -; RV32I-NEXT: .LBB5_3: -; RV32I-NEXT: li a4, 1 -; RV32I-NEXT: .LBB5_4: -; RV32I-NEXT: beq a1, a3, .LBB5_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a0, a1, a3 -; RV32I-NEXT: bnez a0, .LBB5_7 -; RV32I-NEXT: j .LBB5_8 -; RV32I-NEXT: .LBB5_6: ; RV32I-NEXT: sltu a0, a0, a2 -; RV32I-NEXT: beqz a0, .LBB5_8 -; RV32I-NEXT: .LBB5_7: -; RV32I-NEXT: li a4, -1 -; RV32I-NEXT: .LBB5_8: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: sub a0, a4, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.32.64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt a1, a0, .LBB5_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB5_3 -; RV64I-NEXT: j .LBB5_4 -; RV64I-NEXT: .LBB5_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB5_4 -; RV64I-NEXT: .LBB5_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB5_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i32 @llvm.scmp(i64 %x, i64 %y) ret i32 %1 @@ -245,46 +131,25 @@ define i32 @scmp.32.64(i64 %x, i64 %y) nounwind { define i64 @scmp.64.64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: scmp.64.64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: beq a1, a3, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a0, a3, a1 -; RV32I-NEXT: bnez a0, .LBB6_3 -; RV32I-NEXT: j .LBB6_4 +; RV32I-NEXT: slt a4, a3, a1 +; RV32I-NEXT: slt a1, a1, a3 +; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sltu a0, a2, a4 -; RV32I-NEXT: beqz a0, .LBB6_4 +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: sltu a1, a0, a2 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: .LBB6_4: -; RV32I-NEXT: beq a1, a3, .LBB6_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: slt a1, a1, a3 -; RV32I-NEXT: bnez a1, .LBB6_7 -; RV32I-NEXT: j .LBB6_8 -; RV32I-NEXT: .LBB6_6: -; RV32I-NEXT: sltu a1, a4, a2 -; RV32I-NEXT: beqz a1, .LBB6_8 -; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: li a1, -1 -; RV32I-NEXT: .LBB6_8: +; RV32I-NEXT: sub a0, a4, a1 +; RV32I-NEXT: sltu a1, a4, a1 +; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: scmp.64.64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: blt a1, a0, .LBB6_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: blt a2, a1, .LBB6_3 -; RV64I-NEXT: j .LBB6_4 -; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bge a2, a1, .LBB6_4 -; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB6_4: +; RV64I-NEXT: slt a2, a1, a0 +; RV64I-NEXT: slt a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i64 @llvm.scmp(i64 %x, i64 %y) ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll index e2a95eb974342..c3abf51fd05bc 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/ucmp.ll @@ -5,34 +5,16 @@ define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind { ; RV32I-LABEL: ucmp.8.8: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu a1, a0, .LBB0_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB0_3 -; RV32I-NEXT: j .LBB0_4 -; RV32I-NEXT: .LBB0_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bgeu a2, a1, .LBB0_4 -; RV32I-NEXT: .LBB0_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB0_4: +; RV32I-NEXT: sltu a2, a1, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.8.8: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu a1, a0, .LBB0_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB0_3 -; RV64I-NEXT: j .LBB0_4 -; RV64I-NEXT: .LBB0_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a2, a1, .LBB0_4 -; RV64I-NEXT: .LBB0_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB0_4: +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.ucmp(i8 %x, i8 %y) ret i8 %1 @@ -41,34 +23,16 @@ define i8 @ucmp.8.8(i8 zeroext %x, i8 zeroext %y) nounwind { define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind { ; RV32I-LABEL: ucmp.8.16: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu a1, a0, .LBB1_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB1_3 -; RV32I-NEXT: j .LBB1_4 -; RV32I-NEXT: .LBB1_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bgeu a2, a1, .LBB1_4 -; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB1_4: +; RV32I-NEXT: sltu a2, a1, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.8.16: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu a1, a0, .LBB1_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB1_3 -; RV64I-NEXT: j .LBB1_4 -; RV64I-NEXT: .LBB1_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a2, a1, .LBB1_4 -; RV64I-NEXT: .LBB1_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB1_4: +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.ucmp(i16 %x, i16 %y) ret i8 %1 @@ -77,37 +41,20 @@ define i8 @ucmp.8.16(i16 zeroext %x, i16 zeroext %y) nounwind { define i8 @ucmp.8.32(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: ucmp.8.32: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu a1, a0, .LBB2_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB2_3 -; RV32I-NEXT: j .LBB2_4 -; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bgeu a2, a1, .LBB2_4 -; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB2_4: +; RV32I-NEXT: sltu a2, a1, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.8.32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: srli a2, a2, 32 -; RV64I-NEXT: bltu a2, a1, .LBB2_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a1, a2, .LBB2_3 -; RV64I-NEXT: j .LBB2_4 -; RV64I-NEXT: .LBB2_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a1, a2, .LBB2_4 -; RV64I-NEXT: .LBB2_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB2_4: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.ucmp(i32 %x, i32 %y) ret i8 %1 @@ -119,42 +66,20 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: beq a1, a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a4, a3, a1 -; RV32I-NEXT: bnez a4, .LBB3_3 -; RV32I-NEXT: j .LBB3_4 +; RV32I-NEXT: sltu a0, a1, a3 +; RV32I-NEXT: sub a0, a4, a0 +; RV32I-NEXT: ret ; RV32I-NEXT: .LBB3_2: ; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: beqz a4, .LBB3_4 -; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: li a4, 1 -; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: beq a1, a3, .LBB3_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: sltu a0, a1, a3 -; RV32I-NEXT: bnez a0, .LBB3_7 -; RV32I-NEXT: j .LBB3_8 -; RV32I-NEXT: .LBB3_6: ; RV32I-NEXT: sltu a0, a0, a2 -; RV32I-NEXT: beqz a0, .LBB3_8 -; RV32I-NEXT: .LBB3_7: -; RV32I-NEXT: li a4, -1 -; RV32I-NEXT: .LBB3_8: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: sub a0, a4, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.8.64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu a1, a0, .LBB3_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB3_3 -; RV64I-NEXT: j .LBB3_4 -; RV64I-NEXT: .LBB3_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a2, a1, .LBB3_4 -; RV64I-NEXT: .LBB3_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB3_4: +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i8 @llvm.ucmp(i64 %x, i64 %y) ret i8 %1 @@ -163,37 +88,20 @@ define i8 @ucmp.8.64(i64 %x, i64 %y) nounwind { define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind { ; RV32I-LABEL: ucmp.32.32: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu a1, a0, .LBB4_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB4_3 -; RV32I-NEXT: j .LBB4_4 -; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bgeu a2, a1, .LBB4_4 -; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB4_4: +; RV32I-NEXT: sltu a2, a1, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.32.32: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: srli a2, a2, 32 -; RV64I-NEXT: bltu a2, a1, .LBB4_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a1, a2, .LBB4_3 -; RV64I-NEXT: j .LBB4_4 -; RV64I-NEXT: .LBB4_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a1, a2, .LBB4_4 -; RV64I-NEXT: .LBB4_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB4_4: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 @@ -202,37 +110,20 @@ define i32 @ucmp.32.32(i32 %x, i32 %y) nounwind { define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind { ; RV32I-LABEL: ucmp.32.32_sext: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu a1, a0, .LBB5_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB5_3 -; RV32I-NEXT: j .LBB5_4 -; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bgeu a2, a1, .LBB5_4 -; RV32I-NEXT: .LBB5_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB5_4: +; RV32I-NEXT: sltu a2, a1, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.32.32_sext: ; RV64I: # %bb.0: ; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: slli a2, a1, 32 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: srli a2, a2, 32 -; RV64I-NEXT: bltu a2, a1, .LBB5_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a1, a2, .LBB5_3 -; RV64I-NEXT: j .LBB5_4 -; RV64I-NEXT: .LBB5_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a1, a2, .LBB5_4 -; RV64I-NEXT: .LBB5_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB5_4: +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 @@ -241,34 +132,16 @@ define i32 @ucmp.32.32_sext(i32 signext %x, i32 signext %y) nounwind { define i32 @ucmp.32.32_zext(i32 zeroext %x, i32 zeroext %y) nounwind { ; RV32I-LABEL: ucmp.32.32_zext: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a2, a0 -; RV32I-NEXT: bltu a1, a0, .LBB6_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: li a0, 0 -; RV32I-NEXT: bltu a2, a1, .LBB6_3 -; RV32I-NEXT: j .LBB6_4 -; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: bgeu a2, a1, .LBB6_4 -; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: .LBB6_4: +; RV32I-NEXT: sltu a2, a1, a0 +; RV32I-NEXT: sltu a0, a0, a1 +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.32.32_zext: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu a1, a0, .LBB6_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB6_3 -; RV64I-NEXT: j .LBB6_4 -; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a2, a1, .LBB6_4 -; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB6_4: +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i32 @llvm.ucmp(i32 %x, i32 %y) ret i32 %1 @@ -280,42 +153,20 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: beq a1, a3, .LBB7_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu a4, a3, a1 -; RV32I-NEXT: bnez a4, .LBB7_3 -; RV32I-NEXT: j .LBB7_4 +; RV32I-NEXT: sltu a0, a1, a3 +; RV32I-NEXT: sub a0, a4, a0 +; RV32I-NEXT: ret ; RV32I-NEXT: .LBB7_2: ; RV32I-NEXT: sltu a4, a2, a0 -; RV32I-NEXT: beqz a4, .LBB7_4 -; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: li a4, 1 -; RV32I-NEXT: .LBB7_4: -; RV32I-NEXT: beq a1, a3, .LBB7_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: sltu a0, a1, a3 -; RV32I-NEXT: bnez a0, .LBB7_7 -; RV32I-NEXT: j .LBB7_8 -; RV32I-NEXT: .LBB7_6: ; RV32I-NEXT: sltu a0, a0, a2 -; RV32I-NEXT: beqz a0, .LBB7_8 -; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: li a4, -1 -; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: sub a0, a4, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.32.64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu a1, a0, .LBB7_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB7_3 -; RV64I-NEXT: j .LBB7_4 -; RV64I-NEXT: .LBB7_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a2, a1, .LBB7_4 -; RV64I-NEXT: .LBB7_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB7_4: +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i32 @llvm.ucmp(i64 %x, i64 %y) ret i32 %1 @@ -324,46 +175,25 @@ define i32 @ucmp.32.64(i64 %x, i64 %y) nounwind { define i64 @ucmp.64.64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: ucmp.64.64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: beq a1, a3, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a0, a3, a1 -; RV32I-NEXT: bnez a0, .LBB8_3 -; RV32I-NEXT: j .LBB8_4 +; RV32I-NEXT: sltu a4, a3, a1 +; RV32I-NEXT: sltu a1, a1, a3 +; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: sltu a0, a2, a4 -; RV32I-NEXT: beqz a0, .LBB8_4 +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: sltu a1, a0, a2 ; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: .LBB8_4: -; RV32I-NEXT: beq a1, a3, .LBB8_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: sltu a1, a1, a3 -; RV32I-NEXT: bnez a1, .LBB8_7 -; RV32I-NEXT: j .LBB8_8 -; RV32I-NEXT: .LBB8_6: -; RV32I-NEXT: sltu a1, a4, a2 -; RV32I-NEXT: beqz a1, .LBB8_8 -; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: li a0, -1 -; RV32I-NEXT: li a1, -1 -; RV32I-NEXT: .LBB8_8: +; RV32I-NEXT: sub a0, a4, a1 +; RV32I-NEXT: sltu a1, a4, a1 +; RV32I-NEXT: neg a1, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ucmp.64.64: ; RV64I: # %bb.0: -; RV64I-NEXT: mv a2, a0 -; RV64I-NEXT: bltu a1, a0, .LBB8_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: li a0, 0 -; RV64I-NEXT: bltu a2, a1, .LBB8_3 -; RV64I-NEXT: j .LBB8_4 -; RV64I-NEXT: .LBB8_2: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: bgeu a2, a1, .LBB8_4 -; RV64I-NEXT: .LBB8_3: -; RV64I-NEXT: li a0, -1 -; RV64I-NEXT: .LBB8_4: +; RV64I-NEXT: sltu a2, a1, a0 +; RV64I-NEXT: sltu a0, a0, a1 +; RV64I-NEXT: sub a0, a2, a0 ; RV64I-NEXT: ret %1 = call i64 @llvm.ucmp(i64 %x, i64 %y) ret i64 %1