diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 0232ac421aeda..ed051f295752e 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4353,15 +4353,26 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost( } } - // Treat the icmp in icmp(and, 0) as free, as we can make use of ands. - // FIXME: This can apply to more conditions and add/sub if it can be shown to - // be profitable. + // Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to + // icmp(and, 0) as free, as we can make use of ands, but only if the + // comparison is not unsigned. if (ValTy->isIntegerTy() && ISD == ISD::SETCC && I && - ICmpInst::isEquality(VecPred) && + !CmpInst::isUnsigned(VecPred) && TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) && - match(I->getOperand(1), m_Zero()) && - match(I->getOperand(0), m_And(m_Value(), m_Value()))) - return 0; + match(I->getOperand(0), m_And(m_Value(), m_Value()))) { + if (match(I->getOperand(1), m_Zero())) + return 0; + + // x >= 1 / x < 1 -> x > 0 / x <= 0 + if (match(I->getOperand(1), m_One()) && + (VecPred == CmpInst::ICMP_SLT || VecPred == CmpInst::ICMP_SGE)) + return 0; + + // x <= -1 / x > -1 -> x > 0 / x <= 0 + if (match(I->getOperand(1), m_AllOnes()) && + (VecPred == CmpInst::ICMP_SLE || VecPred == CmpInst::ICMP_SGT)) + return 0; + } // The base case handles scalable vectors fine for now, since it treats the // cost as 1 * legalization cost. diff --git a/llvm/test/Analysis/CostModel/AArch64/cmp.ll b/llvm/test/Analysis/CostModel/AArch64/cmp.ll index aba113865af10..16b3913f52028 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cmp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cmp.ll @@ -53,6 +53,14 @@ define void @andcmp() { ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32 = icmp eq i32 %a32, 0 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %a64 = and i64 undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64 = icmp ne i64 %a64, 0 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32ge = icmp sge i32 %a32, 1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32le = icmp slt i32 %a32, 1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32leneg = icmp sle i32 %a32, -1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32gtneg = icmp sgt i32 %a32, -1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64ge = icmp sge i64 %a64, 1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64le = icmp slt i64 %a64, 1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64leneg = icmp sle i64 %a64, -1 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64gtneg = icmp sgt i64 %a64, -1 ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a128 = and i128 undef, undef ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp eq i128 %a128, 0 ; CHECK-NEXT: Cost Model: Found costs of 1 for: %av16i8 = and <16 x i8> undef, undef @@ -62,7 +70,7 @@ define void @andcmp() { ; CHECK-NEXT: Cost Model: Found costs of 1 for: %av4i32 = and <4 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found costs of 1 for: %cv4i32 = icmp ne <4 x i32> %av4i32, zeroinitializer ; CHECK-NEXT: Cost Model: Found costs of 1 for: %c32not0 = icmp eq i32 %a32, 1 -; CHECK-NEXT: Cost Model: Found costs of 1 for: %c64sle = icmp sle i64 %a64, 0 +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64sle = icmp sle i64 %a64, 0 ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %a8 = and i8 undef, undef @@ -73,6 +81,17 @@ define void @andcmp() { %c32 = icmp eq i32 %a32, 0 %a64 = and i64 undef, undef %c64 = icmp ne i64 %a64, 0 + + %c32ge = icmp sge i32 %a32, 1 + %c32le = icmp slt i32 %a32, 1 + %c32leneg = icmp sle i32 %a32, -1 + %c32gtneg = icmp sgt i32 %a32, -1 + + %c64ge = icmp sge i64 %a64, 1 + %c64le = icmp slt i64 %a64, 1 + %c64leneg = icmp sle i64 %a64, -1 + %c64gtneg = icmp sgt i64 %a64, -1 + %a128 = and i128 undef, undef %c128 = icmp eq i128 %a128, zeroinitializer %av16i8 = and <16 x i8> undef, undef