Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4353,15 +4353,26 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
}
}

// Treat the icmp in icmp(and, 0) as free, as we can make use of ands.
// FIXME: This can apply to more conditions and add/sub if it can be shown to
// be profitable.
// Treat the icmp in icmp(and, 0) or icmp(and, -1/1) when it can be folded to
// icmp(and, 0) as free, as we can make use of ands, but only if the
// comparison is not unsigned.
if (ValTy->isIntegerTy() && ISD == ISD::SETCC && I &&
ICmpInst::isEquality(VecPred) &&
!CmpInst::isUnsigned(VecPred) &&
TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
match(I->getOperand(1), m_Zero()) &&
match(I->getOperand(0), m_And(m_Value(), m_Value())))
return 0;
match(I->getOperand(0), m_And(m_Value(), m_Value()))) {
if (match(I->getOperand(1), m_Zero()))
return 0;

// x >= 1 / x < 1 -> x > 0 / x <= 0
if (match(I->getOperand(1), m_One()) &&
(VecPred == CmpInst::ICMP_SLT || VecPred == CmpInst::ICMP_SGE))
return 0;

// x <= -1 / x > -1 -> x > 0 / x <= 0
if (match(I->getOperand(1), m_AllOnes()) &&
(VecPred == CmpInst::ICMP_SLE || VecPred == CmpInst::ICMP_SGT))
return 0;
}

// The base case handles scalable vectors fine for now, since it treats the
// cost as 1 * legalization cost.
Expand Down
21 changes: 20 additions & 1 deletion llvm/test/Analysis/CostModel/AArch64/cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ define void @andcmp() {
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32 = icmp eq i32 %a32, 0
; CHECK-NEXT: Cost Model: Found costs of 1 for: %a64 = and i64 undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64 = icmp ne i64 %a64, 0
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32ge = icmp sge i32 %a32, 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32le = icmp slt i32 %a32, 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32leneg = icmp sle i32 %a32, -1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c32gtneg = icmp sgt i32 %a32, -1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64ge = icmp sge i64 %a64, 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64le = icmp slt i64 %a64, 1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64leneg = icmp sle i64 %a64, -1
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64gtneg = icmp sgt i64 %a64, -1
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %a128 = and i128 undef, undef
; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %c128 = icmp eq i128 %a128, 0
; CHECK-NEXT: Cost Model: Found costs of 1 for: %av16i8 = and <16 x i8> undef, undef
Expand All @@ -62,7 +70,7 @@ define void @andcmp() {
; CHECK-NEXT: Cost Model: Found costs of 1 for: %av4i32 = and <4 x i32> undef, undef
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cv4i32 = icmp ne <4 x i32> %av4i32, zeroinitializer
; CHECK-NEXT: Cost Model: Found costs of 1 for: %c32not0 = icmp eq i32 %a32, 1
; CHECK-NEXT: Cost Model: Found costs of 1 for: %c64sle = icmp sle i64 %a64, 0
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: %c64sle = icmp sle i64 %a64, 0
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%a8 = and i8 undef, undef
Expand All @@ -73,6 +81,17 @@ define void @andcmp() {
%c32 = icmp eq i32 %a32, 0
%a64 = and i64 undef, undef
%c64 = icmp ne i64 %a64, 0

%c32ge = icmp sge i32 %a32, 1
%c32le = icmp slt i32 %a32, 1
%c32leneg = icmp sle i32 %a32, -1
%c32gtneg = icmp sgt i32 %a32, -1

%c64ge = icmp sge i64 %a64, 1
%c64le = icmp slt i64 %a64, 1
%c64leneg = icmp sle i64 %a64, -1
%c64gtneg = icmp sgt i64 %a64, -1

%a128 = and i128 undef, undef
%c128 = icmp eq i128 %a128, zeroinitializer
%av16i8 = and <16 x i8> undef, undef
Expand Down
Loading