@@ -1650,6 +1650,47 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1650
1650
int ISD = TLI->InstructionOpcodeToISD (Opcode);
1651
1651
assert (ISD && " Invalid opcode" );
1652
1652
1653
+ unsigned ExtraCost = 0 ;
1654
+ if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
1655
+ // Some vector comparison predicates cost extra instructions.
1656
+ if (MTy.isVector () &&
1657
+ !((ST->hasXOP () && (!ST->hasAVX2 () || MTy.is128BitVector ())) ||
1658
+ (ST->hasAVX512 () && 32 <= MTy.getScalarSizeInBits ()) ||
1659
+ ST->hasBWI ())) {
1660
+ switch (cast<CmpInst>(I)->getPredicate ()) {
1661
+ case CmpInst::Predicate::ICMP_NE:
1662
+ // xor(cmpeq(x,y),-1)
1663
+ ExtraCost = 1 ;
1664
+ break ;
1665
+ case CmpInst::Predicate::ICMP_SGE:
1666
+ case CmpInst::Predicate::ICMP_SLE:
1667
+ // xor(cmpgt(x,y),-1)
1668
+ ExtraCost = 1 ;
1669
+ break ;
1670
+ case CmpInst::Predicate::ICMP_ULT:
1671
+ case CmpInst::Predicate::ICMP_UGT:
1672
+ // cmpgt(xor(x,signbit),xor(y,signbit))
1673
+ // xor(cmpeq(pmaxu(x,y),x),-1)
1674
+ ExtraCost = 2 ;
1675
+ break ;
1676
+ case CmpInst::Predicate::ICMP_ULE:
1677
+ case CmpInst::Predicate::ICMP_UGE:
1678
+ if ((ST->hasSSE41 () && MTy.getScalarSizeInBits () == 32 ) ||
1679
+ (ST->hasSSE2 () && MTy.getScalarSizeInBits () < 32 )) {
1680
+ // cmpeq(psubus(x,y),0)
1681
+ // cmpeq(pminu(x,y),x)
1682
+ ExtraCost = 1 ;
1683
+ } else {
1684
+ // xor(cmpgt(xor(x,signbit),xor(y,signbit)),-1)
1685
+ ExtraCost = 3 ;
1686
+ }
1687
+ break ;
1688
+ default :
1689
+ break ;
1690
+ }
1691
+ }
1692
+ }
1693
+
1653
1694
static const CostTblEntry AVX512BWCostTbl[] = {
1654
1695
{ ISD::SETCC, MVT::v32i16, 1 },
1655
1696
{ ISD::SETCC, MVT::v64i8, 1 },
@@ -1738,35 +1779,35 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1738
1779
1739
1780
if (ST->hasBWI ())
1740
1781
if (const auto *Entry = CostTableLookup (AVX512BWCostTbl, ISD, MTy))
1741
- return LT.first * Entry->Cost ;
1782
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1742
1783
1743
1784
if (ST->hasAVX512 ())
1744
1785
if (const auto *Entry = CostTableLookup (AVX512CostTbl, ISD, MTy))
1745
- return LT.first * Entry->Cost ;
1786
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1746
1787
1747
1788
if (ST->hasAVX2 ())
1748
1789
if (const auto *Entry = CostTableLookup (AVX2CostTbl, ISD, MTy))
1749
- return LT.first * Entry->Cost ;
1790
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1750
1791
1751
1792
if (ST->hasAVX ())
1752
1793
if (const auto *Entry = CostTableLookup (AVX1CostTbl, ISD, MTy))
1753
- return LT.first * Entry->Cost ;
1794
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1754
1795
1755
1796
if (ST->hasSSE42 ())
1756
1797
if (const auto *Entry = CostTableLookup (SSE42CostTbl, ISD, MTy))
1757
- return LT.first * Entry->Cost ;
1798
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1758
1799
1759
1800
if (ST->hasSSE41 ())
1760
1801
if (const auto *Entry = CostTableLookup (SSE41CostTbl, ISD, MTy))
1761
- return LT.first * Entry->Cost ;
1802
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1762
1803
1763
1804
if (ST->hasSSE2 ())
1764
1805
if (const auto *Entry = CostTableLookup (SSE2CostTbl, ISD, MTy))
1765
- return LT.first * Entry->Cost ;
1806
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1766
1807
1767
1808
if (ST->hasSSE1 ())
1768
1809
if (const auto *Entry = CostTableLookup (SSE1CostTbl, ISD, MTy))
1769
- return LT.first * Entry->Cost ;
1810
+ return LT.first * (ExtraCost + Entry->Cost ) ;
1770
1811
1771
1812
return BaseT::getCmpSelInstrCost (Opcode, ValTy, CondTy, I);
1772
1813
}
0 commit comments