@@ -43,6 +43,8 @@ class RISCV final : public TargetInfo {
4343  void  scanSectionImpl (InputSectionBase &, Relocs<RelTy>);
4444  template  <class  ELFT > void  scanSection1 (InputSectionBase &);
4545  void  scanSection (InputSectionBase &) override ;
46+   void  writeTableJumpHeader (uint8_t  *buf) const  override ;
47+   void  writeTableJumpEntry (uint8_t  *buf, const  uint64_t  symbol) const  override ;
4648  RelType getDynRel (RelType type) const  override ;
4749  RelExpr getRelExpr (RelType type, const  Symbol &s,
4850                     const  uint8_t  *loc) const  override ;
@@ -75,6 +77,7 @@ class RISCV final : public TargetInfo {
7577#define  INTERNAL_R_RISCV_GPREL_S  257 
7678#define  INTERNAL_R_RISCV_X0REL_I  258 
7779#define  INTERNAL_R_RISCV_X0REL_S  259 
80+ #define  INTERNAL_R_RISCV_TBJAL  260 
7881
7982const  uint64_t  dtpOffset = 0x800 ;
8083
@@ -274,6 +277,20 @@ void RISCV::writePlt(uint8_t *buf, const Symbol &sym,
274277  write32le (buf + 12 , itype (ADDI, 0 , 0 , 0 ));
275278}
276279
280+ void  RISCV::writeTableJumpHeader (uint8_t  *buf) const  {
281+   if  (ctx.arg .is64 )
282+     write64le (buf, ctx.mainPart ->dynamic ->getVA ());
283+   else 
284+     write32le (buf, ctx.mainPart ->dynamic ->getVA ());
285+ }
286+ 
287+ void  RISCV::writeTableJumpEntry (uint8_t  *buf, const  uint64_t  address) const  {
288+   if  (ctx.arg .is64 )
289+     write64le (buf, address);
290+   else 
291+     write32le (buf, address);
292+ }
293+ 
277294RelType RISCV::getDynRel (RelType type) const  {
278295  return  type == ctx.target ->symbolicRel  ? type
279296                                         : static_cast <RelType>(R_RISCV_NONE);
@@ -496,6 +513,9 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
496513    return ;
497514  }
498515
516+   case  INTERNAL_R_RISCV_TBJAL:
517+     return ;
518+ 
499519  case  R_RISCV_ADD8:
500520    *loc += val;
501521    return ;
@@ -745,6 +765,32 @@ void elf::initSymbolAnchors(Ctx &ctx) {
745765  }
746766}
747767
768+ static  bool  relaxTableJump (Ctx &ctx, const  InputSection &sec, size_t  i,
769+                            uint64_t  loc, Relocation &r, uint32_t  &remove) {
770+   if  (!ctx.in .riscvTableJumpSection  ||
771+       !ctx.in .riscvTableJumpSection ->isFinalized )
772+     return  false ;
773+ 
774+   const  uint32_t  jalr = read32le (sec.contentMaybeDecompress ().data () +
775+                                  r.offset  + (r.type  == R_RISCV_JAL ? 0  : 4 ));
776+   const  uint8_t  rd = extractBits (jalr, 11 , 7 );
777+   int  tblEntryIndex = -1 ;
778+   if  (rd == X_X0) {
779+     tblEntryIndex = ctx.in .riscvTableJumpSection ->getCMJTEntryIndex (r.sym );
780+   } else  if  (rd == X_RA) {
781+     tblEntryIndex = ctx.in .riscvTableJumpSection ->getCMJALTEntryIndex (r.sym );
782+   }
783+ 
784+   if  (tblEntryIndex >= 0 ) {
785+     sec.relaxAux ->relocTypes [i] = INTERNAL_R_RISCV_TBJAL;
786+     sec.relaxAux ->writes .push_back (0xA002  |
787+                                    (tblEntryIndex << 2 )); //  cm.jt or cm.jalt
788+     remove = (r.type  == R_RISCV_JAL ? 2  : 6 );
789+     return  true ;
790+   }
791+   return  false ;
792+ }
793+ 
748794//  Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
749795static  void  relaxCall (Ctx &ctx, const  InputSection &sec, size_t  i, uint64_t  loc,
750796                      Relocation &r, uint32_t  &remove) {
@@ -767,6 +813,8 @@ static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
767813    sec.relaxAux ->relocTypes [i] = R_RISCV_RVC_JUMP;
768814    sec.relaxAux ->writes .push_back (0x2001 ); //  c.jal
769815    remove = 6 ;
816+   } else  if  (remove >= 6  && relaxTableJump (ctx, sec, i, loc, r, remove)) {
817+     //  relaxTableJump sets remove
770818  } else  if  (remove >= 4  && isInt<21 >(displace)) {
771819    sec.relaxAux ->relocTypes [i] = R_RISCV_JAL;
772820    sec.relaxAux ->writes .push_back (0x6f  | rd << 7 ); //  jal
@@ -890,6 +938,11 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
890938        relaxCall (ctx, sec, i, loc, r, remove);
891939      }
892940      break ;
941+     case  R_RISCV_JAL:
942+       if  (relaxable (relocs, i)) {
943+         relaxTableJump (ctx, sec, i, loc, r, remove);
944+       }
945+       break ;
893946    case  R_RISCV_TPREL_HI20:
894947    case  R_RISCV_TPREL_ADD:
895948    case  R_RISCV_TPREL_LO12_I:
@@ -1144,6 +1197,12 @@ void RISCV::finalizeRelax(int passes) const {
11441197          case  INTERNAL_R_RISCV_X0REL_I:
11451198          case  INTERNAL_R_RISCV_X0REL_S:
11461199            break ;
1200+           case  INTERNAL_R_RISCV_TBJAL:
1201+             assert (ctx.arg .relaxTbljal );
1202+             assert ((aux.writes [writesIdx] & 0xfc03 ) == 0xA002 );
1203+             skip = 2 ;
1204+             write16le (p, aux.writes [writesIdx++]);
1205+             break ;
11471206          case  R_RISCV_RELAX:
11481207            //  Used by relaxTlsLe to indicate the relocation is ignored.
11491208            break ;
@@ -1155,6 +1214,8 @@ void RISCV::finalizeRelax(int passes) const {
11551214            skip = 4 ;
11561215            write32le (p, aux.writes [writesIdx++]);
11571216            break ;
1217+           case  R_RISCV_64:
1218+             break ;
11581219          case  R_RISCV_32:
11591220            //  Used by relaxTlsLe to write a uint32_t then suppress the handling
11601221            //  in relocateAlloc.
@@ -1533,3 +1594,219 @@ template <class ELFT> void RISCV::scanSection1(InputSectionBase &sec) {
15331594void  RISCV::scanSection (InputSectionBase &sec) {
15341595  invokeELFT (scanSection1, sec);
15351596}
1597+ 
1598+ TableJumpSection::TableJumpSection (Ctx &ctx)
1599+     : SyntheticSection(ctx, " .riscv.jvt" 
1600+                        SHF_ALLOC | SHF_EXECINSTR, tableAlign) {}
1601+ 
1602+ void  TableJumpSection::addCMJTEntryCandidate (const  Symbol *symbol,
1603+                                              int  csReduction) {
1604+   addEntry (symbol, CMJTEntryCandidates, csReduction);
1605+ }
1606+ 
1607+ int  TableJumpSection::getCMJTEntryIndex (const  Symbol *symbol) {
1608+   uint32_t  index = getIndex (symbol, maxCMJTEntrySize, finalizedCMJTEntries);
1609+   return  index < finalizedCMJTEntries.size () ? (int )(startCMJTEntryIdx + index)
1610+                                              : -1 ;
1611+ }
1612+ 
1613+ void  TableJumpSection::addCMJALTEntryCandidate (const  Symbol *symbol,
1614+                                                int  csReduction) {
1615+   addEntry (symbol, CMJALTEntryCandidates, csReduction);
1616+ }
1617+ 
1618+ int  TableJumpSection::getCMJALTEntryIndex (const  Symbol *symbol) {
1619+   uint32_t  index = getIndex (symbol, maxCMJALTEntrySize, finalizedCMJALTEntries);
1620+   return  index < finalizedCMJALTEntries.size ()
1621+              ? (int )(startCMJALTEntryIdx + index)
1622+              : -1 ;
1623+ }
1624+ 
1625+ void  TableJumpSection::addEntry (
1626+     const  Symbol *symbol, llvm::DenseMap<const  Symbol *, int > &entriesList,
1627+     int  csReduction) {
1628+   entriesList[symbol] += csReduction;
1629+ }
1630+ 
1631+ uint32_t  TableJumpSection::getIndex (
1632+     const  Symbol *symbol, uint32_t  maxSize,
1633+     SmallVector<llvm::detail::DenseMapPair<const  Symbol *, int >, 0 >
1634+         &entriesList) {
1635+   //  Find this symbol in the ordered list of entries if it exists.
1636+   assert (maxSize >= entriesList.size () &&
1637+          " Finalized vector of entries exceeds maximum" 
1638+   auto  idx = std::find_if (
1639+       entriesList.begin (), entriesList.end (),
1640+       [symbol](llvm::detail::DenseMapPair<const  Symbol *, int > &e) {
1641+         return  e.first  == symbol;
1642+       });
1643+ 
1644+   if  (idx == entriesList.end ())
1645+     return  entriesList.size ();
1646+   return  idx - entriesList.begin ();
1647+ }
1648+ 
1649+ void  TableJumpSection::scanTableJumpEntries (const  InputSection &sec) const  {
1650+   for  (auto  [i, r] : llvm::enumerate (sec.relocations )) {
1651+     Defined *definedSymbol = dyn_cast<Defined>(r.sym );
1652+     if  (!definedSymbol)
1653+       continue ;
1654+     if  (i + 1  == sec.relocs ().size () ||
1655+         sec.relocs ()[i + 1 ].type  != R_RISCV_RELAX)
1656+       continue ;
1657+     switch  (r.type ) {
1658+     case  R_RISCV_JAL:
1659+     case  R_RISCV_CALL:
1660+     case  R_RISCV_CALL_PLT: {
1661+       const  uint32_t  jalr =
1662+           read32le (sec.contentMaybeDecompress ().data () + r.offset  +
1663+                    (r.type  == R_RISCV_JAL ? 0  : 4 ));
1664+       const  uint8_t  rd = extractBits (jalr, 11 , 7 );
1665+ 
1666+       int  csReduction = 6 ;
1667+       if  (sec.relaxAux ->relocTypes [i] == R_RISCV_RVC_JUMP)
1668+         continue ;
1669+       else  if  (sec.relaxAux ->relocTypes [i] == R_RISCV_JAL)
1670+         csReduction = 2 ;
1671+ 
1672+       if  (rd == 0 )
1673+         ctx.in .riscvTableJumpSection ->addCMJTEntryCandidate (r.sym , csReduction);
1674+       else  if  (rd == X_RA)
1675+         ctx.in .riscvTableJumpSection ->addCMJALTEntryCandidate (r.sym ,
1676+                                                               csReduction);
1677+     }
1678+     }
1679+   }
1680+ }
1681+ 
1682+ void  TableJumpSection::finalizeContents () {
1683+   if  (isFinalized)
1684+     return ;
1685+   isFinalized = true ;
1686+ 
1687+   finalizedCMJTEntries = finalizeEntry (CMJTEntryCandidates, maxCMJTEntrySize);
1688+   CMJTEntryCandidates.clear ();
1689+   int32_t  CMJTSizeReduction = getSizeReduction ();
1690+   finalizedCMJALTEntries =
1691+       finalizeEntry (CMJALTEntryCandidates, maxCMJALTEntrySize);
1692+   CMJALTEntryCandidates.clear ();
1693+ 
1694+   if  (!finalizedCMJALTEntries.empty () &&
1695+       getSizeReduction () < CMJTSizeReduction) {
1696+     //  In memory, the cm.jt table occupies the first 0x20 entries.
1697+     //  To be able to use the cm.jalt table which comes afterwards
1698+     //  it is necessary to pad out the cm.jt table.
1699+     //  Remove cm.jalt entries if the code reduction of cm.jalt is
1700+     //  smaller than the size of the padding.
1701+     finalizedCMJALTEntries.clear ();
1702+   }
1703+   //  if table jump still got negative effect, give up.
1704+   if  (getSizeReduction () <= 0 ) {
1705+     warn (" Table Jump Relaxation didn't got any reduction for code size." 
1706+     finalizedCMJTEntries.clear ();
1707+   }
1708+ }
1709+ 
1710+ //  Sort the map in decreasing order of the amount of code reduction provided
1711+ //  by the entries. Drop any entries that can't fit in the map from the tail
1712+ //  end since they provide less code reduction. Drop any entries that cause
1713+ //  an increase in code size (i.e. the reduction from instruction conversion
1714+ //  does not cover the code size gain from adding a table entry).
1715+ SmallVector<llvm::detail::DenseMapPair<const  Symbol *, int >, 0 >
1716+ TableJumpSection::finalizeEntry (llvm::DenseMap<const  Symbol *, int > EntryMap,
1717+                                 uint32_t  maxSize) {
1718+   auto  cmp = [](const  llvm::detail::DenseMapPair<const  Symbol *, int > &p1,
1719+                 const  llvm::detail::DenseMapPair<const  Symbol *, int > &p2) {
1720+     return  p1.second  > p2.second ;
1721+   };
1722+ 
1723+   SmallVector<llvm::detail::DenseMapPair<const  Symbol *, int >, 0 >
1724+       tempEntryVector;
1725+   std::copy (EntryMap.begin (), EntryMap.end (),
1726+             std::back_inserter (tempEntryVector));
1727+   std::sort (tempEntryVector.begin (), tempEntryVector.end (), cmp);
1728+ 
1729+   auto  finalizedVector = tempEntryVector;
1730+ 
1731+   finalizedVector.resize (maxSize);
1732+ 
1733+   //  Drop any items that have a negative effect (i.e. increase code size).
1734+   while  (!finalizedVector.empty ()) {
1735+     if  (finalizedVector.rbegin ()->second  < ctx.arg .wordsize )
1736+       finalizedVector.pop_back ();
1737+     else 
1738+       break ;
1739+   }
1740+   return  finalizedVector;
1741+ }
1742+ 
1743+ size_t  TableJumpSection::getSize () const  {
1744+   if  (isFinalized) {
1745+     if  (!finalizedCMJALTEntries.empty ())
1746+       return  (startCMJALTEntryIdx + finalizedCMJALTEntries.size ()) *
1747+              ctx.arg .wordsize ;
1748+     return  (startCMJTEntryIdx + finalizedCMJTEntries.size ()) * ctx.arg .wordsize ;
1749+   }
1750+ 
1751+   if  (!CMJALTEntryCandidates.empty ())
1752+     return  (startCMJALTEntryIdx + CMJALTEntryCandidates.size ()) *
1753+            ctx.arg .wordsize ;
1754+   return  (startCMJTEntryIdx + CMJTEntryCandidates.size ()) * ctx.arg .wordsize ;
1755+ }
1756+ 
1757+ int32_t  TableJumpSection::getSizeReduction () {
1758+   //  The total reduction in code size is J + JA - JTS - JAE.
1759+   //  Where:
1760+   //  J = number of bytes saved for all the cm.jt instructions emitted
1761+   //  JA = number of bytes saved for all the cm.jalt instructions emitted
1762+   //  JTS = size of the part of the table for cm.jt jumps (i.e. 32 x wordsize)
1763+   //  JAE = number of entries emitted for the cm.jalt jumps x wordsize
1764+ 
1765+   int32_t  sizeReduction = -getSize ();
1766+   for  (auto  entry : finalizedCMJTEntries) {
1767+     sizeReduction += entry.second ;
1768+   }
1769+   for  (auto  entry : finalizedCMJALTEntries) {
1770+     sizeReduction += entry.second ;
1771+   }
1772+   return  sizeReduction;
1773+ }
1774+ 
1775+ void  TableJumpSection::writeTo (uint8_t  *buf) {
1776+   if  (getSizeReduction () <= 0 )
1777+     return ;
1778+   ctx.target ->writeTableJumpHeader (buf);
1779+   writeEntries (buf + startCMJTEntryIdx * ctx.arg .wordsize ,
1780+                finalizedCMJTEntries);
1781+   if  (finalizedCMJALTEntries.size () > 0 ) {
1782+     padWords (buf + ((startCMJTEntryIdx + finalizedCMJTEntries.size ()) *
1783+                     ctx.arg .wordsize ),
1784+              startCMJALTEntryIdx);
1785+     writeEntries (buf + (startCMJALTEntryIdx * ctx.arg .wordsize ),
1786+                  finalizedCMJALTEntries);
1787+   }
1788+ }
1789+ 
1790+ void  TableJumpSection::padWords (uint8_t  *buf, const  uint8_t  maxWordCount) {
1791+   for  (size_t  i = 0 ; i < maxWordCount; ++i) {
1792+     if  (ctx.arg .is64 )
1793+       write64le (buf + i, 0 );
1794+     else 
1795+       write32le (buf + i, 0 );
1796+   }
1797+ }
1798+ 
1799+ void  TableJumpSection::writeEntries (
1800+     uint8_t  *buf,
1801+     SmallVector<llvm::detail::DenseMapPair<const  Symbol *, int >, 0 >
1802+         &entriesList) {
1803+   for  (const  auto  &entry : entriesList) {
1804+     assert (entry.second  > 0 );
1805+     //  Use the symbol from in.symTab to ensure we have the final adjusted
1806+     //  symbol.
1807+     if  (!entry.first ->isDefined ())
1808+       continue ;
1809+     ctx.target ->writeTableJumpEntry (buf, entry.first ->getVA (ctx, 0 ));
1810+     buf += ctx.arg .wordsize ;
1811+   }
1812+ }
0 commit comments