@@ -137,7 +137,11 @@ void jl_dump_llvm_opt_impl(void *s)
137137 **jl_ExecutionEngine->get_dump_llvm_opt_stream () = (JL_STREAM*)s;
138138}
139139
140- static void jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
140+ static int jl_add_to_ee (
141+ orc::ThreadSafeModule &M,
142+ const StringMap<orc::ThreadSafeModule*> &NewExports,
143+ DenseMap<orc::ThreadSafeModule*, int > &Queued,
144+ std::vector<orc::ThreadSafeModule*> &Stack);
141145static void jl_decorate_module (Module &M);
142146static uint64_t getAddressForFunction (StringRef fname);
143147
@@ -229,10 +233,13 @@ static jl_callptr_t _jl_compile_codeinst(
229233 }
230234 }
231235 }
236+ DenseMap<orc::ThreadSafeModule*, int > Queued;
237+ std::vector<orc::ThreadSafeModule*> Stack;
232238 for (auto &def : emitted) {
233239 // Add the results to the execution engine now
234240 orc::ThreadSafeModule &M = std::get<0 >(def.second );
235- jl_add_to_ee (M, NewExports);
241+ jl_add_to_ee (M, NewExports, Queued, Stack);
242+ assert (Queued.empty () && Stack.empty () && !M);
236243 }
237244 ++CompiledCodeinsts;
238245 MaxWorkqueueSize.updateMax (emitted.size ());
@@ -1715,76 +1722,72 @@ static void jl_decorate_module(Module &M) {
17151722#endif
17161723}
17171724
1725+ // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17181726static int jl_add_to_ee (
17191727 orc::ThreadSafeModule &M,
1720- StringMap<orc::ThreadSafeModule*> &NewExports,
1728+ const StringMap<orc::ThreadSafeModule*> &NewExports,
17211729 DenseMap<orc::ThreadSafeModule*, int > &Queued,
1722- std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1723- int depth)
1730+ std::vector<orc::ThreadSafeModule*> &Stack)
17241731{
1725- // DAG-sort (post-dominator) the compile to compute the minimum
1726- // merge-module sets for linkage
1732+ // First check if the TSM is empty (already compiled)
17271733 if (!M)
17281734 return 0 ;
1729- // First check and record if it's on the stack somewhere
1735+ // Next check and record if it is on the stack somewhere
17301736 {
1731- auto &Cycle = Queued[&M];
1732- if (Cycle )
1733- return Cycle ;
1734- ToMerge .push_back ({} );
1735- Cycle = depth ;
1737+ auto &Id = Queued[&M];
1738+ if (Id )
1739+ return Id ;
1740+ Stack .push_back (&M );
1741+ Id = Stack. size () ;
17361742 }
1743+ // Finally work out the SCC
1744+ int depth = Stack.size ();
17371745 int MergeUp = depth;
1738- // Compute the cycle-id
1746+ std::vector<orc::ThreadSafeModule*> Children;
17391747 M.withModuleDo ([&](Module &m) {
17401748 for (auto &F : m.global_objects ()) {
17411749 if (F.isDeclaration () && F.getLinkage () == GlobalValue::ExternalLinkage) {
17421750 auto Callee = NewExports.find (F.getName ());
17431751 if (Callee != NewExports.end ()) {
1744- auto &CM = Callee->second ;
1745- int Down = jl_add_to_ee (*CM, NewExports, Queued, ToMerge, depth + 1 );
1746- assert (Down <= depth);
1747- if (Down && Down < MergeUp)
1748- MergeUp = Down;
1752+ auto *CM = Callee->second ;
1753+ if (*CM && CM != &M) {
1754+ auto Down = Queued.find (CM);
1755+ if (Down != Queued.end ())
1756+ MergeUp = std::min (MergeUp, Down->second );
1757+ else
1758+ Children.push_back (CM);
1759+ }
17491760 }
17501761 }
17511762 }
17521763 });
1753- if (MergeUp == depth) {
1754- // Not in a cycle (or at the top of it)
1755- Queued.erase (&M);
1756- for (auto &CM : ToMerge.at (depth - 1 )) {
1757- assert (Queued.find (CM)->second == depth);
1758- Queued.erase (CM);
1759- jl_merge_module (M, std::move (*CM));
1760- }
1761- jl_ExecutionEngine->addModule (std::move (M));
1762- MergeUp = 0 ;
1764+ assert (MergeUp > 0 );
1765+ for (auto *CM : Children) {
1766+ int Down = jl_add_to_ee (*CM, NewExports, Queued, Stack);
1767+ assert (Down <= (int )Stack.size ());
1768+ if (Down)
1769+ MergeUp = std::min (MergeUp, Down);
17631770 }
1764- else {
1765- // Add our frame(s) to the top of the cycle
1766- Queued[&M] = MergeUp;
1767- auto &Top = ToMerge.at (MergeUp - 1 );
1768- Top.push_back (&M);
1769- for (auto &CM : ToMerge.at (depth - 1 )) {
1770- assert (Queued.find (CM)->second == depth);
1771- Queued[CM] = MergeUp;
1772- Top.push_back (CM);
1771+ if (MergeUp < depth)
1772+ return MergeUp;
1773+ while (1 ) {
1774+ // Not in a cycle (or at the top of it)
1775+ // remove SCC state and merge every CM from the cycle into M
1776+ orc::ThreadSafeModule *CM = Stack.back ();
1777+ auto it = Queued.find (CM);
1778+ assert (it->second == (int )Stack.size ());
1779+ Queued.erase (it);
1780+ Stack.pop_back ();
1781+ if ((int )Stack.size () < depth) {
1782+ assert (&M == CM);
1783+ break ;
17731784 }
1785+ jl_merge_module (M, std::move (*CM));
17741786 }
1775- ToMerge.pop_back ();
1776- return MergeUp;
1777- }
1778-
1779- static void jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1780- {
1781- DenseMap<orc::ThreadSafeModule*, int > Queued;
1782- std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1783- jl_add_to_ee (M, NewExports, Queued, ToMerge, 1 );
1784- assert (!M);
1787+ jl_ExecutionEngine->addModule (std::move (M));
1788+ return 0 ;
17851789}
17861790
1787-
17881791static uint64_t getAddressForFunction (StringRef fname)
17891792{
17901793 auto addr = jl_ExecutionEngine->getFunctionAddress (fname);
0 commit comments