Skip to content

Commit d1b9ff9

Browse files
committed
convert algorithms to SCC (#47866)
These places in the code can either be more efficient O(1) or more correct using something more similar to the published SCC algorithm by Tarjan for strongly connected components. (cherry picked from commit b03439c)
1 parent 62b70e3 commit d1b9ff9

File tree

3 files changed

+111
-163
lines changed

3 files changed

+111
-163
lines changed

src/gf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3421,6 +3421,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
34213421
}
34223422
}
34233423
// then we'll merge those numbers to assign each item in the group the same number
3424+
// (similar to Kosaraju's SCC algorithm?)
34243425
uint32_t groupid = 0;
34253426
uint32_t grouphi = 0;
34263427
for (i = 0; i < len; i++) {

src/jitlayers.cpp

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,11 @@ void jl_dump_llvm_opt_impl(void *s)
137137
**jl_ExecutionEngine->get_dump_llvm_opt_stream() = (JL_STREAM*)s;
138138
}
139139

140-
static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
140+
static int jl_add_to_ee(
141+
orc::ThreadSafeModule &M,
142+
const StringMap<orc::ThreadSafeModule*> &NewExports,
143+
DenseMap<orc::ThreadSafeModule*, int> &Queued,
144+
std::vector<orc::ThreadSafeModule*> &Stack);
141145
static void jl_decorate_module(Module &M);
142146
static uint64_t getAddressForFunction(StringRef fname);
143147

@@ -229,10 +233,13 @@ static jl_callptr_t _jl_compile_codeinst(
229233
}
230234
}
231235
}
236+
DenseMap<orc::ThreadSafeModule*, int> Queued;
237+
std::vector<orc::ThreadSafeModule*> Stack;
232238
for (auto &def : emitted) {
233239
// Add the results to the execution engine now
234240
orc::ThreadSafeModule &M = std::get<0>(def.second);
235-
jl_add_to_ee(M, NewExports);
241+
jl_add_to_ee(M, NewExports, Queued, Stack);
242+
assert(Queued.empty() && Stack.empty() && !M);
236243
}
237244
++CompiledCodeinsts;
238245
MaxWorkqueueSize.updateMax(emitted.size());
@@ -1715,76 +1722,72 @@ static void jl_decorate_module(Module &M) {
17151722
#endif
17161723
}
17171724

1725+
// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17181726
static int jl_add_to_ee(
17191727
orc::ThreadSafeModule &M,
1720-
StringMap<orc::ThreadSafeModule*> &NewExports,
1728+
const StringMap<orc::ThreadSafeModule*> &NewExports,
17211729
DenseMap<orc::ThreadSafeModule*, int> &Queued,
1722-
std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1723-
int depth)
1730+
std::vector<orc::ThreadSafeModule*> &Stack)
17241731
{
1725-
// DAG-sort (post-dominator) the compile to compute the minimum
1726-
// merge-module sets for linkage
1732+
// First check if the TSM is empty (already compiled)
17271733
if (!M)
17281734
return 0;
1729-
// First check and record if it's on the stack somewhere
1735+
// Next check and record if it is on the stack somewhere
17301736
{
1731-
auto &Cycle = Queued[&M];
1732-
if (Cycle)
1733-
return Cycle;
1734-
ToMerge.push_back({});
1735-
Cycle = depth;
1737+
auto &Id = Queued[&M];
1738+
if (Id)
1739+
return Id;
1740+
Stack.push_back(&M);
1741+
Id = Stack.size();
17361742
}
1743+
// Finally work out the SCC
1744+
int depth = Stack.size();
17371745
int MergeUp = depth;
1738-
// Compute the cycle-id
1746+
std::vector<orc::ThreadSafeModule*> Children;
17391747
M.withModuleDo([&](Module &m) {
17401748
for (auto &F : m.global_objects()) {
17411749
if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
17421750
auto Callee = NewExports.find(F.getName());
17431751
if (Callee != NewExports.end()) {
1744-
auto &CM = Callee->second;
1745-
int Down = jl_add_to_ee(*CM, NewExports, Queued, ToMerge, depth + 1);
1746-
assert(Down <= depth);
1747-
if (Down && Down < MergeUp)
1748-
MergeUp = Down;
1752+
auto *CM = Callee->second;
1753+
if (*CM && CM != &M) {
1754+
auto Down = Queued.find(CM);
1755+
if (Down != Queued.end())
1756+
MergeUp = std::min(MergeUp, Down->second);
1757+
else
1758+
Children.push_back(CM);
1759+
}
17491760
}
17501761
}
17511762
}
17521763
});
1753-
if (MergeUp == depth) {
1754-
// Not in a cycle (or at the top of it)
1755-
Queued.erase(&M);
1756-
for (auto &CM : ToMerge.at(depth - 1)) {
1757-
assert(Queued.find(CM)->second == depth);
1758-
Queued.erase(CM);
1759-
jl_merge_module(M, std::move(*CM));
1760-
}
1761-
jl_ExecutionEngine->addModule(std::move(M));
1762-
MergeUp = 0;
1764+
assert(MergeUp > 0);
1765+
for (auto *CM : Children) {
1766+
int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
1767+
assert(Down <= (int)Stack.size());
1768+
if (Down)
1769+
MergeUp = std::min(MergeUp, Down);
17631770
}
1764-
else {
1765-
// Add our frame(s) to the top of the cycle
1766-
Queued[&M] = MergeUp;
1767-
auto &Top = ToMerge.at(MergeUp - 1);
1768-
Top.push_back(&M);
1769-
for (auto &CM : ToMerge.at(depth - 1)) {
1770-
assert(Queued.find(CM)->second == depth);
1771-
Queued[CM] = MergeUp;
1772-
Top.push_back(CM);
1771+
if (MergeUp < depth)
1772+
return MergeUp;
1773+
while (1) {
1774+
// Not in a cycle (or at the top of it)
1775+
// remove SCC state and merge every CM from the cycle into M
1776+
orc::ThreadSafeModule *CM = Stack.back();
1777+
auto it = Queued.find(CM);
1778+
assert(it->second == (int)Stack.size());
1779+
Queued.erase(it);
1780+
Stack.pop_back();
1781+
if ((int)Stack.size() < depth) {
1782+
assert(&M == CM);
1783+
break;
17731784
}
1785+
jl_merge_module(M, std::move(*CM));
17741786
}
1775-
ToMerge.pop_back();
1776-
return MergeUp;
1777-
}
1778-
1779-
static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1780-
{
1781-
DenseMap<orc::ThreadSafeModule*, int> Queued;
1782-
std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1783-
jl_add_to_ee(M, NewExports, Queued, ToMerge, 1);
1784-
assert(!M);
1787+
jl_ExecutionEngine->addModule(std::move(M));
1788+
return 0;
17851789
}
17861790

1787-
17881791
static uint64_t getAddressForFunction(StringRef fname)
17891792
{
17901793
auto addr = jl_ExecutionEngine->getFunctionAddress(fname);

0 commit comments

Comments
 (0)