Skip to content

Commit 2ac0d34

Browse files
committed
optbarriers: preserve SCCs in processes
1 parent 8760555 commit 2ac0d34

File tree

1 file changed

+256
-16
lines changed

1 file changed

+256
-16
lines changed

passes/cmds/optbarriers.cc

Lines changed: 256 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "kernel/yosys.h"
2121
#include "kernel/sigtools.h"
22+
#include <algorithm>
2223

2324
USING_YOSYS_NAMESPACE
2425
PRIVATE_NAMESPACE_BEGIN
@@ -29,6 +30,121 @@ struct overloaded : Ts... { using Ts::operator()...; };
2930
template<class... Ts>
3031
overloaded(Ts...) -> overloaded<Ts...>;
3132

33+
// This computes a graph of assignment dependencies in a process, which is used
34+
// to preserve SCCs in the process which are useful for DFF inference
35+
struct ProcessDependencyWorker {
36+
ProcessDependencyWorker(const RTLIL::Process& proc) {
37+
add_process(proc);
38+
}
39+
40+
void add_process(const RTLIL::Process& proc) {
41+
add_caserule(proc.root_case);
42+
43+
for (const auto* sync : proc.syncs)
44+
add_syncrule(*sync);
45+
}
46+
47+
void add_syncrule(const RTLIL::SyncRule& sync) {
48+
for (const auto& sigsig : sync.actions)
49+
add_sigsig(sigsig);
50+
}
51+
52+
void add_caserule(const RTLIL::CaseRule& caserule) {
53+
for (const auto& sigsig : caserule.actions)
54+
add_sigsig(sigsig);
55+
56+
for (const auto* rule : caserule.switches)
57+
add_switchrule(*rule);
58+
}
59+
60+
void add_switchrule(const RTLIL::SwitchRule& switchrule) {
61+
for (const auto* rule : switchrule.cases)
62+
add_caserule(*rule);
63+
}
64+
65+
void add_sigsig(const RTLIL::SigSig& sigsig) {
66+
for (int i = 0; i < GetSize(sigsig.first); i++) {
67+
const auto lhs = sigsig.first[i], rhs = sigsig.second[i];
68+
if (rhs.is_wire())
69+
dependencies[lhs].emplace(rhs);
70+
}
71+
}
72+
73+
// Returns the set of nodes that appear in an SCC with this bit
74+
pool<SigBit> scc_nodes(const SigBit bit) {
75+
pool<SigBit> scc_nodes;
76+
77+
// This uses a DFS to iterate through the graph stopping when it detects
78+
// SCCs
79+
struct StackElem {
80+
const SigBit lhs;
81+
pool<SigBit>::const_iterator current_rhs;
82+
const pool<SigBit>::const_iterator end;
83+
84+
StackElem(
85+
const SigBit lhs,
86+
pool<SigBit>::const_iterator current_rhs,
87+
const pool<SigBit>::const_iterator end
88+
) : lhs{lhs}, current_rhs{current_rhs}, end{end} {}
89+
};
90+
std::vector<StackElem> node_stack;
91+
92+
// Try to add a new node to the stack. Returns false if it is already
93+
// in the stack (we have found an SCC) or doesn't exist in the dependency
94+
// map (has no children), otherwise true
95+
const auto try_add_node = [&](const SigBit node) {
96+
const auto stack_it = std::find_if(
97+
node_stack.cbegin(), node_stack.cend(),
98+
[&](const auto& elem){ return elem.lhs == node; }
99+
);
100+
101+
if (stack_it != node_stack.cend())
102+
return false;
103+
104+
const auto it = dependencies.find(node);
105+
106+
if (it == dependencies.end())
107+
return false;
108+
109+
node_stack.emplace_back(node, it->second.begin(), it->second.end());
110+
return true;
111+
};
112+
113+
try_add_node(bit);
114+
115+
while (!node_stack.empty()) {
116+
auto& top = node_stack.back();
117+
118+
// If we have explored all children of this node backtrack
119+
if (top.current_rhs == top.end) {
120+
node_stack.pop_back();
121+
if (!node_stack.empty())
122+
++node_stack.back().current_rhs;
123+
continue;
124+
}
125+
126+
// Not yet at an SCC so try to add this top node to the stack. If
127+
// it doesn't form an SCC and has children, carry on (with the new top node)
128+
if (try_add_node(*top.current_rhs))
129+
continue;
130+
131+
// We have found an SCC or a node without children. If it loops back
132+
// to the starting bit, add the whole stack as it is all in the SCC
133+
// being searched for
134+
if (*top.current_rhs == bit)
135+
for (const auto& elem : node_stack)
136+
scc_nodes.emplace(elem.lhs);
137+
138+
// Increment the iterator to keep going
139+
++top.current_rhs;
140+
}
141+
142+
return scc_nodes;
143+
}
144+
145+
dict<SigBit, pool<SigBit>> dependencies;
146+
};
147+
32148
struct OptBarriersPass : public Pass {
33149
OptBarriersPass() : Pass("optbarriers", "insert optimization barriers") {}
34150

@@ -139,7 +255,7 @@ struct OptBarriersPass : public Pass {
139255
// Add a wire to drive if one does not already exist
140256
auto* new_wire = new_wires.at(chunk.wire, nullptr);
141257
if (!new_wire) {
142-
new_wire = module->addWire(NEW_ID, GetSize(chunk.wire));
258+
new_wire = module->addWire(NEW_ID_SUFFIX(chunk.wire->name.str()), GetSize(chunk.wire));
143259
new_wires.emplace(chunk.wire, new_wire);
144260
}
145261

@@ -156,29 +272,153 @@ struct OptBarriersPass : public Pass {
156272
return new_output;
157273
};
158274

159-
// Rewrite cell outputs
160-
if (!nocells_mode)
161-
for (auto* cell : module->cells())
162-
if (cell->type != ID($barrier))
163-
for (const auto& [name, sig] : cell->connections())
164-
if (cell->output(name))
165-
cell->setPort(name, rewrite_sigspec(sig));
275+
// Rewrite processes. It is not as simple as changing all LHS
276+
// expressions to drive barriers if required, as this prevents
277+
// proc passes from optimizing self feedback which is important to
278+
// prevent false comb paths when generating FFs. We only care about
279+
// the assignments/connections within processes, and want to maintain
280+
// the property that if a bit that is to be rewritten to use a
281+
// barrier can be assigned transitively to itself, the value that is
282+
// assigned should be the value before the barrier.
283+
//
284+
// To do this we first enumerate the assignment dependency graph
285+
// for the process - marking which bits drive any other bit. We
286+
// then look for strongly connected components containing barrier
287+
// bits. These correspond to potential paths where the bit can be
288+
// driven by itself and so should see the pre-barrier value. For
289+
// each of the bits on this path we want to construct a parallel
290+
// version that appears in all the same process assignments but is
291+
// driven originally by the pre-barrier value.
292+
//
293+
// For example, consider the following set of assignments appear
294+
// somewhere in the process and we want to add a barrier to b:
295+
// a <- b
296+
// b <- a
297+
// There is a path from b to itself through $a, so we add wire b\b to
298+
// represent the pre-barrier version of b and a\b to represent
299+
// the version of a that sees a pre-barrier version of b. We then
300+
// correspondingly add these paths to the assignments and a barrier:
301+
// {a\b, a} <- {b\b, b}
302+
// b\b <- a\b
303+
// b\b -$barrier> b
304+
//
305+
// This has retained that b\b is transitively driven by itself, but
306+
// a is still driven by b, the post-barrier version of b
307+
if (!noprocs_mode)
308+
for (const auto& proc : module->processes) {
309+
// A map from each bit driven by the original process to the
310+
// set of variants required for it. If a bit doesn't appear in
311+
// variants it is only needed in the original form it appears
312+
// in the process.
313+
//
314+
// To get bit a\b from the above example you would index
315+
// variants[a][b]
316+
dict<SigBit, dict<SigBit, SigBit>> variants;
317+
318+
{
319+
// We want to minimize the number of wires we have to create
320+
// for each bit in variants, so for variants[a][b] we create
321+
// a unique wire with size GetSize(a.wire) for each tuple
322+
// of a.wire, b.wire, a.offset - b.offset rather than for
323+
// every pair (a, b).
324+
using IdxTuple = std::tuple<Wire*, Wire*, int>;
325+
dict<IdxTuple, Wire*> variant_wires;
326+
327+
// Collect all assignment dependencies in the process
328+
ProcessDependencyWorker dep_worker(*proc.second);
329+
330+
// Enumerate driven bits that need barriers added
331+
for (const auto& [variant_bit, _] : dep_worker.dependencies) {
332+
if (skip(variant_bit))
333+
continue;
334+
335+
// Collect the bits that are in an SCC with this bit and
336+
// thus need to have variants constructed that see the
337+
// pre-barrier value
338+
for (const auto& lhs_bit : dep_worker.scc_nodes(variant_bit)) {
339+
// Don't add a new wire for the variant_bit itself as this
340+
// should be driven by a wire generated by rewrite_sigspec below
341+
if (lhs_bit == variant_bit)
342+
continue;
343+
344+
const int offset = lhs_bit.offset - variant_bit.offset;
345+
const IdxTuple idx{lhs_bit.wire, variant_bit.wire, offset};
346+
auto it = variant_wires.find(idx);
347+
348+
// Create a new wire to represent this offset combination
349+
// if needed
350+
if (it == variant_wires.end()) {
351+
const auto name = NEW_ID_SUFFIX(lhs_bit.wire->name.str());
352+
auto* new_wire = module->addWire(name, GetSize(lhs_bit.wire));
353+
it = variant_wires.emplace(idx, new_wire).first;
354+
}
355+
356+
variants[lhs_bit].emplace(variant_bit, SigBit(it->second, lhs_bit.offset));
357+
}
358+
359+
// Even if a bit doesn't appear in any SCCs we want to rewrite it if it
360+
// isn't skipped
361+
variants[variant_bit].emplace(variant_bit, rewrite_sigspec(variant_bit));
362+
}
363+
}
364+
365+
const auto variant = [&](const SigBit a, const SigBit b) {
366+
const auto it1 = variants.find(a);
367+
// There are no variants of a required, so a definitely isn't
368+
// transitively driven by b
369+
if (it1 == variants.end())
370+
return a;
371+
372+
const auto it2 = it1->second.find(b);
373+
// a isn't be transitively driven by b
374+
if (it2 == it1->second.end())
375+
return a;
376+
377+
// a can be transitively driven by b so return the variant
378+
// of a that sees pre-barrier b
379+
return it2->second;
380+
};
166381

167-
// Rewrite connections in processes
168-
if (!noprocs_mode) {
169382
const auto proc_rewriter = overloaded{
170-
// Don't do anything for input sigspecs
383+
// Don't do anything for input sigspecs, these are not connections
171384
[&](const SigSpec&) {},
172-
// Rewrite connections to drive barrier if needed
173-
[&](SigSpec& lhs, const SigSpec&) {
174-
lhs = rewrite_sigspec(lhs);
385+
// Rewrite connections to drive barrier and see pre-barrier
386+
// values if needed
387+
[&](SigSpec& lhs, SigSpec& rhs) {
388+
for (int i = 0; i < GetSize(lhs); i++) {
389+
const auto lhs_bit = lhs[i], rhs_bit = rhs[i];
390+
391+
for (const auto& [variant_bit, variant_lhs_bit] : variants[lhs_bit]) {
392+
// For the existing connections, update the lhs to be pre-barrier
393+
// variant_lhs_bit and rhs to be the variant of itself that sees
394+
// the pre-barrier version of variant_bit
395+
if (variant_bit == lhs_bit) {
396+
lhs[i] = variant_lhs_bit;
397+
rhs[i] = variant(rhs_bit, variant_bit);
398+
continue;
399+
}
400+
401+
// For new variants of lhs_bit that we need to exist, set the
402+
// rhs bit to the variant of rhs_bit that sees the pre_barrier
403+
// version of variant_bit
404+
lhs.append(variant_lhs_bit);
405+
rhs.append(variant(rhs_bit, variant_bit));
406+
}
407+
}
175408
}
176409
};
177410

178-
for (auto& proc : module->processes)
179-
proc.second->rewrite_sigspecs2(proc_rewriter);
411+
proc.second->rewrite_sigspecs2(proc_rewriter);
180412
}
181413

414+
// Rewrite cell outputs
415+
if (!nocells_mode)
416+
for (auto* cell : module->cells())
417+
if (cell->type != ID($barrier))
418+
for (const auto& [name, sig] : cell->connections())
419+
if (cell->output(name))
420+
cell->setPort(name, rewrite_sigspec(sig));
421+
182422
// Add all the scheduled barriers. To minimize the number of cells,
183423
// first construct a sigspec of all bits, then sort and unify before
184424
// creating barriers

0 commit comments

Comments
 (0)