1919
2020#include " kernel/yosys.h"
2121#include " kernel/sigtools.h"
22+ #include < algorithm>
2223
2324USING_YOSYS_NAMESPACE
2425PRIVATE_NAMESPACE_BEGIN
@@ -29,6 +30,121 @@ struct overloaded : Ts... { using Ts::operator()...; };
2930template <class ... Ts>
3031overloaded (Ts...) -> overloaded<Ts...>;
3132
33+ // This computes a graph of assignment dependencies in a process, which is used
34+ // to preserve SCCs in the process which are useful for DFF inference
35+ struct ProcessDependencyWorker {
36+ ProcessDependencyWorker (const RTLIL::Process& proc) {
37+ add_process (proc);
38+ }
39+
40+ void add_process (const RTLIL::Process& proc) {
41+ add_caserule (proc.root_case );
42+
43+ for (const auto * sync : proc.syncs )
44+ add_syncrule (*sync);
45+ }
46+
47+ void add_syncrule (const RTLIL::SyncRule& sync) {
48+ for (const auto & sigsig : sync.actions )
49+ add_sigsig (sigsig);
50+ }
51+
52+ void add_caserule (const RTLIL::CaseRule& caserule) {
53+ for (const auto & sigsig : caserule.actions )
54+ add_sigsig (sigsig);
55+
56+ for (const auto * rule : caserule.switches )
57+ add_switchrule (*rule);
58+ }
59+
60+ void add_switchrule (const RTLIL::SwitchRule& switchrule) {
61+ for (const auto * rule : switchrule.cases )
62+ add_caserule (*rule);
63+ }
64+
65+ void add_sigsig (const RTLIL::SigSig& sigsig) {
66+ for (int i = 0 ; i < GetSize (sigsig.first ); i++) {
67+ const auto lhs = sigsig.first [i], rhs = sigsig.second [i];
68+ if (rhs.is_wire ())
69+ dependencies[lhs].emplace (rhs);
70+ }
71+ }
72+
73+ // Returns the set of nodes that appear in an SCC with this bit
74+ pool<SigBit> scc_nodes (const SigBit bit) {
75+ pool<SigBit> scc_nodes;
76+
77+ // This uses a DFS to iterate through the graph stopping when it detects
78+ // SCCs
79+ struct StackElem {
80+ const SigBit lhs;
81+ pool<SigBit>::const_iterator current_rhs;
82+ const pool<SigBit>::const_iterator end;
83+
84+ StackElem (
85+ const SigBit lhs,
86+ pool<SigBit>::const_iterator current_rhs,
87+ const pool<SigBit>::const_iterator end
88+ ) : lhs{lhs}, current_rhs{current_rhs}, end{end} {}
89+ };
90+ std::vector<StackElem> node_stack;
91+
92+ // Try to add a new node to the stack. Returns false if it is already
93+ // in the stack (we have found an SCC) or doesn't exist in the dependency
94+ // map (has no children), otherwise true
95+ const auto try_add_node = [&](const SigBit node) {
96+ const auto stack_it = std::find_if (
97+ node_stack.cbegin (), node_stack.cend (),
98+ [&](const auto & elem){ return elem.lhs == node; }
99+ );
100+
101+ if (stack_it != node_stack.cend ())
102+ return false ;
103+
104+ const auto it = dependencies.find (node);
105+
106+ if (it == dependencies.end ())
107+ return false ;
108+
109+ node_stack.emplace_back (node, it->second .begin (), it->second .end ());
110+ return true ;
111+ };
112+
113+ try_add_node (bit);
114+
115+ while (!node_stack.empty ()) {
116+ auto & top = node_stack.back ();
117+
118+ // If we have explored all children of this node backtrack
119+ if (top.current_rhs == top.end ) {
120+ node_stack.pop_back ();
121+ if (!node_stack.empty ())
122+ ++node_stack.back ().current_rhs ;
123+ continue ;
124+ }
125+
126+ // Not yet at an SCC so try to add this top node to the stack. If
127+ // it doesn't form an SCC and has children, carry on (with the new top node)
128+ if (try_add_node (*top.current_rhs ))
129+ continue ;
130+
131+ // We have found an SCC or a node without children. If it loops back
132+ // to the starting bit, add the whole stack as it is all in the SCC
133+ // being searched for
134+ if (*top.current_rhs == bit)
135+ for (const auto & elem : node_stack)
136+ scc_nodes.emplace (elem.lhs );
137+
138+ // Increment the iterator to keep going
139+ ++top.current_rhs ;
140+ }
141+
142+ return scc_nodes;
143+ }
144+
145+ dict<SigBit, pool<SigBit>> dependencies;
146+ };
147+
32148struct OptBarriersPass : public Pass {
33149 OptBarriersPass () : Pass(" optbarriers" , " insert optimization barriers" ) {}
34150
@@ -139,7 +255,7 @@ struct OptBarriersPass : public Pass {
139255 // Add a wire to drive if one does not already exist
140256 auto * new_wire = new_wires.at (chunk.wire , nullptr );
141257 if (!new_wire) {
142- new_wire = module ->addWire (NEW_ID , GetSize (chunk.wire ));
258+ new_wire = module ->addWire (NEW_ID_SUFFIX (chunk. wire -> name . str ()) , GetSize (chunk.wire ));
143259 new_wires.emplace (chunk.wire , new_wire);
144260 }
145261
@@ -156,29 +272,153 @@ struct OptBarriersPass : public Pass {
156272 return new_output;
157273 };
158274
159- // Rewrite cell outputs
160- if (!nocells_mode)
161- for (auto * cell : module ->cells ())
162- if (cell->type != ID ($barrier))
163- for (const auto & [name, sig] : cell->connections ())
164- if (cell->output (name))
165- cell->setPort (name, rewrite_sigspec (sig));
275+ // Rewrite processes. It is not as simple as changing all LHS
276+ // expressions to drive barriers if required, as this prevents
277+ // proc passes from optimizing self feedback which is important to
278+ // prevent false comb paths when generating FFs. We only care about
279+ // the assignments/connections within processes, and want to maintain
280+ // the property that if a bit that is to be rewritten to use a
281+ // barrier can be assigned transitively to itself, the value that is
282+ // assigned should be the value before the barrier.
283+ //
284+ // To do this we first enumerate the assignment dependency graph
285+ // for the process - marking which bits drive any other bit. We
286+ // then look for strongly connected components containing barrier
287+ // bits. These correspond to potential paths where the bit can be
288+ // driven by itself and so should see the pre-barrier value. For
289+ // each of the bits on this path we want to construct a parallel
290+ // version that appears in all the same process assignments but is
291+ // driven originally by the pre-barrier value.
292+ //
293+ // For example, consider the following set of assignments appear
294+ // somewhere in the process and we want to add a barrier to b:
295+ // a <- b
296+ // b <- a
297+ // There is a path from b to itself through $a, so we add wire b\b to
298+ // represent the pre-barrier version of b and a\b to represent
299+ // the version of a that sees a pre-barrier version of b. We then
300+ // correspondingly add these paths to the assignments and a buffer:
301+ // {a\b, a} <- {b\b, b}
302+ // b\b <- a\b
303+ // b\b -buf> b
304+ //
305+ // This has retained that b\b is transitively driven by itself, but
306+ // a is still driven by b, the post-barrier version of b
307+ if (!noprocs_mode)
308+ for (const auto & proc : module ->processes ) {
309+ // A map from each bit driven by the original process to the
310+ // set of variants required for it. If a bit doesn't appear in
311+ // variants it is only needed in the original form it appears
312+ // in the process.
313+ //
314+ // To get bit a\b from the above example you would index
315+ // variants[a][b]
316+ dict<SigBit, dict<SigBit, SigBit>> variants;
317+
318+ {
319+ // We want to minimize the number of wires we have to create
320+ // for each bit in variants, so for variants[a][b] we create
321+ // a unique wire with size GetSize(a.wire) for each tuple
322+ // of a.wire, b.wire, a.offset - b.offset rather than for
323+ // every pair (a, b).
324+ using IdxTuple = std::tuple<Wire*, Wire*, int >;
325+ dict<IdxTuple, Wire*> variant_wires;
326+
327+ // Collect all assignment dependencies in the process
328+ ProcessDependencyWorker dep_worker (*proc.second );
329+
330+ // Enumerate driven bits that need barriers added
331+ for (const auto & [variant_bit, _] : dep_worker.dependencies ) {
332+ if (skip (variant_bit))
333+ continue ;
334+
335+ // Collect the bits that are in an SCC with this bit and
336+ // thus need to have variants constructed that see the
337+ // pre-barrier value
338+ for (const auto & lhs_bit : dep_worker.scc_nodes (variant_bit)) {
339+ // Don't add a new wire for the variant_bit itself as this
340+ // should be driven by a wire generated by rewrite_sigspec below
341+ if (lhs_bit == variant_bit)
342+ continue ;
343+
344+ const int offset = lhs_bit.offset - variant_bit.offset ;
345+ const IdxTuple idx{lhs_bit.wire , variant_bit.wire , offset};
346+ auto it = variant_wires.find (idx);
347+
348+ // Create a new wire to represent this offset combination
349+ // if needed
350+ if (it == variant_wires.end ()) {
351+ const auto name = NEW_ID_SUFFIX (lhs_bit.wire ->name .str ());
352+ auto * new_wire = module ->addWire (name, GetSize (lhs_bit.wire ));
353+ it = variant_wires.emplace (idx, new_wire).first ;
354+ }
355+
356+ variants[lhs_bit].emplace (variant_bit, SigBit (it->second , lhs_bit.offset ));
357+ }
358+
359+ // Even if a bit doesn't appear in any SCCs we want to rewrite it if it
360+ // isn't skipped
361+ variants[variant_bit].emplace (variant_bit, rewrite_sigspec (variant_bit));
362+ }
363+ }
364+
365+ const auto variant = [&](const SigBit a, const SigBit b) {
366+ const auto it1 = variants.find (a);
367+ // There are no variants of a required, so a definitely isn't
368+ // transitively driven by b
369+ if (it1 == variants.end ())
370+ return a;
371+
372+ const auto it2 = it1->second .find (b);
373+ // a isn't be transitively driven by b
374+ if (it2 == it1->second .end ())
375+ return a;
376+
377+ // a can be transitively driven by b so return the variant
378+ // of a that sees pre-barrier b
379+ return it2->second ;
380+ };
166381
167- // Rewrite connections in processes
168- if (!noprocs_mode) {
169382 const auto proc_rewriter = overloaded{
170- // Don't do anything for input sigspecs
383+ // Don't do anything for input sigspecs, these are not connections
171384 [&](const SigSpec&) {},
172- // Rewrite connections to drive barrier if needed
173- [&](SigSpec& lhs, const SigSpec&) {
174- lhs = rewrite_sigspec (lhs);
385+ // Rewrite connections to drive barrier and see pre-barrier
386+ // values if needed
387+ [&](SigSpec& lhs, SigSpec& rhs) {
388+ for (int i = 0 ; i < GetSize (lhs); i++) {
389+ const auto lhs_bit = lhs[i], rhs_bit = rhs[i];
390+
391+ for (const auto & [variant_bit, variant_lhs_bit] : variants[lhs_bit]) {
392+ // For the existing connections, update the lhs to be pre-barrier
393+ // variant_lhs_bit and rhs to be the variant of itself that sees
394+ // the pre-barrier version of variant_bit
395+ if (variant_bit == lhs_bit) {
396+ lhs[i] = variant_lhs_bit;
397+ rhs[i] = variant (rhs_bit, variant_bit);
398+ continue ;
399+ }
400+
401+ // For new variants of lhs_bit that we need to exist, set the
402+ // rhs bit to the variant of rhs_bit that sees the pre_barrier
403+ // version of variant_bit
404+ lhs.append (variant_lhs_bit);
405+ rhs.append (variant (rhs_bit, variant_bit));
406+ }
407+ }
175408 }
176409 };
177410
178- for (auto & proc : module ->processes )
179- proc.second ->rewrite_sigspecs2 (proc_rewriter);
411+ proc.second ->rewrite_sigspecs2 (proc_rewriter);
180412 }
181413
414+ // Rewrite cell outputs
415+ if (!nocells_mode)
416+ for (auto * cell : module ->cells ())
417+ if (cell->type != ID ($barrier))
418+ for (const auto & [name, sig] : cell->connections ())
419+ if (cell->output (name))
420+ cell->setPort (name, rewrite_sigspec (sig));
421+
182422 // Add all the scheduled barriers. To minimize the number of cells,
183423 // first construct a sigspec of all bits, then sort and unify before
184424 // creating barriers
0 commit comments