Skip to content

Commit f6e435f

Browse files
authored
Merge pull request #4524 from YosysHQ/emil/hashlib-interface
Neater hashing interface
2 parents 7a362f1 + 026e9da commit f6e435f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1105
-744
lines changed

backends/cxxrtl/cxxrtl_backend.cc

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct Scheduler {
4747
struct Vertex {
4848
T *data;
4949
Vertex *prev, *next;
50-
pool<Vertex*, hash_ptr_ops> preds, succs;
50+
pool<Vertex*> preds, succs;
5151

5252
Vertex() : data(NULL), prev(this), next(this) {}
5353
Vertex(T *data) : data(data), prev(NULL), next(NULL) {}
@@ -300,10 +300,10 @@ struct FlowGraph {
300300
};
301301

302302
std::vector<Node*> nodes;
303-
dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses;
304-
dict<Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_comb_defs, node_sync_defs, node_uses;
303+
dict<const RTLIL::Wire*, pool<Node*>> wire_comb_defs, wire_sync_defs, wire_uses;
304+
dict<Node*, pool<const RTLIL::Wire*>> node_comb_defs, node_sync_defs, node_uses;
305305
dict<const RTLIL::Wire*, bool> wire_def_inlinable;
306-
dict<const RTLIL::Wire*, dict<Node*, bool, hash_ptr_ops>> wire_use_inlinable;
306+
dict<const RTLIL::Wire*, dict<Node*, bool>> wire_use_inlinable;
307307
dict<RTLIL::SigBit, bool> bit_has_state;
308308

309309
~FlowGraph()
@@ -365,7 +365,7 @@ struct FlowGraph {
365365
return false;
366366
}
367367

368-
bool is_inlinable(const RTLIL::Wire *wire, const pool<Node*, hash_ptr_ops> &nodes) const
368+
bool is_inlinable(const RTLIL::Wire *wire, const pool<Node*> &nodes) const
369369
{
370370
// Can the wire be inlined, knowing that the given nodes are reachable?
371371
if (nodes.size() != 1)
@@ -3080,7 +3080,7 @@ struct CxxrtlWorker {
30803080
// without feedback arcs can generally be evaluated in a single pass, i.e. it always requires only
30813081
// a single delta cycle.
30823082
Scheduler<FlowGraph::Node> scheduler;
3083-
dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_vertex_map;
3083+
dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*> node_vertex_map;
30843084
for (auto node : flow.nodes)
30853085
node_vertex_map[node] = scheduler.add(node);
30863086
for (auto node_comb_def : flow.node_comb_defs) {
@@ -3095,7 +3095,7 @@ struct CxxrtlWorker {
30953095

30963096
// Find out whether the order includes any feedback arcs.
30973097
std::vector<FlowGraph::Node*> node_order;
3098-
pool<FlowGraph::Node*, hash_ptr_ops> evaluated_nodes;
3098+
pool<FlowGraph::Node*> evaluated_nodes;
30993099
pool<const RTLIL::Wire*> feedback_wires;
31003100
for (auto vertex : scheduler.schedule()) {
31013101
auto node = vertex->data;
@@ -3139,7 +3139,7 @@ struct CxxrtlWorker {
31393139
}
31403140

31413141
// Discover nodes reachable from primary outputs (i.e. members) and collect reachable wire users.
3142-
pool<FlowGraph::Node*, hash_ptr_ops> worklist;
3142+
pool<FlowGraph::Node*> worklist;
31433143
for (auto node : flow.nodes) {
31443144
if (node->type == FlowGraph::Node::Type::CELL_EVAL && !is_internal_cell(node->cell->type))
31453145
worklist.insert(node); // node evaluates a submodule
@@ -3159,8 +3159,8 @@ struct CxxrtlWorker {
31593159
worklist.insert(node); // node drives public wires
31603160
}
31613161
}
3162-
dict<const RTLIL::Wire*, pool<FlowGraph::Node*, hash_ptr_ops>> live_wires;
3163-
pool<FlowGraph::Node*, hash_ptr_ops> live_nodes;
3162+
dict<const RTLIL::Wire*, pool<FlowGraph::Node*>> live_wires;
3163+
pool<FlowGraph::Node*> live_nodes;
31643164
while (!worklist.empty()) {
31653165
auto node = worklist.pop();
31663166
live_nodes.insert(node);
@@ -3290,15 +3290,15 @@ struct CxxrtlWorker {
32903290

32913291
// Discover nodes reachable from primary outputs (i.e. outlines) up until primary inputs (i.e. members)
32923292
// and collect reachable wire users.
3293-
pool<FlowGraph::Node*, hash_ptr_ops> worklist;
3293+
pool<FlowGraph::Node*> worklist;
32943294
for (auto node : flow.nodes) {
32953295
if (flow.node_comb_defs.count(node))
32963296
for (auto wire : flow.node_comb_defs[node])
32973297
if (debug_wire_types[wire].is_outline())
32983298
worklist.insert(node); // node drives outline
32993299
}
3300-
dict<const RTLIL::Wire*, pool<FlowGraph::Node*, hash_ptr_ops>> debug_live_wires;
3301-
pool<FlowGraph::Node*, hash_ptr_ops> debug_live_nodes;
3300+
dict<const RTLIL::Wire*, pool<FlowGraph::Node*>> debug_live_wires;
3301+
pool<FlowGraph::Node*> debug_live_nodes;
33023302
while (!worklist.empty()) {
33033303
auto node = worklist.pop();
33043304
debug_live_nodes.insert(node);
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
Hashing and associative data structures in Yosys
2+
------------------------------------------------
3+
4+
Container classes based on hashing
5+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6+
7+
Yosys uses ``dict<K, T>`` and ``pool<T>`` as main container classes.
8+
``dict<K, T>`` is essentially a replacement for ``std::unordered_map<K, T>``
9+
and ``pool<T>`` is a replacement for ``std::unordered_set<T>``.
10+
The main characteristics are:
11+
12+
* ``dict<K, T>`` and ``pool<T>`` are about 2x faster than the std containers
13+
(though this claim hasn't been verified for over 10 years)
14+
15+
* references to elements in a ``dict<K, T>`` or ``pool<T>`` are invalidated by
16+
insert and remove operations (similar to ``std::vector<T>`` on ``push_back()``).
17+
18+
* some iterators are invalidated by ``erase()``. specifically, iterators
19+
that have not passed the erased element yet are invalidated. (``erase()``
20+
itself returns valid iterator to the next element.)
21+
22+
* no iterators are invalidated by ``insert()``. elements are inserted at
23+
``begin()``. i.e. only a new iterator that starts at ``begin()`` will see the
24+
inserted elements.
25+
26+
* the method ``.count(key, iterator)`` is like ``.count(key)`` but only
27+
considers elements that can be reached via the iterator.
28+
29+
* iterators can be compared. ``it1 < it2`` means that the position of ``t2``
30+
can be reached via ``t1`` but not vice versa.
31+
32+
* the method ``.sort()`` can be used to sort the elements in the container
33+
the container stays sorted until elements are added or removed.
34+
35+
* ``dict<K, T>`` and ``pool<T>`` will have the same order of iteration across
36+
all compilers, standard libraries and architectures.
37+
38+
In addition to ``dict<K, T>`` and ``pool<T>`` there is also an ``idict<K>`` that
39+
creates a bijective map from ``K`` to the integers. For example:
40+
41+
::
42+
43+
idict<string, 42> si;
44+
log("%d\n", si("hello")); // will print 42
45+
log("%d\n", si("world")); // will print 43
46+
log("%d\n", si.at("world")); // will print 43
47+
log("%d\n", si.at("dummy")); // will throw exception
48+
log("%s\n", si[42].c_str())); // will print hello
49+
log("%s\n", si[43].c_str())); // will print world
50+
log("%s\n", si[44].c_str())); // will throw exception
51+
52+
It is not possible to remove elements from an idict.
53+
54+
Finally ``mfp<K>`` implements a merge-find set data structure (aka. disjoint-set
55+
or union-find) over the type ``K`` ("mfp" = merge-find-promote).
56+
57+
The hash function
58+
~~~~~~~~~~~~~~~~~
59+
60+
The hash function generally used in Yosys is the XOR version of DJB2:
61+
62+
::
63+
64+
state = ((state << 5) + state) ^ value
65+
66+
This is an old-school hash designed to hash ASCII characters. Yosys doesn't hash
67+
a lot of ASCII text, but it still happens to be a local optimum due to factors
68+
described later.
69+
70+
Hash function quality is multi-faceted and highly dependent on what is being
71+
hashed. Yosys isn't concerned by any cryptographic qualities, instead the goal
72+
is minimizing total hashing collision risk given the data patterns within Yosys.
73+
In general, a good hash function typically folds values into a state accumulator
74+
with a mathematical function that is fast to compute and has some beneficial
75+
properties. One of these is the avalanche property, which demands that a small
76+
change such as flipping a bit or incrementing by one in the input produces a
77+
large, unpredictable change in the output. Additionally, the bit independence
78+
criterion states that any pair of output bits should change independently when
79+
any single input bit is inverted. These properties are important for avoiding
80+
hash collision on data patterns like the hash of a sequence not colliding with
81+
its permutation, not losing from the state the information added by hashing
82+
preceding elements, etc.
83+
84+
DJB2 lacks these properties. Instead, since Yosys hashes large numbers of data
85+
structures composed of incrementing integer IDs, Yosys abuses the predictability
86+
of DJB2 to get lower hash collisions, with regular nature of the hashes
87+
surviving through the interaction with the "modulo prime" operations in the
88+
associative data structures. For example, some most common objects in Yosys are
89+
interned ``IdString``\ s of incrementing indices or ``SigBit``\ s with bit
90+
offsets into wire (represented by its unique ``IdString`` name) as the typical
91+
case. This is what makes DJB2 a local optimum. Additionally, the ADD version of
92+
DJB2 (like above but with addition instead of XOR) is used to this end for some
93+
types, abandoning the general pattern of folding values into a state value.
94+
95+
Making a type hashable
96+
~~~~~~~~~~~~~~~~~~~~~~
97+
98+
Let's first take a look at the external interface on a simplified level.
99+
Generally, to get the hash for ``T obj``, you would call the utility function
100+
``run_hash<T>(const T& obj)``, corresponding to ``hash_top_ops<T>::hash(obj)``,
101+
the default implementation of which is ``hash_ops<T>::hash_into(Hasher(), obj)``.
102+
``Hasher`` is the class actually implementing the hash function, hiding its
103+
initialized internal state, and passing it out on ``hash_t yield()`` with
104+
perhaps some finalization steps.
105+
106+
``hash_ops<T>`` is the star of the show. By default it pulls the ``Hasher h``
107+
through a ``Hasher T::hash_into(Hasher h)`` method. That's the method you have to
108+
implement to make a record (class or struct) type easily hashable with Yosys
109+
hashlib associative data structures.
110+
111+
``hash_ops<T>`` is specialized for built-in types like ``int`` or ``bool`` and
112+
treats pointers the same as integers, so it doesn't dereference pointers. Since
113+
many RTLIL data structures like ``RTLIL::Wire`` carry their own unique index
114+
``Hasher::hash_t hashidx_;``, there are specializations for ``hash_ops<Wire*>``
115+
and others in ``kernel/hashlib.h`` that actually dereference the pointers and
116+
call ``hash_into`` on the instances pointed to.
117+
118+
``hash_ops<T>`` is also specialized for simple compound types like
119+
``std::pair<U>`` by calling hash_into in sequence on its members. For flexible
120+
size containers like ``std::vector<U>`` the size of the container is hashed
121+
first. That is also how implementing hashing for a custom record data type
122+
should be - unless there is strong reason to do otherwise, call ``h.eat(m)`` on
123+
the ``Hasher h`` you have received for each member in sequence and ``return
124+
h;``. If you do have a strong reason to do so, look at how
125+
``hash_top_ops<RTLIL::SigBit>`` is implemented in ``kernel/rtlil.h``.
126+
127+
Porting plugins from the legacy interface
128+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
129+
130+
Previously, the interface to implement hashing on custom types was just
131+
``unsigned int T::hash() const``. This meant hashes for members were computed
132+
independently and then ad-hoc combined with the hash function with some xorshift
133+
operations thrown in to mix bits together somewhat. A plugin can stay compatible
134+
with both versions prior and after the break by implementing both interfaces
135+
based on the existance and value of `YS_HASHING_VERSION`.
136+
137+
.. code-block:: cpp
138+
:caption: Example hash compatibility wrapper
139+
:name: hash_plugin_compat
140+
141+
#ifndef YS_HASHING_VERSION
142+
unsigned int T::hash() const {
143+
return mkhash(a, b);
144+
}
145+
#elif YS_HASHING_VERSION == 1
146+
Hasher T::hash_into(Hasher h) const {
147+
h.eat(a);
148+
h.eat(b);
149+
return h;
150+
}
151+
#else
152+
#error "Unsupported hashing interface"
153+
#endif
154+
155+
Feel free to contact Yosys maintainers with related issues.

docs/source/yosys_internals/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@ as reference to implement a similar system in any language.
3939
extending_yosys/index
4040
techmap
4141
verilog
42+
hashing

examples/cxx-api/scopeinfo_example.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ struct ScopeinfoExamplePass : public Pass {
9090

9191
// Shuffle wires so this example produces more interesting outputs
9292
std::sort(wires.begin(), wires.end(), [](Wire *a, Wire *b) {
93-
return mkhash_xorshift(a->name.hash() * 0x2c9277b5) < mkhash_xorshift(b->name.hash() * 0x2c9277b5);
93+
return mkhash_xorshift(run_hash(a->name) * 0x2c9277b5) < mkhash_xorshift(run_hash(b->name) * 0x2c9277b5);
9494
});
9595

9696
ModuleHdlnameIndex index(module);

flake.nix

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
};
1515
# TODO: don't override src when ./abc is empty
1616
# which happens when the command used is `nix build` and not `nix build ?submodules=1`
17-
abc-verifier = pkgs.abc-verifier.overrideAttrs(x: y: {src = ./abc;});
17+
abc-verifier = pkgs.abc-verifier;
1818
yosys = pkgs.clangStdenv.mkDerivation {
1919
name = "yosys";
2020
src = ./. ;
21-
buildInputs = with pkgs; [ clang bison flex libffi tcl readline python3 llvmPackages.libcxxClang zlib git pkg-configUpstream llvmPackages.bintools ];
21+
buildInputs = with pkgs; [ clang bison flex libffi tcl readline python3 zlib git pkg-configUpstream llvmPackages.bintools ];
2222
checkInputs = with pkgs; [ gtest ];
2323
propagatedBuildInputs = [ abc-verifier ];
2424
preConfigure = "make config-clang";
25-
checkTarget = "test";
25+
checkTarget = "unit-test";
2626
installPhase = ''
2727
make install PREFIX=$out ABCEXTERNAL=yosys-abc
2828
ln -s ${abc-verifier}/bin/abc $out/bin/yosys-abc
@@ -41,7 +41,7 @@
4141
packages.default = yosys;
4242
defaultPackage = yosys;
4343
devShell = pkgs.mkShell {
44-
buildInputs = with pkgs; [ clang llvmPackages.bintools bison flex libffi tcl readline python3 llvmPackages.libcxxClang zlib git gtest abc-verifier ];
44+
buildInputs = with pkgs; [ clang llvmPackages.bintools gcc bison flex libffi tcl readline python3 zlib git gtest abc-verifier verilog boost python3Packages.boost ];
4545
};
4646
}
4747
);

frontends/ast/ast.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ namespace AST
177177
{
178178
// for dict<> and pool<>
179179
unsigned int hashidx_;
180-
unsigned int hash() const { return hashidx_; }
180+
Hasher hash_into(Hasher h) const { h.eat(hashidx_); return h; }
181181

182182
// this nodes type
183183
AstNodeType type;

frontends/verific/verific.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ RTLIL::SigSpec VerificImporter::operatorInportCase(Instance *inst, const char *p
619619
}
620620
}
621621

622-
RTLIL::SigSpec VerificImporter::operatorOutput(Instance *inst, const pool<Net*, hash_ptr_ops> *any_all_nets)
622+
RTLIL::SigSpec VerificImporter::operatorOutput(Instance *inst, const pool<Net*> *any_all_nets)
623623
{
624624
RTLIL::SigSpec sig;
625625
RTLIL::Wire *dummy_wire = NULL;
@@ -1576,9 +1576,9 @@ void VerificImporter::import_netlist(RTLIL::Design *design, Netlist *nl, std::ma
15761576

15771577
module->fixup_ports();
15781578

1579-
dict<Net*, char, hash_ptr_ops> init_nets;
1580-
pool<Net*, hash_ptr_ops> anyconst_nets, anyseq_nets;
1581-
pool<Net*, hash_ptr_ops> allconst_nets, allseq_nets;
1579+
dict<Net*, char> init_nets;
1580+
pool<Net*> anyconst_nets, anyseq_nets;
1581+
pool<Net*> allconst_nets, allseq_nets;
15821582
any_all_nets.clear();
15831583

15841584
FOREACH_NET_OF_NETLIST(nl, mi, net)
@@ -1841,10 +1841,10 @@ void VerificImporter::import_netlist(RTLIL::Design *design, Netlist *nl, std::ma
18411841
module->connect(net_map_at(net), module->Anyseq(new_verific_id(net)));
18421842

18431843
#ifdef VERIFIC_SYSTEMVERILOG_SUPPORT
1844-
pool<Instance*, hash_ptr_ops> sva_asserts;
1845-
pool<Instance*, hash_ptr_ops> sva_assumes;
1846-
pool<Instance*, hash_ptr_ops> sva_covers;
1847-
pool<Instance*, hash_ptr_ops> sva_triggers;
1844+
pool<Instance*> sva_asserts;
1845+
pool<Instance*> sva_assumes;
1846+
pool<Instance*> sva_covers;
1847+
pool<Instance*> sva_triggers;
18481848
#endif
18491849

18501850
pool<RTLIL::Cell*> past_ffs;

frontends/verific/verific.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ struct VerificImporter
7171

7272
std::map<Verific::Net*, RTLIL::SigBit> net_map;
7373
std::map<Verific::Net*, Verific::Net*> sva_posedge_map;
74-
pool<Verific::Net*, hash_ptr_ops> any_all_nets;
74+
pool<Verific::Net*> any_all_nets;
7575

7676
bool mode_gates, mode_keep, mode_nosva, mode_names, mode_verific;
7777
bool mode_autocover, mode_fullinit;
@@ -89,7 +89,7 @@ struct VerificImporter
8989
RTLIL::SigSpec operatorInput2(Verific::Instance *inst);
9090
RTLIL::SigSpec operatorInport(Verific::Instance *inst, const char *portname);
9191
RTLIL::SigSpec operatorInportCase(Verific::Instance *inst, const char *portname);
92-
RTLIL::SigSpec operatorOutput(Verific::Instance *inst, const pool<Verific::Net*, hash_ptr_ops> *any_all_nets = nullptr);
92+
RTLIL::SigSpec operatorOutput(Verific::Instance *inst, const pool<Verific::Net*> *any_all_nets = nullptr);
9393

9494
bool import_netlist_instance_gates(Verific::Instance *inst, RTLIL::IdString inst_name);
9595
bool import_netlist_instance_cells(Verific::Instance *inst, RTLIL::IdString inst_name);

frontends/verific/verificsva.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ struct VerificSvaImporter
10511051
msg.c_str(), inst->View()->Owner()->Name(), inst->Name()), inst->Linefile());
10521052
}
10531053

1054-
dict<Net*, bool, hash_ptr_ops> check_expression_cache;
1054+
dict<Net*, bool> check_expression_cache;
10551055

10561056
bool check_expression(Net *net, bool raise_error = false)
10571057
{

0 commit comments

Comments
 (0)