@@ -59,4 +59,72 @@ class Num3072
59
59
}
60
60
};
61
61
62
+ /* * A class representing MuHash sets
63
+ *
64
+ * MuHash is a hashing algorithm that supports adding set elements in any
65
+ * order but also deleting in any order. As a result, it can maintain a
66
+ * running sum for a set of data as a whole, and add/remove when data
67
+ * is added to or removed from it. A downside of MuHash is that computing
68
+ * an inverse is relatively expensive. This is solved by representing
69
+ * the running value as a fraction, and multiplying added elements into
70
+ * the numerator and removed elements into the denominator. Only when the
71
+ * final hash is desired, a single modular inverse and multiplication is
72
+ * needed to combine the two. The combination is also run on serialization
73
+ * to allow for space-efficient storage on disk.
74
+ *
75
+ * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
76
+ * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
77
+ * all of this is perfectly parallellizable: each thread can process an
78
+ * arbitrary subset of the update operations, allowing them to be
79
+ * efficiently combined later.
80
+ *
81
+ * Muhash does not support checking if an element is already part of the
82
+ * set. That is why this class does not enforce the use of a set as the
83
+ * data it represents because there is no efficient way to do so.
84
+ * It is possible to add elements more than once and also to remove
85
+ * elements that have not been added before. However, this implementation
86
+ * is intended to represent a set of elements.
87
+ *
88
+ * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
89
+ * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
90
+ */
91
+ class MuHash3072
92
+ {
93
+ private:
94
+ static constexpr size_t BYTE_SIZE = 384 ;
95
+
96
+ Num3072 m_numerator;
97
+ Num3072 m_denominator;
98
+
99
+ Num3072 ToNum3072 (Span<const unsigned char > in);
100
+
101
+ public:
102
+ /* The empty set. */
103
+ MuHash3072 () noexcept {};
104
+
105
+ /* A singleton with variable sized data in it. */
106
+ explicit MuHash3072 (Span<const unsigned char > in) noexcept ;
107
+
108
+ /* Insert a single piece of data into the set. */
109
+ MuHash3072& Insert (Span<const unsigned char > in) noexcept ;
110
+
111
+ /* Remove a single piece of data from the set. */
112
+ MuHash3072& Remove (Span<const unsigned char > in) noexcept ;
113
+
114
+ /* Multiply (resulting in a hash for the union of the sets) */
115
+ MuHash3072& operator *=(const MuHash3072& mul) noexcept ;
116
+
117
+ /* Divide (resulting in a hash for the difference of the sets) */
118
+ MuHash3072& operator /=(const MuHash3072& div) noexcept ;
119
+
120
+ /* Finalize into a 32-byte hash. Does not change this object's value. */
121
+ void Finalize (uint256& out) noexcept ;
122
+
123
+ SERIALIZE_METHODS (MuHash3072, obj)
124
+ {
125
+ READWRITE (obj.m_numerator );
126
+ READWRITE (obj.m_denominator );
127
+ }
128
+ };
129
+
62
130
#endif // BITCOIN_CRYPTO_MUHASH_H
0 commit comments