Skip to content

Commit adc708c

Browse files
fjahrsipa
andcommitted
crypto: Add MuHash3072 implementation
Co-authored-by: Pieter Wuille <[email protected]>
1 parent 0b4d290 commit adc708c

File tree

3 files changed

+131
-0
lines changed

3 files changed

+131
-0
lines changed

src/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@ crypto_libbitcoin_crypto_base_a_SOURCES = \
406406
crypto/hmac_sha512.h \
407407
crypto/poly1305.h \
408408
crypto/poly1305.cpp \
409+
crypto/muhash.h \
410+
crypto/muhash.cpp \
409411
crypto/ripemd160.cpp \
410412
crypto/ripemd160.h \
411413
crypto/sha1.cpp \

src/crypto/muhash.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,3 +275,64 @@ void Num3072::Divide(const Num3072& a)
275275
this->Multiply(inv);
276276
if (this->IsOverflow()) this->FullReduce();
277277
}
278+
279+
Num3072 MuHash3072::ToNum3072(Span<const unsigned char> in) {
280+
Num3072 out{};
281+
uint256 hashed_in = (CHashWriter(SER_DISK, 0) << in).GetSHA256();
282+
unsigned char tmp[BYTE_SIZE];
283+
ChaCha20(hashed_in.data(), hashed_in.size()).Keystream(tmp, BYTE_SIZE);
284+
for (int i = 0; i < LIMBS; ++i) {
285+
if (sizeof(limb_t) == 4) {
286+
out.limbs[i] = ReadLE32(tmp + 4 * i);
287+
} else if (sizeof(limb_t) == 8) {
288+
out.limbs[i] = ReadLE64(tmp + 8 * i);
289+
}
290+
}
291+
return out;
292+
}
293+
294+
MuHash3072::MuHash3072(Span<const unsigned char> in) noexcept
295+
{
296+
m_numerator = ToNum3072(in);
297+
}
298+
299+
void MuHash3072::Finalize(uint256& out) noexcept
300+
{
301+
m_numerator.Divide(m_denominator);
302+
m_denominator.SetToOne(); // Needed to keep the MuHash object valid
303+
304+
unsigned char data[384];
305+
for (int i = 0; i < LIMBS; ++i) {
306+
if (sizeof(limb_t) == 4) {
307+
WriteLE32(data + i * 4, m_numerator.limbs[i]);
308+
} else if (sizeof(limb_t) == 8) {
309+
WriteLE64(data + i * 8, m_numerator.limbs[i]);
310+
}
311+
}
312+
313+
out = (CHashWriter(SER_DISK, 0) << data).GetSHA256();
314+
}
315+
316+
MuHash3072& MuHash3072::operator*=(const MuHash3072& mul) noexcept
317+
{
318+
m_numerator.Multiply(mul.m_numerator);
319+
m_denominator.Multiply(mul.m_denominator);
320+
return *this;
321+
}
322+
323+
MuHash3072& MuHash3072::operator/=(const MuHash3072& div) noexcept
324+
{
325+
m_numerator.Multiply(div.m_denominator);
326+
m_denominator.Multiply(div.m_numerator);
327+
return *this;
328+
}
329+
330+
MuHash3072& MuHash3072::Insert(Span<const unsigned char> in) noexcept {
331+
m_numerator.Multiply(ToNum3072(in));
332+
return *this;
333+
}
334+
335+
MuHash3072& MuHash3072::Remove(Span<const unsigned char> in) noexcept {
336+
m_numerator.Divide(ToNum3072(in));
337+
return *this;
338+
}

src/crypto/muhash.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,72 @@ class Num3072
5959
}
6060
};
6161

62+
/** A class representing MuHash sets
63+
*
64+
* MuHash is a hashing algorithm that supports adding set elements in any
65+
* order but also deleting in any order. As a result, it can maintain a
66+
* running sum for a set of data as a whole, and add/remove when data
67+
* is added to or removed from it. A downside of MuHash is that computing
68+
* an inverse is relatively expensive. This is solved by representing
69+
* the running value as a fraction, and multiplying added elements into
70+
* the numerator and removed elements into the denominator. Only when the
71+
* final hash is desired, a single modular inverse and multiplication is
72+
* needed to combine the two. The combination is also run on serialization
73+
* to allow for space-efficient storage on disk.
74+
*
75+
* As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
76+
* in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
77+
* all of this is perfectly parallellizable: each thread can process an
78+
* arbitrary subset of the update operations, allowing them to be
79+
* efficiently combined later.
80+
*
81+
* Muhash does not support checking if an element is already part of the
82+
* set. That is why this class does not enforce the use of a set as the
83+
* data it represents because there is no efficient way to do so.
84+
* It is possible to add elements more than once and also to remove
85+
* elements that have not been added before. However, this implementation
86+
* is intended to represent a set of elements.
87+
*
88+
* See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
89+
* https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
90+
*/
91+
class MuHash3072
92+
{
93+
private:
94+
static constexpr size_t BYTE_SIZE = 384;
95+
96+
Num3072 m_numerator;
97+
Num3072 m_denominator;
98+
99+
Num3072 ToNum3072(Span<const unsigned char> in);
100+
101+
public:
102+
/* The empty set. */
103+
MuHash3072() noexcept {};
104+
105+
/* A singleton with variable sized data in it. */
106+
explicit MuHash3072(Span<const unsigned char> in) noexcept;
107+
108+
/* Insert a single piece of data into the set. */
109+
MuHash3072& Insert(Span<const unsigned char> in) noexcept;
110+
111+
/* Remove a single piece of data from the set. */
112+
MuHash3072& Remove(Span<const unsigned char> in) noexcept;
113+
114+
/* Multiply (resulting in a hash for the union of the sets) */
115+
MuHash3072& operator*=(const MuHash3072& mul) noexcept;
116+
117+
/* Divide (resulting in a hash for the difference of the sets) */
118+
MuHash3072& operator/=(const MuHash3072& div) noexcept;
119+
120+
/* Finalize into a 32-byte hash. Does not change this object's value. */
121+
void Finalize(uint256& out) noexcept;
122+
123+
SERIALIZE_METHODS(MuHash3072, obj)
124+
{
125+
READWRITE(obj.m_numerator);
126+
READWRITE(obj.m_denominator);
127+
}
128+
};
129+
62130
#endif // BITCOIN_CRYPTO_MUHASH_H

0 commit comments

Comments
 (0)