Skip to content

Commit 48c1083

Browse files
committed
Merge #19105: Add Muhash3072 implementation in Python
36ec980 test: Add chacha20 test vectors in muhash (Fabian Jahr) 0e2b400 test: Add basic Python/C++ Muhash implementation parity unit test (Fabian Jahr) b85543c test: Add Python MuHash3072 implementation to test framework (Pieter Wuille) ab30cec test: Move modinv to util and add unit test (Fabian Jahr) Pull request description: This is the second in a [series of pull requests](bitcoin/bitcoin#18000) to implement an Index for UTXO set statistics. This pull request adds a Python implementation of Muhash3072, a homomorphic hashing algorithm to be used for hashing the UTXO set. The Python implementation can then be used to compare behavior with the C++ version. ACKs for top commit: jnewbery: utACK 36ec980 laanwj: Code review ACK 36ec980 Tree-SHA512: a3519c6e11031174f1ae71ecd8bcc7f3be42d7fc9c84c77f2fbea7cfc5ad54fcbe10b55116ad8d9a52ac5d675640eefed3bf260c58a02f2bf3bc0d8ec208baa6
2 parents bab4cce + 36ec980 commit 48c1083

File tree

4 files changed

+144
-16
lines changed

4 files changed

+144
-16
lines changed

test/functional/test_framework/key.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,7 @@
88
anything but tests."""
99
import random
1010

11-
def modinv(a, n):
12-
"""Compute the modular inverse of a modulo n
13-
14-
See https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Modular_integers.
15-
"""
16-
t1, t2 = 0, 1
17-
r1, r2 = n, a
18-
while r2 != 0:
19-
q = r1 // r2
20-
t1, t2 = t2, t1 - q * t2
21-
r1, r2 = r2, r1 - q * r2
22-
if r1 > 1:
23-
return None
24-
if t1 < 0:
25-
t1 += n
26-
return t1
11+
from .util import modinv
2712

2813
def jacobi_symbol(n, k):
2914
"""Compute the Jacobi symbol of n modulo k
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# Copyright (c) 2020 Pieter Wuille
2+
# Distributed under the MIT software license, see the accompanying
3+
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
"""Native Python MuHash3072 implementation."""
5+
6+
import hashlib
7+
import unittest
8+
9+
from .util import modinv
10+
11+
def rot32(v, bits):
12+
"""Rotate the 32-bit value v left by bits bits."""
13+
bits %= 32 # Make sure the term below does not throw an exception
14+
return ((v << bits) & 0xffffffff) | (v >> (32 - bits))
15+
16+
def chacha20_doubleround(s):
17+
"""Apply a ChaCha20 double round to 16-element state array s.
18+
19+
See https://cr.yp.to/chacha/chacha-20080128.pdf and https://tools.ietf.org/html/rfc8439
20+
"""
21+
QUARTER_ROUNDS = [(0, 4, 8, 12),
22+
(1, 5, 9, 13),
23+
(2, 6, 10, 14),
24+
(3, 7, 11, 15),
25+
(0, 5, 10, 15),
26+
(1, 6, 11, 12),
27+
(2, 7, 8, 13),
28+
(3, 4, 9, 14)]
29+
30+
for a, b, c, d in QUARTER_ROUNDS:
31+
s[a] = (s[a] + s[b]) & 0xffffffff
32+
s[d] = rot32(s[d] ^ s[a], 16)
33+
s[c] = (s[c] + s[d]) & 0xffffffff
34+
s[b] = rot32(s[b] ^ s[c], 12)
35+
s[a] = (s[a] + s[b]) & 0xffffffff
36+
s[d] = rot32(s[d] ^ s[a], 8)
37+
s[c] = (s[c] + s[d]) & 0xffffffff
38+
s[b] = rot32(s[b] ^ s[c], 7)
39+
40+
def chacha20_32_to_384(key32):
41+
"""Specialized ChaCha20 implementation with 32-byte key, 0 IV, 384-byte output."""
42+
# See RFC 8439 section 2.3 for chacha20 parameters
43+
CONSTANTS = [0x61707865, 0x3320646e, 0x79622d32, 0x6b206574]
44+
45+
key_bytes = [0]*8
46+
for i in range(8):
47+
key_bytes[i] = int.from_bytes(key32[(4 * i):(4 * (i+1))], 'little')
48+
49+
INITIALIZATION_VECTOR = [0] * 4
50+
init = CONSTANTS + key_bytes + INITIALIZATION_VECTOR
51+
out = bytearray()
52+
for counter in range(6):
53+
init[12] = counter
54+
s = init.copy()
55+
for _ in range(10):
56+
chacha20_doubleround(s)
57+
for i in range(16):
58+
out.extend(((s[i] + init[i]) & 0xffffffff).to_bytes(4, 'little'))
59+
return bytes(out)
60+
61+
def data_to_num3072(data):
62+
"""Hash a 32-byte array data to a 3072-bit number using 6 Chacha20 operations."""
63+
bytes384 = chacha20_32_to_384(data)
64+
return int.from_bytes(bytes384, 'little')
65+
66+
class MuHash3072:
67+
"""Class representing the MuHash3072 computation of a set.
68+
69+
See https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html
70+
"""
71+
72+
MODULUS = 2**3072 - 1103717
73+
74+
def __init__(self):
75+
"""Initialize for an empty set."""
76+
self.numerator = 1
77+
self.denominator = 1
78+
79+
def insert(self, data):
80+
"""Insert a byte array data in the set."""
81+
self.numerator = (self.numerator * data_to_num3072(data)) % self.MODULUS
82+
83+
def remove(self, data):
84+
"""Remove a byte array from the set."""
85+
self.denominator = (self.denominator * data_to_num3072(data)) % self.MODULUS
86+
87+
def digest(self):
88+
"""Extract the final hash. Does not modify this object."""
89+
val = (self.numerator * modinv(self.denominator, self.MODULUS)) % self.MODULUS
90+
bytes384 = val.to_bytes(384, 'little')
91+
return hashlib.sha256(bytes384).digest()
92+
93+
class TestFrameworkMuhash(unittest.TestCase):
94+
def test_muhash(self):
95+
muhash = MuHash3072()
96+
muhash.insert([0]*32)
97+
muhash.insert([1] + [0]*31)
98+
muhash.remove([2] + [0]*31)
99+
finalized = muhash.digest()
100+
# This mirrors the result in the C++ MuHash3072 unit test
101+
self.assertEqual(finalized[::-1].hex(), "a44e16d5e34d259b349af21c06e65d653915d2e208e4e03f389af750dc0bfdc3")
102+
103+
def test_chacha20(self):
104+
def chacha_check(key, result):
105+
self.assertEqual(chacha20_32_to_384(key)[:64].hex(), result)
106+
107+
# Test vectors from https://tools.ietf.org/html/draft-agl-tls-chacha20poly1305-04#section-7
108+
# Since the nonce is hardcoded to 0 in our function we only use those vectors.
109+
chacha_check([0]*32, "76b8e0ada0f13d90405d6ae55386bd28bdd219b8a08ded1aa836efcc8b770dc7da41597c5157488d7724e03fb8d84a376a43b8f41518a11cc387b669b2ee6586")
110+
chacha_check([0]*31 + [1], "4540f05a9f1fb296d7736e7b208e3c96eb4fe1834688d2604f450952ed432d41bbe2a0b6ea7566d2a5d1e7e20d42af2c53d792b1c43fea817e9ad275ae546963")

test/functional/test_framework/util.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import random
1616
import re
1717
import time
18+
import unittest
1819

1920
from . import coverage
2021
from .authproxy import AuthServiceProxy, JSONRPCException
@@ -625,3 +626,33 @@ def find_vout_for_address(node, txid, addr):
625626
if any([addr == a for a in tx["vout"][i]["scriptPubKey"]["addresses"]]):
626627
return i
627628
raise RuntimeError("Vout not found for address: txid=%s, addr=%s" % (txid, addr))
629+
630+
def modinv(a, n):
631+
"""Compute the modular inverse of a modulo n using the extended Euclidean
632+
Algorithm. See https://en.wikipedia.org/wiki/Extended_Euclidean_algorithm#Modular_integers.
633+
"""
634+
# TODO: Change to pow(a, -1, n) available in Python 3.8
635+
t1, t2 = 0, 1
636+
r1, r2 = n, a
637+
while r2 != 0:
638+
q = r1 // r2
639+
t1, t2 = t2, t1 - q * t2
640+
r1, r2 = r2, r1 - q * r2
641+
if r1 > 1:
642+
return None
643+
if t1 < 0:
644+
t1 += n
645+
return t1
646+
647+
class TestFrameworkUtil(unittest.TestCase):
648+
def test_modinv(self):
649+
test_vectors = [
650+
[7, 11],
651+
[11, 29],
652+
[90, 13],
653+
[1891, 3797],
654+
[6003722857, 77695236973],
655+
]
656+
657+
for a, n in test_vectors:
658+
self.assertEqual(modinv(a, n), pow(a, n-2, n))

test/functional/test_runner.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@
6969
TEST_FRAMEWORK_MODULES = [
7070
"address",
7171
"blocktools",
72+
"muhash",
7273
"script",
74+
"util",
7375
]
7476

7577
EXTENDED_SCRIPTS = [

0 commit comments

Comments
 (0)