Skip to content

Commit 4a99835

Browse files
authored
Merge pull request #37 from alexander255/unhexlify
Rip out the hex encoding of the binary multiaddr in favour of “really” just storing binary
2 parents 0f19202 + 5e5a8a6 commit 4a99835

File tree

7 files changed

+92
-152
lines changed

7 files changed

+92
-152
lines changed

multiaddr/codec.py

Lines changed: 30 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import base58
22
import base64
3-
import binascii
43
import os
54

65
import idna
76
from netaddr import IPAddress
87
import six
8+
import struct
9+
import varint
910

10-
from .protocols import code_to_varint
1111
from .protocols import P_DNS
1212
from .protocols import P_DNS4
1313
from .protocols import P_DNS6
@@ -57,7 +57,7 @@ def string_to_bytes(string):
5757
while sp:
5858
element = sp.pop(0)
5959
proto = protocol_with_name(element)
60-
bs.append(code_to_varint(proto.code))
60+
bs.append(varint.encode(proto.code))
6161
if proto.size == 0:
6262
continue
6363
if len(sp) < 1:
@@ -71,7 +71,6 @@ def string_to_bytes(string):
7171

7272
def bytes_to_string(buf):
7373
st = [''] # start with empty string so we get a leading slash on join()
74-
buf = binascii.unhexlify(buf)
7574
while buf:
7675
maddr_component = ""
7776
code, num_bytes_read = read_varint_code(buf)
@@ -80,7 +79,7 @@ def bytes_to_string(buf):
8079
maddr_component += proto.name
8180
size = size_for_addr(proto, buf)
8281
if size > 0:
83-
addr = address_bytes_to_string(proto, binascii.hexlify(buf[:size]))
82+
addr = address_bytes_to_string(proto, buf[:size])
8483
if not (proto.path and addr[0] == '/'):
8584
maddr_component += '/'
8685
maddr_component += addr
@@ -89,47 +88,27 @@ def bytes_to_string(buf):
8988
return '/'.join(st)
9089

9190

92-
int_to_hex = None
93-
encode_big_endian_16 = None
94-
95-
9691
def address_string_to_bytes(proto, addr_string):
97-
global int_to_hex
98-
if int_to_hex is None:
99-
from .util import int_to_hex
100-
101-
global encode_big_endian_16
102-
if encode_big_endian_16 is None:
103-
from .util import encode_big_endian_16
104-
10592
if proto.code == P_IP4: # ipv4
10693
try:
107-
ip = IPAddress(addr_string)
108-
if ip.version != 4:
109-
raise ValueError("failed to parse ip4 addr: %s" % addr_string)
110-
return int_to_hex(int(ip), 8)
94+
return IPAddress(addr_string, version=4).packed
11195
except Exception:
11296
raise ValueError("failed to parse ip4 addr: %s" % addr_string)
11397
elif proto.code == P_IP6: # ipv6
11498
try:
115-
ip = IPAddress(addr_string)
116-
if ip.version != 6:
117-
raise ValueError("failed to parse ip6 addr: %s" % addr_string)
118-
return int_to_hex(int(ip), 32)
99+
return IPAddress(addr_string, version=6).packed
119100
except Exception:
120101
raise ValueError("failed to parse ip6 addr: %s" % addr_string)
121102
# tcp udp dccp sctp
122103
elif proto.code in [P_TCP, P_UDP, P_DCCP, P_SCTP]:
123104
try:
124-
ip = int(addr_string)
105+
return struct.pack('>H', int(addr_string, 10))
125106
except ValueError as ex:
126107
raise ValueError("failed to parse %s addr: %s"
127108
% (proto.name, str(ex)))
128-
129-
if ip >= 65536:
109+
except struct.error:
130110
raise ValueError("failed to parse %s addr: %s" %
131111
(proto.name, "greater than 65536"))
132-
return binascii.hexlify(encode_big_endian_16(ip))
133112
elif proto.code == P_ONION:
134113
addr = addr_string.split(":")
135114
if len(addr) != 2:
@@ -143,8 +122,7 @@ def address_string_to_bytes(proto, addr_string):
143122
"failed to parse %s addr: %s not a Tor onion address."
144123
% (proto.name, addr_string))
145124
try:
146-
onion_host_bytes = binascii.hexlify(
147-
base64.b32decode(addr[0].upper()))
125+
onion_host_bytes = base64.b32decode(addr[0].upper())
148126
except Exception as ex:
149127
raise ValueError(
150128
"failed to decode base32 %s addr: %s %s"
@@ -163,8 +141,7 @@ def address_string_to_bytes(proto, addr_string):
163141
raise ValueError("failed to parse %s addr: %s"
164142
% (proto.name, "port less than 1"))
165143

166-
return b''.join([onion_host_bytes,
167-
binascii.hexlify(encode_big_endian_16(port))])
144+
return b''.join([onion_host_bytes, struct.pack('>H', port)])
168145
elif proto.code == P_P2P: # ipfs
169146
# the address is a varint prefixed multihash string representation
170147
try:
@@ -174,74 +151,69 @@ def address_string_to_bytes(proto, addr_string):
174151
except Exception as ex:
175152
raise ValueError("failed to parse p2p addr: %s %s"
176153
% (addr_string, str(ex)))
177-
size = code_to_varint(len(mm))
178-
mm = binascii.hexlify(mm)
179-
if len(mm) < 10:
154+
size = varint.encode(len(mm))
155+
if len(mm) < 5:
180156
# TODO - port go-multihash so we can do this correctly
181157
raise ValueError("invalid P2P multihash: %s" % mm)
182158
return b''.join([size, mm])
183159
elif proto.code == P_UNIX:
184160
addr_string_bytes = fsencode(addr_string)
185-
size = code_to_varint(len(addr_string_bytes))
186-
return b''.join([size, binascii.hexlify(addr_string_bytes)])
161+
size = varint.encode(len(addr_string_bytes))
162+
return b''.join([size, addr_string_bytes])
187163
elif proto.code in (P_DNS, P_DNS4, P_DNS6):
188164
addr_string_bytes = idna.encode(addr_string, uts46=True)
189-
size = code_to_varint(len(addr_string_bytes))
190-
return b''.join([size, binascii.hexlify(addr_string_bytes)])
165+
size = varint.encode(len(addr_string_bytes))
166+
return b''.join([size, addr_string_bytes])
191167
else:
192168
raise ValueError("failed to parse %s addr: unknown" % proto.name)
193169

194170

195-
decode_big_endian_16 = None
171+
packed_net_bytes_to_int = None
196172

197173

198174
def address_bytes_to_string(proto, buf):
199-
global decode_big_endian_16
200-
if decode_big_endian_16 is None:
201-
from .util import decode_big_endian_16
175+
global packed_net_bytes_to_int
176+
if packed_net_bytes_to_int is None:
177+
from .util import packed_net_bytes_to_int
178+
202179
if proto.code == P_IP4:
203-
return str(IPAddress(int(buf, 16), 4).ipv4())
180+
return six.text_type(IPAddress(packed_net_bytes_to_int(buf), 4))
204181
elif proto.code == P_IP6:
205-
return str(IPAddress(int(buf, 16), 6).ipv6())
182+
return six.text_type(IPAddress(packed_net_bytes_to_int(buf), 6))
206183
elif proto.code in [P_TCP, P_UDP, P_DCCP, P_SCTP]:
207-
return str(decode_big_endian_16(binascii.unhexlify(buf)))
184+
if len(buf) != 2:
185+
raise ValueError("Not a uint16")
186+
return six.text_type(struct.unpack('>H', buf)[0])
208187
elif proto.code == P_ONION:
209-
buf = binascii.unhexlify(buf)
210188
addr_bytes, port_bytes = (buf[:-2], buf[-2:])
211189
addr = base64.b32encode(addr_bytes).decode('ascii').lower()
212-
port = str(decode_big_endian_16(port_bytes))
213-
return ':'.join([addr, port])
190+
port = six.text_type(struct.unpack('>H', port_bytes)[0])
191+
return u':'.join([addr, port])
214192
elif proto.code == P_P2P:
215-
buf = binascii.unhexlify(buf)
216193
size, num_bytes_read = read_varint_code(buf)
217194
buf = buf[num_bytes_read:]
218195
if len(buf) != size:
219196
raise ValueError("inconsistent lengths")
220-
return base58.b58encode(buf).decode()
197+
return base58.b58encode(buf).decode('ascii')
221198
elif proto.code == P_UNIX:
222-
buf = binascii.unhexlify(buf)
223199
size, num_bytes_read = read_varint_code(buf)
224200
return fsdecode(buf[num_bytes_read:])
225201
elif proto.code in (P_DNS, P_DNS4, P_DNS6):
226-
buf = binascii.unhexlify(buf)
227202
size, num_bytes_read = read_varint_code(buf)
228203
return idna.decode(buf[num_bytes_read:])
229204
raise ValueError("unknown protocol")
230205

231206

232207
def size_for_addr(proto, buf):
233-
if proto.size > 0:
208+
if proto.size >= 0:
234209
return proto.size // 8
235-
elif proto.size == 0:
236-
return 0
237210
else:
238211
size, num_bytes_read = read_varint_code(buf)
239212
return size + num_bytes_read
240213

241214

242215
def bytes_split(buf):
243216
ret = []
244-
buf = binascii.unhexlify(buf)
245217
while buf:
246218
code, num_bytes_read = read_varint_code(buf)
247219
proto = protocol_with_code(code)

multiaddr/multiaddr.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# -*- coding: utf-8 -*-
2-
import binascii
32
from copy import copy
43

54
import six
@@ -90,7 +89,7 @@ def to_bytes(self):
9089

9190
def protocols(self):
9291
"""Returns a list of Protocols this Multiaddr includes."""
93-
buf = binascii.unhexlify(self.to_bytes())
92+
buf = self.to_bytes()
9493
protos = []
9594
while buf:
9695
code, num_bytes_read = read_varint_code(buf)

multiaddr/protocols.py

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,6 @@ def __repr__(self):
118118
)
119119

120120

121-
def code_to_varint(num):
122-
"""Convert an integer to a varint-encoded byte."""
123-
return binascii.hexlify(varint.encode(num))
124-
125-
126-
def varint_to_code(buf):
127-
return varint.decode_bytes(binascii.unhexlify(buf))
128-
129-
130121
def _uvarint(buf):
131122
"""Reads a varint from a bytes buffer and returns the value and # bytes"""
132123
x = 0
@@ -152,31 +143,31 @@ def read_varint_code(buf):
152143

153144
# Protocols is the list of multiaddr protocols supported by this module.
154145
PROTOCOLS = [
155-
Protocol(P_IP4, 32, 'ip4', code_to_varint(P_IP4)),
156-
Protocol(P_TCP, 16, 'tcp', code_to_varint(P_TCP)),
157-
Protocol(P_UDP, 16, 'udp', code_to_varint(P_UDP)),
158-
Protocol(P_DCCP, 16, 'dccp', code_to_varint(P_DCCP)),
159-
Protocol(P_IP6, 128, 'ip6', code_to_varint(P_IP6)),
160-
Protocol(P_IP6ZONE, LENGTH_PREFIXED_VAR_SIZE, 'ip6zone', code_to_varint(P_IP6ZONE)),
161-
Protocol(P_DNS, LENGTH_PREFIXED_VAR_SIZE, 'dns', code_to_varint(P_DNS)),
162-
Protocol(P_DNS4, LENGTH_PREFIXED_VAR_SIZE, 'dns4', code_to_varint(P_DNS4)),
163-
Protocol(P_DNS6, LENGTH_PREFIXED_VAR_SIZE, 'dns6', code_to_varint(P_DNS6)),
164-
Protocol(P_DNSADDR, LENGTH_PREFIXED_VAR_SIZE, 'dnsaddr', code_to_varint(P_DNSADDR)),
165-
Protocol(P_SCTP, 16, 'sctp', code_to_varint(P_SCTP)),
166-
Protocol(P_UDT, 0, 'udt', code_to_varint(P_UDT)),
167-
Protocol(P_UTP, 0, 'utp', code_to_varint(P_UTP)),
168-
Protocol(P_P2P, LENGTH_PREFIXED_VAR_SIZE, 'p2p', code_to_varint(P_P2P)),
169-
Protocol(P_ONION, 96, 'onion', code_to_varint(P_ONION)),
170-
Protocol(P_QUIC, 0, 'quic', code_to_varint(P_QUIC)),
171-
Protocol(P_HTTP, 0, 'http', code_to_varint(P_HTTP)),
172-
Protocol(P_HTTPS, 0, 'https', code_to_varint(P_HTTPS)),
173-
Protocol(P_WS, 0, 'ws', code_to_varint(P_WS)),
174-
Protocol(P_WSS, 0, 'wss', code_to_varint(P_WSS)),
175-
Protocol(P_P2P_WEBSOCKET_STAR, 0, 'p2p-websocket-star', code_to_varint(P_P2P_WEBSOCKET_STAR)),
176-
Protocol(P_P2P_WEBRTC_STAR, 0, 'p2p-webrtc-star', code_to_varint(P_P2P_WEBRTC_STAR)),
177-
Protocol(P_P2P_WEBRTC_DIRECT, 0, 'p2p-webrtc-direct', code_to_varint(P_P2P_WEBRTC_DIRECT)),
178-
Protocol(P_P2P_CIRCUIT, 0, 'p2p-circuit', code_to_varint(P_P2P_CIRCUIT)),
179-
Protocol(P_UNIX, LENGTH_PREFIXED_VAR_SIZE, 'unix', code_to_varint(P_UNIX), path=True),
146+
Protocol(P_IP4, 32, 'ip4', varint.encode(P_IP4)),
147+
Protocol(P_TCP, 16, 'tcp', varint.encode(P_TCP)),
148+
Protocol(P_UDP, 16, 'udp', varint.encode(P_UDP)),
149+
Protocol(P_DCCP, 16, 'dccp', varint.encode(P_DCCP)),
150+
Protocol(P_IP6, 128, 'ip6', varint.encode(P_IP6)),
151+
Protocol(P_IP6ZONE, LENGTH_PREFIXED_VAR_SIZE, 'ip6zone', varint.encode(P_IP6ZONE)),
152+
Protocol(P_DNS, LENGTH_PREFIXED_VAR_SIZE, 'dns', varint.encode(P_DNS)),
153+
Protocol(P_DNS4, LENGTH_PREFIXED_VAR_SIZE, 'dns4', varint.encode(P_DNS4)),
154+
Protocol(P_DNS6, LENGTH_PREFIXED_VAR_SIZE, 'dns6', varint.encode(P_DNS6)),
155+
Protocol(P_DNSADDR, LENGTH_PREFIXED_VAR_SIZE, 'dnsaddr', varint.encode(P_DNSADDR)),
156+
Protocol(P_SCTP, 16, 'sctp', varint.encode(P_SCTP)),
157+
Protocol(P_UDT, 0, 'udt', varint.encode(P_UDT)),
158+
Protocol(P_UTP, 0, 'utp', varint.encode(P_UTP)),
159+
Protocol(P_P2P, LENGTH_PREFIXED_VAR_SIZE, 'p2p', varint.encode(P_P2P)),
160+
Protocol(P_ONION, 96, 'onion', varint.encode(P_ONION)),
161+
Protocol(P_QUIC, 0, 'quic', varint.encode(P_QUIC)),
162+
Protocol(P_HTTP, 0, 'http', varint.encode(P_HTTP)),
163+
Protocol(P_HTTPS, 0, 'https', varint.encode(P_HTTPS)),
164+
Protocol(P_WS, 0, 'ws', varint.encode(P_WS)),
165+
Protocol(P_WSS, 0, 'wss', varint.encode(P_WSS)),
166+
Protocol(P_P2P_WEBSOCKET_STAR, 0, 'p2p-websocket-star', varint.encode(P_P2P_WEBSOCKET_STAR)),
167+
Protocol(P_P2P_WEBRTC_STAR, 0, 'p2p-webrtc-star', varint.encode(P_P2P_WEBRTC_STAR)),
168+
Protocol(P_P2P_WEBRTC_DIRECT, 0, 'p2p-webrtc-direct', varint.encode(P_P2P_WEBRTC_DIRECT)),
169+
Protocol(P_P2P_CIRCUIT, 0, 'p2p-circuit', varint.encode(P_P2P_CIRCUIT)),
170+
Protocol(P_UNIX, LENGTH_PREFIXED_VAR_SIZE, 'unix', varint.encode(P_UNIX), path=True),
180171
]
181172

182173
_names_to_protocols = dict((proto.name, proto) for proto in PROTOCOLS)

multiaddr/util.py

Lines changed: 9 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import binascii
21
import six
32
import struct
43

@@ -11,7 +10,7 @@ def split(ma):
1110
addrs = []
1211
bb = bytes_split(ma.to_bytes())
1312
for addr in bb:
14-
addrs.append(Multiaddr(binascii.hexlify(addr)))
13+
addrs.append(Multiaddr(addr))
1514
return addrs
1615

1716

@@ -22,37 +21,11 @@ def join(multiaddrs):
2221
return Multiaddr(b''.join(bs))
2322

2423

25-
def int_to_hex(i, size):
26-
"""Encode a long value as a hex string, 0-padding to size.
27-
28-
Note that size is the size of the resulting hex string. So, for a 32Byte
29-
int, size should be 64 (two hex characters per byte"."""
30-
f_str = "{0:0%sx}" % size
31-
buf = f_str.format(i).lower()
32-
if six.PY3:
33-
buf = bytes(buf, 'utf-8')
34-
35-
return buf
36-
37-
38-
def encode_big_endian_32(i):
39-
"""Take an int and return big-endian bytes"""
40-
return struct.pack('>I', i)[-4:]
41-
42-
43-
def encode_big_endian_16(i):
44-
"""Take an int and return big-endian bytes"""
45-
return encode_big_endian_32(i)[-2:]
46-
47-
48-
def decode_big_endian_32(b):
49-
"""Take big-endian bytes and return int"""
50-
b = binascii.unhexlify(binascii.hexlify(b).zfill(8))
51-
return struct.unpack('>I', b)[0]
52-
53-
54-
def decode_big_endian_16(b):
55-
ret = decode_big_endian_32(b)
56-
if ret < 0 or ret > 65535:
57-
raise ValueError("Not a uint16")
58-
return ret
24+
if hasattr(int, 'from_bytes'):
25+
def packed_net_bytes_to_int(b):
26+
"""Convert the given big-endian byte-string to an int."""
27+
return int.from_bytes(b, byteorder='big')
28+
else: # PY2
29+
def packed_net_bytes_to_int(b):
30+
"""Convert the given big-endian byte-string to an int."""
31+
return int(b.encode('hex'), 16)

0 commit comments

Comments
 (0)