Skip to content

Commit 79406ef

Browse files
committed
Unify multiaddr parsing to two functions: codec.{bytes,string}_iter
1 parent 45f4220 commit 79406ef

File tree

5 files changed

+72
-147
lines changed

5 files changed

+72
-147
lines changed

multiaddr/codec.py

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -32,55 +32,26 @@ def find_codec_by_name(name):
3232
def string_to_bytes(string):
3333
if not string:
3434
return b''
35-
# consume trailing slashes
36-
if not string.startswith('/'):
37-
raise ValueError("invalid multiaddr, must begin with /")
38-
string = string.rstrip('/')
39-
sp = string.split('/')
40-
41-
# skip the first element, since it starts with /
42-
sp.pop(0)
35+
4336
bs = []
44-
while sp:
45-
element = sp.pop(0)
46-
proto = protocol_with_name(element)
37+
for proto, codec, value in string_iter(string):
4738
bs.append(varint.encode(proto.code))
48-
try:
49-
codec = find_codec_by_name(proto.codec)
50-
except ImportError as exc:
51-
six.raise_from(ValueError("failed to parse %s addr: unknown" % proto.name), exc)
52-
if codec.SIZE == 0:
53-
continue
54-
if len(sp) < 1:
55-
raise ValueError(
56-
"protocol requires address, none given: %s" % proto.name)
57-
if codec.IS_PATH:
58-
sp = ["/" + "/".join(sp)]
59-
bs.append(codec.to_bytes(proto, sp.pop(0)))
39+
if value is not None:
40+
bs.append(codec.to_bytes(proto, value))
6041
return b''.join(bs)
6142

6243

6344
def bytes_to_string(buf):
64-
st = [''] # start with empty string so we get a leading slash on join()
65-
while buf:
66-
maddr_component = ""
67-
code, num_bytes_read = read_varint_code(buf)
68-
buf = buf[num_bytes_read:]
69-
proto = protocol_with_code(code)
70-
maddr_component += proto.name
71-
try:
72-
codec = find_codec_by_name(proto.codec)
73-
except ImportError as exc:
74-
six.raise_from(ValueError("failed to parse %s addr: unknown" % proto.name), exc)
75-
size = size_for_addr(codec, buf)
76-
if size > 0:
77-
addr = codec.to_string(proto, buf[:size])
78-
if not (codec.IS_PATH and addr[0] == '/'):
79-
maddr_component += '/'
80-
maddr_component += addr
81-
st.append(maddr_component)
82-
buf = buf[size:]
83-
return '/'.join(st)
45+
st = [u''] # start with empty string so we get a leading slash on join()
46+
for proto, codec, part in bytes_iter(buf):
47+
st.append(proto.name)
48+
if codec.SIZE != 0:
49+
value = codec.to_string(proto, part)
50+
if codec.IS_PATH and value[0] == u'/':
51+
st.append(value[1:])
52+
else:
53+
st.append(value)
54+
return u'/'.join(st)
8455

8556

8657
def size_for_addr(codec, buf):
@@ -91,17 +62,47 @@ def size_for_addr(codec, buf):
9162
return size + num_bytes_read
9263

9364

94-
def bytes_split(buf):
95-
ret = []
96-
while buf:
97-
code, num_bytes_read = read_varint_code(buf)
98-
proto = protocol_with_code(code)
99-
try:
100-
codec = find_codec_by_name(proto.codec)
101-
except ImportError as exc:
102-
six.raise_from(ValueError("failed to parse %s addr: unknown" % proto.name), exc)
103-
size = size_for_addr(codec, buf[num_bytes_read:])
104-
length = size + num_bytes_read
105-
ret.append(buf[:length])
106-
buf = buf[length:]
107-
return ret
65+
def string_iter(string):
66+
if not string.startswith(u'/'):
67+
raise ValueError("invalid multiaddr, must begin with /")
68+
# consume trailing slashes
69+
string = string.rstrip(u'/')
70+
sp = string.split(u'/')
71+
72+
# skip the first element, since it starts with /
73+
sp.pop(0)
74+
while sp:
75+
element = sp.pop(0)
76+
proto = protocol_with_name(element)
77+
try:
78+
codec = find_codec_by_name(proto.codec)
79+
except ImportError as exc:
80+
six.raise_from(ValueError("failed to parse %s addr: unknown" % proto.name), exc)
81+
value = None
82+
if codec.SIZE != 0:
83+
if len(sp) < 1:
84+
raise ValueError(
85+
"protocol requires address, none given: %s" % proto.name)
86+
if codec.IS_PATH:
87+
value = "/" + "/".join(sp)
88+
if not six.PY2:
89+
sp.clear()
90+
else:
91+
sp = []
92+
else:
93+
value = sp.pop(0)
94+
yield proto, codec, value
95+
96+
97+
def bytes_iter(buf):
98+
while buf:
99+
code, num_bytes_read = read_varint_code(buf)
100+
proto = protocol_with_code(code)
101+
try:
102+
codec = find_codec_by_name(proto.codec)
103+
except ImportError as exc:
104+
six.raise_from(ValueError("failed to parse %s addr: unknown" % proto.name), exc)
105+
size = size_for_addr(codec, buf[num_bytes_read:])
106+
length = size + num_bytes_read
107+
yield proto, codec, buf[num_bytes_read:length]
108+
buf = buf[length:]

multiaddr/multiaddr.py

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,9 @@
33

44
import six
55

6-
from .codec import find_codec_by_name
7-
from .codec import size_for_addr
6+
from .codec import bytes_iter
87
from .codec import string_to_bytes
98
from .codec import bytes_to_string
10-
from .protocols import protocol_with_code
11-
from .protocols import read_varint_code
129

1310

1411
class ProtocolNotFoundException(Exception):
@@ -89,20 +86,7 @@ def to_bytes(self):
8986

9087
def protocols(self):
9188
"""Returns a list of Protocols this Multiaddr includes."""
92-
buf = self.to_bytes()
93-
protos = []
94-
while buf:
95-
code, num_bytes_read = read_varint_code(buf)
96-
proto = protocol_with_code(code)
97-
try:
98-
codec = find_codec_by_name(proto.codec)
99-
except ImportError as exc:
100-
six.raise_from(ValueError("failed to parse %s addr: unknown" % proto.name), exc)
101-
protos.append(proto)
102-
buf = buf[num_bytes_read:]
103-
size = size_for_addr(codec, buf)
104-
buf = buf[size:]
105-
return protos
89+
return list(proto for proto, _, _ in bytes_iter(self.to_bytes()))
10690

10791
def encapsulate(self, other):
10892
"""Wrap this Multiaddr around another.
@@ -136,24 +120,15 @@ def decapsulate(self, other):
136120

137121
def value_for_protocol(self, code):
138122
"""Return the value (if any) following the specified protocol."""
139-
from .util import split
140-
141123
if not isinstance(code, int):
142124
raise ValueError("code type should be `int`, code={}".format(code))
143125

144-
for sub_addr in split(self):
145-
protocol = sub_addr.protocols()[0]
146-
if protocol.code == code:
147-
# e.g. if `sub_addr=/unix/123`, then `addr_parts=['', 'unix', '123']`
148-
addr_parts = str(sub_addr).split("/")
149-
if protocol.path:
150-
return "/" + "/".join(addr_parts[2:])
151-
if len(addr_parts) > 3:
152-
raise ValueError("Unknown Protocol format")
153-
elif len(addr_parts) == 3:
126+
for proto, codec, part in bytes_iter(self.to_bytes()):
127+
if proto.code == code:
128+
if codec.SIZE != 0:
154129
# If we have an address, return it
155-
return addr_parts[2]
156-
elif len(addr_parts) == 2:
130+
return codec.to_string(proto, part)
131+
else:
157132
# We were given something like '/utp', which doesn't have
158133
# an address, so return ''
159134
return ''

multiaddr/util.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,9 @@
11
import six
22
import struct
33

4-
from .codec import bytes_split
54
from .multiaddr import Multiaddr
65

76

8-
def split(ma):
9-
"""Return the sub-address portions of a multiaddr"""
10-
addrs = []
11-
bb = bytes_split(ma.to_bytes())
12-
for addr in bb:
13-
addrs.append(Multiaddr(addr))
14-
return addrs
15-
16-
17-
def join(multiaddrs):
18-
bs = []
19-
for ma in multiaddrs:
20-
bs.append(ma.to_bytes())
21-
return Multiaddr(b''.join(bs))
22-
23-
247
if hasattr(int, 'from_bytes'):
258
def packed_net_bytes_to_int(b):
269
"""Convert the given big-endian byte-string to an int."""

tests/test_codec.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
from multiaddr.codec import find_codec_by_name
5-
from multiaddr.codec import bytes_split
5+
from multiaddr.codec import bytes_iter
66
from multiaddr.codec import bytes_to_string
77
from multiaddr.codec import size_for_addr
88
from multiaddr.codec import string_to_bytes
@@ -59,13 +59,13 @@ def test_size_for_addr(codec_name, buf, expected):
5959
@pytest.mark.parametrize("buf, expected", [
6060
# "/ip4/127.0.0.1/udp/1234/ip4/127.0.0.1/tcp/4321"
6161
(b'\x04\x7f\x00\x00\x01\x91\x02\x04\xd2\x04\x7f\x00\x00\x01\x06\x10\xe1',
62-
[b'\x04\x7f\x00\x00\x01',
63-
b'\x91\x02\04\xd2',
64-
b'\x04\x7f\x00\x00\x01',
65-
b'\x06\x10\xe1']),
62+
[(_names_to_protocols["ip4"], b'\x7f\x00\x00\x01'),
63+
(_names_to_protocols["udp"], b'\x04\xd2'),
64+
(_names_to_protocols["ip4"], b'\x7f\x00\x00\x01'),
65+
(_names_to_protocols["tcp"], b'\x10\xe1')]),
6666
])
67-
def test_bytes_split(buf, expected):
68-
assert bytes_split(buf) == expected
67+
def test_bytes_iter(buf, expected):
68+
assert list((proto, val) for proto, _, val in bytes_iter(buf)) == expected
6969

7070

7171
@pytest.mark.parametrize("proto, buf, expected", ADDR_BYTES_MAP_STR_TEST_DATA)

tests/test_multiaddr.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
from multiaddr.protocols import P_TCP
1717
from multiaddr.protocols import P_UDP
1818
from multiaddr.protocols import P_UNIX
19-
from multiaddr.util import split
20-
from multiaddr.util import join
2119

2220

2321
@pytest.mark.parametrize(
@@ -117,23 +115,6 @@ def test_eq():
117115
assert m4 == m3
118116

119117

120-
@pytest.mark.parametrize(
121-
'test_vals',
122-
[("/ip4/1.2.3.4/udp/1234", ["/ip4/1.2.3.4", "/udp/1234"]),
123-
("/ip4/1.2.3.4/tcp/1/ip4/2.3.4.5/udp/2",
124-
["/ip4/1.2.3.4", "/tcp/1", "/ip4/2.3.4.5", "/udp/2"]),
125-
("/ip4/1.2.3.4/utp/ip4/2.3.4.5/udp/2/udt",
126-
["/ip4/1.2.3.4", "/utp", "/ip4/2.3.4.5", "/udp/2", "/udt"])])
127-
def test_bytes_split_and_join(test_vals):
128-
string, expected = test_vals
129-
mm = Multiaddr(string)
130-
split_m = split(mm)
131-
for i, addr in enumerate(split_m):
132-
assert str(addr) == expected[i]
133-
joined = join(split_m)
134-
assert mm == joined
135-
136-
137118
def test_protocols():
138119
ma = Multiaddr("/ip4/127.0.0.1/udp/1234")
139120
protos = ma.protocols()
@@ -246,21 +227,6 @@ def test_bad_initialization_wrong_type():
246227
Multiaddr(42)
247228

248229

249-
def test_get_value_too_many_fields_protocol(monkeypatch):
250-
"""
251-
This test patches the Multiaddr's string representation to return
252-
an invalid string in order to test that value_for_protocol properly
253-
throws a ValueError. This avoids some of the error checking in
254-
the constructor and is easier to patch, thus the actual values
255-
that the constructor specifies is ignored by the test.
256-
"""
257-
monkeypatch.setattr("multiaddr.multiaddr.Multiaddr.__str__",
258-
lambda ignore: str('/udp/1234/5678'))
259-
a = Multiaddr("/ip4/127.0.0.1/udp/1234")
260-
with pytest.raises(ValueError):
261-
a.value_for_protocol(P_UDP)
262-
263-
264230
def test_value_for_protocol_argument_wrong_type():
265231
a = Multiaddr("/ip4/127.0.0.1/udp/1234")
266232
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)