Skip to content

Commit 2185a3a

Browse files
committed
fix: adding DNSADDR protocol work in progress
1 parent fb4b742 commit 2185a3a

File tree

15 files changed

+1017
-239
lines changed

15 files changed

+1017
-239
lines changed

docs/multiaddr.codecs.rst

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
multiaddr.codecs package
2+
========================
3+
4+
Submodules
5+
----------
6+
7+
multiaddr.codecs.cid module
8+
---------------------------
9+
10+
.. automodule:: multiaddr.codecs.cid
11+
:members:
12+
:undoc-members:
13+
:show-inheritance:
14+
15+
multiaddr.codecs.domain module
16+
------------------------------
17+
18+
.. automodule:: multiaddr.codecs.domain
19+
:members:
20+
:undoc-members:
21+
:show-inheritance:
22+
23+
multiaddr.codecs.fspath module
24+
------------------------------
25+
26+
.. automodule:: multiaddr.codecs.fspath
27+
:members:
28+
:undoc-members:
29+
:show-inheritance:
30+
31+
multiaddr.codecs.ip4 module
32+
---------------------------
33+
34+
.. automodule:: multiaddr.codecs.ip4
35+
:members:
36+
:undoc-members:
37+
:show-inheritance:
38+
39+
multiaddr.codecs.ip6 module
40+
---------------------------
41+
42+
.. automodule:: multiaddr.codecs.ip6
43+
:members:
44+
:undoc-members:
45+
:show-inheritance:
46+
47+
multiaddr.codecs.onion module
48+
-----------------------------
49+
50+
.. automodule:: multiaddr.codecs.onion
51+
:members:
52+
:undoc-members:
53+
:show-inheritance:
54+
55+
multiaddr.codecs.onion3 module
56+
------------------------------
57+
58+
.. automodule:: multiaddr.codecs.onion3
59+
:members:
60+
:undoc-members:
61+
:show-inheritance:
62+
63+
multiaddr.codecs.uint16be module
64+
--------------------------------
65+
66+
.. automodule:: multiaddr.codecs.uint16be
67+
:members:
68+
:undoc-members:
69+
:show-inheritance:
70+
71+
multiaddr.codecs.utf8 module
72+
----------------------------
73+
74+
.. automodule:: multiaddr.codecs.utf8
75+
:members:
76+
:undoc-members:
77+
:show-inheritance:
78+
79+
80+
Module contents
81+
---------------
82+
83+
.. automodule:: multiaddr.codecs
84+
:members:
85+
:undoc-members:
86+
:show-inheritance:

multiaddr/codecs/cid.py

Lines changed: 66 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22

33
import base58
44
import cid
5+
import varint
56

67
from ..codecs import CodecBase
78
from . import LENGTH_PREFIXED_VAR_SIZE
9+
from ..exceptions import BinaryParseError
810

911
SIZE = LENGTH_PREFIXED_VAR_SIZE
1012
IS_PATH = False
@@ -63,82 +65,82 @@
6365
"Qm": [46],
6466
}
6567

66-
PROTO_NAME_TO_CIDv1_CODEC: Dict[str, str] = {
67-
# The "p2p" multiaddr protocol requires all keys to use the "libp2p-key" multicodec
68+
PROTO_NAME_TO_CIDv1_CODEC = {
6869
"p2p": "libp2p-key",
70+
"ipfs": "dag-pb",
6971
}
7072

7173

74+
def _is_binary_cidv0_multihash(buf: bytes) -> bool:
75+
"""Check if the given bytes represent a CIDv0 multihash."""
76+
try:
77+
# CIDv0 is just a base58btc encoded multihash
78+
decoded = base58.b58decode(base58.b58encode(buf).decode("ascii"))
79+
return len(decoded) == len(buf) and decoded == buf
80+
except Exception:
81+
return False
82+
83+
7284
class Codec(CodecBase):
7385
SIZE = SIZE
7486
IS_PATH = IS_PATH
7587

76-
def to_bytes(self, proto, string):
77-
expected_codec = PROTO_NAME_TO_CIDv1_CODEC.get(proto.name)
78-
79-
if len(string) in CIDv0_PREFIX_TO_LENGTH.get(string[0:2], ()): # CIDv0
80-
# Upgrade the wire (binary) representation of any received CIDv0 string
81-
# to CIDv1 if we can determine which multicodec value to use
82-
if expected_codec:
83-
cid_obj = cid.make_cid(1, expected_codec, base58.b58decode(string))
84-
assert isinstance(cid_obj.buffer, bytes)
85-
return cid_obj.buffer
86-
87-
return base58.b58decode(string)
88-
else: # CIDv1+
89-
parsed = cid.from_string(string)
90-
91-
# Ensure CID has correct codec for protocol
92-
if expected_codec and parsed.codec != expected_codec:
93-
raise ValueError(
94-
'"{0}" multiaddr CIDs must use the "{1}" multicodec'.format(
95-
proto.name, expected_codec
96-
)
97-
)
98-
99-
return parsed.buffer
88+
def to_bytes(self, proto, value: str) -> bytes:
89+
"""Convert a CID string to its binary representation."""
90+
if not value:
91+
raise ValueError("CID string cannot be empty")
92+
93+
# First try to parse as CIDv0 (base58btc encoded multihash)
94+
try:
95+
decoded = base58.b58decode(value)
96+
if _is_binary_cidv0_multihash(decoded):
97+
# Add length prefix for CIDv0
98+
return varint.encode(len(decoded)) + decoded
99+
except Exception:
100+
pass
101+
102+
# If not CIDv0, try to parse as CIDv1
103+
try:
104+
parsed = cid.make_cid(value)
105+
# Add length prefix for CIDv1
106+
return varint.encode(len(parsed.buffer)) + parsed.buffer
107+
except ValueError:
108+
raise ValueError(f"Invalid CID: {value}")
109+
110+
def to_string(self, proto, buf: bytes) -> str:
111+
"""Convert a binary CID to its string representation."""
112+
if not buf:
113+
raise ValueError("CID buffer cannot be empty")
100114

101-
def to_string(self, proto, buf):
102115
expected_codec = PROTO_NAME_TO_CIDv1_CODEC.get(proto.name)
103116

104-
if _is_binary_cidv0_multihash(buf): # CIDv0
105-
if not expected_codec:
106-
# Simply encode as base58btc as there is nothing better to do
107-
return base58.b58encode(buf).decode("ascii")
117+
try:
118+
if _is_binary_cidv0_multihash(buf): # CIDv0
119+
if not expected_codec:
120+
# Simply encode as base58btc as there is nothing better to do
121+
return base58.b58encode(buf).decode("ascii")
108122

109-
# "Implementations SHOULD display peer IDs using the first (raw
110-
# base58btc encoded multihash) format until the second format is
111-
# widely supported."
112-
#
113-
# In the future the following line should instead convert the multihash
114-
# to CIDv1 and with the `expected_codec` and wrap it in base32:
115-
# return cid.make_cid(1, expected_codec, buf).encode("base32").decode("ascii")
116-
return base58.b58encode(buf).decode("ascii")
117-
else: # CIDv1+
118-
parsed = cid.from_bytes(buf)
119-
120-
# Ensure CID has correct codec for protocol
121-
if expected_codec and parsed.codec != expected_codec:
122-
raise ValueError(
123-
'"{0}" multiaddr CIDs must use the "{1}" multicodec'.format(
124-
proto.name, expected_codec
123+
# "Implementations SHOULD display peer IDs using the first (raw
124+
# base58btc encoded multihash) format until the second format is
125+
# widely supported."
126+
return base58.b58encode(buf).decode("ascii")
127+
else: # CIDv1+
128+
parsed = cid.from_bytes(buf)
129+
130+
# Ensure CID has correct codec for protocol
131+
if expected_codec and parsed.codec != expected_codec:
132+
raise ValueError(
133+
'"{0}" multiaddr CIDs must use the "{1}" multicodec'.format(
134+
proto.name, expected_codec
135+
)
125136
)
126-
)
127-
128-
# "Implementations SHOULD display peer IDs using the first (raw
129-
# base58btc encoded multihash) format until the second format is
130-
# widely supported."
131-
if expected_codec and _is_binary_cidv0_multihash(parsed.multihash):
132-
return base58.b58encode(parsed.multihash).decode("ascii")
133-
134-
return parsed.encode("base32").decode("ascii")
135-
136-
137-
def _is_binary_cidv0_multihash(buf: bytes) -> bool:
138-
if buf.startswith(b"\x12\x20") and len(buf) == 34: # SHA2-256
139-
return True
140137

141-
if (buf[0] == 0x00 and buf[1] in range(43)) and len(buf) == (buf[1] + 2): # Identity hash
142-
return True
138+
# "Implementations SHOULD display peer IDs using the first (raw
139+
# base58btc encoded multihash) format until the second format is
140+
# widely supported."
141+
if expected_codec and _is_binary_cidv0_multihash(parsed.multihash):
142+
return base58.b58encode(parsed.multihash).decode("ascii")
143143

144-
return False
144+
return parsed.encode("base32").decode("ascii")
145+
except Exception as e:
146+
raise BinaryParseError(str(e), buf, proto.name, e) from e

multiaddr/codecs/domain.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,47 @@
11
import idna
2-
3-
from ..codecs import CodecBase
2+
from . import CodecBase, LENGTH_PREFIXED_VAR_SIZE
43
from ..exceptions import BinaryParseError
54

6-
SIZE = -1
5+
SIZE = LENGTH_PREFIXED_VAR_SIZE # Variable size for length-prefixed values
76
IS_PATH = False
87

98

109
class Codec(CodecBase):
1110
SIZE = SIZE
1211
IS_PATH = IS_PATH
1312

14-
def to_bytes(self, proto, string):
15-
return string.encode('utf-8')
16-
17-
def to_string(self, proto, buf):
13+
def to_bytes(self, proto, value: str) -> bytes:
14+
"""Convert a domain name string to its binary representation (UTF-8), validating with IDNA."""
15+
if not value:
16+
raise ValueError("Domain name cannot be empty")
17+
try:
18+
# Validate using IDNA, but store as UTF-8
19+
idna.encode(value, uts46=True)
20+
return value.encode("utf-8")
21+
except idna.IDNAError as e:
22+
raise ValueError(f"Invalid domain name: {str(e)}")
23+
24+
def to_string(self, proto, buf: bytes) -> str:
25+
"""Convert a binary domain name to its string representation (UTF-8), validating with IDNA."""
26+
if not buf:
27+
raise ValueError("Domain name buffer cannot be empty")
1828
try:
19-
string = buf.decode("utf-8")
20-
for label in string.split("."):
21-
idna.check_label(label)
22-
return string
23-
except (ValueError, UnicodeDecodeError) as e:
24-
raise BinaryParseError(str(e), buf, proto)
29+
value = buf.decode("utf-8")
30+
# Validate using IDNA
31+
idna.encode(value, uts46=True)
32+
return value
33+
except (UnicodeDecodeError, idna.IDNAError) as e:
34+
raise BinaryParseError(f"Invalid domain name encoding: {str(e)}", buf, proto.name, e)
2535

2636

2737
def to_bytes(proto, string):
28-
return idna.uts46_remap(string).encode("utf-8")
38+
# Validate using IDNA, but store as UTF-8
39+
idna.encode(string, uts46=True)
40+
return string.encode("utf-8")
2941

3042

3143
def to_string(proto, buf):
3244
string = buf.decode("utf-8")
33-
for label in string.split("."):
34-
idna.check_label(label)
45+
# Validate using IDNA
46+
idna.encode(string, uts46=True)
3547
return string

multiaddr/codecs/fspath.py

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,57 @@
1-
from ..codecs import CodecBase
1+
import os
2+
import urllib.parse
3+
from . import CodecBase, LENGTH_PREFIXED_VAR_SIZE
4+
from ..exceptions import BinaryParseError
25

3-
SIZE = -1
6+
SIZE = LENGTH_PREFIXED_VAR_SIZE
47
IS_PATH = True
58

69

710
class Codec(CodecBase):
811
SIZE = SIZE
912
IS_PATH = IS_PATH
1013

11-
def to_bytes(self, proto, string):
12-
if len(string) == 0:
13-
raise ValueError("{0} value must not be empty".format(proto.name))
14-
# Remove leading slash unless the path is just '/'
15-
if string != '/' and string.startswith('/'):
16-
string = string[1:]
17-
return string.encode('utf-8')
14+
def to_bytes(self, proto, value: str) -> bytes:
15+
"""Convert a filesystem path to its binary representation."""
16+
if not value:
17+
raise ValueError("Path cannot be empty")
1818

19-
def to_string(self, proto, buf):
20-
if len(buf) == 0:
21-
raise ValueError("invalid length (should be > 0)")
22-
string = buf.decode('utf-8')
23-
# Always add a single leading slash
24-
if not string.startswith('/'):
25-
string = '/' + string
26-
return string
19+
# Normalize path separators
20+
value = value.replace("\\", "/")
21+
22+
# Remove leading/trailing slashes
23+
value = value.strip("/")
24+
25+
# Handle empty path after normalization
26+
if not value:
27+
raise ValueError("Path cannot be empty after normalization")
28+
29+
# URL decode to handle special characters
30+
value = urllib.parse.unquote(value)
31+
32+
# Encode as UTF-8
33+
return value.encode("utf-8")
34+
35+
def to_string(self, proto, buf: bytes) -> str:
36+
"""Convert a binary filesystem path to its string representation."""
37+
if not buf:
38+
raise ValueError("Path buffer cannot be empty")
39+
40+
try:
41+
# Decode from UTF-8
42+
value = buf.decode("utf-8")
43+
44+
# Normalize path separators
45+
value = value.replace("\\", "/")
46+
47+
# Remove leading/trailing slashes
48+
value = value.strip("/")
49+
50+
# Handle empty path after normalization
51+
if not value:
52+
raise ValueError("Path cannot be empty after normalization")
53+
54+
# URL encode special characters
55+
return urllib.parse.quote(value)
56+
except UnicodeDecodeError as e:
57+
raise BinaryParseError(f"Invalid UTF-8 encoding: {str(e)}", buf, proto.name, e)

0 commit comments

Comments
 (0)