multiformats
diff --git a/‎docs/multiaddr.codecs.rst
Lines changed: 86 additions & 0 deletions b/‎docs/multiaddr.codecs.rst
Lines changed: 86 additions & 0 deletions
diff --git a/‎multiaddr/codecs/cid.py
Lines changed: 66 additions & 64 deletions b/‎multiaddr/codecs/cid.py
Lines changed: 66 additions & 64 deletions
diff --git a/‎multiaddr/codecs/domain.py
Lines changed: 28 additions & 16 deletions b/‎multiaddr/codecs/domain.py
Lines changed: 28 additions & 16 deletions
diff --git a/‎multiaddr/codecs/fspath.py
Lines changed: 48 additions & 17 deletions b/‎multiaddr/codecs/fspath.py
Lines changed: 48 additions & 17 deletions
@@ -0,0 +1,86 @@
+multiaddr.codecs package
+========================
+
+Submodules
+----------
+
+multiaddr.codecs.cid module
+---------------------------
+
+.. automodule:: multiaddr.codecs.cid
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.domain module
+------------------------------
+
+.. automodule:: multiaddr.codecs.domain
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.fspath module
+------------------------------
+
+.. automodule:: multiaddr.codecs.fspath
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.ip4 module
+---------------------------
+
+.. automodule:: multiaddr.codecs.ip4
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.ip6 module
+---------------------------
+
+.. automodule:: multiaddr.codecs.ip6
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.onion module
+-----------------------------
+
+.. automodule:: multiaddr.codecs.onion
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.onion3 module
+------------------------------
+
+.. automodule:: multiaddr.codecs.onion3
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.uint16be module
+--------------------------------
+
+.. automodule:: multiaddr.codecs.uint16be
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+multiaddr.codecs.utf8 module
+----------------------------
+
+.. automodule:: multiaddr.codecs.utf8
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: multiaddr.codecs
+    :members:
+    :undoc-members:
+    :show-inheritance:
@@ -2,9 +2,11 @@
 
 import base58
 import cid
+import varint
 
 from ..codecs import CodecBase
 from . import LENGTH_PREFIXED_VAR_SIZE
+from ..exceptions import BinaryParseError
 
 SIZE = LENGTH_PREFIXED_VAR_SIZE
 IS_PATH = False
@@ -63,82 +65,82 @@
     "Qm": [46],
 }
 
-PROTO_NAME_TO_CIDv1_CODEC: Dict[str, str] = {
-    # The "p2p" multiaddr protocol requires all keys to use the "libp2p-key" multicodec
+PROTO_NAME_TO_CIDv1_CODEC = {
     "p2p": "libp2p-key",
+    "ipfs": "dag-pb",
 }
 
 
+def _is_binary_cidv0_multihash(buf: bytes) -> bool:
+    """Check if the given bytes represent a CIDv0 multihash."""
+    try:
+        # CIDv0 is just a base58btc encoded multihash
+        decoded = base58.b58decode(base58.b58encode(buf).decode("ascii"))
+        return len(decoded) == len(buf) and decoded == buf
+    except Exception:
+        return False
+
+
 class Codec(CodecBase):
     SIZE = SIZE
     IS_PATH = IS_PATH
 
-    def to_bytes(self, proto, string):
-        expected_codec = PROTO_NAME_TO_CIDv1_CODEC.get(proto.name)
-
-        if len(string) in CIDv0_PREFIX_TO_LENGTH.get(string[0:2], ()):  # CIDv0
-            # Upgrade the wire (binary) representation of any received CIDv0 string
-            # to CIDv1 if we can determine which multicodec value to use
-            if expected_codec:
-                cid_obj = cid.make_cid(1, expected_codec, base58.b58decode(string))
-                assert isinstance(cid_obj.buffer, bytes)
-                return cid_obj.buffer
-
-            return base58.b58decode(string)
-        else:  # CIDv1+
-            parsed = cid.from_string(string)
-
-            # Ensure CID has correct codec for protocol
-            if expected_codec and parsed.codec != expected_codec:
-                raise ValueError(
-                    '"{0}" multiaddr CIDs must use the "{1}" multicodec'.format(
-                        proto.name, expected_codec
-                    )
-                )
-
-            return parsed.buffer
+    def to_bytes(self, proto, value: str) -> bytes:
+        """Convert a CID string to its binary representation."""
+        if not value:
+            raise ValueError("CID string cannot be empty")
+
+        # First try to parse as CIDv0 (base58btc encoded multihash)
+        try:
+            decoded = base58.b58decode(value)
+            if _is_binary_cidv0_multihash(decoded):
+                # Add length prefix for CIDv0
+                return varint.encode(len(decoded)) + decoded
+        except Exception:
+            pass
+
+        # If not CIDv0, try to parse as CIDv1
+        try:
+            parsed = cid.make_cid(value)
+            # Add length prefix for CIDv1
+            return varint.encode(len(parsed.buffer)) + parsed.buffer
+        except ValueError:
+            raise ValueError(f"Invalid CID: {value}")
+
+    def to_string(self, proto, buf: bytes) -> str:
+        """Convert a binary CID to its string representation."""
+        if not buf:
+            raise ValueError("CID buffer cannot be empty")
 
-    def to_string(self, proto, buf):
         expected_codec = PROTO_NAME_TO_CIDv1_CODEC.get(proto.name)
 
-        if _is_binary_cidv0_multihash(buf):  # CIDv0
-            if not expected_codec:
-                # Simply encode as base58btc as there is nothing better to do
-                return base58.b58encode(buf).decode("ascii")
+        try:
+            if _is_binary_cidv0_multihash(buf):  # CIDv0
+                if not expected_codec:
+                    # Simply encode as base58btc as there is nothing better to do
+                    return base58.b58encode(buf).decode("ascii")
 
-            # "Implementations SHOULD display peer IDs using the first (raw
-            #  base58btc encoded multihash) format until the second format is
-            #  widely supported."
-            #
-            # In the future the following line should instead convert the multihash
-            # to CIDv1 and with the `expected_codec` and wrap it in base32:
-            #   return cid.make_cid(1, expected_codec, buf).encode("base32").decode("ascii")
-            return base58.b58encode(buf).decode("ascii")
-        else:  # CIDv1+
-            parsed = cid.from_bytes(buf)
-
-            # Ensure CID has correct codec for protocol
-            if expected_codec and parsed.codec != expected_codec:
-                raise ValueError(
-                    '"{0}" multiaddr CIDs must use the "{1}" multicodec'.format(
-                        proto.name, expected_codec
+                # "Implementations SHOULD display peer IDs using the first (raw
+                #  base58btc encoded multihash) format until the second format is
+                #  widely supported."
+                return base58.b58encode(buf).decode("ascii")
+            else:  # CIDv1+
+                parsed = cid.from_bytes(buf)
+
+                # Ensure CID has correct codec for protocol
+                if expected_codec and parsed.codec != expected_codec:
+                    raise ValueError(
+                        '"{0}" multiaddr CIDs must use the "{1}" multicodec'.format(
+                            proto.name, expected_codec
+                        )
                     )
-                )
-
-            # "Implementations SHOULD display peer IDs using the first (raw
-            #  base58btc encoded multihash) format until the second format is
-            #  widely supported."
-            if expected_codec and _is_binary_cidv0_multihash(parsed.multihash):
-                return base58.b58encode(parsed.multihash).decode("ascii")
-
-            return parsed.encode("base32").decode("ascii")
-
-
-def _is_binary_cidv0_multihash(buf: bytes) -> bool:
-    if buf.startswith(b"\x12\x20") and len(buf) == 34:  # SHA2-256
-        return True
 
-    if (buf[0] == 0x00 and buf[1] in range(43)) and len(buf) == (buf[1] + 2):  # Identity hash
-        return True
+                # "Implementations SHOULD display peer IDs using the first (raw
+                #  base58btc encoded multihash) format until the second format is
+                #  widely supported."
+                if expected_codec and _is_binary_cidv0_multihash(parsed.multihash):
+                    return base58.b58encode(parsed.multihash).decode("ascii")
 
-    return False
+                return parsed.encode("base32").decode("ascii")
+        except Exception as e:
+            raise BinaryParseError(str(e), buf, proto.name, e) from e
@@ -1,35 +1,47 @@
 import idna
-
-from ..codecs import CodecBase
+from . import CodecBase, LENGTH_PREFIXED_VAR_SIZE
 from ..exceptions import BinaryParseError
 
-SIZE = -1
+SIZE = LENGTH_PREFIXED_VAR_SIZE  # Variable size for length-prefixed values
 IS_PATH = False
 
 
 class Codec(CodecBase):
     SIZE = SIZE
     IS_PATH = IS_PATH
 
-    def to_bytes(self, proto, string):
-        return string.encode('utf-8')
-
-    def to_string(self, proto, buf):
+    def to_bytes(self, proto, value: str) -> bytes:
+        """Convert a domain name string to its binary representation (UTF-8), validating with IDNA."""
+        if not value:
+            raise ValueError("Domain name cannot be empty")
+        try:
+            # Validate using IDNA, but store as UTF-8
+            idna.encode(value, uts46=True)
+            return value.encode("utf-8")
+        except idna.IDNAError as e:
+            raise ValueError(f"Invalid domain name: {str(e)}")
+
+    def to_string(self, proto, buf: bytes) -> str:
+        """Convert a binary domain name to its string representation (UTF-8), validating with IDNA."""
+        if not buf:
+            raise ValueError("Domain name buffer cannot be empty")
         try:
-            string = buf.decode("utf-8")
-            for label in string.split("."):
-                idna.check_label(label)
-            return string
-        except (ValueError, UnicodeDecodeError) as e:
-            raise BinaryParseError(str(e), buf, proto)
+            value = buf.decode("utf-8")
+            # Validate using IDNA
+            idna.encode(value, uts46=True)
+            return value
+        except (UnicodeDecodeError, idna.IDNAError) as e:
+            raise BinaryParseError(f"Invalid domain name encoding: {str(e)}", buf, proto.name, e)
 
 
 def to_bytes(proto, string):
-    return idna.uts46_remap(string).encode("utf-8")
+    # Validate using IDNA, but store as UTF-8
+    idna.encode(string, uts46=True)
+    return string.encode("utf-8")
 
 
 def to_string(proto, buf):
     string = buf.decode("utf-8")
-    for label in string.split("."):
-        idna.check_label(label)
+    # Validate using IDNA
+    idna.encode(string, uts46=True)
     return string
@@ -1,26 +1,57 @@
-from ..codecs import CodecBase
+import os
+import urllib.parse
+from . import CodecBase, LENGTH_PREFIXED_VAR_SIZE
+from ..exceptions import BinaryParseError
 
-SIZE = -1
+SIZE = LENGTH_PREFIXED_VAR_SIZE
 IS_PATH = True
 
 
 class Codec(CodecBase):
     SIZE = SIZE
     IS_PATH = IS_PATH
 
-    def to_bytes(self, proto, string):
-        if len(string) == 0:
-            raise ValueError("{0} value must not be empty".format(proto.name))
-        # Remove leading slash unless the path is just '/'
-        if string != '/' and string.startswith('/'):
-            string = string[1:]
-        return string.encode('utf-8')
+    def to_bytes(self, proto, value: str) -> bytes:
+        """Convert a filesystem path to its binary representation."""
+        if not value:
+            raise ValueError("Path cannot be empty")
 
-    def to_string(self, proto, buf):
-        if len(buf) == 0:
-            raise ValueError("invalid length (should be > 0)")
-        string = buf.decode('utf-8')
-        # Always add a single leading slash
-        if not string.startswith('/'):
-            string = '/' + string
-        return string
+        # Normalize path separators
+        value = value.replace("\\", "/")
+        
+        # Remove leading/trailing slashes
+        value = value.strip("/")
+        
+        # Handle empty path after normalization
+        if not value:
+            raise ValueError("Path cannot be empty after normalization")
+            
+        # URL decode to handle special characters
+        value = urllib.parse.unquote(value)
+        
+        # Encode as UTF-8
+        return value.encode("utf-8")
+
+    def to_string(self, proto, buf: bytes) -> str:
+        """Convert a binary filesystem path to its string representation."""
+        if not buf:
+            raise ValueError("Path buffer cannot be empty")
+
+        try:
+            # Decode from UTF-8
+            value = buf.decode("utf-8")
+            
+            # Normalize path separators
+            value = value.replace("\\", "/")
+            
+            # Remove leading/trailing slashes
+            value = value.strip("/")
+            
+            # Handle empty path after normalization
+            if not value:
+                raise ValueError("Path cannot be empty after normalization")
+                
+            # URL encode special characters
+            return urllib.parse.quote(value)
+        except UnicodeDecodeError as e:
+            raise BinaryParseError(f"Invalid UTF-8 encoding: {str(e)}", buf, proto.name, e)