multiformats · acul71 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -28,12 +28,11 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Set TOXENV
         run: |
-          python_version="${{ matrix.python-version }}"
-          toxenv="${{ matrix.toxenv }}"
-          if [[ "$toxenv" == "docs" ]]; then
-            echo "TOXENV=docs" | tee -a "$GITHUB_ENV"
+          if [[ "${{ matrix.toxenv }}" == "docs" ]]; then
+            echo "TOXENV=docs" >> "$GITHUB_ENV"
           else
-            echo "TOXENV=py${python_version}-${toxenv}" | tr -d '.' | tee -a "$GITHUB_ENV"
+            python_version="${{ matrix.python-version }}"
+            echo "TOXENV=py${python_version//./}-${{ matrix.toxenv }}" >> "$GITHUB_ENV"
           fi
       - run: |
           python -m pip install --upgrade pip
@@ -58,8 +57,7 @@ jobs:
         shell: bash
         run: |
           python_version="${{ matrix.python-version }}"
-          toxenv="${{ matrix.toxenv }}"
-          echo "TOXENV=py${python_version}-${toxenv}" | tr -d '.' | tee -a "$GITHUB_ENV"
+          echo "TOXENV=py${python_version//./}-${{ matrix.toxenv }}" >> "$GITHUB_ENV"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -96,6 +96,33 @@ Ready to contribute? Here's how to set up `multibase` for local development.
 
    If you installed pre-commit hooks (step 4), they will run automatically on commit.
 
+Development Workflow Commands
+-------------------------------
+
+The project provides several ``make`` targets to help with development:
+
+* ``make fix`` - Automatically fix formatting and linting issues using ruff.
+  Use this when you want to auto-fix code style issues.
+
+* ``make lint`` - Run all pre-commit hooks on all files to check for code quality
+  issues. This includes YAML/TOML validation, trailing whitespace checks, pyupgrade,
+  ruff linting and formatting, and mypy type checking.
+
+* ``make typecheck`` - Run mypy type checking only. Use this when you want to
+  quickly check for type errors without running all other checks.
+
+* ``make test`` - Run the test suite with pytest using the default Python version.
+  For testing across multiple Python versions, use ``tox`` instead.
+
+* ``make pr`` - Run a complete pre-PR check: clean build artifacts, fix formatting,
+  run linting, type checking, and tests. This is the recommended command to run
+  before submitting a pull request.
+
+* ``make coverage`` - Run tests with coverage reporting and open the HTML report
+  in your browser.
+
+For a full list of available commands, run ``make help``.
+
 7. Commit your changes and push your branch to GitHub::
 
     $ git add .

diff --git a/README.rst b/README.rst
@@ -61,6 +61,28 @@ Sample Usage
     >>> decode(encode('base2', b'hello world'))
     b'hello world'
 
+    >>> # Using reusable Encoder/Decoder classes
+    >>> from multibase import Encoder, Decoder
+    >>> encoder = Encoder('base64')
+    >>> encoded1 = encoder.encode('data1')
+    >>> encoded2 = encoder.encode('data2')
+
+    >>> decoder = Decoder()
+    >>> decoded = decoder.decode(encoded1)
+
+    >>> # Getting encoding information
+    >>> from multibase import get_encoding_info, list_encodings, is_encoding_supported
+    >>> info = get_encoding_info('base64')
+    >>> print(info.encoding, info.code)
+    base64 b'm'
+    >>> all_encodings = list_encodings()
+    >>> is_encoding_supported('base64')
+    True
+
+    >>> # Decode with encoding return
+    >>> encoding, data = decode(encoded1, return_encoding=True)
+    >>> print(f'Encoded with {encoding}: {data}')
+
 
 Supported codecs
 ================
@@ -69,14 +91,22 @@ Supported codecs
 * base8
 * base10
 * base16
-* base16
-* base16
+* base16upper
 * base32hex
+* base32hexupper
+* base32hexpad
+* base32hexpadupper
 * base32
+* base32upper
+* base32pad
+* base32padupper
 * base32z
 * base36
 * base36upper
 * base58flickr
 * base58btc
 * base64
+* base64pad
 * base64url
+* base64urlpad
+* base256emoji
diff --git a/multibase/__init__.py b/multibase/__init__.py
@@ -4,4 +4,23 @@
 __email__ = "[email protected]"
 __version__ = "1.0.3"
 
-from .multibase import ENCODINGS, Encoding, decode, encode, get_codec, is_encoded  # noqa: F401
+from .exceptions import (  # noqa: F401
+    DecodingError,
+    InvalidMultibaseStringError,
+    MultibaseError,
+    UnsupportedEncodingError,
+)
+from .multibase import (  # noqa: F401
+    ENCODINGS,
+    ComposedDecoder,
+    Decoder,
+    Encoder,
+    Encoding,
+    decode,
+    encode,
+    get_codec,
+    get_encoding_info,
+    is_encoded,
+    is_encoding_supported,
+    list_encodings,
+)
diff --git a/multibase/converters.py b/multibase/converters.py
@@ -28,17 +28,38 @@ def decode(self, bytes):
 
 
 class Base16StringConverter(BaseStringConverter):
+    def __init__(self, digits):
+        super().__init__(digits)
+        self.uppercase = digits.isupper()
+
     def encode(self, bytes):
-        return ensure_bytes("".join([f"{byte:02x}" for byte in bytes]))
+        result = "".join([f"{byte:02x}" for byte in bytes])
+        if self.uppercase:
+            result = result.upper()
+        return ensure_bytes(result)
+
+    def decode(self, data):
+        # Base16 decode is case-insensitive, normalize to our digits case
+        if isinstance(data, bytes):
+            data_str = data.decode("utf-8")
+        else:
+            data_str = data
+        # Convert to match our digits case
+        if self.uppercase:
+            data_str = data_str.upper()
+        else:
+            data_str = data_str.lower()
+        return super().decode(data_str.encode("utf-8"))
 
 
 class BaseByteStringConverter:
     ENCODE_GROUP_BYTES = 1
     ENCODING_BITS = 1
     DECODING_BITS = 1
 
-    def __init__(self, digits):
+    def __init__(self, digits, pad=False):
         self.digits = digits
+        self.pad = pad
 
     def _chunk_with_padding(self, iterable, n, fillvalue=None):
         "Collect data into fixed-length chunks or blocks"
@@ -49,9 +70,11 @@ def _chunk_with_padding(self, iterable, n, fillvalue=None):
     def _chunk_without_padding(self, iterable, n):
         return map("".join, zip(*[iter(iterable)] * n))
 
-    def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits):
+    def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits, output_chars):
         buffer = BytesIO(bytes_)
         encoded_bytes = BytesIO()
+        input_length = len(bytes_)
+
         while True:
             byte_ = buffer.read(group_bytes)
             if not byte_:
@@ -67,9 +90,26 @@ def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits):
                 # convert binary representation to an integer
                 encoded_bytes.write(ensure_bytes(self.digits[digit]))
 
-        return encoded_bytes.getvalue()
+        result = encoded_bytes.getvalue()
+
+        # Add padding if needed (RFC 4648)
+        if self.pad:
+            remainder = input_length % group_bytes
+            if remainder > 0:
+                # For partial groups, we need to pad the output
+                # The padding makes the output length a multiple of output_chars
+                chars_produced = len(result)
+                # Calculate padding needed to reach next multiple of output_chars
+                padding_needed = output_chars - (chars_produced % output_chars)
+                result += ensure_bytes("=" * padding_needed)
+
+        return result
 
     def _decode_bytes(self, bytes_, group_bytes, decoding_bits, encoding_bits):
+        # Remove padding if present
+        if self.pad:
+            bytes_ = bytes_.rstrip(b"=")
+
         buffer = BytesIO()
         decoded_bytes = BytesIO()
 
@@ -104,20 +144,118 @@ def decode(self, bytes):
 
 class Base64StringConverter(BaseByteStringConverter):
     def encode(self, bytes):
-        return self._encode_bytes(ensure_bytes(bytes), 3, 8, 6)
+        return self._encode_bytes(ensure_bytes(bytes), 3, 8, 6, 4)
 
     def decode(self, bytes):
         return self._decode_bytes(ensure_bytes(bytes), 4, 6, 8)
 
 
 class Base32StringConverter(BaseByteStringConverter):
     def encode(self, bytes):
-        return self._encode_bytes(ensure_bytes(bytes), 5, 8, 5)
+        return self._encode_bytes(ensure_bytes(bytes), 5, 8, 5, 8)
 
     def decode(self, bytes):
         return self._decode_bytes(ensure_bytes(bytes), 8, 5, 8)
 
 
+class Base256EmojiConverter:
+    """Base256 emoji encoding using 256 unique emoji characters.
+
+    This implementation uses the exact same hardcoded emoji alphabet as
+    js-multiformats and go-multibase reference implementations to ensure
+    full compatibility. The alphabet is curated from Unicode emoji frequency
+    data, excluding modifier-based emojis (such as flags) that are bigger
+    than one single code point.
+    """
+
+    # Hardcoded emoji alphabet matching js-multiformats and go-multibase
+    # This is the exact same alphabet used in reference implementations
+    # Source: js-multiformats/src/bases/base256emoji.ts and go-multibase/base256emoji.go
+    _EMOJI_ALPHABET = (
+        "🚀🪐☄🛰🌌"  # Space
+        "🌑🌒🌓🌔🌕🌖🌗🌘"  # Moon
+        "🌍🌏🌎"  # Earth
+        "🐉"  # Dragon
+        "☀"  # Sun
+        "💻🖥💾💿"  # Computer
+        # Rest from Unicode emoji frequency data (most used first)
+        "😂❤😍🤣😊🙏💕😭😘👍"
+        "😅👏😁🔥🥰💔💖💙😢🤔"
+        "😆🙄💪😉☺👌🤗💜😔😎"
+        "😇🌹🤦🎉💞✌✨🤷😱😌"
+        "🌸🙌😋💗💚😏💛🙂💓🤩"
+        "😄😀🖤😃💯🙈👇🎶😒🤭"
+        "❣😜💋👀😪😑💥🙋😞😩"
+        "😡🤪👊🥳😥🤤👉💃😳✋"
+        "😚😝😴🌟😬🙃🍀🌷😻😓"
+        "⭐✅🥺🌈😈🤘💦✔😣🏃"
+        "💐☹🎊💘😠☝😕🌺🎂🌻"
+        "😐🖕💝🙊😹🗣💫💀👑🎵"
+        "🤞😛🔴😤🌼😫⚽🤙☕🏆"
+        "🤫👈😮🙆🍻🍃🐶💁😲🌿"
+        "🧡🎁⚡🌞🎈❌✊👋😰🤨"
+        "😶🤝🚶💰🍓💢🤟🙁🚨💨"
+        "🤬✈🎀🍺🤓😙💟🌱😖👶"
+        "🥴▶➡❓💎💸⬇😨🌚🦋"
+        "😷🕺⚠🙅😟😵👎🤲🤠🤧"
+        "📌🔵💅🧐🐾🍒😗🤑🌊🤯"
+        "🐷☎💧😯💆👆🎤🙇🍑❄"
+        "🌴💣🐸💌📍🥀🤢👅💡💩"
+        "👐📸👻🤐🤮🎼🥵🚩🍎🍊"
+        "👼💍📣🥂"
+    )
+
+    def __init__(self):
+        # Verify alphabet length
+        if len(self._EMOJI_ALPHABET) != 256:
+            raise ValueError(f"EMOJI_ALPHABET must contain exactly 256 characters, got {len(self._EMOJI_ALPHABET)}")
+        # Create mapping from byte value to emoji character
+        self.byte_to_emoji = {i: self._EMOJI_ALPHABET[i] for i in range(256)}
+        # Create reverse mapping from emoji character to byte value
+        # This matches the approach in js-multiformats and go-multibase
+        self.emoji_to_byte = {emoji: byte for byte, emoji in self.byte_to_emoji.items()}
+
+    def encode(self, bytes_) -> bytes:
+        """Encode bytes to emoji string.
+
+        :param bytes_: Bytes to encode
+        :type bytes_: bytes or str
+        :return: UTF-8 encoded emoji string
+        :rtype: bytes
+        """
+        bytes_ = ensure_bytes(bytes_)
+        result = []
+        for byte_val in bytes_:
+            result.append(self.byte_to_emoji[byte_val])
+        return "".join(result).encode("utf-8")
+
+    def decode(self, bytes_) -> bytes:
+        """Decode emoji string to bytes.
+
+        Decodes character-by-character, matching the behavior of js-multiformats
+        and go-multibase reference implementations. Each emoji in the alphabet
+        is a single Unicode code point, so we can safely iterate character by
+        character.
+
+        :param bytes_: UTF-8 encoded emoji string
+        :type bytes_: bytes or str
+        :return: Decoded bytes
+        :rtype: bytes
+        :raises ValueError: if an invalid emoji character is encountered
+        """
+        bytes_ = ensure_bytes(bytes_, "utf8")
+        # Decode UTF-8 to get emoji string
+        emoji_str = bytes_.decode("utf-8")
+        result = bytearray()
+        # Iterate character by character (Python string iteration handles
+        # single code point emojis correctly, matching js-multiformats and go-multibase)
+        for char in emoji_str:
+            if char not in self.emoji_to_byte:
+                raise ValueError(f"Non-base256emoji character: {char}")
+            result.append(self.emoji_to_byte[char])
+        return bytes(result)
+
+
 class IdentityConverter:
     def encode(self, x):
         return x

diff --git a/multibase/exceptions.py b/multibase/exceptions.py
@@ -0,0 +1,25 @@
+"""Custom exceptions for multibase encoding/decoding errors."""
+
+
+class MultibaseError(ValueError):
+    """Base exception for all multibase errors."""
+
+    pass
+
+
+class UnsupportedEncodingError(MultibaseError):
+    """Raised when an encoding is not supported."""
+
+    pass
+
+
+class InvalidMultibaseStringError(MultibaseError):
+    """Raised when a multibase string is invalid or cannot be decoded."""
+
+    pass
+
+
+class DecodingError(MultibaseError):
+    """Raised when decoding fails."""
+
+    pass