Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions .github/workflows/tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Set TOXENV
run: |
python_version="${{ matrix.python-version }}"
toxenv="${{ matrix.toxenv }}"
if [[ "$toxenv" == "docs" ]]; then
echo "TOXENV=docs" | tee -a "$GITHUB_ENV"
if [[ "${{ matrix.toxenv }}" == "docs" ]]; then
echo "TOXENV=docs" >> "$GITHUB_ENV"
else
echo "TOXENV=py${python_version}-${toxenv}" | tr -d '.' | tee -a "$GITHUB_ENV"
python_version="${{ matrix.python-version }}"
echo "TOXENV=py${python_version//./}-${{ matrix.toxenv }}" >> "$GITHUB_ENV"
fi
- run: |
python -m pip install --upgrade pip
Expand All @@ -58,8 +57,7 @@ jobs:
shell: bash
run: |
python_version="${{ matrix.python-version }}"
toxenv="${{ matrix.toxenv }}"
echo "TOXENV=py${python_version}-${toxenv}" | tr -d '.' | tee -a "$GITHUB_ENV"
echo "TOXENV=py${python_version//./}-${{ matrix.toxenv }}" >> "$GITHUB_ENV"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
27 changes: 27 additions & 0 deletions CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,33 @@ Ready to contribute? Here's how to set up `multibase` for local development.

If you installed pre-commit hooks (step 4), they will run automatically on commit.

Development Workflow Commands
-------------------------------

The project provides several ``make`` targets to help with development:

* ``make fix`` - Automatically fix formatting and linting issues using ruff.
Use this when you want to auto-fix code style issues.

* ``make lint`` - Run all pre-commit hooks on all files to check for code quality
issues. This includes YAML/TOML validation, trailing whitespace checks, pyupgrade,
ruff linting and formatting, and mypy type checking.

* ``make typecheck`` - Run mypy type checking only. Use this when you want to
quickly check for type errors without running all other checks.

* ``make test`` - Run the test suite with pytest using the default Python version.
For testing across multiple Python versions, use ``tox`` instead.

* ``make pr`` - Run a complete pre-PR check: clean build artifacts, fix formatting,
run linting, type checking, and tests. This is the recommended command to run
before submitting a pull request.

* ``make coverage`` - Run tests with coverage reporting and open the HTML report
in your browser.

For a full list of available commands, run ``make help``.

7. Commit your changes and push your branch to GitHub::

$ git add .
Expand Down
34 changes: 32 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,28 @@ Sample Usage
>>> decode(encode('base2', b'hello world'))
b'hello world'

>>> # Using reusable Encoder/Decoder classes
>>> from multibase import Encoder, Decoder
>>> encoder = Encoder('base64')
>>> encoded1 = encoder.encode('data1')
>>> encoded2 = encoder.encode('data2')

>>> decoder = Decoder()
>>> decoded = decoder.decode(encoded1)

>>> # Getting encoding information
>>> from multibase import get_encoding_info, list_encodings, is_encoding_supported
>>> info = get_encoding_info('base64')
>>> print(info.encoding, info.code)
base64 b'm'
>>> all_encodings = list_encodings()
>>> is_encoding_supported('base64')
True

>>> # Decode with encoding return
>>> encoding, data = decode(encoded1, return_encoding=True)
>>> print(f'Encoded with {encoding}: {data}')


Supported codecs
================
Expand All @@ -69,14 +91,22 @@ Supported codecs
* base8
* base10
* base16
* base16
* base16
* base16upper
* base32hex
* base32hexupper
* base32hexpad
* base32hexpadupper
* base32
* base32upper
* base32pad
* base32padupper
* base32z
* base36
* base36upper
* base58flickr
* base58btc
* base64
* base64pad
* base64url
* base64urlpad
* base256emoji
21 changes: 20 additions & 1 deletion multibase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,23 @@
__email__ = "[email protected]"
__version__ = "1.0.3"

from .multibase import ENCODINGS, Encoding, decode, encode, get_codec, is_encoded # noqa: F401
from .exceptions import ( # noqa: F401
DecodingError,
InvalidMultibaseStringError,
MultibaseError,
UnsupportedEncodingError,
)
from .multibase import ( # noqa: F401
ENCODINGS,
ComposedDecoder,
Decoder,
Encoder,
Encoding,
decode,
encode,
get_codec,
get_encoding_info,
is_encoded,
is_encoding_supported,
list_encodings,
)
150 changes: 144 additions & 6 deletions multibase/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,38 @@ def decode(self, bytes):


class Base16StringConverter(BaseStringConverter):
def __init__(self, digits):
super().__init__(digits)
self.uppercase = digits.isupper()

def encode(self, bytes):
return ensure_bytes("".join([f"{byte:02x}" for byte in bytes]))
result = "".join([f"{byte:02x}" for byte in bytes])
if self.uppercase:
result = result.upper()
return ensure_bytes(result)

def decode(self, data):
# Base16 decode is case-insensitive, normalize to our digits case
if isinstance(data, bytes):
data_str = data.decode("utf-8")
else:
data_str = data
# Convert to match our digits case
if self.uppercase:
data_str = data_str.upper()
else:
data_str = data_str.lower()
return super().decode(data_str.encode("utf-8"))


class BaseByteStringConverter:
ENCODE_GROUP_BYTES = 1
ENCODING_BITS = 1
DECODING_BITS = 1

def __init__(self, digits):
def __init__(self, digits, pad=False):
self.digits = digits
self.pad = pad

def _chunk_with_padding(self, iterable, n, fillvalue=None):
"Collect data into fixed-length chunks or blocks"
Expand All @@ -49,9 +70,11 @@ def _chunk_with_padding(self, iterable, n, fillvalue=None):
def _chunk_without_padding(self, iterable, n):
return map("".join, zip(*[iter(iterable)] * n))

def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits):
def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits, output_chars):
buffer = BytesIO(bytes_)
encoded_bytes = BytesIO()
input_length = len(bytes_)

while True:
byte_ = buffer.read(group_bytes)
if not byte_:
Expand All @@ -67,9 +90,26 @@ def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits):
# convert binary representation to an integer
encoded_bytes.write(ensure_bytes(self.digits[digit]))

return encoded_bytes.getvalue()
result = encoded_bytes.getvalue()

# Add padding if needed (RFC 4648)
if self.pad:
remainder = input_length % group_bytes
if remainder > 0:
# For partial groups, we need to pad the output
# The padding makes the output length a multiple of output_chars
chars_produced = len(result)
# Calculate padding needed to reach next multiple of output_chars
padding_needed = output_chars - (chars_produced % output_chars)
result += ensure_bytes("=" * padding_needed)

return result

def _decode_bytes(self, bytes_, group_bytes, decoding_bits, encoding_bits):
# Remove padding if present
if self.pad:
bytes_ = bytes_.rstrip(b"=")

buffer = BytesIO()
decoded_bytes = BytesIO()

Expand Down Expand Up @@ -104,20 +144,118 @@ def decode(self, bytes):

class Base64StringConverter(BaseByteStringConverter):
def encode(self, bytes):
return self._encode_bytes(ensure_bytes(bytes), 3, 8, 6)
return self._encode_bytes(ensure_bytes(bytes), 3, 8, 6, 4)

def decode(self, bytes):
return self._decode_bytes(ensure_bytes(bytes), 4, 6, 8)


class Base32StringConverter(BaseByteStringConverter):
def encode(self, bytes):
return self._encode_bytes(ensure_bytes(bytes), 5, 8, 5)
return self._encode_bytes(ensure_bytes(bytes), 5, 8, 5, 8)

def decode(self, bytes):
return self._decode_bytes(ensure_bytes(bytes), 8, 5, 8)


class Base256EmojiConverter:
"""Base256 emoji encoding using 256 unique emoji characters.

This implementation uses the exact same hardcoded emoji alphabet as
js-multiformats and go-multibase reference implementations to ensure
full compatibility. The alphabet is curated from Unicode emoji frequency
data, excluding modifier-based emojis (such as flags) that are bigger
than one single code point.
"""

# Hardcoded emoji alphabet matching js-multiformats and go-multibase
# This is the exact same alphabet used in reference implementations
# Source: js-multiformats/src/bases/base256emoji.ts and go-multibase/base256emoji.go
_EMOJI_ALPHABET = (
"🚀🪐☄🛰🌌" # Space
"🌑🌒🌓🌔🌕🌖🌗🌘" # Moon
"🌍🌏🌎" # Earth
"🐉" # Dragon
"☀" # Sun
"💻🖥💾💿" # Computer
# Rest from Unicode emoji frequency data (most used first)
"😂❤😍🤣😊🙏💕😭😘👍"
"😅👏😁🔥🥰💔💖💙😢🤔"
"😆🙄💪😉☺👌🤗💜😔😎"
"😇🌹🤦🎉💞✌✨🤷😱😌"
"🌸🙌😋💗💚😏💛🙂💓🤩"
"😄😀🖤😃💯🙈👇🎶😒🤭"
"❣😜💋👀😪😑💥🙋😞😩"
"😡🤪👊🥳😥🤤👉💃😳✋"
"😚😝😴🌟😬🙃🍀🌷😻😓"
"⭐✅🥺🌈😈🤘💦✔😣🏃"
"💐☹🎊💘😠☝😕🌺🎂🌻"
"😐🖕💝🙊😹🗣💫💀👑🎵"
"🤞😛🔴😤🌼😫⚽🤙☕🏆"
"🤫👈😮🙆🍻🍃🐶💁😲🌿"
"🧡🎁⚡🌞🎈❌✊👋😰🤨"
"😶🤝🚶💰🍓💢🤟🙁🚨💨"
"🤬✈🎀🍺🤓😙💟🌱😖👶"
"🥴▶➡❓💎💸⬇😨🌚🦋"
"😷🕺⚠🙅😟😵👎🤲🤠🤧"
"📌🔵💅🧐🐾🍒😗🤑🌊🤯"
"🐷☎💧😯💆👆🎤🙇🍑❄"
"🌴💣🐸💌📍🥀🤢👅💡💩"
"👐📸👻🤐🤮🎼🥵🚩🍎🍊"
"👼💍📣🥂"
)

def __init__(self):
# Verify alphabet length
if len(self._EMOJI_ALPHABET) != 256:
raise ValueError(f"EMOJI_ALPHABET must contain exactly 256 characters, got {len(self._EMOJI_ALPHABET)}")
# Create mapping from byte value to emoji character
self.byte_to_emoji = {i: self._EMOJI_ALPHABET[i] for i in range(256)}
# Create reverse mapping from emoji character to byte value
# This matches the approach in js-multiformats and go-multibase
self.emoji_to_byte = {emoji: byte for byte, emoji in self.byte_to_emoji.items()}

def encode(self, bytes_) -> bytes:
"""Encode bytes to emoji string.

:param bytes_: Bytes to encode
:type bytes_: bytes or str
:return: UTF-8 encoded emoji string
:rtype: bytes
"""
bytes_ = ensure_bytes(bytes_)
result = []
for byte_val in bytes_:
result.append(self.byte_to_emoji[byte_val])
return "".join(result).encode("utf-8")

def decode(self, bytes_) -> bytes:
"""Decode emoji string to bytes.

Decodes character-by-character, matching the behavior of js-multiformats
and go-multibase reference implementations. Each emoji in the alphabet
is a single Unicode code point, so we can safely iterate character by
character.

:param bytes_: UTF-8 encoded emoji string
:type bytes_: bytes or str
:return: Decoded bytes
:rtype: bytes
:raises ValueError: if an invalid emoji character is encountered
"""
bytes_ = ensure_bytes(bytes_, "utf8")
# Decode UTF-8 to get emoji string
emoji_str = bytes_.decode("utf-8")
result = bytearray()
# Iterate character by character (Python string iteration handles
# single code point emojis correctly, matching js-multiformats and go-multibase)
for char in emoji_str:
if char not in self.emoji_to_byte:
raise ValueError(f"Non-base256emoji character: {char}")
result.append(self.emoji_to_byte[char])
return bytes(result)


class IdentityConverter:
def encode(self, x):
return x
Expand Down
25 changes: 25 additions & 0 deletions multibase/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Custom exceptions for multibase encoding/decoding errors."""


class MultibaseError(ValueError):
"""Base exception for all multibase errors."""

pass


class UnsupportedEncodingError(MultibaseError):
"""Raised when an encoding is not supported."""

pass


class InvalidMultibaseStringError(MultibaseError):
"""Raised when a multibase string is invalid or cannot be decoded."""

pass


class DecodingError(MultibaseError):
"""Raised when decoding fails."""

pass
Loading