JarbasHiveMind · JarbasAl · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v1
         with:
-          python-version: 3.8
+          python-version: "3.10"
       - name: Install Build Tools
         run: |
           python -m pip install build wheel

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -0,0 +1,60 @@
+name: Run UnitTests
+on:
+  pull_request:
+    branches:
+      - dev
+    paths-ignore:
+      - 'hivemind_bus_client/version.py'
+      - 'examples/**'
+      - '.github/**'
+      - '.gitignore'
+      - 'LICENSE'
+      - 'CHANGELOG.md'
+      - 'MANIFEST.in'
+      - 'readme.md'
+      - 'scripts/**'
+  push:
+    branches:
+      - master
+    paths-ignore:
+      - 'hivemind_bus_client/version.py'
+      - 'requirements/**'
+      - 'examples/**'
+      - '.github/**'
+      - '.gitignore'
+      - 'LICENSE'
+      - 'CHANGELOG.md'
+      - 'MANIFEST.in'
+      - 'readme.md'
+      - 'scripts/**'
+  workflow_dispatch:
+
+jobs:
+  unit_tests:
+    strategy:
+      matrix:
+        python-version: [3.9, "3.10", "3.11" ]
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install System Dependencies
+        run: |
+          sudo apt-get update
+          sudo apt install python3-dev swig
+          python -m pip install build wheel
+      - name: Install repo
+        run: |
+          pip install -e .
+      - name: Run unittests
+        run: |
+          pytest --cov=hivemind_bus_client --cov-report xml test/unittests
+      - name: Upload coverage
+        if: "${{ matrix.python-version == '3.9' }}"
+        env:
+          CODECOV_TOKEN: ${{secrets.CODECOV_TOKEN}}
+        uses: codecov/codecov-action@v2
diff --git a/hivemind_bus_client/encodings/__init__.py b/hivemind_bus_client/encodings/__init__.py
@@ -0,0 +1,4 @@
+from hivemind_bus_client.encodings.z85b import Z85B
+from hivemind_bus_client.encodings.z85p import Z85P
+from hivemind_bus_client.encodings.b91 import B91
+from hivemind_bus_client.encodings.b100p import B100P
diff --git a/hivemind_bus_client/encodings/b100p.py b/hivemind_bus_client/encodings/b100p.py
@@ -0,0 +1,92 @@
+from typing import Union
+
+
+class B100P:
+    """
+    B100P is a class that provides encoding and decoding methods for transforming text into an emoji-based representation
+    with a custom padding mechanism. The first byte of the encoded data indicates how many padding bytes were added
+    during encoding, which is then removed during decoding.
+
+    The padding is added to make the data length a multiple of 4, and the padding size is included as part of the encoded data.
+    When decoding, the padding size is read from the first byte and used to strip the padding from the decoded data.
+    """
+
+    @classmethod
+    def encode(cls, data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
+        """
+        Encodes text into an emoji representation with padding, and prepends the padding size.
+
+        Args:
+            data (Union[str, bytes]): The input text to be encoded. This can either be a string (plaintext) or bytes.
+            encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
+
+        Returns:
+            bytes: The emoji-encoded byte sequence with appropriate padding and padding size indication.
+
+        Notes:
+            The padding is applied to ensure the length of the encoded data is a multiple of 4. The first byte in the
+            returned byte sequence represents the number of padding bytes added. This allows for proper decoding with
+            padding removal.
+        """
+        if isinstance(data, str):
+            data = data.encode(encoding)
+
+        padding = (4 - len(data) % 4) % 4  # Padding to make the length a multiple of 4
+        data += b'\x00' * padding
+
+        # The first byte indicates how many padding bytes were added
+        encoded_data = [padding] + [240, 159, 0, 0] * len(data)
+
+        for i, b in enumerate(data):
+            encoded_data[4 * i + 3] = (b + 55) // 64 + 143
+            encoded_data[4 * i + 4] = (b + 55) % 64 + 128
+
+        return bytes(encoded_data)
+
+    @classmethod
+    def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
+        """
+        Decodes an emoji representation back into text, removing padding as indicated by the first byte.
+
+        Args:
+            encoded_data (Union[str, bytes]): The emoji-encoded byte sequence or string to be decoded.
+            encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
+
+        Returns:
+            bytes: The decoded byte sequence of text with padding removed.
+
+        Raises:
+            ValueError: If the length of the input data is not divisible by 4 or contains invalid emoji encoding.
+
+        Notes:
+            The first byte of the encoded data indicates the padding size, and this padding is removed during decoding.
+        """
+        if isinstance(encoded_data, str):
+            encoded_data = encoded_data.encode(encoding)
+
+        if len(encoded_data) == 0:
+            return encoded_data
+
+        # Ensure the length of data is divisible by 4 (with 1 extra byte for padding size)
+        if len(encoded_data) % 4 != 1:
+            raise ValueError('Invalid data length, should be divisible by 4 with 1 extra byte for padding indicator.')
+
+        padding = encoded_data[0]  # Read the padding size from the first byte
+        if padding < 0 or padding > 3:
+            raise ValueError('Padding size must be between 0 and 3.')
+
+        # Extract the actual encoded data (excluding the padding size byte)
+        encoded_data = encoded_data[1:]
+
+        tmp = 0
+        out = [None] * (len(encoded_data) // 4)
+
+        for i, b in enumerate(encoded_data):
+            if i % 4 == 2:
+                tmp = ((b - 143) * 64) % 256
+            elif i % 4 == 3:
+                out[i // 4] = (b - 128 + tmp - 55) & 0xff
+
+        # Return decoded bytes, removing the indicated padding
+        decoded = bytes(out)
+        return decoded[:-padding] if padding else decoded  # Remove the padding
diff --git a/hivemind_bus_client/encodings/b91.py b/hivemind_bus_client/encodings/b91.py
@@ -0,0 +1,100 @@
+from typing import Union
+
+
+class B91:
+    ALPHABET = [
+        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+        'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '#', '$',
+        '%', '&', '(', ')', '*', '+', ',', '.', '/', ':', ';', '<', '=',
+        '>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~', '"'
+    ]
+
+    DECODE_TABLE = {char: idx for idx, char in enumerate(ALPHABET)}
+
+    @classmethod
+    def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
+        """
+        Decodes a Base91-encoded string into its original binary form.
+
+        Args:
+            encoded_data (Union[str, bytes]): Base91-encoded input data. If `bytes`, it is decoded as UTF-8.
+            encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
+
+        Returns:
+            bytes: The decoded binary data.
+
+        Raises:
+            ValueError: If the input contains invalid Base91 characters.
+        """
+        if isinstance(encoded_data, bytes):
+            encoded_data = encoded_data.decode(encoding)
+
+        v = -1
+        b = 0
+        n = 0
+        out = bytearray()
+
+        for char in encoded_data:
+            if char not in cls.DECODE_TABLE:
+                raise ValueError(f"Invalid Base91 character: {char}")
+            c = cls.DECODE_TABLE[char]
+            if v < 0:
+                v = c
+            else:
+                v += c * 91
+                b |= v << n
+                n += 13 if (v & 8191) > 88 else 14
+                while n >= 8:
+                    out.append(b & 255)
+                    b >>= 8
+                    n -= 8
+                v = -1
+
+        if v >= 0:
+            out.append((b | v << n) & 255)
+
+        return bytes(out)
+
+    @classmethod
+    def encode(cls, data: Union[bytes, str], encoding: str = "utf-8") -> bytes:
+        """
+        Encodes binary data into a Base91-encoded string.
+
+        Args:
+            data (Union[bytes, str]): Input binary data to encode. If `str`, it is encoded as UTF-8.
+            encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
+
+        Returns:
+            str: The Base91-encoded string.
+        """
+        if isinstance(data, str):
+            data = data.encode(encoding)
+
+        b = 0
+        n = 0
+        out = []
+
+        for byte in data:
+            b |= byte << n
+            n += 8
+            if n > 13:
+                v = b & 8191
+                if v > 88:
+                    b >>= 13
+                    n -= 13
+                else:
+                    v = b & 16383
+                    b >>= 14
+                    n -= 14
+                out.append(cls.ALPHABET[v % 91])
+                out.append(cls.ALPHABET[v // 91])
+
+        if n:
+            out.append(cls.ALPHABET[b % 91])
+            if n > 7 or b > 90:
+                out.append(cls.ALPHABET[b // 91])
+
+        return ''.join(out).encode(encoding)
diff --git a/hivemind_bus_client/encodings/z85b.py b/hivemind_bus_client/encodings/z85b.py
@@ -0,0 +1,108 @@
+"""
+Python implementation of Z85b 85-bit encoding.
+
+Z85b is a variation of ZMQ RFC 32 Z85 85-bit encoding with the following differences:
+1. Little-endian encoding (to facilitate alignment with lower byte indices).
+2. No requirement for a multiple of 4/5 length.
+3. `decode_z85b()` eliminates whitespace from the input.
+4. `decode_z85b()` raises a clear exception if invalid characters are encountered.
+
+This file is a derivative work of https://gist.github.com/minrk/6357188?permalink_comment_id=2366506#gistcomment-2366506
+
+Copyright (c) 2013 Brian Granger, Min Ragan-Kelley
+Distributed under the terms of the New BSD License.
+"""
+import re
+import struct
+from typing import Union
+
+from hivemind_bus_client.exceptions import Z85DecodeError
+
+
+class Z85B:
+    # Z85CHARS is the base 85 symbol table
+    Z85CHARS = bytearray(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#")
+
+    # Z85MAP maps integers in [0, 84] to the appropriate character in Z85CHARS
+    Z85MAP = {char: idx for idx, char in enumerate(Z85CHARS)}
+
+    # Powers of 85 for encoding/decoding
+    _85s = [85 ** i for i in range(5)]
+
+    # Padding lengths for encoding and decoding
+    _E_PADDING = [0, 3, 2, 1]
+    _D_PADDING = [0, 4, 3, 2, 1]
+
+    @classmethod
+    def encode(cls, data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
+        """
+        Encode raw bytes into Z85b format.
+
+        Args:
+            data (Union[str, bytes]): Input data to encode.
+            encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
+
+        Returns:
+            bytes: Z85b-encoded bytes.
+        """
+        if isinstance(data, str):
+            data = data.encode(encoding)
+        data = bytearray(data)
+        padding = cls._E_PADDING[len(data) % 4]
+        data += b'\x00' * padding
+        nvalues = len(data) // 4
+
+        # Pack the raw bytes into little-endian 32-bit integers
+        values = struct.unpack(f'<{nvalues}I', data)
+        encoded = bytearray()
+
+        for value in values:
+            for offset in cls._85s:
+                encoded.append(cls.Z85CHARS[(value // offset) % 85])
+
+        # Remove padding characters from the encoded output
+        if padding:
+            encoded = encoded[:-padding]
+        return bytes(encoded)
+
+    @classmethod
+    def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
+        """
+        Decode Z85b-encoded bytes into raw bytes.
+
+        Args:
+            encoded_data (Union[str, bytes]): Z85b-encoded data.
+            encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
+
+        Returns:
+            bytes: Decoded raw bytes.
+
+        Raises:
+            Z85DecodeError: If invalid characters are encountered during decoding.
+        """
+        # Normalize input by removing whitespace
+        encoded_data = bytearray(re.sub(rb'\s+', b'',
+                                        encoded_data if isinstance(encoded_data, bytes)
+                                        else encoded_data.encode(encoding)))
+        padding = cls._D_PADDING[len(encoded_data) % 5]
+        nvalues = (len(encoded_data) + padding) // 5
+
+        values = []
+        for i in range(0, len(encoded_data), 5):
+            value = 0
+            for j, offset in enumerate(cls._85s):
+                try:
+                    value += cls.Z85MAP[encoded_data[i + j]] * offset
+                except IndexError:
+                    break  # End of input reached
+                except KeyError as e:
+                    raise Z85DecodeError(f"Invalid byte code: {e.args[0]!r}")
+            values.append(value)
+
+        # Unpack the values back into raw bytes
+        decoded = struct.pack(f'<{nvalues}I', *values)
+
+        # Remove padding from the decoded output
+        if padding:
+            decoded = decoded[:-padding]
+        return decoded