@@ -28,17 +28,38 @@ def decode(self, bytes):
2828
2929
3030class Base16StringConverter (BaseStringConverter ):
31+ def __init__ (self , digits ):
32+ super ().__init__ (digits )
33+ self .uppercase = digits .isupper ()
34+
3135 def encode (self , bytes ):
32- return ensure_bytes ("" .join ([f"{ byte :02x} " for byte in bytes ]))
36+ result = "" .join ([f"{ byte :02x} " for byte in bytes ])
37+ if self .uppercase :
38+ result = result .upper ()
39+ return ensure_bytes (result )
40+
41+ def decode (self , data ):
42+ # Base16 decode is case-insensitive, normalize to our digits case
43+ if isinstance (data , bytes ):
44+ data_str = data .decode ("utf-8" )
45+ else :
46+ data_str = data
47+ # Convert to match our digits case
48+ if self .uppercase :
49+ data_str = data_str .upper ()
50+ else :
51+ data_str = data_str .lower ()
52+ return super ().decode (data_str .encode ("utf-8" ))
3353
3454
3555class BaseByteStringConverter :
3656 ENCODE_GROUP_BYTES = 1
3757 ENCODING_BITS = 1
3858 DECODING_BITS = 1
3959
40- def __init__ (self , digits ):
60+ def __init__ (self , digits , pad = False ):
4161 self .digits = digits
62+ self .pad = pad
4263
4364 def _chunk_with_padding (self , iterable , n , fillvalue = None ):
4465 "Collect data into fixed-length chunks or blocks"
@@ -49,9 +70,11 @@ def _chunk_with_padding(self, iterable, n, fillvalue=None):
4970 def _chunk_without_padding (self , iterable , n ):
5071 return map ("" .join , zip (* [iter (iterable )] * n ))
5172
52- def _encode_bytes (self , bytes_ , group_bytes , encoding_bits , decoding_bits ):
73+ def _encode_bytes (self , bytes_ , group_bytes , encoding_bits , decoding_bits , output_chars ):
5374 buffer = BytesIO (bytes_ )
5475 encoded_bytes = BytesIO ()
76+ input_length = len (bytes_ )
77+
5578 while True :
5679 byte_ = buffer .read (group_bytes )
5780 if not byte_ :
@@ -67,9 +90,29 @@ def _encode_bytes(self, bytes_, group_bytes, encoding_bits, decoding_bits):
6790 # convert binary representation to an integer
6891 encoded_bytes .write (ensure_bytes (self .digits [digit ]))
6992
70- return encoded_bytes .getvalue ()
93+ result = encoded_bytes .getvalue ()
94+
95+ # Add padding if needed
96+ if self .pad :
97+ remainder = input_length % group_bytes
98+ if remainder > 0 :
99+ # For partial groups, we need to pad the output
100+ # The padding makes the output length a multiple of output_chars
101+ actual_output_len = len (result )
102+ # Calculate padding needed to reach next multiple of output_chars
103+ padding_needed = (output_chars - (actual_output_len % output_chars )) % output_chars
104+ if padding_needed == 0 and actual_output_len % output_chars != 0 :
105+ # If we're not at a multiple, pad to the next multiple
106+ padding_needed = output_chars - (actual_output_len % output_chars )
107+ result += ensure_bytes ("=" * padding_needed )
108+
109+ return result
71110
72111 def _decode_bytes (self , bytes_ , group_bytes , decoding_bits , encoding_bits ):
112+ # Remove padding if present
113+ if self .pad :
114+ bytes_ = bytes_ .rstrip (b"=" )
115+
73116 buffer = BytesIO ()
74117 decoded_bytes = BytesIO ()
75118
@@ -104,20 +147,132 @@ def decode(self, bytes):
104147
105148class Base64StringConverter (BaseByteStringConverter ):
106149 def encode (self , bytes ):
107- return self ._encode_bytes (ensure_bytes (bytes ), 3 , 8 , 6 )
150+ return self ._encode_bytes (ensure_bytes (bytes ), 3 , 8 , 6 , 4 )
108151
109152 def decode (self , bytes ):
110153 return self ._decode_bytes (ensure_bytes (bytes ), 4 , 6 , 8 )
111154
112155
113156class Base32StringConverter (BaseByteStringConverter ):
114157 def encode (self , bytes ):
115- return self ._encode_bytes (ensure_bytes (bytes ), 5 , 8 , 5 )
158+ return self ._encode_bytes (ensure_bytes (bytes ), 5 , 8 , 5 , 8 )
116159
117160 def decode (self , bytes ):
118161 return self ._decode_bytes (ensure_bytes (bytes ), 8 , 5 , 8 )
119162
120163
164+ class Base256EmojiConverter :
165+ """Base256 emoji encoding using 256 unique emoji characters."""
166+
167+ def _get_emoji_chars (self ):
168+ """Get the 256 emoji characters used in base256emoji.
169+
170+ This generates a set of 256 unique emojis from various emoji ranges.
171+ The actual specification may use a different set, but this provides
172+ a working implementation.
173+ """
174+ # Generate emojis from various Unicode ranges
175+ # Using a comprehensive set to ensure we have 256 unique emojis
176+ emojis = []
177+
178+ # Emoticons and faces (U+1F600-U+1F64F)
179+ for code in range (0x1F600 , 0x1F650 ):
180+ try :
181+ emojis .append (chr (code ))
182+ except (ValueError , OverflowError ):
183+ pass
184+
185+ # Various object emojis (U+1F300-U+1F5FF)
186+ for code in range (0x1F300 , 0x1F600 ):
187+ try :
188+ emojis .append (chr (code ))
189+ except (ValueError , OverflowError ):
190+ pass
191+
192+ # Food and drink (U+1F32D-U+1F37F)
193+ for code in range (0x1F32D , 0x1F380 ):
194+ try :
195+ emojis .append (chr (code ))
196+ except (ValueError , OverflowError ):
197+ pass
198+
199+ # Activity and sports (U+1F3C0-U+1F3FF)
200+ for code in range (0x1F3C0 , 0x1F400 ):
201+ try :
202+ emojis .append (chr (code ))
203+ except (ValueError , OverflowError ):
204+ pass
205+
206+ # Symbols and pictographs (U+1F400-U+1F4FF)
207+ for code in range (0x1F400 , 0x1F500 ):
208+ try :
209+ emojis .append (chr (code ))
210+ except (ValueError , OverflowError ):
211+ pass
212+
213+ # Additional emojis to reach 256
214+ # Using various other emoji ranges
215+ additional_ranges = [
216+ (0x1F500 , 0x1F53D ), # Miscellaneous Symbols and Pictographs
217+ (0x1F680 , 0x1F6C0 ), # Transport and Map Symbols
218+ (0x1F900 , 0x1F9FF ), # Supplemental Symbols and Pictographs
219+ ]
220+
221+ for start , end in additional_ranges :
222+ for code in range (start , end ):
223+ try :
224+ emojis .append (chr (code ))
225+ except (ValueError , OverflowError ):
226+ pass
227+ if len (emojis ) >= 256 :
228+ break
229+ if len (emojis ) >= 256 :
230+ break
231+
232+ # Ensure we have exactly 256
233+ return "" .join (emojis [:256 ])
234+
235+ def __init__ (self ):
236+ self .EMOJI_CHARS = self ._get_emoji_chars ()
237+ if len (self .EMOJI_CHARS ) != 256 :
238+ raise ValueError (f"EMOJI_CHARS must contain exactly 256 characters, got { len (self .EMOJI_CHARS )} " )
239+ # Create mapping from byte value to emoji
240+ self .byte_to_emoji = {i : self .EMOJI_CHARS [i ] for i in range (256 )}
241+ # Create reverse mapping from emoji to byte value
242+ self .emoji_to_byte = {emoji : byte for byte , emoji in self .byte_to_emoji .items ()}
243+
244+ def encode (self , bytes_ ):
245+ """Encode bytes to emoji string."""
246+ bytes_ = ensure_bytes (bytes_ )
247+ result = []
248+ for byte_val in bytes_ :
249+ result .append (self .byte_to_emoji [byte_val ])
250+ return "" .join (result ).encode ("utf-8" )
251+
252+ def decode (self , bytes_ ):
253+ """Decode emoji string to bytes."""
254+ bytes_ = ensure_bytes (bytes_ , "utf8" )
255+ # Decode UTF-8 to get emoji string
256+ emoji_str = bytes_ .decode ("utf-8" )
257+ result = bytearray ()
258+ # Iterate through emoji characters
259+ # We need to match emojis which may be multiple code points
260+ i = 0
261+ while i < len (emoji_str ):
262+ matched = False
263+ # Try matching from longest to shortest (up to 4 code points)
264+ for length in range (min (4 , len (emoji_str ) - i ), 0 , - 1 ):
265+ candidate = emoji_str [i : i + length ]
266+ if candidate in self .emoji_to_byte :
267+ result .append (self .emoji_to_byte [candidate ])
268+ i += length
269+ matched = True
270+ break
271+ if not matched :
272+ raise ValueError (f"Invalid emoji character at position { i } : { emoji_str [i : i + 4 ]} " )
273+ return bytes (result )
274+
275+
121276class IdentityConverter :
122277 def encode (self , x ):
123278 return x
0 commit comments