Skip to content

Commit 397ac7a

Browse files
committed
Fixed leading null bytes being dropped by big-integer base codecs
The generic base_encode/base_decode convert the whole input to a single integer, so leading null bytes (high-order zeros) were silently lost: e.g. Base58 encoded b'\x00abc' to 'ZiCa' instead of '1ZiCa', and b'\x00' to an empty string. Per the Base58 spec each leading 0x00 byte maps to a leading charset[0] character. Preserve the leading-zero count on encode and restore it on decode, so values round-trip and match reference implementations.
1 parent 578f57d commit 397ac7a

2 files changed

Lines changed: 15 additions & 2 deletions

File tree

src/codext/base/_base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ def base_encode(input, charset, errors="strict", exc=BaseEncodeError):
130130
while i > 0:
131131
i, c = divmod(i, n)
132132
r = charset[c] + r
133+
# preserve leading zero bytes: big-integer bases such as Base58 map each
134+
# leading null byte of the input to a leading charset[0] character
135+
if not isinstance(input, int):
136+
r = charset[0] * (len(input) - len(input.lstrip("\x00"))) + r
133137
return r
134138

135139

@@ -151,7 +155,8 @@ def base_decode(input, charset, errors="strict", exc=BaseDecodeError):
151155
i = i * n + charset.index(c)
152156
except ValueError:
153157
handle_error("base", errors, exc, decode=True)(c, k, dec(i), "base%d" % n)
154-
return dec(i)
158+
# restore the leading zero bytes encoded as leading charset[0] characters
159+
return chr(0) * (len(input) - len(input.lstrip(charset[0]))) + dec(i)
155160

156161

157162
# base codec factory functions

tests/test_base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,15 @@ def test_codec_base58(self):
172172
self.assertEqual(codecs.decode(B58, "base58-fl"), STR)
173173
self.assertEqual(codecs.encode(STR, "base58-short-url"), B58)
174174
self.assertEqual(codecs.encode(STR, "base58-url"), B58)
175-
175+
# leading null bytes must be preserved as leading charset[0] ('1')
176+
self.assertEqual(codecs.encode("\x00abc", "base58"), "1ZiCa")
177+
self.assertEqual(codecs.encode("\x00", "base58"), "1")
178+
self.assertEqual(codecs.encode("\x00\x00abc", "base58"), "11ZiCa")
179+
self.assertEqual(codecs.decode("1ZiCa", "base58"), "\x00abc")
180+
self.assertEqual(codecs.decode("11ZiCa", "base58"), "\x00\x00abc")
181+
self.assertEqual(codecs.encode(b("\x00abc"), "base58"), b("1ZiCa"))
182+
self.assertEqual(codecs.decode(b("1ZiCa"), "base58"), b("\x00abc"))
183+
176184
def test_codec_base62(self):
177185
for b62, enc in zip(["CsoB4HQ5gmgMyCenF7E", "M2yLERaFqwqW8MoxPHO"], ["base62", "base62-inv"]):
178186
self.assertEqual(codecs.encode(STR, enc), b62)

0 commit comments

Comments
 (0)