Skip to content

Commit adb384b

Browse files
Address Inada review + refactor test location
1 parent c565d52 commit adb384b

File tree

4 files changed

+23
-52
lines changed

4 files changed

+23
-52
lines changed

Lib/test/multibytecodec_support.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,22 @@ def test_incrementalencoder_del_segfault(self):
282282
with self.assertRaises(AttributeError):
283283
del e.errors
284284

285+
def test_null_terminator(self):
286+
# see gh-101828
287+
if any(enc in self.encoding for enc in ('shift', 'euc_jis')):
288+
text = "バルーンフルーツ"
289+
else:
290+
text = "Spam"
291+
encode_w_null = (text + "\0").encode(self.encoding)
292+
encode_plus_null = text.encode(self.encoding) + "\0".encode(self.encoding)
293+
self.assertTrue(encode_w_null.endswith(b'\x00'))
294+
self.assertEqual(encode_w_null, encode_plus_null)
295+
296+
encode_w_null_2 = (text + "\0" + text + "\0").encode(self.encoding)
297+
encode_plus_null_2 = encode_plus_null + encode_plus_null
298+
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
299+
self.assertEqual(encode_w_null_2, encode_plus_null_2)
300+
285301

286302
class TestBase_Mapping(unittest.TestCase):
287303
pass_enctest = []

Lib/test/test_codecencodings_jp.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -106,26 +106,6 @@ class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
106106
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
107107
)
108108

109-
def test_null_terminator(self):
110-
# see gh-101828
111-
cases = (
112-
"バルーンフルーツ",
113-
"ライフアップキノコ",
114-
"テスト",
115-
"'Tis but a scratch!"
116-
)
117-
for case in cases:
118-
with self.subTest(case=case):
119-
encode_w_null = (case + "\0").encode(self.encoding)
120-
encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
121-
self.assertTrue(encode_w_null.endswith(b'\x00'))
122-
self.assertEqual(encode_w_null, encode_plus_null)
123-
124-
encode_w_null_2 = encode_w_null + encode_w_null
125-
encode_plus_null_2 = encode_plus_null + encode_plus_null
126-
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
127-
self.assertEqual(encode_w_null_2, encode_plus_null_2)
128-
129109
class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
130110
encoding = 'shift_jisx0213'
131111
tstring = multibytecodec_support.load_teststring('shift_jisx0213')
@@ -142,25 +122,5 @@ class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
142122
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
143123
)
144124

145-
def test_null_terminator(self):
146-
# see gh-101828
147-
cases = (
148-
"バルーンフルーツ",
149-
"ライフアップキノコ",
150-
"テスト",
151-
"'Tis but a scratch!"
152-
)
153-
for case in cases:
154-
with self.subTest(case=case):
155-
encode_w_null = (case + "\0").encode(self.encoding)
156-
encode_plus_null = case.encode(self.encoding) + "\0".encode(self.encoding)
157-
self.assertTrue(encode_w_null.endswith(b'\x00'))
158-
self.assertEqual(encode_w_null, encode_plus_null)
159-
160-
encode_w_null_2 = encode_w_null + encode_w_null
161-
encode_plus_null_2 = encode_plus_null + encode_plus_null
162-
self.assertEqual(encode_w_null_2.count(b'\x00'), 2)
163-
self.assertEqual(encode_w_null_2, encode_plus_null_2)
164-
165125
if __name__ == "__main__":
166126
unittest.main()
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1-
Fix ``'shift_jisx0213'`` and ``'shift_jis_2004'`` codecs truncating null chars
1+
Fix ``'shift_jisx0213'``, ``'shift_jis_2004'``, ``'euc_jisx0213'`` and
2+
``'euc_jis_2004'`` codecs truncating null chars
23
as they were treated as part of multi-character sequences.

Modules/cjkcodecs/_codecs_jp.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,11 @@ ENCODER(euc_jis_2004)
192192
JISX0213_ENCPAIRS);
193193
if (code == DBCINV)
194194
return 1;
195-
} else
195+
}
196+
else if (c2 != 0) {
197+
/* Don't consume null char as part of pair */
196198
insize = 2;
199+
}
197200
}
198201
}
199202
}
@@ -612,17 +615,8 @@ ENCODER(shift_jis_2004)
612615
return 1;
613616
}
614617
else if (ch2 != 0) {
615-
insize = 2;
616-
}
617-
else {
618618
/* Don't consume null char as part of pair */
619-
code = find_pairencmap(
620-
(ucs2_t)c, 0,
621-
jisx0213_pair_encmap,
622-
JISX0213_ENCPAIRS);
623-
if (code == DBCINV) {
624-
return 1;
625-
}
619+
insize = 2;
626620
}
627621
}
628622
}

0 commit comments

Comments
 (0)