diff --git a/Lib/gettext.py b/Lib/gettext.py index 6c11ab2b1eb570..66a279dbcb5d97 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -407,10 +407,21 @@ def _parse(self, fp): elif lastk: self._info[lastk] += '\n' + item if k == 'content-type': - self._charset = v.split('charset=')[1] + try: + self._charset = v.split('charset=')[1] + except IndexError: + raise ValueError( + f"expected 'charset=' in Content-Type metadata in {filename}, got {v!r}" + ) from None elif k == 'plural-forms': v = v.split(';') - plural = v[1].split('plural=')[1] + try: + plural = v[1].split('plural=')[1] + except IndexError: + raise ValueError( + f"expected ';' and 'plural=' in Plural-Forms metadata in {filename}, " + f"got {';'.join(v)!r}" + ) from None self.plural = c2py(plural) # Note: we unconditionally convert both msgids and msgstrs to # Unicode using the character encoding specified in the charset diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index 9ad37909a8ec4e..4ea5f8015be299 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -1,6 +1,7 @@ import os import base64 import gettext +import sys import unittest import unittest.mock from functools import partial @@ -664,6 +665,70 @@ def test_ignore_comments_in_headers_issue36239(self): t = gettext.GNUTranslations(fp) self.assertEqual(t.info()["plural-forms"], "nplurals=2; plural=(n != 1);") + @property + def expected_filename(self): + if sys.platform == 'win32': + return r'xx\\LC_MESSAGES\\gettext.mo' + return 'xx/LC_MESSAGES/gettext.mo' + + def test_raise_descriptive_error_for_incorrect_content_type(self): + with open(MOFILE, 'wb') as fp: + # below is msgfmt run on such a PO file: + # msgid "" + # msgstr "" + # "Content-Type: text/plain; charste=UTF-8\n" + fp.write( + b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x00(\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00Content-Type: text/plain; charste=UTF-8\n\x00' + ) + with self.assertRaisesRegex( + ValueError, + f"expected 'charset=' in Content-Type metadata in {self.expected_filename}, " + f"got 'text/plain; charste=UTF-8'" + ): + with open(MOFILE, 'rb') as fp: + gettext.GNUTranslations(fp) + + def test_raise_descriptive_error_for_incorrect_plural_forms(self): + with open(MOFILE, 'wb') as fp: + # below is msgfmt run on such a PO file: + # msgid "" + # msgstr "" + # "Plural-Forms: \n" + fp.write( + b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x00\x0f\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00Plural-Forms: \n\x00' + ) + with self.assertRaisesRegex( + ValueError, + f"expected ';' and 'plural=' in Plural-Forms metadata in {self.expected_filename}, got ''", + ): + with open(MOFILE, 'rb') as fp: + gettext.GNUTranslations(fp) + + + def test_raise_descriptive_error_for_incorrect_plural_forms_with_semicolon(self): + with open(MOFILE, 'wb') as fp: + # below is msgfmt run on such a PO file: + # msgid "" + # msgstr "" + # "Plural-Forms: nplurals=1; prulal=0;\n" + fp.write( + b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x00$\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00Plural-Forms: nplurals=1; prulal=0;\n\x00' + + ) + with self.assertRaisesRegex( + ValueError, + f"expected ';' and 'plural=' in Plural-Forms metadata in {self.expected_filename}, " + "got 'nplurals=1; prulal=0;'" + ): + with open(MOFILE, 'rb') as fp: + gettext.GNUTranslations(fp) + class UnicodeTranslationsTest(GettextBaseTest): def setUp(self): diff --git a/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst b/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst new file mode 100644 index 00000000000000..edbf05a98d66d3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst @@ -0,0 +1,2 @@ +:class:`gettext.GNUTranslations` now raises descriptive :exc:`ValueError`\s instead of :exc:`IndexError`\s for +incorrect ``.mo`` file metadata.