From e148f83c46530b60c56edc370ce73dd107ae1855 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Sat, 9 Dec 2023 12:14:01 +0100 Subject: [PATCH 01/14] gh-56634: Gettext: raise descriptive error on empty plural-forms value --- Lib/gettext.py | 5 ++++- Lib/test/test_gettext.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index 62cff81b7b3d49..33a45e5c82708b 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -410,7 +410,10 @@ def _parse(self, fp): self._charset = v.split('charset=')[1] elif k == 'plural-forms': v = v.split(';') - plural = v[1].split('plural=')[1] + try: + plural = v[1].split('plural=')[1] + except IndexError as e: + raise ValueError('invalid plural forms syntax') from e self.plural = c2py(plural) # Note: we unconditionally convert both msgids and msgstrs to # Unicode using the character encoding specified in the charset diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index b2fe3e28c3bec7..2af06ebd089beb 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -624,6 +624,22 @@ def test_ignore_comments_in_headers_issue36239(self): t = gettext.GNUTranslations(fp) self.assertEqual(t.info()["plural-forms"], "nplurals=2; plural=(n != 1);") + def test_raise_descriptive_error_for_incorrect_plural_forms(self): + with open(MOFILE, 'wb') as fp: + # below is msgfmt run on such a PO file: + # msgid "" + # msgstr "" + # "Content-Type: text/plain; charset=UTF-8\n" + # "Plural-Forms: \n" + fp.write( + b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x007\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00Content-Type: text/plain; charset=UTF-8\nPlural-Forms: \n\x00' + ) + with self.assertRaisesRegex(ValueError, "invalid plural forms syntax"): + with open(MOFILE, 'rb') as fp: + gettext.GNUTranslations(fp) + class UnicodeTranslationsTest(GettextBaseTest): def setUp(self): From c421e4fa9e6a70a7bf12171e73dea6a21b5d3e6f Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Tue, 9 Sep 2025 23:39:59 +0200 Subject: [PATCH 02/14] Add a ValueError for content-type field --- Lib/gettext.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index e6dbbafef9831d..6a5f00bfec25da 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -407,7 +407,10 @@ def _parse(self, fp): elif lastk: self._info[lastk] += '\n' + item if k == 'content-type': - self._charset = v.split('charset=')[1] + try: + self._charset = v.split('charset=')[1] + except IndexError: + raise ValueError(f"invalid content-type syntax: '{v}'. Expected 'charset='.") from None elif k == 'plural-forms': v = v.split(';') try: From 5f56dd638be44de79ef7e9cd4f5805aadecb563d Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Tue, 9 Sep 2025 23:47:30 +0200 Subject: [PATCH 03/14] Add a filename to exception message --- Lib/gettext.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index 6a5f00bfec25da..4a527f39356e81 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -410,7 +410,9 @@ def _parse(self, fp): try: self._charset = v.split('charset=')[1] except IndexError: - raise ValueError(f"invalid content-type syntax: '{v}'. Expected 'charset='.") from None + raise ValueError( + f"invalid content-type syntax in '{filename}: '{v}'. Expected 'charset='." + ) from None elif k == 'plural-forms': v = v.split(';') try: From c8ae54e1a1b61ef6cae443cc0d9b59dbde195e7d Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Tue, 9 Sep 2025 23:48:14 +0200 Subject: [PATCH 04/14] Improve ValueError for invalid plural forms --- Lib/gettext.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index 4a527f39356e81..8b3758459661aa 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -417,8 +417,10 @@ def _parse(self, fp): v = v.split(';') try: plural = v[1].split('plural=')[1] - except IndexError as e: - raise ValueError('invalid plural forms syntax') from e + except IndexError: + raise ValueError( + f"invalid plural forms syntax in '{filename}': '{v}'. Expected ';' and 'plural='." + ) from None self.plural = c2py(plural) # Note: we unconditionally convert both msgids and msgstrs to # Unicode using the character encoding specified in the charset From 431fd4c87160c33755f90418ccb5246d7d55feba Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Wed, 10 Sep 2025 00:08:04 +0200 Subject: [PATCH 05/14] Add closing quote --- Lib/gettext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index 8b3758459661aa..2405bc0cbca72f 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -411,7 +411,7 @@ def _parse(self, fp): self._charset = v.split('charset=')[1] except IndexError: raise ValueError( - f"invalid content-type syntax in '{filename}: '{v}'. Expected 'charset='." + f"invalid content-type syntax in '{filename}': '{v}'. Expected 'charset='." ) from None elif k == 'plural-forms': v = v.split(';') From 5cd3c9f4e17dc45b3802d52481ab58834f639307 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Wed, 10 Sep 2025 00:55:28 +0200 Subject: [PATCH 06/14] Add test for content-type --- Lib/test/test_gettext.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index e0ed6374451b96..39d8e4bb8518e9 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -664,6 +664,21 @@ def test_ignore_comments_in_headers_issue36239(self): t = gettext.GNUTranslations(fp) self.assertEqual(t.info()["plural-forms"], "nplurals=2; plural=(n != 1);") + def test_raise_descriptive_error_for_incorrect_content_type(self): + with open(MOFILE, 'wb') as fp: + # below is msgfmt run on such a PO file: + # msgid "" + # msgstr "" + # "Content-Type: text/plain; charste=UTF-8\n" + fp.write( + b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x007\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00Content-Type: text/plain; charste=UTF-8\n\x00' + ) + with self.assertRaisesRegex(ValueError, "invalid content-type syntax"): + with open(MOFILE, 'rb') as fp: + gettext.GNUTranslations(fp) + def test_raise_descriptive_error_for_incorrect_plural_forms(self): with open(MOFILE, 'wb') as fp: # below is msgfmt run on such a PO file: From a0807292e4e9bc0b710bfdff7976b18f3d81f32a Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Wed, 24 Sep 2025 00:42:20 +0200 Subject: [PATCH 07/14] Fix incorrect MO file bytes in content type test --- Lib/test/test_gettext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index 39d8e4bb8518e9..28c1727071a3a1 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -672,7 +672,7 @@ def test_raise_descriptive_error_for_incorrect_content_type(self): # "Content-Type: text/plain; charste=UTF-8\n" fp.write( b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' - b'\x00\x00\x00\x00\x00\x008\x00\x00\x007\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x00(\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00\x00\x00Content-Type: text/plain; charste=UTF-8\n\x00' ) with self.assertRaisesRegex(ValueError, "invalid content-type syntax"): From f6b3a219c68d282e173a8f8986ab8b330de5f378 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Thu, 25 Sep 2025 21:16:58 +0200 Subject: [PATCH 08/14] News entry with blurb --- .../next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst diff --git a/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst b/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst new file mode 100644 index 00000000000000..2633fb2af291ab --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst @@ -0,0 +1,2 @@ +Gettext now raises descriptive ``ValueError``\s instead of ``IndexError``\s for +incorrect ``.mo`` files metadata. From f69c9ab7ec73734675fb55cc9d9878313ca39400 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Thu, 25 Sep 2025 21:57:50 +0200 Subject: [PATCH 09/14] Change error messages --- Lib/gettext.py | 4 ++-- Lib/test/test_gettext.py | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index 2405bc0cbca72f..5e43358ab4fbc8 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -411,7 +411,7 @@ def _parse(self, fp): self._charset = v.split('charset=')[1] except IndexError: raise ValueError( - f"invalid content-type syntax in '{filename}': '{v}'. Expected 'charset='." + f"expected 'charset=' in Content-Type metadata in {filename}, got {v!r}" ) from None elif k == 'plural-forms': v = v.split(';') @@ -419,7 +419,7 @@ def _parse(self, fp): plural = v[1].split('plural=')[1] except IndexError: raise ValueError( - f"invalid plural forms syntax in '{filename}': '{v}'. Expected ';' and 'plural='." + f"expected ';' and 'plural=' in Plural-Forms metadata in {filename}, got {v!r}" ) from None self.plural = c2py(plural) # Note: we unconditionally convert both msgids and msgstrs to diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index 28c1727071a3a1..69a4929f984c98 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -675,7 +675,10 @@ def test_raise_descriptive_error_for_incorrect_content_type(self): b'\x00\x00\x00\x00\x00\x008\x00\x00\x00(\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00\x00\x00Content-Type: text/plain; charste=UTF-8\n\x00' ) - with self.assertRaisesRegex(ValueError, "invalid content-type syntax"): + with self.assertRaisesRegex( + ValueError, + "expected 'charset=' in Content-Type metadata in gettext.mo, got 'text/plain; charste=UTF-8'" + ): with open(MOFILE, 'rb') as fp: gettext.GNUTranslations(fp) @@ -691,7 +694,9 @@ def test_raise_descriptive_error_for_incorrect_plural_forms(self): b'\x00\x00\x00\x00\x00\x008\x00\x00\x007\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00\x00\x00Content-Type: text/plain; charset=UTF-8\nPlural-Forms: \n\x00' ) - with self.assertRaisesRegex(ValueError, "invalid plural forms syntax"): + with self.assertRaisesRegex( + ValueError, "expected ';' and 'plural=' in Plural-Forms metadata in gettext.mo, got ''" + ): with open(MOFILE, 'rb') as fp: gettext.GNUTranslations(fp) From f5121318413037e84b27e9cf8750acb340477fae Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Thu, 25 Sep 2025 23:20:53 +0200 Subject: [PATCH 10/14] Improve plural forms error message, add a test --- Lib/gettext.py | 3 ++- Lib/test/test_gettext.py | 32 +++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Lib/gettext.py b/Lib/gettext.py index 5e43358ab4fbc8..66a279dbcb5d97 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -419,7 +419,8 @@ def _parse(self, fp): plural = v[1].split('plural=')[1] except IndexError: raise ValueError( - f"expected ';' and 'plural=' in Plural-Forms metadata in {filename}, got {v!r}" + f"expected ';' and 'plural=' in Plural-Forms metadata in {filename}, " + f"got {';'.join(v)!r}" ) from None self.plural = c2py(plural) # Note: we unconditionally convert both msgids and msgstrs to diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index 69a4929f984c98..c03a9f6066ff2b 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -677,7 +677,8 @@ def test_raise_descriptive_error_for_incorrect_content_type(self): ) with self.assertRaisesRegex( ValueError, - "expected 'charset=' in Content-Type metadata in gettext.mo, got 'text/plain; charste=UTF-8'" + "expected 'charset=' in Content-Type metadata in xx/LC_MESSAGES/gettext.mo, " + "got 'text/plain; charste=UTF-8'" ): with open(MOFILE, 'rb') as fp: gettext.GNUTranslations(fp) @@ -687,15 +688,36 @@ def test_raise_descriptive_error_for_incorrect_plural_forms(self): # below is msgfmt run on such a PO file: # msgid "" # msgstr "" - # "Content-Type: text/plain; charset=UTF-8\n" # "Plural-Forms: \n" fp.write( b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' - b'\x00\x00\x00\x00\x00\x008\x00\x00\x007\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' - b'\x00\x00\x00\x00Content-Type: text/plain; charset=UTF-8\nPlural-Forms: \n\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x00\x0f\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00Plural-Forms: \n\x00' ) with self.assertRaisesRegex( - ValueError, "expected ';' and 'plural=' in Plural-Forms metadata in gettext.mo, got ''" + ValueError, + "expected ';' and 'plural=' in Plural-Forms metadata in xx/LC_MESSAGES/gettext.mo, got ''" + ): + with open(MOFILE, 'rb') as fp: + gettext.GNUTranslations(fp) + + + def test_raise_descriptive_error_for_incorrect_plural_forms_with_semicolon(self): + with open(MOFILE, 'wb') as fp: + # below is msgfmt run on such a PO file: + # msgid "" + # msgstr "" + # "Plural-Forms: nplurals=1; prulal=0;\n" + fp.write( + b'\xde\x12\x04\x95\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00\x00\x00$\x00\x00\x00\x03\x00\x00\x00,\x00' + b'\x00\x00\x00\x00\x00\x008\x00\x00\x00$\x00\x00\x009\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00Plural-Forms: nplurals=1; prulal=0;\n\x00' + + ) + with self.assertRaisesRegex( + ValueError, + "expected ';' and 'plural=' in Plural-Forms metadata in xx/LC_MESSAGES/gettext.mo, " + "got 'nplurals=1; prulal=0;'" ): with open(MOFILE, 'rb') as fp: gettext.GNUTranslations(fp) From 6fff70ce9007eccb5a802f9d43d2a926c373111f Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Fri, 26 Sep 2025 04:14:28 +0200 Subject: [PATCH 11/14] Support Windows in tests --- Lib/test/test_gettext.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index c03a9f6066ff2b..c9509eae05e07f 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -1,6 +1,7 @@ import os import base64 import gettext +import sys import unittest import unittest.mock from functools import partial @@ -664,6 +665,12 @@ def test_ignore_comments_in_headers_issue36239(self): t = gettext.GNUTranslations(fp) self.assertEqual(t.info()["plural-forms"], "nplurals=2; plural=(n != 1);") + @property + def expected_filename(self): + if sys.platform == 'win32': + return 'xx\LC_MESSAGES\gettext.mo' + return 'xx/LC_MESSAGES/gettext.mo' + def test_raise_descriptive_error_for_incorrect_content_type(self): with open(MOFILE, 'wb') as fp: # below is msgfmt run on such a PO file: @@ -677,8 +684,8 @@ def test_raise_descriptive_error_for_incorrect_content_type(self): ) with self.assertRaisesRegex( ValueError, - "expected 'charset=' in Content-Type metadata in xx/LC_MESSAGES/gettext.mo, " - "got 'text/plain; charste=UTF-8'" + f"expected 'charset=' in Content-Type metadata in {self.expected_filename}, " + f"got 'text/plain; charste=UTF-8'" ): with open(MOFILE, 'rb') as fp: gettext.GNUTranslations(fp) @@ -696,7 +703,7 @@ def test_raise_descriptive_error_for_incorrect_plural_forms(self): ) with self.assertRaisesRegex( ValueError, - "expected ';' and 'plural=' in Plural-Forms metadata in xx/LC_MESSAGES/gettext.mo, got ''" + f"expected ';' and 'plural=' in Plural-Forms metadata in {self.expected_filename}, got ''", ): with open(MOFILE, 'rb') as fp: gettext.GNUTranslations(fp) @@ -716,7 +723,7 @@ def test_raise_descriptive_error_for_incorrect_plural_forms_with_semicolon(self) ) with self.assertRaisesRegex( ValueError, - "expected ';' and 'plural=' in Plural-Forms metadata in xx/LC_MESSAGES/gettext.mo, " + f"expected ';' and 'plural=' in Plural-Forms metadata in {self.expected_filename}, " "got 'nplurals=1; prulal=0;'" ): with open(MOFILE, 'rb') as fp: From 121ec1f6f9da079e7c8fa246cfeab6d11d838c8a Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Fri, 26 Sep 2025 04:50:57 +0200 Subject: [PATCH 12/14] Fix Windows tests: escape backslashes --- Lib/test/test_gettext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index c9509eae05e07f..e07281ba12cd24 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -668,7 +668,7 @@ def test_ignore_comments_in_headers_issue36239(self): @property def expected_filename(self): if sys.platform == 'win32': - return 'xx\LC_MESSAGES\gettext.mo' + return 'xx\\LC_MESSAGES\\gettext.mo' return 'xx/LC_MESSAGES/gettext.mo' def test_raise_descriptive_error_for_incorrect_content_type(self): From 12225ecf22b5b2553f5c3ea6181651ed16bcdbc1 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Fri, 26 Sep 2025 10:37:16 +0200 Subject: [PATCH 13/14] Fix Windows tests: mark path string as raw --- Lib/test/test_gettext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index e07281ba12cd24..4ea5f8015be299 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -668,7 +668,7 @@ def test_ignore_comments_in_headers_issue36239(self): @property def expected_filename(self): if sys.platform == 'win32': - return 'xx\\LC_MESSAGES\\gettext.mo' + return r'xx\\LC_MESSAGES\\gettext.mo' return 'xx/LC_MESSAGES/gettext.mo' def test_raise_descriptive_error_for_incorrect_content_type(self): From 702b84a3fb2f9be80b47eeadc9e3138c64752534 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Wed, 1 Oct 2025 23:34:19 +0200 Subject: [PATCH 14/14] Update Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- .../Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst b/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst index 2633fb2af291ab..edbf05a98d66d3 100644 --- a/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst +++ b/Misc/NEWS.d/next/Library/2025-09-25-21-13-59.gh-issue-56634.8tySiu.rst @@ -1,2 +1,2 @@ -Gettext now raises descriptive ``ValueError``\s instead of ``IndexError``\s for -incorrect ``.mo`` files metadata. +:class:`gettext.GNUTranslations` now raises descriptive :exc:`ValueError`\s instead of :exc:`IndexError`\s for +incorrect ``.mo`` file metadata.