From 90f0e3444525a555688d6f8e5ee77a078dce1fee Mon Sep 17 00:00:00 2001 From: stan Date: Sun, 16 Feb 2025 12:59:58 +0000 Subject: [PATCH 1/3] Add error if BOM on first line of po file --- Tools/i18n/msgfmt.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index 3f731e941eafe7..a37711de3a4fa2 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -116,6 +116,15 @@ def make(filename, outfile): print(msg, file=sys.stderr) sys.exit(1) + # Check for UTF-8 BOM + if lines[0].startswith(b'\xef\xbb\xbf'): + print( + f"The file {infile} starts with a UTF-8 BOM which is not allowed in .po files.\n" + "Please save the file without a BOM and try again.", + file=sys.stderr + ) + sys.exit(1) + section = msgctxt = None fuzzy = 0 From 2455bffeb96ad972068bf60d95a2c442b044f794 Mon Sep 17 00:00:00 2001 From: stan Date: Mon, 17 Feb 2025 08:52:51 +0000 Subject: [PATCH 2/3] Apply Seirhy suggestions --- Lib/test/test_tools/test_msgfmt.py | 8 ++++++++ Tools/i18n/msgfmt.py | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py index e3e3035c4f4395..a6073b8be03073 100644 --- a/Lib/test/test_tools/test_msgfmt.py +++ b/Lib/test/test_tools/test_msgfmt.py @@ -39,6 +39,14 @@ def test_compilation(self): self.assertDictEqual(actual._catalog, expected._catalog) + def test_po_with_bom(self): + with temp_cwd(): + Path('bom.po').write_bytes(b'\xef\xbb\xbfmsgid "Python"\nmsgstr "Pioton"\n') + + res = assert_python_failure(msgfmt, 'bom.po') + err = res.err.decode('utf-8') + self.assertIn('The file bom.po starts with a UTF-8 BOM', err) + def test_invalid_msgid_plural(self): with temp_cwd(): Path('invalid.po').write_text('''\ diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index a37711de3a4fa2..fcf41532d8fb11 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -35,6 +35,8 @@ __version__ = "1.2" +import codecs + MESSAGES = {} @@ -117,7 +119,7 @@ def make(filename, outfile): sys.exit(1) # Check for UTF-8 BOM - if lines[0].startswith(b'\xef\xbb\xbf'): + if lines[0].startswith(codecs.BOM_UTF8): print( f"The file {infile} starts with a UTF-8 BOM which is not allowed in .po files.\n" "Please save the file without a BOM and try again.", From 839efce95f7ab0a70a350bb366c17bcc5264473d Mon Sep 17 00:00:00 2001 From: stan Date: Mon, 17 Feb 2025 09:21:21 +0000 Subject: [PATCH 3/3] Apply Serhiy suggestions --- Tools/i18n/msgfmt.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index fcf41532d8fb11..f005c4e7b5b79e 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -32,10 +32,10 @@ import struct import array from email.parser import HeaderParser +import codecs __version__ = "1.2" -import codecs MESSAGES = {} @@ -118,7 +118,6 @@ def make(filename, outfile): print(msg, file=sys.stderr) sys.exit(1) - # Check for UTF-8 BOM if lines[0].startswith(codecs.BOM_UTF8): print( f"The file {infile} starts with a UTF-8 BOM which is not allowed in .po files.\n"