python · s-ball · Dec 2, 2018 · Dec 2, 2018 · Dec 2, 2018 · Dec 4, 2018
diff --git a/Lib/test/test_tools/msgfmt_data/.gitattributes b/Lib/test/test_tools/msgfmt_data/.gitattributes
@@ -0,0 +1,2 @@
+file1_fr.po	eol=crlf
+file2_fr.po	eol=lf
diff --git a/Lib/test/test_tools/msgfmt_data/file12_fr.mo b/Lib/test/test_tools/msgfmt_data/file12_fr.mo
diff --git a/Lib/test/test_tools/msgfmt_data/file1_fr.mo b/Lib/test/test_tools/msgfmt_data/file1_fr.mo
diff --git a/Lib/test/test_tools/msgfmt_data/file1_fr.po b/Lib/test/test_tools/msgfmt_data/file1_fr.po
@@ -0,0 +1,29 @@
+# French translations for python package.
+# Copyright (C) 2018 THE python\'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the python package.
+# s-ball <[email protected]>, 2018.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: python 3.8\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2018-11-30 23:46+0100\n"
+"PO-Revision-Date: 2018-11-30 23:47+0100\n"
+"Last-Translator: s-ball <[email protected]>\n"
+"Language-Team: French\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+#: file1.py:6
+msgid "Hello!"
+msgstr "Bonjour !"
+
+#: file1.py:7
+#, python-brace-format
+msgid "{n} horse"
+msgid_plural "{n} horses"
+msgstr[0] "{n} cheval"
+msgstr[1] "{n} chevaux"
diff --git a/Lib/test/test_tools/msgfmt_data/file2_fr.mo b/Lib/test/test_tools/msgfmt_data/file2_fr.mo
diff --git a/Lib/test/test_tools/msgfmt_data/file2_fr.po b/Lib/test/test_tools/msgfmt_data/file2_fr.po
@@ -0,0 +1,26 @@
+# French translations for python package.
+# Copyright (C) 2018 THE python'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the python package.
+# s-ball <[email protected]>, 2018.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: python 3.8\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2018-11-30 23:57+0100\n"
+"PO-Revision-Date: 2018-11-30 23:57+0100\n"
+"Last-Translator: s-ball <[email protected]>\n"
+"Language-Team: French\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+#: file2.py:6
+msgid "It's over."
+msgstr "C'est terminé."
+
+#: file2.py:7
+msgid "Bye..."
+msgstr "Au revoir ..."
diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py
@@ -1,5 +1,7 @@
 """Tests for the Tools/i18n/msgfmt.py tool."""
-
+import filecmp
+import os
+import shutil
 import sys
 import unittest
 from gettext import GNUTranslations
@@ -91,6 +93,7 @@ def test_generic_syntax_error(self):
             err = res.err.decode('utf-8')
             self.assertIn('Syntax error', err)
 
+
 class CLITest(unittest.TestCase):
 
     def test_help(self):
@@ -121,6 +124,52 @@ def test_nonexistent_file(self):
         assert_python_failure(msgfmt, 'nonexistent.po')
 
 
+class Test_multi_input(unittest.TestCase):
+    """Tests for the issue https://github.com/python/cpython/issues/79516
+        msgfmt.py shall accept multiple input files
+    """
+
+    def test_no_outputfile(self):
+        """Test script without -o option - 1 single file"""
+        with temp_cwd(None):
+            shutil.copy(data_dir / 'file2_fr.po', '.')
+            assert_python_ok(msgfmt, 'file2_fr.po')
+            self.assertTrue(
+                filecmp.cmp(data_dir / 'file2_fr.mo', 'file2_fr.mo'),
+                'Wrong compiled file2_fr.mo')
+
+    def test_both_with_outputfile(self):
+        """Test script with -o option and 2 input files
+
+        The current behaviour is to merge entries having distinct ids
+        and keep last one if the same id occurs in multiple files.
+
+        Here the first file has Windows endings (cflr) while second has
+        Unix endings (lf)
+        """
+        with temp_cwd(None):
+            assert_python_ok(msgfmt, '-o', 'file12.mo',
+                             data_dir / 'file1_fr.po',
+                             data_dir / 'file2_fr.po')
+            self.assertTrue(
+                filecmp.cmp(data_dir / 'file12_fr.mo', 'file12.mo'),
+                'Wrong compiled file12.mo')
+
+    def test_both_without_outputfile(self):
+        """Test script without -o option and 2 input files"""
+
+        with temp_cwd(None):
+            shutil.copy(data_dir /'file1_fr.po', '.')
+            shutil.copy(data_dir /'file2_fr.po', '.')
+            assert_python_ok(msgfmt, 'file1_fr.po', 'file2_fr.po')
+            self.assertTrue(
+                filecmp.cmp(data_dir / 'file1_fr.mo', 'file1_fr.mo'),
+                'Wrong compiled file1_fr.mo')
+            self.assertTrue(
+                filecmp.cmp(data_dir / 'file2_fr.mo', 'file2_fr.mo'),
+                'Wrong compiled file2_fr.mo')
+
+
 def update_catalog_snapshots():
     for po_file in data_dir.glob('*.po'):
         mo_file = po_file.with_suffix('.mo')

diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-12-05-20-46-10.bpo-35335.qtIUBx.rst b/Misc/NEWS.d/next/Tools-Demos/2018-12-05-20-46-10.bpo-35335.qtIUBx.rst
@@ -0,0 +1,2 @@
+:program:`msgfmt.py` is now able to merge more than one single po file into a compiled mo
+file. When an entry exists in more than on input file, the last file wins.
diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py
@@ -6,9 +6,9 @@
 This program converts a textual Uniforum-style message catalog (.po file) into
 a binary GNU catalog (.mo file).  This is essentially the same function as the
 GNU msgfmt program, however, it is a simpler implementation.  Currently it
-does not handle plural forms but it does handle message contexts.
+handles plural forms and message contexts, but does not generate hash table.
 
-Usage: msgfmt.py [OPTIONS] filename.po
+Usage: msgfmt.py [OPTIONS] filename.po [filename.po ...]
 
 Options:
     -o file
@@ -23,6 +23,14 @@
     -V
     --version
         Display version information and exit.
+
+If more than one input file is given, and if an output file is passed with
+-o option, then all the input files are merged. If keys are repeated (common
+for "" key for the header) the one from last file is used.
+
+If more than one input file is given, and no -o option is present, then
+every input file is compiled in its corresponding mo file (same name with mo
+replacing po)
 """
 
 import os
@@ -47,29 +55,27 @@ def usage(code, msg=''):
     sys.exit(code)
 
 
-def add(ctxt, id, str, fuzzy):
+def add(ctxt, id, str, fuzzy, messages):
     "Add a non-fuzzy translation to the dictionary."
-    global MESSAGES
     if not fuzzy and str:
         if ctxt is None:
-            MESSAGES[id] = str
+            messages[id] = str
         else:
-            MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
+            messages[b"%b\x04%b" % (ctxt, id)] = str
 
 
-def generate():
+def generate(messages):
     "Return the generated output."
-    global MESSAGES
     # the keys are sorted in the .mo file
-    keys = sorted(MESSAGES.keys())
+    keys = sorted(messages.keys())
     offsets = []
     ids = strs = b''
     for id in keys:
         # For each string, we need size and file offset.  Each string is NUL
         # terminated; the NUL does not count into the size.
-        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
+        offsets.append((len(ids), len(id), len(strs), len(messages[id])))
         ids += id + b'\0'
-        strs += MESSAGES[id] + b'\0'
+        strs += messages[id] + b'\0'
     output = ''
     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
     # the keys start right after the index tables.
@@ -98,18 +104,44 @@ def generate():
     return output
 
 
-def make(filename, outfile):
-    ID = 1
-    STR = 2
-    CTXT = 3
+def make(filenames, outfile):
+    """ Compiles one or several po files(s).
+
+    filenames is a string or an iterable of strings representing input file(s)
+    outfile is a string for the name of an input file or None.
+
+    If it is not None, the output file receives a merge of the input files.
+    If it is None, then filenames must be a string and the name of the output
+    file is obtained by replacing the po extension with mo.
+    Both ways are for compatibility reasons with previous behaviour.
+    """
+    messages = {}
+    if isinstance(filenames, str):
+        infile, outfile = get_names(filenames, outfile)
+        process(infile, messages)
+    elif outfile is None:
+        raise TypeError("outfile cannot be None with more than one infile")
+    else:
+        for filename in filenames:
+            infile, _ = get_names(filename, outfile)
+            process(infile, messages)
+    output = generate(messages)
+    writefile(outfile, output)
 
+def get_names(filename, outfile):
     # Compute .mo name from .po name and arguments
     if filename.endswith('.po'):
         infile = filename
     else:
         infile = filename + '.po'
     if outfile is None:
         outfile = os.path.splitext(infile)[0] + '.mo'
+    return infile, outfile
+
+def process(infile, messages):
+    ID = 1
+    STR = 2
+    CTXT = 3
 
     try:
         with open(infile, 'rb') as f:
@@ -140,7 +172,7 @@ def make(filename, outfile):
         lno += 1
         # If we get a comment line after a msgstr, this is a new entry
         if l[0] == '#' and section == STR:
-            add(msgctxt, msgid, msgstr, fuzzy)
+            add(msgctxt, msgid, msgstr, fuzzy, messages)
             section = msgctxt = None
             fuzzy = 0
         # Record a fuzzy mark
@@ -152,13 +184,13 @@ def make(filename, outfile):
         # Now we are in a msgid or msgctxt section, output previous section
         if l.startswith('msgctxt'):
             if section == STR:
-                add(msgctxt, msgid, msgstr, fuzzy)
+                add(msgctxt, msgid, msgstr, fuzzy, messages)
             section = CTXT
             l = l[7:]
             msgctxt = b''
         elif l.startswith('msgid') and not l.startswith('msgid_plural'):
             if section == STR:
-                add(msgctxt, msgid, msgstr, fuzzy)
+                add(msgctxt, msgid, msgstr, fuzzy, messages)
                 if not msgid:
                     # See whether there is an encoding declaration
                     p = HeaderParser()
@@ -213,21 +245,19 @@ def make(filename, outfile):
             sys.exit(1)
     # Add last entry
     if section == STR:
-        add(msgctxt, msgid, msgstr, fuzzy)
-
-    # Compute output
-    output = generate()
+        add(msgctxt, msgid, msgstr, fuzzy, messages)
 
+def writefile(outfile, output):
     try:
         with open(outfile,"wb") as f:
             f.write(output)
     except IOError as msg:
         print(msg, file=sys.stderr)
 
 
-def main():
+def main(argv):
-def main(argv):
+def main():
-def main(argv):
+def main():
     try:
-        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
+        opts, args = getopt.getopt(argv, 'hVo:',
                                    ['help', 'version', 'output-file='])
     except getopt.error as msg:
         usage(1, msg)
@@ -247,10 +277,12 @@ def main():
         print('No input file given', file=sys.stderr)
         print("Try `msgfmt --help' for more information.", file=sys.stderr)
         return
-
-    for filename in args:
-        make(filename, outfile)
+    if outfile is None:
+        for filename in args:
+            make(filename, None)
+    else:
+        make(args, outfile)
 
 
 if __name__ == '__main__':
-    main()
+    main(sys.argv[1:])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		file1_fr.po eol=crlf
		file2_fr.po eol=lf
Copy link Member merwok Mar 3, 2025 • edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Could you add a note somewhere (in tests or in a readme file here) explaining how to recreate the mo files? And what do you think of naming the files `file1_fr_crlf.po` and `file1_fr_lf.po`? Copy link Author s-ball Mar 3, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. The mo files are simple recreated by the `update_catalog_snapshots` function triggered by passing the argument `--snapshot-update` to the test. Exactly the way the other .mo files of the `msgfmt_data` are re-created. Do you really think it deserves a special message? Anyway I agree with you for the other point the file names should make the eol mode explicit. Copy link Member merwok Mar 3, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Ah, I hadn’t seen the function / argument in the test! Good that it exists. But maybe this is making my point? someone else looking at the test data files (python dev or redistributor – they care about source files) may not find the answer quickly. Copy link Member merwok Mar 4, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Adding one comment in the test_msgfmt.py file like `# regenerate files in Lib/test/test_tools/msgfmt_data` for the benefit of people grepping would make me satisfied!
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		:program:`msgfmt.py` is now able to merge more than one single po file into a compiled mo
s-ball marked this conversation as resolved. Outdated Show resolved Hide resolved
		file. When an entry exists in more than on input file, the last file wins.