From 3ae7cbdad531a5a9e10c75a4fde1e8ead7cb56a2 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:09:33 +0300 Subject: [PATCH 01/21] Add definitions for MessageConcatenation and MessageMerge classes in pybabel * Define the MessageConcatenation class to mimic the functionality of GNU gettext's msgcat * Define the MessageMerge class to mimic the functionality of GNU gettext's msgmerge * Implement placeholders for the main interface functions --- babel/messages/frontend.py | 198 +++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 9017ec5a8..0758ecc19 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -852,6 +852,200 @@ def run(self): return +class MessageConcatenation(CommandMixin): + description = 'concatenates and merges the specified PO files' + user_options = [ + ('input-files', None, ''), + ('files-from=', 'f', ''), + ('directory=', 'D', ''), + ('output-file=', 'o', ''), + ('less-than=', '<', ''), + ('more-than=', '>', ''), + ('unique', 'u', ''), + ('properties-input', 'P', ''), + ('stringtable-input', None, ''), + ('to-code=','t', ''), + ('use-first', None, ''), + ('lang=', None, ''), + ('color=', None, ''), + ('style=', None, ''), + ('no-escape', 'e', ''), + ('escape', 'E', ''), + ('force-po', None, ''), + ('indent', 'i', ''), + ('no-location', None, ''), + ('add-location', 'n', ''), + ('strict', None, ''), + ('properties-output', None, ''), + ('stringtable-output', None, ''), + ('width=', 'w', ''), + ('no-wrap', None, ''), + ('sort-output', 's', ''), + ('sort-by-file', 'F', ''), + ] + + as_args='input-files' + + boolean_options = [ + 'unique', + 'properties-input', + 'stringtable-input', + 'use-first', + 'no-escape', + 'escape', + 'force-po', + 'indent', + 'no-location', + 'add-location', + 'strict', + 'properties-output', + 'stringtable-output', + 'no-wrap', + 'sort-output', + 'sort-by-file', + ] + + option_choices = { + 'color': ('always', 'never', 'auto', 'html'), + } + + def initialize_options(self): + self.input_files = None + self.files_from = None + self.directory = None + self.output_file = None + self.less_than = None + self.more_than = None + self.unique = None + self.properties_input = None + self.stringtable_input = None + self.to_code = None + self.use_first = None + self.lang = None + self.color = None + self.color = None + self.style = None + self.no_escape = None + self.escape = None + self.force_po = None + self.indent = None + self.no_location = None + self.add_location = None + self.strict = None + self.properties_output = None + self.stringtable_output = None + self.width = None + self.no_wrap = None + self.sort_output = None + self.sort_by_file = None + + def finalize_options(self): + pass + + def run(self): + pass + + +class MessageMerge(CommandMixin): + description='combines two Uniforum-style PO files into one' + user_options=[ + ('input-files', None, ''), + ('directory=', 'D', ''), + ('compendium=', 'C', ''), + ('update', 'U', ''), + ('output-file=', 'o', ''), + ('backup=', None, ''), + ('suffix=', None, ''), + ('multi-domain', 'm', ''), + ('for-msgfmt', None, ''), + ('no-fuzzy-matching', 'N', ''), + ('previous', None, ''), + ('properties-input', 'P', ''), + ('stringtable-input', None, ''), + ('lang=', None, ''), + ('color=', None, ''), + ('style=', None, ''), + ('no-escape', 'e', ''), + ('escape', 'E', ''), + ('force-po', None, ''), + ('indent', 'i', ''), + ('no-location', None, ''), + ('add-location', 'n', ''), + ('strict', None, ''), + ('properties-output', None, ''), + ('stringtable-output', None, ''), + ('width=', 'w', ''), + ('no-wrap', None, ''), + ('sort-output', 's', ''), + ('sort-by-file', 'F', ''), + ] + + as_args='input-files' + + boolean_options = [ + 'update', + 'multi-domain', + 'for-msgfmt', + 'no-fuzzy-matching', + 'previous' + 'properties-input', + 'stringtable-input', + 'no-escape', + 'escape', + 'force-po', + 'indent', + 'no-location', + 'add-location', + 'strict', + 'properties-output', + 'stringtable-output', + 'no-wrap', + 'sort-output', + 'sort-by-file', + ] + + option_choices = { + 'color': ('always', 'never', 'auto', 'html'), + } + + def initialize_options(self): + self.input_files = None + self.directory = None + self.compendium = None + self.update = None + self.output_file = None + self.backup = None + self.suffix = None + self.multi_domain = None + self.for_msgfmt = None + self.no_fuzzy_matching = None + self.previous = None + self.properties_input = None + self.stringtable_input = None + self.lang = None + self.color = None + self.style = None + self.no_escape = None + self.escape = None + self.force_po = None + self.indent = None + self.no_location = None + self.add_location = None + self.strict = None + self.properties_output = None + self.stringtable_output = None + self.width = None + self.no_wrap = None + self.sort_output = None + self.sort_by_file = None + + def finalize_options(self): + pass + + def run(self): + pass + + class CommandLineInterface: """Command-line interface. @@ -866,6 +1060,8 @@ class CommandLineInterface: 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', + 'msgcat': 'concatenates and merges the specified PO files', + 'msgmerge': 'combines two Uniforum-style PO files into one', } command_classes = { @@ -873,6 +1069,8 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, + 'msgcat': MessageConcatenation, + 'msgmerge': MessageMerge, } log = None # Replaced on instance level From 246671a90ce58abc478c981afc4e3a6f284c964c Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:18:25 +0300 Subject: [PATCH 02/21] Implement basic logic for concatenating catalogs * Add validation for main msgcat options - input_files, output_file * Temporarily set use_first option to true to avoid handling cases with different translations for the same messages --- babel/messages/frontend.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 0758ecc19..9c2be9d8e 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -940,10 +940,37 @@ def initialize_options(self): self.sort_by_file = None def finalize_options(self): - pass + if not self.input_files: + raise OptionError('you must specify the input files') + if not self.output_file: + raise OptionError('you must specify the output file') + + # временно всегда используется первый перевод + if self.use_first is None: + self.use_first = True def run(self): - pass + catalog = Catalog(fuzzy=False) + + for filenum, filename in enumerate(self.input_files): + with open(filename, 'r') as pofile: + template = read_po(pofile) + + if filenum == 0: + catalog.update(template) + continue + + for message in template: + if not message.id: + continue + + if message.id in catalog and catalog[message.id].string != message.string and not self.use_first: + raise NotImplementedError() + + catalog[message.id] = message + + with open(self.output_file, 'wb') as outfile: + write_po(outfile, catalog) class MessageMerge(CommandMixin): From 9a217e2d8ab6adaf7724b08f6aaef84f6a3e6e2c Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:32:07 +0300 Subject: [PATCH 03/21] Add options: unique, less-than, more-than, no-wrap, and width * Implement options unique, less-than, and more-than, and validate their dependencies with each other. * These options specify which messages to include in the output file. * Implement and validate options no-wrap and width. * Create a helper function _prepare that collects data on message occurrences across different catalogs. * Mark options that are already implemented # --- babel/messages/frontend.py | 64 ++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 9c2be9d8e..e6ac46710 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -20,6 +20,7 @@ import sys import tempfile import warnings +from collections import OrderedDict, defaultdict from configparser import RawConfigParser from io import StringIO from typing import BinaryIO, Iterable, Literal @@ -910,20 +911,20 @@ class MessageConcatenation(CommandMixin): } def initialize_options(self): - self.input_files = None + self.input_files = None # self.files_from = None self.directory = None - self.output_file = None - self.less_than = None - self.more_than = None - self.unique = None + self.output_file = None # + self.less_than = None # + self.more_than = 0 # + self.unique = False # self.properties_input = None self.stringtable_input = None self.to_code = None - self.use_first = None + # временно всегда используется первый перевод + self.use_first = True #~ self.lang = None self.color = None - self.color = None self.style = None self.no_escape = None self.escape = None @@ -934,8 +935,8 @@ def initialize_options(self): self.strict = None self.properties_output = None self.stringtable_output = None - self.width = None - self.no_wrap = None + self.width = None # + self.no_wrap = None # self.sort_output = None self.sort_by_file = None @@ -945,21 +946,44 @@ def finalize_options(self): if not self.output_file: raise OptionError('you must specify the output file') - # временно всегда используется первый перевод + if self.unique is None: + self.unique = False if self.use_first is None: self.use_first = True + if self.no_wrap and self.width: + raise OptionError("'--no-wrap' and '--width' are mutually exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + if self.more_than is None: + self.more_than = 0 + else: + self.more_than = int(self.more_than) + if self.less_than is not None: + self.less_than = int(self.less_than) + if self.unique: + self.less_than = 2 + + def _prepare(self): + self.message_count = defaultdict(int) + + for filename in self.input_files: + with open(filename, 'r') as pofile: + template = read_po(pofile) + for message in template: + self.message_count[message.id] += 1 + def run(self): catalog = Catalog(fuzzy=False) + self._prepare() - for filenum, filename in enumerate(self.input_files): + for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) - if filenum == 0: - catalog.update(template) - continue - for message in template: if not message.id: continue @@ -967,10 +991,16 @@ def run(self): if message.id in catalog and catalog[message.id].string != message.string and not self.use_first: raise NotImplementedError() - catalog[message.id] = message + message_count = self.message_count[message.id] + if message_count > self.more_than and (self.less_than is None or message_count < self.less_than): + catalog[message.id] = message with open(self.output_file, 'wb') as outfile: - write_po(outfile, catalog) + write_po( + outfile, + catalog, + width=self.width + ) class MessageMerge(CommandMixin): From a1bf8d41ccaae2dc4d2db59384f0030aad8d6a27 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:32:34 +0300 Subject: [PATCH 04/21] Implement basic msgmerge logic for working with a compendium * Implement basic functionality of msgmerge * Use and validate the main options: input-files and output-file * Use and validate options: no-wrap and width * Use and validate options: sort-output and sort-by-file, both in msgmerge and msgcat * In the basic version of working with a compendium, a translation for a message is taken from the compendium only if the resulting catalog lacks a translation. --- babel/messages/frontend.py | 79 ++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index e6ac46710..52a735f56 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -937,8 +937,8 @@ def initialize_options(self): self.stringtable_output = None self.width = None # self.no_wrap = None # - self.sort_output = None - self.sort_by_file = None + self.sort_output = False # + self.sort_by_file = False # def finalize_options(self): if not self.input_files: @@ -967,6 +967,11 @@ def finalize_options(self): if self.unique: self.less_than = 2 + if self.sort_output is None: + self.sort_output = False + if self.sort_by_file is None: + self.sort_by_file = True + def _prepare(self): self.message_count = defaultdict(int) @@ -999,7 +1004,9 @@ def run(self): write_po( outfile, catalog, - width=self.width + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, ) @@ -1066,11 +1073,11 @@ class MessageMerge(CommandMixin): } def initialize_options(self): - self.input_files = None + self.input_files = None # self.directory = None - self.compendium = None + self.compendium = None #~ self.update = None - self.output_file = None + self.output_file = None # self.backup = None self.suffix = None self.multi_domain = None @@ -1091,16 +1098,64 @@ def initialize_options(self): self.strict = None self.properties_output = None self.stringtable_output = None - self.width = None - self.no_wrap = None - self.sort_output = None - self.sort_by_file = None + self.width = None # + self.no_wrap = None # + self.sort_output = False # + self.sort_by_file = False # def finalize_options(self): - pass + if len(self.input_files) != 2: + raise OptionError('must be two po files') + if not self.output_file: + raise OptionError('you must specify the output file') + + if self.no_wrap and self.width: + raise OptionError("'--no-wrap' and '--width' are mutually exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + if self.sort_output is None: + self.sort_output = False + if self.sort_by_file is None: + self.sort_by_file = True def run(self): - pass + def_file, ref_file = self.input_files + with open(def_file, 'r') as pofile: + def_catalog = read_po(pofile) + + with open(ref_file, 'r') as pofile: + ref_catalog = read_po(pofile) + + ref_catalog.mime_headers = def_catalog.mime_headers + ref_catalog.header_comment = def_catalog.header_comment + + for message in def_catalog: + if not message.id: + continue + if message.id in ref_catalog: + ref_catalog[message.id].string = message.string + else: + ref_catalog.obsolete[message.id] = message + + if self.compendium: + with open(self.compendium, 'r') as pofile: + compendium_catalog = read_po(pofile) + for message in compendium_catalog: + if message.id in ref_catalog and not ref_catalog[message.id].string: + ref_catalog[message.id].string = message.string + + ref_catalog.fuzzy = False + with open(self.output_file, 'wb') as outfile: + write_po( + outfile, + ref_catalog, + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + ) class CommandLineInterface: From a5458fb0eaee388b8b65ee05ae59b899d146de83 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:33:10 +0300 Subject: [PATCH 05/21] Write tests for msgcat * Create basic tests to verify the functionality of msgcat, specifically the concatenation of catalogs, merging of message flags, locations, etc. * Remove the validation of options sort-output, sort-by-file, unique, use-first, as they are initialized in the function initialize_options. --- babel/messages/frontend.py | 13 +- tests/messages/test_frontend.py | 213 +++++++++++++++++++++++++++++++- 2 files changed, 213 insertions(+), 13 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 52a735f56..199e75760 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -891,7 +891,6 @@ class MessageConcatenation(CommandMixin): 'unique', 'properties-input', 'stringtable-input', - 'use-first', 'no-escape', 'escape', 'force-po', @@ -946,11 +945,6 @@ def finalize_options(self): if not self.output_file: raise OptionError('you must specify the output file') - if self.unique is None: - self.unique = False - if self.use_first is None: - self.use_first = True - if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") if not self.no_wrap and not self.width: @@ -967,11 +961,6 @@ def finalize_options(self): if self.unique: self.less_than = 2 - if self.sort_output is None: - self.sort_output = False - if self.sort_by_file is None: - self.sort_by_file = True - def _prepare(self): self.message_count = defaultdict(int) @@ -982,7 +971,7 @@ def _prepare(self): self.message_count[message.id] += 1 def run(self): - catalog = Catalog(fuzzy=False) + catalog = Catalog() self._prepare() for filename in self.input_files: diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index b05f9f683..d34fed1a8 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -27,7 +27,7 @@ from babel import __version__ as VERSION from babel.dates import format_datetime -from babel.messages import Catalog, extract, frontend +from babel.messages import Catalog, extract, frontend, pofile from babel.messages.frontend import ( BaseError, CommandLineInterface, @@ -715,6 +715,217 @@ def test_supports_width(self): assert expected_content == actual_content +class ConcatanationMessagesTestCase(unittest.TestCase): + + def setUp(self): + self.olddir = os.getcwd() + os.chdir(data_dir) + + self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + self.cmd = frontend.MessageConcatenation(self.dist) + self.cmd.initialize_options() + + self.temp1 = f'{i18n_dir}/msgcat_temp1.po' + self.temp2 = f'{i18n_dir}/msgcat_temp2.po' + self.output_file = f'{i18n_dir}/msgcat.po' + + with open(self.temp1, 'wb') as file: + catalog = Catalog() + catalog.add('other1', string='Other 1', locations=[('simple.py', 1)], flags=['flag1000']) + catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) + catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) + catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) + pofile.write_po(file, catalog) + + with open(self.temp2, 'wb') as file: + catalog = Catalog() + catalog.add('other3', string='Other 3', locations=[('hard.py', 1)]) + catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) + catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) + catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) + pofile.write_po(file, catalog) + + def tearDown(self): + for file in [self.temp1, self.temp2, self.output_file]: + if os.path.isfile(file): + os.unlink(file) + + def test_no_input_files(self): + with pytest.raises(OptionError): + self.cmd.finalize_options() + + def test_no_output_file(self): + self.cmd.input_files = ['project/i18n/messages.pot'] + with pytest.raises(OptionError): + self.cmd.finalize_options() + + @freeze_time("1994-11-11") + def test_default(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: simple.py:1 +#, flag1000 +msgid "other1" +msgstr "Other 1" + +#: simple.py:10 +msgid "other2" +msgstr "Other 2" + +#: hard.py:100 simple.py:100 +#, flag1, flag1.2, flag4 +msgid "same" +msgstr "Same" + +#: hard.py:1000 simple.py:1000 +#, flag2, flag3 +msgid "almost_same" +msgstr "Almost same" + +#: hard.py:1 +msgid "other3" +msgstr "Other 3" + +#: hard.py:10 +msgid "other4" +msgstr "Other 4" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_unique(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.unique = True + + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: simple.py:1 +#, flag1000 +msgid "other1" +msgstr "Other 1" + +#: simple.py:10 +msgid "other2" +msgstr "Other 2" + +#: hard.py:1 +msgid "other3" +msgstr "Other 3" + +#: hard.py:10 +msgid "other4" +msgstr "Other 4" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + self.cmd.less_than = 2 + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_more_than(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.more_than = 1 + + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: hard.py:100 simple.py:100 +#, flag1, flag1.2, flag4 +msgid "same" +msgstr "Same" + +#: hard.py:1000 simple.py:1000 +#, flag2, flag3 +msgid "almost_same" +msgstr "Almost same" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + class CommandLineInterfaceTestCase(unittest.TestCase): def setUp(self): From 60201079c975cda0b92a62ed5df5ab947593aa95 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:33:34 +0300 Subject: [PATCH 06/21] Write tests for msgmerge * Create basic tests to verify the functionality of msgmerge, specifically the merging of messages and their integration with a compendium. * Remove the definition of sort-output and sort-by-file, and add an additional check for input-files. --- babel/messages/frontend.py | 8 +- tests/messages/test_frontend.py | 148 ++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 6 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 199e75760..6854160c4 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1093,7 +1093,7 @@ def initialize_options(self): self.sort_by_file = False # def finalize_options(self): - if len(self.input_files) != 2: + if not self.input_files or len(self.input_files) != 2: raise OptionError('must be two po files') if not self.output_file: raise OptionError('you must specify the output file') @@ -1105,11 +1105,6 @@ def finalize_options(self): elif self.width is not None: self.width = int(self.width) - if self.sort_output is None: - self.sort_output = False - if self.sort_by_file is None: - self.sort_by_file = True - def run(self): def_file, ref_file = self.input_files with open(def_file, 'r') as pofile: @@ -1132,6 +1127,7 @@ def run(self): if self.compendium: with open(self.compendium, 'r') as pofile: compendium_catalog = read_po(pofile) + for message in compendium_catalog: if message.id in ref_catalog and not ref_catalog[message.id].string: ref_catalog[message.id].string = message.string diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index d34fed1a8..e4c4385de 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -926,6 +926,154 @@ def test_more_than(self): assert expected_content == actual_content +class MergeMessagesTestCase(unittest.TestCase): + + @freeze_time("1994-11-11") + def setUp(self): + self.olddir = os.getcwd() + os.chdir(data_dir) + + self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + self.cmd = frontend.MessageMerge(self.dist) + self.cmd.initialize_options() + + self.temp_def = f'{i18n_dir}/msgmerge_def.po' + self.temp_ref = f'{i18n_dir}/msgmerge_ref.pot' + self.compendium = f'{i18n_dir}/compenidum.po' + self.output_file = f'{i18n_dir}/msgmerge.po' + + with open(self.temp_ref, 'wb') as file: + catalog = Catalog() + for word in ['word1', 'word2', 'word3', 'word4']: + catalog.add(word) + pofile.write_po(file, catalog) + + with open(self.temp_def, 'wb') as file: + catalog = Catalog() + catalog.add('word1', string='Word 1') + catalog.add('word2', string='Word 2') + catalog.add('word3') + pofile.write_po(file, catalog) + + with open(self.compendium, 'wb') as file: + catalog = Catalog() + catalog.add('word4', string='Word 4') + catalog.add('word5', string='Word 5') + pofile.write_po(file, catalog) + + def tearDown(self): + for file in [self.temp_def, self.temp_ref, self.compendium, self.output_file]: + if os.path.isfile(file): + os.unlink(file) + + def test_no_input_files(self): + with pytest.raises(OptionError): + self.cmd.finalize_options() + + with pytest.raises(OptionError): + self.cmd.input_files = ['1'] + self.cmd.finalize_options() + + with pytest.raises(OptionError): + self.cmd.input_files = ['1', '2', '3'] + self.cmd.finalize_options() + + def test_no_output_file(self): + self.cmd.input_files = ['1', '2'] + with pytest.raises(OptionError): + self.cmd.finalize_options() + + @freeze_time("1994-11-11") + def test_default(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +msgid "word1" +msgstr "Word 1" + +msgid "word2" +msgstr "Word 2" + +msgid "word3" +msgstr "" + +#, fuzzy +msgid "word4" +msgstr "Word 2" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_compenidum(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = self.compendium + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +msgid "word1" +msgstr "Word 1" + +msgid "word2" +msgstr "Word 2" + +msgid "word3" +msgstr "" + +msgid "word4" +msgstr "Word 4" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + class CommandLineInterfaceTestCase(unittest.TestCase): def setUp(self): From cb71c9370edd57f172262ae32a8c316f42a30351 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:34:05 +0300 Subject: [PATCH 07/21] Add options update, backup, and c_overwrite for a different compendium handling logic * Implement `update` to update the source file instead of writing to the current output file * Implement `backup` to save a backup of the source file before making any updates * Implement `c_overwrite` to use a new mode of handling the compendium, where translations from the compendium overwrite messages in the output file --- babel/messages/frontend.py | 56 +++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 6854160c4..ced138ec4 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1005,9 +1005,10 @@ class MessageMerge(CommandMixin): ('input-files', None, ''), ('directory=', 'D', ''), ('compendium=', 'C', ''), + ('c-overwrite', '', ''), ('update', 'U', ''), ('output-file=', 'o', ''), - ('backup=', None, ''), + ('backup', None, ''), ('suffix=', None, ''), ('multi-domain', 'm', ''), ('for-msgfmt', None, ''), @@ -1055,6 +1056,8 @@ class MessageMerge(CommandMixin): 'no-wrap', 'sort-output', 'sort-by-file', + 'c-overwrite', + 'backup', ] option_choices = { @@ -1065,13 +1068,14 @@ def initialize_options(self): self.input_files = None # self.directory = None self.compendium = None #~ - self.update = None + self.c_overwrite = False # + self.update = None # self.output_file = None # - self.backup = None - self.suffix = None + self.backup = False # + self.suffix = '~' # self.multi_domain = None self.for_msgfmt = None - self.no_fuzzy_matching = None + self.no_fuzzy_matching = None # self.previous = None self.properties_input = None self.stringtable_input = None @@ -1095,8 +1099,8 @@ def initialize_options(self): def finalize_options(self): if not self.input_files or len(self.input_files) != 2: raise OptionError('must be two po files') - if not self.output_file: - raise OptionError('you must specify the output file') + if not self.output_file and not self.update: + raise OptionError('you must specify the output file or update existing') if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") @@ -1107,36 +1111,38 @@ def finalize_options(self): def run(self): def_file, ref_file = self.input_files - with open(def_file, 'r') as pofile: - def_catalog = read_po(pofile) + if self.update and self.backup: + shutil.copy(def_file, def_file + self.suffix) + + with open(def_file, 'r') as pofile: + catalog = read_po(pofile) with open(ref_file, 'r') as pofile: ref_catalog = read_po(pofile) - - ref_catalog.mime_headers = def_catalog.mime_headers - ref_catalog.header_comment = def_catalog.header_comment - - for message in def_catalog: - if not message.id: - continue - if message.id in ref_catalog: - ref_catalog[message.id].string = message.string - else: - ref_catalog.obsolete[message.id] = message + catalog.update( + ref_catalog, + no_fuzzy_matching=self.no_fuzzy_matching + ) if self.compendium: with open(self.compendium, 'r') as pofile: compendium_catalog = read_po(pofile) for message in compendium_catalog: - if message.id in ref_catalog and not ref_catalog[message.id].string: - ref_catalog[message.id].string = message.string + current = catalog[message.id] + if message.id in catalog and (not current.string or current.fuzzy or self.c_overwrite): + if self.c_overwrite and not current.fuzzy and current.string: + catalog.obsolete[message.id] = current.clone() - ref_catalog.fuzzy = False - with open(self.output_file, 'wb') as outfile: + current.string = message.string + current.flags = [flag for flag in current.flags if flag != 'fuzzy'] + current.auto_comments.append(self.compendium) + + output_path = def_file if self.update else self.output_file + with open(output_path, 'wb') as outfile: write_po( outfile, - ref_catalog, + catalog, width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, From 99ac98781b0fa6760e35fa5a6b5ea0272fe8e456 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:34:20 +0300 Subject: [PATCH 08/21] Add test for msgmerge compendium overwrite mode with no comments * Implement a test for `msgmerge` that validates the new mode where compendium entries overwrite messages in the output PO file. * Include the `no_compendium_comment` option to ensure comments about translations sourced from the compendium are not included. * Utilize the `no-location` option to exclude location comments from the output. --- babel/messages/frontend.py | 12 +++++- tests/messages/test_frontend.py | 72 ++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index ced138ec4..9ffd3d460 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1006,6 +1006,7 @@ class MessageMerge(CommandMixin): ('directory=', 'D', ''), ('compendium=', 'C', ''), ('c-overwrite', '', ''), + ('no-compendium-comment', '', ''), ('update', 'U', ''), ('output-file=', 'o', ''), ('backup', None, ''), @@ -1058,6 +1059,7 @@ class MessageMerge(CommandMixin): 'sort-by-file', 'c-overwrite', 'backup', + 'no-compendium-comment', ] option_choices = { @@ -1067,8 +1069,11 @@ class MessageMerge(CommandMixin): def initialize_options(self): self.input_files = None # self.directory = None + self.compendium = None #~ self.c_overwrite = False # + self.no_compendium_comment = None # + self.update = None # self.output_file = None # self.backup = False # @@ -1086,7 +1091,7 @@ def initialize_options(self): self.escape = None self.force_po = None self.indent = None - self.no_location = None + self.no_location = None # self.add_location = None self.strict = None self.properties_output = None @@ -1136,13 +1141,16 @@ def run(self): current.string = message.string current.flags = [flag for flag in current.flags if flag != 'fuzzy'] - current.auto_comments.append(self.compendium) + + if not self.no_compendium_comment: + current.auto_comments.append(self.compendium) output_path = def_file if self.update else self.output_file with open(output_path, 'wb') as outfile: write_po( outfile, catalog, + no_location=self.no_location, width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index e4c4385de..6f79149c1 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -957,6 +957,8 @@ def setUp(self): with open(self.compendium, 'wb') as file: catalog = Catalog() + catalog.add('word1', string='Comp Word 1') + catalog.add('word2', string='Comp Word 2') catalog.add('word4', string='Word 4') catalog.add('word5', string='Word 5') pofile.write_po(file, catalog) @@ -983,10 +985,19 @@ def test_no_output_file(self): with pytest.raises(OptionError): self.cmd.finalize_options() + self.cmd.output_file = '2' + self.cmd.finalize_options() + + self.cmd.output_file = None + self.cmd.update = True + self.cmd.finalize_options() + + @freeze_time("1994-11-11") def test_default(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file + self.cmd.no_fuzzy_matching = True self.cmd.finalize_options() self.cmd.run() @@ -996,6 +1007,7 @@ def test_default(self): # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. # +#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" @@ -1018,9 +1030,8 @@ def test_default(self): msgid "word3" msgstr "" -#, fuzzy msgid "word4" -msgstr "Word 2" +msgstr "" """ @@ -1033,6 +1044,8 @@ def test_compenidum(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file self.cmd.compendium = self.compendium + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True self.cmd.finalize_options() self.cmd.run() @@ -1042,6 +1055,7 @@ def test_compenidum(self): # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. # +#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" @@ -1073,6 +1087,60 @@ def test_compenidum(self): actual_content = f.read() assert expected_content == actual_content + @freeze_time("1994-11-11") + def test_compenidum(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = self.compendium + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True + self.cmd.c_overwrite = True + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +msgid "word1" +msgstr "Comp Word 1" + +msgid "word2" +msgstr "Comp Word 2" + +msgid "word3" +msgstr "" + +msgid "word4" +msgstr "Word 4" + +#~ msgid "word1" +#~ msgstr "Word 1" + +#~ msgid "word2" +#~ msgstr "Word 2" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content class CommandLineInterfaceTestCase(unittest.TestCase): From 7228cea9c941f8dd5bb5b985547234690d2427e7 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:34:50 +0300 Subject: [PATCH 09/21] Refactor test for msgmerge with compendium-overwrite option * Implemented a helper function `_get_expected` to standardize the expected PO file structure. * Renamed the option `c-overwrite` to `compendium-overwrite` --- babel/messages/frontend.py | 10 +- tests/messages/test_frontend.py | 263 +++++++++++++++----------------- 2 files changed, 129 insertions(+), 144 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 9ffd3d460..4078f54c3 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1005,7 +1005,7 @@ class MessageMerge(CommandMixin): ('input-files', None, ''), ('directory=', 'D', ''), ('compendium=', 'C', ''), - ('c-overwrite', '', ''), + ('compendium-overwrite', '', ''), ('no-compendium-comment', '', ''), ('update', 'U', ''), ('output-file=', 'o', ''), @@ -1057,7 +1057,7 @@ class MessageMerge(CommandMixin): 'no-wrap', 'sort-output', 'sort-by-file', - 'c-overwrite', + 'compendium-overwrite', 'backup', 'no-compendium-comment', ] @@ -1071,7 +1071,7 @@ def initialize_options(self): self.directory = None self.compendium = None #~ - self.c_overwrite = False # + self.compendium_overwrite = False # self.no_compendium_comment = None # self.update = None # @@ -1135,8 +1135,8 @@ def run(self): for message in compendium_catalog: current = catalog[message.id] - if message.id in catalog and (not current.string or current.fuzzy or self.c_overwrite): - if self.c_overwrite and not current.fuzzy and current.string: + if message.id in catalog and (not current.string or current.fuzzy or self.compendium_overwrite): + if self.compendium_overwrite and not current.fuzzy and current.string: catalog.obsolete[message.id] = current.clone() current.string = message.string diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index 6f79149c1..d760b90c2 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -750,25 +750,9 @@ def tearDown(self): if os.path.isfile(file): os.unlink(file) - def test_no_input_files(self): - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_no_output_file(self): - self.cmd.input_files = ['project/i18n/messages.pot'] - with pytest.raises(OptionError): - self.cmd.finalize_options() - - @freeze_time("1994-11-11") - def test_default(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - - self.cmd.finalize_options() - self.cmd.run() - + def _get_expected(self, messages): date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. + return fr"""# Translations template for PROJECT. # Copyright (C) 1994 ORGANIZATION # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. @@ -787,7 +771,26 @@ def test_default(self): "Content-Transfer-Encoding: 8bit\n" "Generated-By: Babel {VERSION}\n" -#: simple.py:1 +""" + messages + + def test_no_input_files(self): + with pytest.raises(OptionError): + self.cmd.finalize_options() + + def test_no_output_file(self): + self.cmd.input_files = ['project/i18n/messages.pot'] + with pytest.raises(OptionError): + self.cmd.finalize_options() + + @freeze_time("1994-11-11") + def test_default(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + + self.cmd.finalize_options() + self.cmd.run() + + expected_content = self._get_expected(fr"""#: simple.py:1 #, flag1000 msgid "other1" msgstr "Other 1" @@ -814,10 +817,10 @@ def test_default(self): msgid "other4" msgstr "Other 4" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content @freeze_time("1994-11-11") @@ -829,27 +832,7 @@ def test_unique(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#: simple.py:1 + expected_content = self._get_expected(fr"""#: simple.py:1 #, flag1000 msgid "other1" msgstr "Other 1" @@ -866,7 +849,7 @@ def test_unique(self): msgid "other4" msgstr "Other 4" -""" +""") with open(self.output_file, 'r') as f: actual_content = f.read() @@ -889,27 +872,7 @@ def test_more_than(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#: hard.py:100 simple.py:100 + expected_content = self._get_expected(fr"""#: hard.py:100 simple.py:100 #, flag1, flag1.2, flag4 msgid "same" msgstr "Same" @@ -919,10 +882,10 @@ def test_more_than(self): msgid "almost_same" msgstr "Almost same" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content @@ -964,10 +927,40 @@ def setUp(self): pofile.write_po(file, catalog) def tearDown(self): - for file in [self.temp_def, self.temp_ref, self.compendium, self.output_file]: - if os.path.isfile(file): + for file in [ + self.temp_def, + self.temp_def + '~', + self.temp_def + '.bac', + self.temp_ref, + self.compendium, + self.output_file + ]: + if os.path.exists(file) and os.path.isfile(file): os.unlink(file) + def _get_expected(self, messages): + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + return fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +""" + messages + def test_no_input_files(self): with pytest.raises(OptionError): self.cmd.finalize_options() @@ -1001,27 +994,7 @@ def test_default(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -msgid "word1" + expected_content = self._get_expected(fr"""msgid "word1" msgstr "Word 1" msgid "word2" @@ -1033,10 +1006,10 @@ def test_default(self): msgid "word4" msgstr "" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content @freeze_time("1994-11-11") @@ -1049,27 +1022,7 @@ def test_compenidum(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -msgid "word1" + expected_content = self._get_expected(fr"""msgid "word1" msgstr "Word 1" msgid "word2" @@ -1081,44 +1034,24 @@ def test_compenidum(self): msgid "word4" msgstr "Word 4" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content @freeze_time("1994-11-11") - def test_compenidum(self): + def test_compenidum_overwrite(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file self.cmd.compendium = self.compendium self.cmd.no_fuzzy_matching = True self.cmd.no_compendium_comment = True - self.cmd.c_overwrite = True + self.cmd.compendium_overwrite = True self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -msgid "word1" + expected_content = self._get_expected(fr"""msgid "word1" msgstr "Comp Word 1" msgid "word2" @@ -1136,12 +1069,64 @@ def test_compenidum(self): #~ msgid "word2" #~ msgstr "Word 2" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content + def test_update(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.update = True + self.cmd.no_fuzzy_matching = True + self.cmd.finalize_options() + self.cmd.run() + + expected_content = self._get_expected(fr"""msgid "word1" +msgstr "Word 1" + +msgid "word2" +msgstr "Word 2" + +msgid "word3" +msgstr "" + +msgid "word4" +msgstr "" + +""") + + with open(self.temp_def, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + def test_update_backup(self): + with open(self.temp_def, 'r') as f: + before_content = f.read() + + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.update = True + self.cmd.backup = True + self.cmd.no_fuzzy_matching = True + self.cmd.finalize_options() + self.cmd.run() + + assert os.path.exists(self.temp_def + '~') + with open(self.temp_def + '~', 'r') as f: + actual_content = f.read() + assert before_content == actual_content + + os.unlink(self.temp_def) + shutil.move(self.temp_def + '~', self.temp_def) + self.cmd.suffix = '.bac' + self.cmd.run() + + assert os.path.exists(self.temp_def + '.bac') + with open(self.temp_def + '.bac', 'r') as f: + actual_content = f.read() + assert before_content == actual_content + + class CommandLineInterfaceTestCase(unittest.TestCase): def setUp(self): From 2dababc66042cc77d7c109f70fb7d7252e4d1555 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:35:04 +0300 Subject: [PATCH 10/21] Create a catalog without fuzzy by default, remove add-location * Mark the catalog as fuzzy after msgcat and msgmerge if there is at least one fuzzy message * Remove add-location as it's unnecessary --- babel/messages/frontend.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 4078f54c3..fb89ae868 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -875,7 +875,6 @@ class MessageConcatenation(CommandMixin): ('force-po', None, ''), ('indent', 'i', ''), ('no-location', None, ''), - ('add-location', 'n', ''), ('strict', None, ''), ('properties-output', None, ''), ('stringtable-output', None, ''), @@ -896,7 +895,6 @@ class MessageConcatenation(CommandMixin): 'force-po', 'indent', 'no-location', - 'add-location', 'strict', 'properties-output', 'stringtable-output', @@ -929,8 +927,7 @@ def initialize_options(self): self.escape = None self.force_po = None self.indent = None - self.no_location = None - self.add_location = None + self.no_location = None # self.strict = None self.properties_output = None self.stringtable_output = None @@ -971,12 +968,14 @@ def _prepare(self): self.message_count[message.id] += 1 def run(self): - catalog = Catalog() + catalog = Catalog(fuzzy=False) self._prepare() for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) + if catalog.locale is None: + catalog.locale = template.locale for message in template: if not message.id: @@ -989,6 +988,7 @@ def run(self): if message_count > self.more_than and (self.less_than is None or message_count < self.less_than): catalog[message.id] = message + catalog.fuzzy = any(message.fuzzy for message in catalog) with open(self.output_file, 'wb') as outfile: write_po( outfile, @@ -996,6 +996,7 @@ def run(self): width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, + no_location=self.no_location, ) @@ -1025,7 +1026,6 @@ class MessageMerge(CommandMixin): ('force-po', None, ''), ('indent', 'i', ''), ('no-location', None, ''), - ('add-location', 'n', ''), ('strict', None, ''), ('properties-output', None, ''), ('stringtable-output', None, ''), @@ -1050,7 +1050,6 @@ class MessageMerge(CommandMixin): 'force-po', 'indent', 'no-location', - 'add-location', 'strict', 'properties-output', 'stringtable-output', @@ -1092,7 +1091,6 @@ def initialize_options(self): self.force_po = None self.indent = None self.no_location = None # - self.add_location = None self.strict = None self.properties_output = None self.stringtable_output = None @@ -1140,11 +1138,13 @@ def run(self): catalog.obsolete[message.id] = current.clone() current.string = message.string - current.flags = [flag for flag in current.flags if flag != 'fuzzy'] + if current.fuzzy: + current.flags.remove('fuzzy') if not self.no_compendium_comment: current.auto_comments.append(self.compendium) + catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file with open(output_path, 'wb') as outfile: write_po( From 888cdd06a2136e256c85bb777b9c516c2a50209d Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:35:18 +0300 Subject: [PATCH 11/21] Add tests for using msgcat with plural message forms --- tests/messages/test_frontend.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index d760b90c2..457722e59 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -735,6 +735,7 @@ def setUp(self): catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals'), locations=[('simple.py', 2000)]) pofile.write_po(file, catalog) with open(self.temp2, 'wb') as file: @@ -743,6 +744,7 @@ def setUp(self): catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals other'), locations=[('hard.py', 2000)]) pofile.write_po(file, catalog) def tearDown(self): @@ -757,7 +759,6 @@ def _get_expected(self, messages): # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. # -#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" @@ -809,6 +810,12 @@ def test_default(self): msgid "almost_same" msgstr "Almost same" +#: hard.py:2000 simple.py:2000 +msgid "plural" +msgid_plural "plurals" +msgstr[0] "Plural" +msgstr[1] "Plurals" + #: hard.py:1 msgid "other3" msgstr "Other 3" @@ -882,6 +889,12 @@ def test_more_than(self): msgid "almost_same" msgstr "Almost same" +#: hard.py:2000 simple.py:2000 +msgid "plural" +msgid_plural "plurals" +msgstr[0] "Plural" +msgstr[1] "Plurals" + """) with open(self.output_file, 'r') as f: From 5202291b40469eecfa4f27bdadd0159dd5d9a8d5 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:35:28 +0300 Subject: [PATCH 12/21] Rename msgmerge to merge and msgcat to concat --- babel/messages/frontend.py | 12 ++++++------ tests/messages/test_frontend.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index fb89ae868..a03da61f7 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -853,7 +853,7 @@ def run(self): return -class MessageConcatenation(CommandMixin): +class ConcatenationCatalog(CommandMixin): description = 'concatenates and merges the specified PO files' user_options = [ ('input-files', None, ''), @@ -1000,7 +1000,7 @@ def run(self): ) -class MessageMerge(CommandMixin): +class MergeCatalog(CommandMixin): description='combines two Uniforum-style PO files into one' user_options=[ ('input-files', None, ''), @@ -1171,8 +1171,8 @@ class CommandLineInterface: 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', - 'msgcat': 'concatenates and merges the specified PO files', - 'msgmerge': 'combines two Uniforum-style PO files into one', + 'concat': 'concatenates and merges the specified PO files', + 'merge': 'combines two Uniforum-style PO files into one', } command_classes = { @@ -1180,8 +1180,8 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, - 'msgcat': MessageConcatenation, - 'msgmerge': MessageMerge, + 'concat': ConcatenationCatalog, + 'merge': MergeCatalog, } log = None # Replaced on instance level diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index 457722e59..018db15bf 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -715,14 +715,14 @@ def test_supports_width(self): assert expected_content == actual_content -class ConcatanationMessagesTestCase(unittest.TestCase): +class ConcatanationCatalogTestCase(unittest.TestCase): def setUp(self): self.olddir = os.getcwd() os.chdir(data_dir) self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.MessageConcatenation(self.dist) + self.cmd = frontend.ConcatenationCatalog(self.dist) self.cmd.initialize_options() self.temp1 = f'{i18n_dir}/msgcat_temp1.po' @@ -902,7 +902,7 @@ def test_more_than(self): assert expected_content == actual_content -class MergeMessagesTestCase(unittest.TestCase): +class MergeCatalogTestCase(unittest.TestCase): @freeze_time("1994-11-11") def setUp(self): @@ -910,7 +910,7 @@ def setUp(self): os.chdir(data_dir) self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.MessageMerge(self.dist) + self.cmd = frontend.MergeCatalog(self.dist) self.cmd.initialize_options() self.temp_def = f'{i18n_dir}/msgmerge_def.po' From efe2502f8d93a4df2eef87f01fee6a09078322f1 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 22:00:00 +0300 Subject: [PATCH 13/21] Add discription to all options --- babel/messages/frontend.py | 120 ++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index a03da61f7..06710793f 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -856,32 +856,38 @@ def run(self): class ConcatenationCatalog(CommandMixin): description = 'concatenates and merges the specified PO files' user_options = [ - ('input-files', None, ''), - ('files-from=', 'f', ''), - ('directory=', 'D', ''), - ('output-file=', 'o', ''), - ('less-than=', '<', ''), - ('more-than=', '>', ''), - ('unique', 'u', ''), - ('properties-input', 'P', ''), - ('stringtable-input', None, ''), - ('to-code=','t', ''), - ('use-first', None, ''), - ('lang=', None, ''), - ('color=', None, ''), - ('style=', None, ''), - ('no-escape', 'e', ''), - ('escape', 'E', ''), - ('force-po', None, ''), - ('indent', 'i', ''), - ('no-location', None, ''), - ('strict', None, ''), - ('properties-output', None, ''), - ('stringtable-output', None, ''), - ('width=', 'w', ''), - ('no-wrap', None, ''), - ('sort-output', 's', ''), - ('sort-by-file', 'F', ''), + ('input-files', None, 'input files'), + ('files-from=', 'f', 'get list of input files from FILE'), + ('directory=', 'D', 'add DIRECTORY to list for input files search' + 'If input file is -, standard input is read.'), + ('output-file=', 'o', 'write output to specified file'), + ('less-than=', '<', 'print messages with less than this many' + 'definitions, defaults to infinite if not set'), + ('more-than=', '>', 'print messages with more than this many' + 'definitions, defaults to 0 if not set'), + ('unique', 'u', 'shorthand for --less-than=2, requests' + 'that only unique messages be printed'), + ('properties-input', 'P', 'input files are in Java .properties syntax'), + ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), + ('to-code=','t', 'encoding for output'), + ('use-first', None, 'use first available translation for each' + 'message, don\'t merge several translations'), + ('lang=', None, 'set 'Language' field in the header entry'), + ('color=', None, 'use colors and other text attributes always'), + ('style=', None, 'specify CSS style rule file for --color'), + ('no-escape', 'e', 'do not use C escapes in output (default)'), + ('escape', 'E', 'use C escapes in output, no extended chars'), + ('force-po', None, 'write PO file even if empty'), + ('indent', 'i', 'write the .po file using indented style'), + ('no-location', None, 'do not write \'#: filename:line\' lines'), + ('strict', None, 'write out strict Uniforum conforming .po file'), + ('properties-output', None, 'write out a Java .properties file'), + ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), + ('width=', 'w', 'set output page width'), + ('no-wrap', None, 'do not break long message lines, longer than' + 'the output page width, into several lines'), + ('sort-output', 's', 'generate sorted output'), + ('sort-by-file', 'F', 'sort output by file location'), ] as_args='input-files' @@ -918,7 +924,7 @@ def initialize_options(self): self.properties_input = None self.stringtable_input = None self.to_code = None - # временно всегда используется первый перевод + # the first translation is always used temporarily self.use_first = True #~ self.lang = None self.color = None @@ -1003,36 +1009,38 @@ def run(self): class MergeCatalog(CommandMixin): description='combines two Uniforum-style PO files into one' user_options=[ - ('input-files', None, ''), - ('directory=', 'D', ''), - ('compendium=', 'C', ''), - ('compendium-overwrite', '', ''), + ('input-files', None, 'def.po ref.pot'), + ('directory=', 'D', 'add DIRECTORY to list for input files search'), + ('compendium=', 'C', 'additional library of message translations, may be specified more than once'), + ('compendium-overwrite', '', 'overwrite mode of compendium'), ('no-compendium-comment', '', ''), - ('update', 'U', ''), - ('output-file=', 'o', ''), - ('backup', None, ''), - ('suffix=', None, ''), - ('multi-domain', 'm', ''), - ('for-msgfmt', None, ''), - ('no-fuzzy-matching', 'N', ''), - ('previous', None, ''), - ('properties-input', 'P', ''), - ('stringtable-input', None, ''), - ('lang=', None, ''), - ('color=', None, ''), - ('style=', None, ''), - ('no-escape', 'e', ''), - ('escape', 'E', ''), - ('force-po', None, ''), - ('indent', 'i', ''), - ('no-location', None, ''), - ('strict', None, ''), - ('properties-output', None, ''), - ('stringtable-output', None, ''), - ('width=', 'w', ''), - ('no-wrap', None, ''), - ('sort-output', 's', ''), - ('sort-by-file', 'F', ''), + ('update', 'U', 'pdate def.po, do nothing if def.po already up to date'), + ('output-file=', 'o', 'write output to specified file, the results are written' + 'to standard output if no output file is specified'), + ('backup', None, 'make a backup of def.po'), + ('suffix=', None, 'override the usual backup suffix'), + ('multi-domain', 'm', 'apply ref.pot to each of the domains in def.po'), + ('for-msgfmt', None, 'produce output for 'msgfmt', not for a translator'), + ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), + ('previous', None, 'keep previous msgids of translated messages'), + ('properties-input', 'P', 'input files are in Java .properties syntax'), + ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), + ('lang=', None, 'set 'Language' field in the header entry'), + ('color=', None, 'use colors and other text attributes always'), + ('style=', None, 'specify CSS style rule file for --color'), + ('no-escape', 'e', 'do not use C escapes in output (default)'), + ('escape', 'E', 'use C escapes in output, no extended chars'), + ('force-po', None, 'write PO file even if empty'), + ('indent', 'i', 'indented output style'), + ('no-location', None, 'suppress \'#: filename:line\' lines'), + ('strict', None, 'strict Uniforum output style'), + ('properties-output', None, 'write out a Java .properties file'), + ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), + ('width=', 'w', 'set output page width'), + ('no-wrap', None, 'do not break long message lines, longer' + 'than the output page width, into several lines'), + ('sort-output', 's', 'generate sorted output'), + ('sort-by-file', 'F', 'sort output by file location'), ] as_args='input-files' From 4cbe604b8be43b3555113db4b39da90606af7e2a Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 2 Mar 2025 17:33:37 +0300 Subject: [PATCH 14/21] Ability to specify multiple compendiums --- babel/messages/frontend.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 73cd6d62f..0b97bcf08 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -872,7 +872,7 @@ class ConcatenationCatalog(CommandMixin): ('to-code=','t', 'encoding for output'), ('use-first', None, 'use first available translation for each' 'message, don\'t merge several translations'), - ('lang=', None, 'set 'Language' field in the header entry'), + ('lang=', None, 'set \'Language\' field in the header entry'), ('color=', None, 'use colors and other text attributes always'), ('style=', None, 'specify CSS style rule file for --color'), ('no-escape', 'e', 'do not use C escapes in output (default)'), @@ -1020,12 +1020,12 @@ class MergeCatalog(CommandMixin): ('backup', None, 'make a backup of def.po'), ('suffix=', None, 'override the usual backup suffix'), ('multi-domain', 'm', 'apply ref.pot to each of the domains in def.po'), - ('for-msgfmt', None, 'produce output for 'msgfmt', not for a translator'), + ('for-msgfmt', None, 'produce output for \'msgfmt\', not for a translator'), ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), ('previous', None, 'keep previous msgids of translated messages'), ('properties-input', 'P', 'input files are in Java .properties syntax'), ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), - ('lang=', None, 'set 'Language' field in the header entry'), + ('lang=', None, 'set \'Language\' field in the header entry'), ('color=', None, 'use colors and other text attributes always'), ('style=', None, 'specify CSS style rule file for --color'), ('no-escape', 'e', 'do not use C escapes in output (default)'), @@ -1043,7 +1043,11 @@ class MergeCatalog(CommandMixin): ('sort-by-file', 'F', 'sort output by file location'), ] - as_args='input-files' + as_args = 'input-files' + + multiple_value_options = ( + 'compendium' + ) boolean_options = [ 'update', @@ -1120,6 +1124,13 @@ def finalize_options(self): elif self.width is not None: self.width = int(self.width) + def _get_message_from_compendium(self, compendium): + for file_path in compendium: + with open(file_path, 'r') as pofile: + catalog = read_po(pofile) + for message in catalog: + yield message, file_path + def run(self): def_file, ref_file = self.input_files @@ -1136,10 +1147,7 @@ def run(self): ) if self.compendium: - with open(self.compendium, 'r') as pofile: - compendium_catalog = read_po(pofile) - - for message in compendium_catalog: + for message, compendium_path in self._get_message_from_compendium(self.compendium): current = catalog[message.id] if message.id in catalog and (not current.string or current.fuzzy or self.compendium_overwrite): if self.compendium_overwrite and not current.fuzzy and current.string: @@ -1150,7 +1158,7 @@ def run(self): current.flags.remove('fuzzy') if not self.no_compendium_comment: - current.auto_comments.append(self.compendium) + current.auto_comments.append(compendium_path) catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file From 8568e902c10bb5f1c4fafc610318663478b68611 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 2 Mar 2025 20:00:25 +0300 Subject: [PATCH 15/21] Marking conflicting messages * Update _prepare function in ConcatenateCatalog to check conflicting messages and to not parse po-files twice * Add _conflicts field in Catalog to mark conflicts * Update tests --- babel/messages/catalog.py | 24 ++++++++- babel/messages/frontend.py | 51 +++++++++++-------- babel/messages/pofile.py | 41 +++++++++++++++- tests/messages/test_frontend.py | 86 +++++++++++++++++++++++++++++---- 4 files changed, 171 insertions(+), 31 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index de96ea576..71b0e78c1 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -11,13 +11,14 @@ import datetime import re +import os from collections.abc import Iterable, Iterator from copy import copy from difflib import SequenceMatcher from email import message_from_string from heapq import nlargest from string import Formatter -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict from babel import __version__ as VERSION from babel.core import Locale, UnknownLocaleError @@ -338,6 +339,13 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') return str(s) +class ConflictInfo(TypedDict): + message: Message + file_name: str + project: str + version: str + + class Catalog: """Representation of a message catalog.""" @@ -381,6 +389,7 @@ def __init__( self.locale = locale self._header_comment = header_comment self._messages: dict[str | tuple[str, str], Message] = {} + self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = {} self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -747,6 +756,19 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: f"Expected sequence but got {type(message.string)}" self._messages[key] = message + def add_conflict(self, message: Message, file_name: str, project: str, version: str): + key = message.id + if key not in self._conflicts: + self._conflicts[key] = [] + + self._conflicts[key].append({ + 'message': message, + 'file_name': file_name, + 'project': project, + 'version': version, + }) + message.flags |= {'fuzzy'} + def add( self, id: _MessageID, diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 0b97bcf08..5bdc5bfd4 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -20,7 +20,7 @@ import sys import tempfile import warnings -from collections import OrderedDict, defaultdict +from collections import defaultdict from configparser import RawConfigParser from io import StringIO from typing import BinaryIO, Iterable, Literal @@ -28,7 +28,7 @@ from babel import Locale, localedata from babel import __version__ as VERSION from babel.core import UnknownLocaleError -from babel.messages.catalog import DEFAULT_HEADER, Catalog +from babel.messages.catalog import DEFAULT_HEADER, Catalog, ConflictInfo from babel.messages.extract import ( DEFAULT_KEYWORDS, DEFAULT_MAPPING, @@ -925,7 +925,7 @@ def initialize_options(self): self.stringtable_input = None self.to_code = None # the first translation is always used temporarily - self.use_first = True #~ + self.use_first = False #~ self.lang = None self.color = None self.style = None @@ -965,36 +965,49 @@ def finalize_options(self): self.less_than = 2 def _prepare(self): - self.message_count = defaultdict(int) + templates: list[tuple[str, Catalog]] = [] + message_info = {} for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) for message in template: - self.message_count[message.id] += 1 + if message.id not in message_info: + message_info[message.id] = { + 'count': 0, + 'strings': set(), + } + message_info[message.id]['count'] += 1 + message_info[message.id]['strings'].add(message.string if isinstance(message.string, str) else tuple(message.string)) + templates.append((filename, template, )) + + return templates, message_info def run(self): catalog = Catalog(fuzzy=False) - self._prepare() + templates, message_info = self._prepare() - for filename in self.input_files: - with open(filename, 'r') as pofile: - template = read_po(pofile) - if catalog.locale is None: - catalog.locale = template.locale + for path, template in templates: + if catalog.locale is None: + catalog.locale = template.locale - for message in template: - if not message.id: - continue + for message in template: + if not message.id: + continue + + count = message_info[message.id]['count'] + diff_string_count = len(message_info[message.id]['strings']) + if count <= self.more_than or (self.less_than is not None and count >= self.less_than): + continue - if message.id in catalog and catalog[message.id].string != message.string and not self.use_first: - raise NotImplementedError() + if count > 1 and not self.use_first and diff_string_count > 1: + file_name = os.path.basename(path) + catalog.add_conflict(message, file_name, template.project, template.version) - message_count = self.message_count[message.id] - if message_count > self.more_than and (self.less_than is None or message_count < self.less_than): - catalog[message.id] = message + catalog[message.id] = message catalog.fuzzy = any(message.fuzzy for message in catalog) + with open(self.output_file, 'wb') as outfile: write_po( outfile, diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 3afdd6061..67c3416b6 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -16,7 +16,7 @@ from typing import TYPE_CHECKING, Literal from babel.core import Locale -from babel.messages.catalog import Catalog, Message +from babel.messages.catalog import Catalog, Message, ConflictInfo from babel.util import TextWrapper, _cmp if TYPE_CHECKING: @@ -349,6 +349,9 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: if not line: continue if line.startswith('#'): + if line[1:].startswith('-'): + self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts') + if line[1:].startswith('~'): self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) else: @@ -642,6 +645,37 @@ def _format_comment(comment, prefix=''): for line in comment_wrapper.wrap(comment): yield f"#{prefix} {line.strip()}\n" + def _format_conflict_comment(file, project, version, prefix=''): + comment = f"#-#-#-#-# {file} ({project} {version}) #-#-#-#-#" + yield f"{normalize(comment, prefix=prefix, width=width)}\n" + + def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], prefix=''): + for conflict in conflicts: + message = conflict['message'] + if message.context: + yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" + + if isinstance(key, (list, tuple)): + yield f"{prefix}msgid {normalize(key[0], prefix=prefix, width=width)}\n" + yield f"{prefix}msgid_plural {normalize(key[1], prefix=prefix, width=width)}\n" + else: + yield f"{prefix}msgid {normalize(key, prefix=prefix, width=width)}\n" + yield f"{prefix}msgstr {normalize('', prefix=prefix, width=width)}\n" + + for conflict in conflicts: + message = conflict['message'] + yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + if isinstance(key, (list, tuple)): + for idx in range(catalog.num_plurals): + try: + string = message.string[idx] + except IndexError: + string = '' + yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n" + else: + yield f"{normalize(message.string, prefix=prefix, width=width)}\n" + def _format_message(message, prefix=''): if isinstance(message.id, (list, tuple)): if message.context: @@ -711,7 +745,10 @@ def _format_message(message, prefix=''): norm_previous_id = normalize(message.previous_id[1], width=width) yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|') - yield from _format_message(message) + if len(conflicts := catalog._conflicts.get(message.id, [])) > 0: + yield from _format_conflict(message.id, conflicts) + else: + yield from _format_message(message) yield '\n' if not ignore_obsolete: diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index 99f0b3d08..72738ecc0 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -715,14 +715,14 @@ def test_supports_width(self): assert expected_content == actual_content -class ConcatanationCatalogTestCase(unittest.TestCase): +class ConcatanateCatalogTestCase(unittest.TestCase): def setUp(self): self.olddir = os.getcwd() os.chdir(data_dir) self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.ConcatenationCatalog(self.dist) + self.cmd = frontend.ConcatenateCatalog(self.dist) self.cmd.initialize_options() self.temp1 = f'{i18n_dir}/msgcat_temp1.po' @@ -752,13 +752,13 @@ def tearDown(self): if os.path.isfile(file): os.unlink(file) - def _get_expected(self, messages): + def _get_expected(self, messages, fuzzy=False): date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') return fr"""# Translations template for PROJECT. # Copyright (C) 1994 ORGANIZATION # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. -# +#{'\n#, fuzzy' if fuzzy else ''} msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" @@ -805,6 +805,64 @@ def test_default(self): msgid "same" msgstr "Same" +#: hard.py:1000 simple.py:1000 +#, flag2, flag3, fuzzy +msgid "almost_same" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +"Almost same" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +"A bit same" + +#: hard.py:2000 simple.py:2000 +#, fuzzy +msgid "plural" +msgid_plural "plurals" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals other" + +#: hard.py:1 +msgid "other3" +msgstr "Other 3" + +#: hard.py:10 +msgid "other4" +msgstr "Other 4" + +""", fuzzy=True) + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_use_first(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.use_first = True + + self.cmd.finalize_options() + self.cmd.run() + + expected_content = self._get_expected(fr"""#: simple.py:1 +#, flag1000 +msgid "other1" +msgstr "Other 1" + +#: simple.py:10 +msgid "other2" +msgstr "Other 2" + +#: hard.py:100 simple.py:100 +#, flag1, flag1.2, flag4 +msgid "same" +msgstr "Same" + #: hard.py:1000 simple.py:1000 #, flag2, flag3 msgid "almost_same" @@ -885,17 +943,27 @@ def test_more_than(self): msgstr "Same" #: hard.py:1000 simple.py:1000 -#, flag2, flag3 +#, flag2, flag3, fuzzy msgid "almost_same" -msgstr "Almost same" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +"Almost same" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +"A bit same" #: hard.py:2000 simple.py:2000 +#, fuzzy msgid "plural" msgid_plural "plurals" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" msgstr[0] "Plural" msgstr[1] "Plurals" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals other" -""") +""", fuzzy=True) with open(self.output_file, 'r') as f: actual_content = f.read() @@ -1029,7 +1097,7 @@ def test_default(self): def test_compenidum(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file - self.cmd.compendium = self.compendium + self.cmd.compendium = [self.compendium,] self.cmd.no_fuzzy_matching = True self.cmd.no_compendium_comment = True self.cmd.finalize_options() @@ -1057,7 +1125,7 @@ def test_compenidum(self): def test_compenidum_overwrite(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file - self.cmd.compendium = self.compendium + self.cmd.compendium = [self.compendium,] self.cmd.no_fuzzy_matching = True self.cmd.no_compendium_comment = True self.cmd.compendium_overwrite = True From 3f37414c8f4352ba5baad9b356236cad56a2caff Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 23 Mar 2025 18:43:23 +0300 Subject: [PATCH 16/21] Fix PR issues * Delete unused options * Fix multiline options comments * Replace backup logic in MergeCatalog * Rename to ConcatenateCatalog --- babel/messages/frontend.py | 182 +++++++++---------------------------- 1 file changed, 45 insertions(+), 137 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 5bdc5bfd4..28a6dc777 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -853,94 +853,51 @@ def run(self): return -class ConcatenationCatalog(CommandMixin): +class ConcatenateCatalog(CommandMixin): description = 'concatenates and merges the specified PO files' user_options = [ ('input-files', None, 'input files'), - ('files-from=', 'f', 'get list of input files from FILE'), - ('directory=', 'D', 'add DIRECTORY to list for input files search' - 'If input file is -, standard input is read.'), ('output-file=', 'o', 'write output to specified file'), ('less-than=', '<', 'print messages with less than this many' - 'definitions, defaults to infinite if not set'), - ('more-than=', '>', 'print messages with more than this many' + 'definitions, defaults to infinite if not set '), + ('more-than=', '>', 'print messages with more than this many ' 'definitions, defaults to 0 if not set'), - ('unique', 'u', 'shorthand for --less-than=2, requests' + ('unique', 'u', 'shorthand for --less-than=2, requests ' 'that only unique messages be printed'), - ('properties-input', 'P', 'input files are in Java .properties syntax'), - ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), - ('to-code=','t', 'encoding for output'), - ('use-first', None, 'use first available translation for each' + ('use-first', None, 'use first available translation for each ' 'message, don\'t merge several translations'), - ('lang=', None, 'set \'Language\' field in the header entry'), - ('color=', None, 'use colors and other text attributes always'), - ('style=', None, 'specify CSS style rule file for --color'), - ('no-escape', 'e', 'do not use C escapes in output (default)'), - ('escape', 'E', 'use C escapes in output, no extended chars'), - ('force-po', None, 'write PO file even if empty'), - ('indent', 'i', 'write the .po file using indented style'), ('no-location', None, 'do not write \'#: filename:line\' lines'), - ('strict', None, 'write out strict Uniforum conforming .po file'), - ('properties-output', None, 'write out a Java .properties file'), - ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), ('width=', 'w', 'set output page width'), - ('no-wrap', None, 'do not break long message lines, longer than' + ('no-wrap', None, 'do not break long message lines, longer than ' 'the output page width, into several lines'), ('sort-output', 's', 'generate sorted output'), ('sort-by-file', 'F', 'sort output by file location'), ] - as_args='input-files' + as_args = 'input-files' boolean_options = [ 'unique', - 'properties-input', - 'stringtable-input', - 'no-escape', - 'escape', - 'force-po', - 'indent', + 'use-first', 'no-location', 'strict', - 'properties-output', - 'stringtable-output', 'no-wrap', 'sort-output', 'sort-by-file', ] - option_choices = { - 'color': ('always', 'never', 'auto', 'html'), - } - def initialize_options(self): - self.input_files = None # - self.files_from = None - self.directory = None - self.output_file = None # - self.less_than = None # - self.more_than = 0 # - self.unique = False # - self.properties_input = None - self.stringtable_input = None - self.to_code = None - # the first translation is always used temporarily - self.use_first = False #~ - self.lang = None - self.color = None - self.style = None - self.no_escape = None - self.escape = None - self.force_po = None - self.indent = None - self.no_location = None # - self.strict = None - self.properties_output = None - self.stringtable_output = None - self.width = None # - self.no_wrap = None # - self.sort_output = False # - self.sort_by_file = False # + self.input_files = None + self.output_file = None + self.less_than = None + self.more_than = 0 + self.unique = False + self.use_first = False + self.no_location = None + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False def finalize_options(self): if not self.input_files: @@ -1020,37 +977,21 @@ def run(self): class MergeCatalog(CommandMixin): - description='combines two Uniforum-style PO files into one' + description='updates translation PO file by merging them with updated template POT file with using compendium' user_options=[ - ('input-files', None, 'def.po ref.pot'), - ('directory=', 'D', 'add DIRECTORY to list for input files search'), + ('input-files', None, 'def.po (obsolete translations) ref.pot (actual template)'), ('compendium=', 'C', 'additional library of message translations, may be specified more than once'), ('compendium-overwrite', '', 'overwrite mode of compendium'), ('no-compendium-comment', '', ''), ('update', 'U', 'pdate def.po, do nothing if def.po already up to date'), - ('output-file=', 'o', 'write output to specified file, the results are written' + ('output-file=', 'o', 'write output to specified file, the results are written ' 'to standard output if no output file is specified'), ('backup', None, 'make a backup of def.po'), ('suffix=', None, 'override the usual backup suffix'), - ('multi-domain', 'm', 'apply ref.pot to each of the domains in def.po'), - ('for-msgfmt', None, 'produce output for \'msgfmt\', not for a translator'), ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), - ('previous', None, 'keep previous msgids of translated messages'), - ('properties-input', 'P', 'input files are in Java .properties syntax'), - ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), - ('lang=', None, 'set \'Language\' field in the header entry'), - ('color=', None, 'use colors and other text attributes always'), - ('style=', None, 'specify CSS style rule file for --color'), - ('no-escape', 'e', 'do not use C escapes in output (default)'), - ('escape', 'E', 'use C escapes in output, no extended chars'), - ('force-po', None, 'write PO file even if empty'), - ('indent', 'i', 'indented output style'), ('no-location', None, 'suppress \'#: filename:line\' lines'), - ('strict', None, 'strict Uniforum output style'), - ('properties-output', None, 'write out a Java .properties file'), - ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), ('width=', 'w', 'set output page width'), - ('no-wrap', None, 'do not break long message lines, longer' + ('no-wrap', None, 'do not break long message lines, longer ' 'than the output page width, into several lines'), ('sort-output', 's', 'generate sorted output'), ('sort-by-file', 'F', 'sort output by file location'), @@ -1063,66 +1004,32 @@ class MergeCatalog(CommandMixin): ) boolean_options = [ + 'compendium-overwrite', + 'no-compendium-comment', 'update', - 'multi-domain', - 'for-msgfmt', + 'backup', 'no-fuzzy-matching', - 'previous' - 'properties-input', - 'stringtable-input', - 'no-escape', - 'escape', - 'force-po', - 'indent', 'no-location', - 'strict', - 'properties-output', - 'stringtable-output', 'no-wrap', 'sort-output', 'sort-by-file', - 'compendium-overwrite', - 'backup', - 'no-compendium-comment', ] - option_choices = { - 'color': ('always', 'never', 'auto', 'html'), - } - def initialize_options(self): - self.input_files = None # - self.directory = None - - self.compendium = None #~ - self.compendium_overwrite = False # - self.no_compendium_comment = None # - - self.update = None # - self.output_file = None # - self.backup = False # - self.suffix = '~' # - self.multi_domain = None - self.for_msgfmt = None - self.no_fuzzy_matching = None # - self.previous = None - self.properties_input = None - self.stringtable_input = None - self.lang = None - self.color = None - self.style = None - self.no_escape = None - self.escape = None - self.force_po = None - self.indent = None - self.no_location = None # - self.strict = None - self.properties_output = None - self.stringtable_output = None - self.width = None # - self.no_wrap = None # - self.sort_output = False # - self.sort_by_file = False # + self.input_files = None + self.compendium = None + self.compendium_overwrite = False + self.no_compendium_comment = False + self.update = False + self.output_file = None + self.backup = False + self.suffix = '~' + self.no_fuzzy_matching = False + self.no_location = False + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False def finalize_options(self): if not self.input_files or len(self.input_files) != 2: @@ -1147,9 +1054,6 @@ def _get_message_from_compendium(self, compendium): def run(self): def_file, ref_file = self.input_files - if self.update and self.backup: - shutil.copy(def_file, def_file + self.suffix) - with open(def_file, 'r') as pofile: catalog = read_po(pofile) with open(ref_file, 'r') as pofile: @@ -1175,6 +1079,10 @@ def run(self): catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file + + if self.update and self.backup: + shutil.copy(def_file, def_file + self.suffix) + with open(output_path, 'wb') as outfile: write_po( outfile, @@ -1209,7 +1117,7 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, - 'concat': ConcatenationCatalog, + 'concat': ConcatenateCatalog, 'merge': MergeCatalog, } From 1817bce8400d89083a6a26d7fcbeef2c9566c1b8 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 7 Apr 2025 14:08:04 +0300 Subject: [PATCH 17/21] Add info about pybable concat and pybabel merge into docs --- babel/messages/frontend.py | 2 +- docs/cmdline.rst | 89 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 28a6dc777..9623533a8 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -854,7 +854,7 @@ def run(self): class ConcatenateCatalog(CommandMixin): - description = 'concatenates and merges the specified PO files' + description = 'concatenates the specified PO files into single one' user_options = [ ('input-files', None, 'input files'), ('output-file=', 'o', 'write output to specified file'), diff --git a/docs/cmdline.rst b/docs/cmdline.rst index e1328fe8f..e8221dd0e 100644 --- a/docs/cmdline.rst +++ b/docs/cmdline.rst @@ -247,3 +247,92 @@ filename of the output file will be:: If neither the ``output_file`` nor the ``locale`` option is set, this command looks for all catalog files in the base directory that match the given domain, and updates each of them. + +concat +====== + +The `concat` command merges multiple PO files into a single one. If a message has +different translations in different PO files, the conflicting translations are +marked with a conflict comment:: + #-#-#-#-# (PROJECT VERSION) #-#-#-#-# +and the message itself is marked with a `fuzzy` flag:: + + $ pybabel concat --help + Usage: pybabel concat [options] + + concatenates the specified PO files into single one + + Options: + -h, --help show this help message and exit + -o OUTPUT_FILE, --output-file=OUTPUT_FILE + write output to specified file + --less-than=NUMBER print messages with less than this many + definitions, defaults to infinite if not set + --more-than=NUMBER print messages with more than this many + definitions, defaults to 0 if not set + -u, unique shorthand for --less-than=2, requests + that only unique messages be printed + --use-first use first available translation for each + message, don't merge several translations + --no-location do not write '#: filename:line' lines + -w WIDTH, --width=WIDTH + set output page width + --no-wrap do not break long message lines, longer than + the output page width, into several lines + -s, --sort-output generate sorted output + -F, --sort-by-file sort output by file location + +merge +====== + +The `merge` command allows updating files using a compendium as a translation memory:: + + $ pybabel concat --help + Usage: pybabel merge [options] + + updates translation PO file by merging them with updated template + POT file with using compendium + + Options: + -C COMPENDIUM_FILE, --compendium=COMPENDIUM_FILE + additional library of message translations, may + be specified more than once + --compendium-overwrite + overwrite mode of compendium + --no-compendium-comment + do not add a comment indicating that the message is + taken from the compendium + -U, --update update def.po, do nothing if def.po already up to date, + -o OUTPUT_FILE, --output-file=OUTPUT_FILE + write output to specified file, the results are written + to standard output if no output file is specified + --backup make a backup of def.po + --suffix=SUFFIX override the usual backup suffix (default '~') + -N, --no-fuzzy-matching + do not use fuzzy matching + --no-location suppress '#: filename:line' lines' + -w WIDTH, --width=WIDTH + set output page width + --no-wrap do not break long message lines, longer + than the output page width, into several lines + -s, --sort-output generate sorted output + -F --sort-by-file sort output by file location + +The compendium can be used in two modes: +- Default mode: the translations from the compendium are used + only if they are missing in the output file. + +- Compendium overwrite mode: when using the ``compendium-overwrite`` option, translations + from the compendium take priority and replace those in the output file. If a translation + is used from the compendium, a comment noting the source is added + +The ``input-files`` option includes def.po, a file with obsolete translations, and ref.pot, +the current template file for updating translations. + +The ``compendium`` option can be specified multiple times to use several compendiums. + +The ``backup`` option is used to create a backup copy of the def.po file, which contains +obsolete translations + +The ``suffix`` option allows you to specify a custom suffix for the backup file +By default, a standard suffix ``~`` is appended to the backup file's name, From dd44348272edb1d8bd86818f16ea54809cfd0851 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 21 Apr 2025 16:00:27 +0700 Subject: [PATCH 18/21] Add usage documentation for pybabel concat and merge commands * Includes .rst file with detailed use cases and practical examples for pybabel's concat and merge utilities, outlining common scenarios, options, and best practices for managing PO files. --- docs/concat_merge_usage.rst | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 docs/concat_merge_usage.rst diff --git a/docs/concat_merge_usage.rst b/docs/concat_merge_usage.rst new file mode 100644 index 000000000..03c06d0ea --- /dev/null +++ b/docs/concat_merge_usage.rst @@ -0,0 +1,52 @@ +Usage scenarios +--------------- + +1. Merging Multiple PO Files (`concat`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel concat [options] ` +Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. + +**Example:** + +.. code-block:: shell + + pybabel concat -o merged.po module1.po module2.po module3.po + +**Features:** + +- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. +- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. +- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. +- Output can be sorted alphabetically or by source file (options `-s`, `-F`). + +**Typical Use Case:** + + A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. + + +2. Updating Translations with a Template and Compendium (`merge`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel merge [options] def.po ref.pot` +You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). + +**Example:** + +.. code-block:: shell + + pybabel merge -C my-compendium.po --backup def.po ref.pot + +**Features:** + +- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. +- By default, translations from the compendium are used only for new or missing entries in `def.po`. +- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). +- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). +- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). + +**Typical Use Case:** + + After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. From 0a6388d8aefb13eb6a9bf4606869b52bcb563531 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 27 Apr 2025 15:08:30 +0300 Subject: [PATCH 19/21] Fix PR issues * Rename file_name to filename * Adding fuzzy flag to message parameterized in 'add_conflict' * Replace usage scenarious to cmdline.rst * Rename to ConcatenateCatalog --- babel/messages/catalog.py | 16 +++++----- babel/messages/frontend.py | 6 ++-- babel/messages/pofile.py | 4 +-- docs/cmdline.rst | 60 ++++++++++++++++++++++++++++++++++--- docs/concat_merge_usage.rst | 52 -------------------------------- 5 files changed, 69 insertions(+), 69 deletions(-) delete mode 100644 docs/concat_merge_usage.rst diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 71b0e78c1..9a75f8fa1 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -13,6 +13,7 @@ import re import os from collections.abc import Iterable, Iterator +from collections import defaultdict from copy import copy from difflib import SequenceMatcher from email import message_from_string @@ -341,7 +342,7 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') class ConflictInfo(TypedDict): message: Message - file_name: str + filename: str project: str version: str @@ -389,7 +390,7 @@ def __init__( self.locale = locale self._header_comment = header_comment self._messages: dict[str | tuple[str, str], Message] = {} - self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = {} + self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = defaultdict(list) self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -756,18 +757,17 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: f"Expected sequence but got {type(message.string)}" self._messages[key] = message - def add_conflict(self, message: Message, file_name: str, project: str, version: str): + def add_conflict(self, message: Message, filename: str, project: str, version: str, fuzzy: bool = True): key = message.id - if key not in self._conflicts: - self._conflicts[key] = [] - self._conflicts[key].append({ 'message': message, - 'file_name': file_name, + 'filename': filename, 'project': project, 'version': version, }) - message.flags |= {'fuzzy'} + + if fuzzy: + message.flags |= {'fuzzy'} def add( self, diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 9623533a8..eabc45dd0 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -958,8 +958,8 @@ def run(self): continue if count > 1 and not self.use_first and diff_string_count > 1: - file_name = os.path.basename(path) - catalog.add_conflict(message, file_name, template.project, template.version) + filename = os.path.basename(path) + catalog.add_conflict(message, filename, template.project, template.version) catalog[message.id] = message @@ -1109,7 +1109,7 @@ class CommandLineInterface: 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', 'concat': 'concatenates and merges the specified PO files', - 'merge': 'combines two Uniforum-style PO files into one', + 'merge': 'combines two PO files into one', } command_classes = { diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index 67c3416b6..ed3066dc2 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -653,7 +653,7 @@ def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], for conflict in conflicts: message = conflict['message'] if message.context: - yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + yield from _format_conflict_comment(conflict['filename'], conflict['project'], conflict['version'], prefix=prefix) yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" if isinstance(key, (list, tuple)): @@ -665,7 +665,7 @@ def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], for conflict in conflicts: message = conflict['message'] - yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + yield from _format_conflict_comment(conflict['filename'], conflict['project'], conflict['version'], prefix=prefix) if isinstance(key, (list, tuple)): for idx in range(catalog.num_plurals): try: diff --git a/docs/cmdline.rst b/docs/cmdline.rst index e8221dd0e..de28e1ddb 100644 --- a/docs/cmdline.rst +++ b/docs/cmdline.rst @@ -326,13 +326,65 @@ The compendium can be used in two modes: from the compendium take priority and replace those in the output file. If a translation is used from the compendium, a comment noting the source is added -The ``input-files`` option includes def.po, a file with obsolete translations, and ref.pot, +The ``input-files`` option accepts exactly two arguments: a file with obsolete translations, and the current template file for updating translations. The ``compendium`` option can be specified multiple times to use several compendiums. The ``backup`` option is used to create a backup copy of the def.po file, which contains -obsolete translations +obsolete translations. -The ``suffix`` option allows you to specify a custom suffix for the backup file -By default, a standard suffix ``~`` is appended to the backup file's name, +The ``suffix`` option allows you to specify a custom suffix for the backup file (defaulting to ``~``). + +pybable concat and merge usage scenarios +====== + +1. Merging Multiple PO Files (`concat`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel concat [options] ` +Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. + +**Example:** + +.. code-block:: shell + + pybabel concat -o merged.po module1.po module2.po module3.po + +**Features:** + +- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. +- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. +- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. +- Output can be sorted alphabetically or by source file (options `-s`, `-F`). + +**Typical Use Case:** + + A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. + + +2. Updating Translations with a Template and Compendium (`merge`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel merge [options] def.po ref.pot` +You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). + +**Example:** + +.. code-block:: shell + + pybabel merge -C my-compendium.po --backup def.po ref.pot + +**Features:** + +- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. +- By default, translations from the compendium are used only for new or missing entries in `def.po`. +- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). +- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). +- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). + +**Typical Use Case:** + + After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. diff --git a/docs/concat_merge_usage.rst b/docs/concat_merge_usage.rst deleted file mode 100644 index 03c06d0ea..000000000 --- a/docs/concat_merge_usage.rst +++ /dev/null @@ -1,52 +0,0 @@ -Usage scenarios ---------------- - -1. Merging Multiple PO Files (`concat`) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Usage:** -`pybabel concat [options] ` -Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. - -**Example:** - -.. code-block:: shell - - pybabel concat -o merged.po module1.po module2.po module3.po - -**Features:** - -- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. -- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. -- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. -- Output can be sorted alphabetically or by source file (options `-s`, `-F`). - -**Typical Use Case:** - - A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. - - -2. Updating Translations with a Template and Compendium (`merge`) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Usage:** -`pybabel merge [options] def.po ref.pot` -You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). - -**Example:** - -.. code-block:: shell - - pybabel merge -C my-compendium.po --backup def.po ref.pot - -**Features:** - -- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. -- By default, translations from the compendium are used only for new or missing entries in `def.po`. -- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). -- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). -- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). - -**Typical Use Case:** - - After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. From 5828c13413de83e1c846868f37d9c1c642e04088 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 27 Apr 2025 15:19:12 +0300 Subject: [PATCH 20/21] Add '_conflicts' getter in catalog --- babel/messages/catalog.py | 3 +++ babel/messages/pofile.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 9a75f8fa1..e2d9bcb46 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -769,6 +769,9 @@ def add_conflict(self, message: Message, filename: str, project: str, version: s if fuzzy: message.flags |= {'fuzzy'} + def get_conflicts(self, id: _MessageID) -> list[ConflictInfo]: + return self._conflicts.get(id, []) + def add( self, id: _MessageID, diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index ed3066dc2..e6534f7be 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -745,7 +745,7 @@ def _format_message(message, prefix=''): norm_previous_id = normalize(message.previous_id[1], width=width) yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|') - if len(conflicts := catalog._conflicts.get(message.id, [])) > 0: + if len(conflicts := catalog.get_conflicts(message.id)) > 0: yield from _format_conflict(message.id, conflicts) else: yield from _format_message(message) From bbba96ef2f9c669d4caa13445a21c187ad204fda Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 27 Apr 2025 15:38:55 +0300 Subject: [PATCH 21/21] Rework tests from unittest to pytest * Add frozen_time fixture to use freeze_time in every test --- tests/messages/test_frontend.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index 72738ecc0..a46220e8e 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -48,6 +48,12 @@ from tests.messages.utils import CUSTOM_EXTRACTOR_COOKIE +@pytest.fixture(autouse=True) +def frozen_time(): + with freeze_time("1994-11-11"): + yield + + def _po_file(locale): return os.path.join(i18n_dir, locale, 'LC_MESSAGES', 'messages.po') @@ -715,9 +721,9 @@ def test_supports_width(self): assert expected_content == actual_content -class ConcatanateCatalogTestCase(unittest.TestCase): +class TestConcatanateCatalog: - def setUp(self): + def setup_method(self): self.olddir = os.getcwd() os.chdir(data_dir) @@ -747,7 +753,7 @@ def setUp(self): catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals other'), locations=[('hard.py', 2000)]) pofile.write_po(file, catalog) - def tearDown(self): + def teardown_method(self): for file in [self.temp1, self.temp2, self.output_file]: if os.path.isfile(file): os.unlink(file) @@ -783,7 +789,6 @@ def test_no_output_file(self): with pytest.raises(OptionError): self.cmd.finalize_options() - @freeze_time("1994-11-11") def test_default(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -840,7 +845,6 @@ def test_default(self): actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_use_first(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -888,7 +892,6 @@ def test_use_first(self): actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_unique(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -928,7 +931,6 @@ def test_unique(self): actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_more_than(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -970,10 +972,9 @@ def test_more_than(self): assert expected_content == actual_content -class MergeCatalogTestCase(unittest.TestCase): +class TestMergeCatalog: - @freeze_time("1994-11-11") - def setUp(self): + def setup_method(self): self.olddir = os.getcwd() os.chdir(data_dir) @@ -1007,7 +1008,7 @@ def setUp(self): catalog.add('word5', string='Word 5') pofile.write_po(file, catalog) - def tearDown(self): + def teardown_method(self): for file in [ self.temp_def, self.temp_def + '~', @@ -1066,8 +1067,6 @@ def test_no_output_file(self): self.cmd.update = True self.cmd.finalize_options() - - @freeze_time("1994-11-11") def test_default(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file @@ -1093,7 +1092,6 @@ def test_default(self): actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_compenidum(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file @@ -1121,7 +1119,6 @@ def test_compenidum(self): actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_compenidum_overwrite(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file