diff --git a/.gitignore b/.gitignore index d9ba8ca..e28cd06 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.gem .idea/ +*.iml *.lock .ruby-version .DS_Store diff --git a/python_twine/tests/fixtures/formatter_android_plurals.xml b/python_twine/tests/fixtures/formatter_android_plurals.xml new file mode 100644 index 0000000..cccd95b --- /dev/null +++ b/python_twine/tests/fixtures/formatter_android_plurals.xml @@ -0,0 +1,13 @@ + + + + + + %d bookmark + %d bookmarks + + + %d track + %d tracks + + diff --git a/python_twine/tests/fixtures/formatter_apple_plurals.stringsdict b/python_twine/tests/fixtures/formatter_apple_plurals.stringsdict new file mode 100644 index 0000000..38d6a8d --- /dev/null +++ b/python_twine/tests/fixtures/formatter_apple_plurals.stringsdict @@ -0,0 +1,43 @@ + + + + + + + + bookmarks_places + + NSStringLocalizedFormatKey + %#@value@ + value + + NSStringFormatSpecTypeKey + NSStringPluralRuleType + NSStringFormatValueTypeKey + d + one + %d bookmark + other + %d bookmarks + + + + tracks + + NSStringLocalizedFormatKey + %#@value@ + value + + NSStringFormatSpecTypeKey + NSStringPluralRuleType + NSStringFormatValueTypeKey + d + one + %d track + other + %d tracks + + + + + \ No newline at end of file diff --git a/python_twine/tests/test_formatters.py b/python_twine/tests/test_formatters.py index c977591..db308a8 100644 --- a/python_twine/tests/test_formatters.py +++ b/python_twine/tests/test_formatters.py @@ -157,35 +157,19 @@ def test_double_quotes_not_modified(self, formatter): def test_escape_ampersand(self, formatter): """Test ampersand escaping.""" - formatter.set_translation_for_key("key1", "en", "this & that", "Section A") - assert ( - formatter.twine_file.definitions_by_key["key1"].translations["en"] - == "this & that" - ) + assert AndroidFormatter.unescape_value("this & that") == "this & that" def test_escape_less_than(self, formatter): """Test less-than escaping.""" - formatter.set_translation_for_key("key1", "en", "this < that", "Section B") - assert ( - formatter.twine_file.definitions_by_key["key1"].translations["en"] - == "this < that" - ) + assert AndroidFormatter.unescape_value("this < that") == "this < that" def test_escape_apostrophe(self, formatter): """Test apostrophe escaping.""" - formatter.set_translation_for_key("key1", "en", "it\\'s complicated", "Section C") - assert ( - formatter.twine_file.definitions_by_key["key1"].translations["en"] - == "it's complicated" - ) + assert AndroidFormatter.unescape_value("it\\'s complicated") == "it's complicated" def test_placeholder_conversion(self, formatter): """Test placeholder conversion from %s to %@.""" - formatter.set_translation_for_key("key1", "en", "value %s", "Section D") - assert ( - formatter.twine_file.definitions_by_key["key1"].translations["en"] - == "value %@" - ) + assert AndroidFormatter.unescape_value("value %s") == "value %@" def test_writer_escape_ampersand(self, formatter): """Test ampersand escaping.""" diff --git a/python_twine/tests/test_formatters_plural.py b/python_twine/tests/test_formatters_plural.py index 51bc249..c88c0ed 100644 --- a/python_twine/tests/test_formatters_plural.py +++ b/python_twine/tests/test_formatters_plural.py @@ -6,7 +6,97 @@ import pytest +from twine.formatters.apple_plural import ApplePluralFormatter from twine.twine_file import TwineFile, TwineDefinition, TwineSection +from twine.formatters.android import AndroidFormatter + +class TestAndroidPluralFormatter: + """Test Android XML formatter with tags.""" + + @pytest.fixture + def formatter(self): + """Create formatter with empty TwineFile.""" + twine_file = TwineFile() + formatter = AndroidFormatter() + formatter.twine_file = twine_file + formatter.options = {"consume_all": True, "consume_comments": True} + return formatter + + @pytest.fixture + def fixtures_dir(self): + """Get fixtures directory path.""" + return Path(__file__).parent / "fixtures" + + @pytest.fixture + def twine_file(self): + # Prepare TwineFile data + twine_file = TwineFile() + num_edits_def = TwineDefinition("num_edits") + + twine_file.definitions_by_key["num_edits"] = num_edits_def + twine_file.language_codes = ["en", "de"] + twine_file.sections = [TwineSection("OSM")] + twine_file.sections[0].definitions.append(num_edits_def) + + # Put plural translation + num_edits_def.plural_translations["en"] = { + "one": "%d edit", + "other": "%d edits" + } + num_edits_def.translations["en"] = "%d edits" + + num_edits_def.plural_translations["de"] = { + "zero": "%d Bearbeitungen", + "one": "%d Bearbeitung", + "other": "%d Bearbeitungen" + } + num_edits_def.translations["de"] = "%d Bearbeitungen" + + return twine_file + + def test_read_format(self, formatter, fixtures_dir): + """Test reading Android XML format with tags.""" + fixture_path = fixtures_dir / "formatter_android_plurals.xml" + with open(fixture_path, "r", encoding="utf-8") as f: + formatter.read(f, "en") + + twine_file = formatter.twine_file + + assert "bookmarks_places" in twine_file.definitions_by_key + translations1 = twine_file.definitions_by_key["bookmarks_places"].plural_translations + assert translations1 == {"en": {"one": "%d bookmark", "other": "%d bookmarks"}} + + assert "tracks" in twine_file.definitions_by_key + translations2 = twine_file.definitions_by_key["tracks"].plural_translations + assert translations2 == {"en": {"one": "%d track", "other": "%d tracks"}} + + def test_write_format(self, formatter, twine_file): + formatter.twine_file = twine_file + + en_plural_content = formatter.format_file("en") + assert en_plural_content == """ + + + + + %d edit + %d edits + + +""" + + de_plural_content = formatter.format_file("de") + assert de_plural_content == """ + + + + + %d Bearbeitungen + %d Bearbeitung + %d Bearbeitungen + + +""" class TestTwineFilePlural: @@ -71,3 +161,122 @@ def test_read_plurals(self, fixtures_dir): "many": "%d меток", "other": "%d меток" } + + +class TestApplePluralFormatter: + @pytest.fixture + def formatter(self) -> ApplePluralFormatter: + """Create formatter with empty TwineFile.""" + formatter = ApplePluralFormatter() + formatter.options = {"consume_all": True, "consume_comments": True} + return formatter + + @pytest.fixture + def fixtures_dir(self) -> Path: + """Get fixtures directory path.""" + return Path(__file__).parent / "fixtures" + + @pytest.fixture + def twine_file(self) -> TwineFile: + # Prepare TwineFile data + twine_file = TwineFile() + num_edits_def = TwineDefinition("num_edits") + + twine_file.definitions_by_key["num_edits"] = num_edits_def + twine_file.language_codes = ["en", "de"] + twine_file.sections = [TwineSection("OSM")] + twine_file.sections[0].definitions.append(num_edits_def) + + # Put plural translation + num_edits_def.plural_translations["en"] = { + "one": "%d edit", + "other": "%d edits" + } + num_edits_def.translations["en"] = "%d edits" + + num_edits_def.plural_translations["de"] = { + "zero": "%d Bearbeitungen", + "one": "%d Bearbeitung", + "other": "%d Bearbeitungen" + } + num_edits_def.translations["de"] = "%d Bearbeitungen" + + return twine_file + + def test_read_stringsdict(self, formatter:ApplePluralFormatter, fixtures_dir): + """Test reading Android XML format with tags.""" + fixture_path = fixtures_dir / "formatter_apple_plurals.stringsdict" + with open(fixture_path, "r", encoding="utf-8") as f: + formatter.read(f, "en") + + twine_file = formatter.twine_file + + assert "bookmarks_places" in twine_file.definitions_by_key + translations1 = twine_file.definitions_by_key["bookmarks_places"].plural_translations + assert translations1 == {"en": {"one": "%d bookmark", "other": "%d bookmarks"}} + + assert "tracks" in twine_file.definitions_by_key + translations2 = twine_file.definitions_by_key["tracks"].plural_translations + assert translations2 == {"en": {"one": "%d track", "other": "%d tracks"}} + + + def test_write_plural_format(self, formatter, twine_file): + formatter.twine_file = twine_file + + en_plural_content = formatter.format_file("en") + assert en_plural_content == """ + + + + + + +\tnum_edits +\t +\t\tNSStringLocalizedFormatKey +\t\t%#@value@ +\t\tvalue +\t\t +\t\t\tNSStringFormatSpecTypeKey +\t\t\tNSStringPluralRuleType +\t\t\tNSStringFormatValueTypeKey +\t\t\td +\t\t\tone +\t\t\t%d edit +\t\t\tother +\t\t\t%d edits +\t\t +\t + + +""" + + de_plural_content = formatter.format_file("de") + assert de_plural_content == """ + + + + + + +\tnum_edits +\t +\t\tNSStringLocalizedFormatKey +\t\t%#@value@ +\t\tvalue +\t\t +\t\t\tNSStringFormatSpecTypeKey +\t\t\tNSStringPluralRuleType +\t\t\tNSStringFormatValueTypeKey +\t\t\td +\t\t\tzero +\t\t\t%d Bearbeitungen +\t\t\tone +\t\t\t%d Bearbeitung +\t\t\tother +\t\t\t%d Bearbeitungen +\t\t +\t + + +""" diff --git a/python_twine/twine/cli.py b/python_twine/twine/cli.py index 355a224..4e942ef 100644 --- a/python_twine/twine/cli.py +++ b/python_twine/twine/cli.py @@ -74,6 +74,9 @@ def create_parser() -> argparse.ArgumentParser: ) consume_all.add_argument("twine_file", help="Path to Twine data file") consume_all.add_argument("input_path", help="Input directory path") + consume_all.add_argument( + "-n", "--file-name", help="Input file name (default: format-specific)" + ) CLI._add_common_arguments(consume_all) CLI._add_consume_arguments(consume_all) diff --git a/python_twine/twine/formatters/__init__.py b/python_twine/twine/formatters/__init__.py index a66d3ba..dda6212 100644 --- a/python_twine/twine/formatters/__init__.py +++ b/python_twine/twine/formatters/__init__.py @@ -18,6 +18,16 @@ def flatten(input: List[List[str]]) -> List[str]: flat += group return flat +def find_dict_diff(dict1: dict, dict2: dict): + keys = dict1.keys() | dict2.keys() + for key in sorted(keys): + if key in dict1 and key not in dict2: + yield key, dict1[key], None + elif key not in dict1 and key in dict2: + yield key, None, dict2[key] + elif dict1[key] != dict2[key]: + yield key, dict1[key], dict2[key] + class AbstractFormatter(ABC): """Base class for all format formatters.""" @@ -103,6 +113,56 @@ def set_translation_for_key(self, key: str, lang: str, value: str, section_name: if lang not in self.twine_file.language_codes: self.twine_file.add_language_code(lang) + def set_translation_for_key_plural(self, key: str, lang: str, values: Dict[str, str], section_name:Optional[str]): + """ Set plular values translation for a key in a specific language. + This method is similar to set_translation_for_key() but with dict values. + """ + # Normalize newlines + values = {key:val.replace("\n", "\\n") for (key, val) in values.items()} + + if key in self.twine_file.definitions_by_key: + definition = self.twine_file.definitions_by_key[key] + reference = None + + if definition.reference_key: + reference = self.twine_file.definitions_by_key.get( + definition.reference_key + ) + + # Only set if no reference or value differs from reference + if not reference or values != reference.plural_translations.get(lang): + if lang in definition.plural_translations and definition.plural_translations[lang] != values: + for quantity, value_old, value_new in find_dict_diff(definition.plural_translations[lang], values): + msg = (f"Translation '{value_new}' overrides existing translation '{value_old}' " + f"for key '{key}:{quantity}' and lang '{lang}'") + self.add_validation_error(msg) + if lang in definition.plural_translations: + definition.plural_translations[lang].update(values) + else: + definition.plural_translations[lang] = values + if "tags" in self.options: + definition.add_tags(flatten(self.options["tags"])) + + elif self.options.get("consume_all"): + print(f"Adding new definition '{key}' to twine file.", file=twine.stdout) + + current_section = self.get_section_or_create(section_name or "Uncategorized") + + current_definition = TwineDefinition(key) + current_section.definitions.append(current_definition) + if "tags" in self.options: + current_definition.add_tags(flatten(self.options["tags"])) + + self.twine_file.definitions_by_key[key] = current_definition + current_definition.plural_translations[lang] = values + + else: + print(f"WARNING: '{key}' not found in twine file.", file=twine.stdout) + + # Add language code if not present + if lang not in self.twine_file.language_codes: + self.twine_file.add_language_code(lang) + def get_section(self, section_name) -> Optional[TwineSection]: # Find or create a section by name return next( @@ -114,7 +174,7 @@ def get_section_or_create(self, section_name) -> TwineSection: if not section: section = TwineSection(section_name) - self.twine_file.sections.insert(0, section) + self.twine_file.sections.append(section) return section diff --git a/python_twine/twine/formatters/android.py b/python_twine/twine/formatters/android.py index 4df002c..3f49992 100644 --- a/python_twine/twine/formatters/android.py +++ b/python_twine/twine/formatters/android.py @@ -6,6 +6,7 @@ import html from typing import Dict, Optional, TextIO from xml.etree import ElementTree as ET +from xml.etree.ElementTree import Element from twine.formatters import AbstractFormatter from twine.formatters.tools import replace_with_filter @@ -16,14 +17,25 @@ ) +def inner_xml(node:Element) -> str: + # Get inner XML (text + nested elements) + # Start with the text before any child element + value = node.text or "" + + # Add each child element's XML and tail + for subelement in node: + value += ET.tostring(subelement, encoding="unicode", method="html") + return value + class AndroidFormatter(AbstractFormatter): """Formatter for Android XML string resources.""" SUPPORTS_PLURAL = True # Language code mappings for Android - LANG_CODES = { + ANDROID_TO_TWINE_LANG_CODES = { "zh": "zh-Hans", + "zh-TW": "zh-Hant", "zh-CN": "zh-Hans", "zh-HK": "zh-Hant", # Legacy language codes @@ -32,6 +44,11 @@ class AndroidFormatter(AbstractFormatter): "ji": "yi", } + TWINE_TO_ANDROID_LANG_CODES = { + "zh-Hans": "zh", + "zh-Hant": "zh-TW", + } + def format_name(self) -> str: return "android" @@ -70,7 +87,7 @@ def determine_language_given_path(self, path: str) -> Optional[str]: ) if match: lang = match.group(1).replace("-r", "-") - return self.LANG_CODES.get(lang, lang) + return self.ANDROID_TO_TWINE_LANG_CODES.get(lang, lang) return super().determine_language_given_path(path) @@ -79,38 +96,12 @@ def output_path_for_language(self, lang: str) -> str: if self.twine_file.language_codes and lang == self.twine_file.language_codes[0]: return "values" else: + lang = self.TWINE_TO_ANDROID_LANG_CODES.get(lang, lang) # Convert en-US to values-en-rUS result = f"values-{lang}" result = re.sub(r"-([A-Z])", r"-r\1", result) return result - def set_translation_for_key(self, key: str, lang: str, value: str, section_name: Optional[str]): - """Set translation, handling Android-specific unescaping.""" - # Unescape HTML entities - value = html.unescape(value) - - # Unescape Android escapes - value = value.replace("\\'", "'") - value = value.replace('\\"', '"') - - # Convert placeholders from Android to Twine - value = convert_placeholders_from_android_to_twine(value) - - # Unescape @ signs - value = value.replace("\\@", "@") - - # Unescape \n - value = value.replace("\n\\n", "\n") - - # Convert \u0020 space escapes - def replace_spaces(match): - spaces = match.group(0) - return " " * (len(spaces) // 6) - - value = re.sub(r"(\\u0020)+", replace_spaces, value) - - super().set_translation_for_key(key, lang, value, section_name) - def read(self, io: TextIO, lang: str): """Read Android XML strings file.""" content = io.read() @@ -144,18 +135,33 @@ def read(self, io: TextIO, lang: str): if not key: continue - # Get inner XML (text + nested elements) - # Start with the text before any child element - value = child.text or "" + value = self.unescape_value(inner_xml(child)) + self.set_translation_for_key(key, lang, value, current_section) - # Add each child element's XML and tail - for subelement in child: - value += ET.tostring(subelement, encoding="unicode", method="html") + if comment: + self.set_comment_for_key(key, comment) + comment = None + + # Handle plural strings elements: + # + # %d bookmark + # %d bookmarks + # + elif child.tag == "plurals": + key = child.get("name") + if not key: + continue - # Add tail text if any (text after the last child element) - # Note: child.tail is text AFTER the element, not inside + plural_values = {} + for subelement in child: + if subelement.tag == "item": + quantity = subelement.get("quantity") + if not quantity: + continue + plural_values[quantity] = self.unescape_value(inner_xml(subelement)) - self.set_translation_for_key(key, lang, value, current_section) + if plural_values: + self.set_translation_for_key_plural(key, lang, plural_values, current_section) if comment: self.set_comment_for_key(key, comment) @@ -200,6 +206,31 @@ def format_plural_keys(self, key: str, plural_hash: Dict[str, str]) -> str: result += "\n " return result + @staticmethod + def unescape_value(value: str) -> str: + """ Unescape HTML entities """ + value = html.unescape(value) + + # Unescape Android escapes + value = value.replace("\\'", "'") + value = value.replace('\\"', '"') + + # Convert placeholders from Android to Twine + value = convert_placeholders_from_android_to_twine(value) + + # Unescape @ signs + value = value.replace("\\@", "@") + + # Unescape \n + value = value.replace("\n\\n", "\n") + + # Convert \u0020 space escapes + def replace_spaces(match): + spaces = match.group(0) + return " " * (len(spaces) // 6) + + return re.sub(r"(\\u0020)+", replace_spaces, value) + def escape_value(self, value: str) -> str: """ Escape value for Android XML. diff --git a/python_twine/twine/formatters/apple_plural.py b/python_twine/twine/formatters/apple_plural.py index 9aa5d50..7f36b76 100644 --- a/python_twine/twine/formatters/apple_plural.py +++ b/python_twine/twine/formatters/apple_plural.py @@ -4,10 +4,11 @@ from typing import Dict, Optional, TextIO from xml.etree import ElementTree as ET +from xml.etree.ElementTree import Element from twine.formatters.apple import AppleFormatter from twine.placeholders import convert_placeholders_from_android_to_twine -from twine.twine_file import TwineDefinition, TwineSection +from twine.twine_file import TwineDefinition class ApplePluralFormatter(AppleFormatter): @@ -82,7 +83,6 @@ def format_plural_keys(self, key: str, plural_hash: Dict[str, str]) -> str: def read(self, io: TextIO, lang: str): """Read Apple .stringsdict file.""" - import twine from twine import TwineError content = io.read() @@ -128,94 +128,27 @@ def read(self, io: TextIO, lang: str): comment_text = None for j in range(i - 1, -1, -1): prev = children[j] - if isinstance(prev, ET.Comment): + # Handle comments (they have a callable tag function) + if callable(prev.tag): comment_text = prev.text.strip() if prev.text else None break elif prev.tag is not None: # Hit another element break # Extract plural hash - plural_hash = {} - - # Find value inside value_container - value_dict = None - value_children = list(value_container) - - for j, inner_key in enumerate(value_children): - if inner_key.tag == "key" and inner_key.text == "value": - if j + 1 < len(value_children): - value_dict = value_children[j + 1] - break - - if value_dict is not None and value_dict.tag == "dict": - # Extract plural entries - plural_children = list(value_dict) - j = 0 - - while j < len(plural_children): - pkey_elem = plural_children[j] - - if pkey_elem.tag == "key": - pkey = pkey_elem.text - - if pkey in TwineDefinition.PLURAL_KEYS: - if j + 1 < len(plural_children): - string_elem = plural_children[j + 1] - - if string_elem.tag == "string": - pvalue = string_elem.text or "" - plural_hash[pkey] = pvalue - - j += 1 + plural_hash = self.extract_plural_dict(value_container) if not plural_hash: i += 2 continue # Get or create definition - definition = self.twine_file.definitions_by_key.get(key_name) - - if not definition: - if self.options.get("consume_all"): - print( - f"Adding new plural definition '{key_name}' to twine file.", - file=twine.stdout, - ) - - # Find or create Uncategorized section - current_section = next( - ( - s - for s in self.twine_file.sections - if s.name == "Uncategorized" - ), - None, - ) - - if not current_section: - current_section = TwineSection("Uncategorized") - self.twine_file.sections.insert(0, current_section) - - definition = TwineDefinition(key_name) - current_section.definitions.append(definition) - self.twine_file.definitions_by_key[key_name] = definition - else: - print( - f"WARNING: '{key_name}' not found in twine file (plural).", - file=twine.stdout, - ) - i += 2 - continue - - # Merge plural translations - if lang not in definition.plural_translations: - definition.plural_translations[lang] = {} - - definition.plural_translations[lang].update(plural_hash) - - # Set base translation to 'other' if present - if "other" in plural_hash: - self.set_translation_for_key(key_name, lang, plural_hash["other"]) + if not self.match_default_lang_translation(key_name, lang, plural_hash): + self.set_translation_for_key_plural(key_name, lang, plural_hash, section_name=None) + + # Set base translation to 'other' if present + if "other" in plural_hash: + self.set_translation_for_key(key_name, lang, plural_hash["other"], section_name=None) # Set comment if requested if comment_text and self.options.get("consume_comments"): @@ -227,9 +160,75 @@ def read(self, io: TextIO, lang: str): i += 2 + def extract_plural_dict(self, value_element: Element) -> dict: + """ Parse next XML structure to extract key-value pairs: + + NSStringLocalizedFormatKey + %#@value@ + value + + NSStringFormatSpecTypeKey + NSStringPluralRuleType + NSStringFormatValueTypeKey + d + one + %d bookmark + other + %d bookmarks + + + """ + plural_dict = {} + + # Find value inside value_element + value_dict = None + value_children = list(value_element) + + for j, inner_key in enumerate(value_children): + if inner_key.tag == "key" and inner_key.text == "value": + if j + 1 < len(value_children): + value_dict = value_children[j + 1] + break + + if value_dict is not None and value_dict.tag == "dict": + # Extract plural entries + plural_children = list(value_dict) + j = 0 + + while j < len(plural_children): + pkey_elem = plural_children[j] + + if pkey_elem.tag == "key": + pkey = pkey_elem.text + + if pkey in TwineDefinition.PLURAL_KEYS: + if j + 1 < len(plural_children): + string_elem = plural_children[j + 1] + + if string_elem.tag == "string": + pvalue = string_elem.text or "" + plural_dict[pkey] = pvalue + + j += 1 + return plural_dict + def should_include_definition(self, definition, lang: str) -> bool: """Only include plural definitions.""" return ( definition.is_plural() and definition.plural_translation_for_lang(lang) is not None ) + + def match_default_lang_translation(self, key:str, lang:str, value:dict) -> bool: + """ Apple strings file for non-default language (es, de, fr, etc) contains + default value for not translated keys. That's why in Slovenian .strings + file you can find english words. + If `value` matches translation from default language, it means that + this string is not translated. + """ + default_lang = self.twine_file.get_developer_language_code() + if default_lang is None: + return False + if default_lang == lang: + return False + return self.twine_file.definitions_by_key[key].plural_translations[default_lang] == value diff --git a/python_twine/twine/output_processor.py b/python_twine/twine/output_processor.py index 04cf8b7..7dce07c 100644 --- a/python_twine/twine/output_processor.py +++ b/python_twine/twine/output_processor.py @@ -78,6 +78,7 @@ def process(self, language: str) -> TwineFile: """ result = TwineFile() result.language_codes = self.twine_file.language_codes.copy() + fallbacks = self.fallback_languages(language) for section in self.twine_file.sections: new_section = TwineSection(section.name) @@ -99,7 +100,6 @@ def process(self, language: str) -> TwineFile: # Try fallback languages if no translation found if value is None and include_option != "translated": - fallbacks = self.fallback_languages(language) value = definition.translation_for_lang(fallbacks) # Skip if still no value @@ -112,8 +112,10 @@ def process(self, language: str) -> TwineFile: # Handle plural translations if definition.is_plural(): - if language not in new_definition.plural_translations: - new_definition.plural_translations[language] = {} + if language not in new_definition.plural_translations \ + and include_option != "translated": + lng = definition.find_plural_lang_fallback(fallbacks) + new_definition.plural_translations[language] = definition.plural_translation_for_lang(lng) # Ensure 'other' key exists for plurals if "other" not in new_definition.plural_translations[language]: diff --git a/python_twine/twine/runner.py b/python_twine/twine/runner.py index f5cabc3..53e5dd5 100644 --- a/python_twine/twine/runner.py +++ b/python_twine/twine/runner.py @@ -236,6 +236,7 @@ def consume_all_localization_files(self): raise TwineError(f"No files consumed from {input_path}") # Export to Twine. + self.twine_file.optimize_duplicates() self.write_twine_data(self.options["twine_file"]) def find_translation_files(self, input_path: Path, formatter: AbstractFormatter) -> Iterable[Tuple[str, Path]]: diff --git a/python_twine/twine/twine_file.py b/python_twine/twine/twine_file.py index 7d9c0b7..63bccd2 100644 --- a/python_twine/twine/twine_file.py +++ b/python_twine/twine/twine_file.py @@ -3,8 +3,16 @@ """ import re -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Any +FALLBACK_LANGS_MAPPING = { + "zh-CN": "zh-Hans", # Chinese Simplified + "zh-TW": "zh-Hant", # Chinese Taiwan -> Chinese Traditional + "zh-MO": "zh-Hant", # Chinese Macau -> Chinese Traditional + "zh-HK": "zh-Hant", # Chinese Hong Kong -> Chinese Traditional +} + +REGIONAL_LANG_REGEX = re.compile(r"([a-zA-Z]{2})-[a-zA-Z]+") class TwineDefinition: """Represents a single translatable string definition.""" @@ -114,6 +122,13 @@ def translation_for_lang(self, lang: str | List[str]) -> Optional[str]: return None + def find_plural_lang_fallback(self, fallback_langs: List[str]) -> Optional[str]: + """ Find first language from `fallback_langs` which is in plural_translations. """ + return next( + filter(lambda lng: lng in self.plural_translations, + fallback_langs), + None) + def plural_translation_for_lang(self, lang: str) -> Optional[Dict[str, str]]: """ Get plural translations for a language, sorted by PLURAL_KEYS order. @@ -183,6 +198,49 @@ def get_developer_language_code(self) -> Optional[str]: return self.language_codes[0] return None + def optimize_duplicates(self): + """ Some regional languages have common items. Such as 'en-GB' and 'en'. + Deduplication: for each item and each language search the same translations + within fallback languages. Not all languages have fallbacks. + """ + for key, definition in self.definitions_by_key.items(): + definition.translations = {lang:value for (lang, value) in definition.translations.items() \ + if not self.match_fallback_lang(definition.translations, lang, key, value)} + definition.plural_translations = {lang:value for (lang, value) in definition.plural_translations.items() \ + if not self.match_fallback_lang(definition.plural_translations, lang, key, value)} + + def match_fallback_lang(self, translations: dict, lang:str, key:str, value: Any) -> bool: + # TODO: this method is invoked for each key and lang. Optimize: cache all fallback languages in a dict + for fallback_lang in self.fallback_languages(lang): + if translations.get(fallback_lang) == value: + print(f"Warning: key '{key}' in lang '{lang}' matches value from fallback language '{fallback_lang}'") + return True + return False + + def fallback_languages(self, language: str) -> List[str]: + fallbacks = [] + + # Check specific mapping + if language in FALLBACK_LANGS_MAPPING: + fallbacks.append(FALLBACK_LANGS_MAPPING[language]) + + # Regional dialect fallbacks to generic language + # e.g., 'es-MX' -> 'es', 'pt-BR' -> 'pt' + match = REGIONAL_LANG_REGEX.match(language) + if match: + generic_language = match.group(1) + fallbacks.append(generic_language) + + # Remove duplicates while preserving order + seen = set() + result = [] + for lang in fallbacks: + if lang not in seen: + seen.add(lang) + result.append(lang) + + return result + def read(self, path: str): """ Read and parse a Twine file.