diff --git a/.gitignore b/.gitignore
index d9ba8ca..e28cd06 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
*.gem
.idea/
+*.iml
*.lock
.ruby-version
.DS_Store
diff --git a/python_twine/tests/fixtures/formatter_android_plurals.xml b/python_twine/tests/fixtures/formatter_android_plurals.xml
new file mode 100644
index 0000000..cccd95b
--- /dev/null
+++ b/python_twine/tests/fixtures/formatter_android_plurals.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+ - %d bookmark
+ - %d bookmarks
+
+
+ - %d track
+ - %d tracks
+
+
diff --git a/python_twine/tests/fixtures/formatter_apple_plurals.stringsdict b/python_twine/tests/fixtures/formatter_apple_plurals.stringsdict
new file mode 100644
index 0000000..38d6a8d
--- /dev/null
+++ b/python_twine/tests/fixtures/formatter_apple_plurals.stringsdict
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+ bookmarks_places
+
+ NSStringLocalizedFormatKey
+ %#@value@
+ value
+
+ NSStringFormatSpecTypeKey
+ NSStringPluralRuleType
+ NSStringFormatValueTypeKey
+ d
+ one
+ %d bookmark
+ other
+ %d bookmarks
+
+
+
+ tracks
+
+ NSStringLocalizedFormatKey
+ %#@value@
+ value
+
+ NSStringFormatSpecTypeKey
+ NSStringPluralRuleType
+ NSStringFormatValueTypeKey
+ d
+ one
+ %d track
+ other
+ %d tracks
+
+
+
+
+
\ No newline at end of file
diff --git a/python_twine/tests/test_formatters.py b/python_twine/tests/test_formatters.py
index c977591..db308a8 100644
--- a/python_twine/tests/test_formatters.py
+++ b/python_twine/tests/test_formatters.py
@@ -157,35 +157,19 @@ def test_double_quotes_not_modified(self, formatter):
def test_escape_ampersand(self, formatter):
"""Test ampersand escaping."""
- formatter.set_translation_for_key("key1", "en", "this & that", "Section A")
- assert (
- formatter.twine_file.definitions_by_key["key1"].translations["en"]
- == "this & that"
- )
+ assert AndroidFormatter.unescape_value("this & that") == "this & that"
def test_escape_less_than(self, formatter):
"""Test less-than escaping."""
- formatter.set_translation_for_key("key1", "en", "this < that", "Section B")
- assert (
- formatter.twine_file.definitions_by_key["key1"].translations["en"]
- == "this < that"
- )
+ assert AndroidFormatter.unescape_value("this < that") == "this < that"
def test_escape_apostrophe(self, formatter):
"""Test apostrophe escaping."""
- formatter.set_translation_for_key("key1", "en", "it\\'s complicated", "Section C")
- assert (
- formatter.twine_file.definitions_by_key["key1"].translations["en"]
- == "it's complicated"
- )
+ assert AndroidFormatter.unescape_value("it\\'s complicated") == "it's complicated"
def test_placeholder_conversion(self, formatter):
"""Test placeholder conversion from %s to %@."""
- formatter.set_translation_for_key("key1", "en", "value %s", "Section D")
- assert (
- formatter.twine_file.definitions_by_key["key1"].translations["en"]
- == "value %@"
- )
+ assert AndroidFormatter.unescape_value("value %s") == "value %@"
def test_writer_escape_ampersand(self, formatter):
"""Test ampersand escaping."""
diff --git a/python_twine/tests/test_formatters_plural.py b/python_twine/tests/test_formatters_plural.py
index 51bc249..c88c0ed 100644
--- a/python_twine/tests/test_formatters_plural.py
+++ b/python_twine/tests/test_formatters_plural.py
@@ -6,7 +6,97 @@
import pytest
+from twine.formatters.apple_plural import ApplePluralFormatter
from twine.twine_file import TwineFile, TwineDefinition, TwineSection
+from twine.formatters.android import AndroidFormatter
+
+class TestAndroidPluralFormatter:
+ """Test Android XML formatter with tags."""
+
+ @pytest.fixture
+ def formatter(self):
+ """Create formatter with empty TwineFile."""
+ twine_file = TwineFile()
+ formatter = AndroidFormatter()
+ formatter.twine_file = twine_file
+ formatter.options = {"consume_all": True, "consume_comments": True}
+ return formatter
+
+ @pytest.fixture
+ def fixtures_dir(self):
+ """Get fixtures directory path."""
+ return Path(__file__).parent / "fixtures"
+
+ @pytest.fixture
+ def twine_file(self):
+ # Prepare TwineFile data
+ twine_file = TwineFile()
+ num_edits_def = TwineDefinition("num_edits")
+
+ twine_file.definitions_by_key["num_edits"] = num_edits_def
+ twine_file.language_codes = ["en", "de"]
+ twine_file.sections = [TwineSection("OSM")]
+ twine_file.sections[0].definitions.append(num_edits_def)
+
+ # Put plural translation
+ num_edits_def.plural_translations["en"] = {
+ "one": "%d edit",
+ "other": "%d edits"
+ }
+ num_edits_def.translations["en"] = "%d edits"
+
+ num_edits_def.plural_translations["de"] = {
+ "zero": "%d Bearbeitungen",
+ "one": "%d Bearbeitung",
+ "other": "%d Bearbeitungen"
+ }
+ num_edits_def.translations["de"] = "%d Bearbeitungen"
+
+ return twine_file
+
+ def test_read_format(self, formatter, fixtures_dir):
+ """Test reading Android XML format with tags."""
+ fixture_path = fixtures_dir / "formatter_android_plurals.xml"
+ with open(fixture_path, "r", encoding="utf-8") as f:
+ formatter.read(f, "en")
+
+ twine_file = formatter.twine_file
+
+ assert "bookmarks_places" in twine_file.definitions_by_key
+ translations1 = twine_file.definitions_by_key["bookmarks_places"].plural_translations
+ assert translations1 == {"en": {"one": "%d bookmark", "other": "%d bookmarks"}}
+
+ assert "tracks" in twine_file.definitions_by_key
+ translations2 = twine_file.definitions_by_key["tracks"].plural_translations
+ assert translations2 == {"en": {"one": "%d track", "other": "%d tracks"}}
+
+ def test_write_format(self, formatter, twine_file):
+ formatter.twine_file = twine_file
+
+ en_plural_content = formatter.format_file("en")
+ assert en_plural_content == """
+
+
+
+
+ - %d edit
+ - %d edits
+
+
+"""
+
+ de_plural_content = formatter.format_file("de")
+ assert de_plural_content == """
+
+
+
+
+ - %d Bearbeitungen
+ - %d Bearbeitung
+ - %d Bearbeitungen
+
+
+"""
class TestTwineFilePlural:
@@ -71,3 +161,122 @@ def test_read_plurals(self, fixtures_dir):
"many": "%d меток",
"other": "%d меток"
}
+
+
+class TestApplePluralFormatter:
+ @pytest.fixture
+ def formatter(self) -> ApplePluralFormatter:
+ """Create formatter with empty TwineFile."""
+ formatter = ApplePluralFormatter()
+ formatter.options = {"consume_all": True, "consume_comments": True}
+ return formatter
+
+ @pytest.fixture
+ def fixtures_dir(self) -> Path:
+ """Get fixtures directory path."""
+ return Path(__file__).parent / "fixtures"
+
+ @pytest.fixture
+ def twine_file(self) -> TwineFile:
+ # Prepare TwineFile data
+ twine_file = TwineFile()
+ num_edits_def = TwineDefinition("num_edits")
+
+ twine_file.definitions_by_key["num_edits"] = num_edits_def
+ twine_file.language_codes = ["en", "de"]
+ twine_file.sections = [TwineSection("OSM")]
+ twine_file.sections[0].definitions.append(num_edits_def)
+
+ # Put plural translation
+ num_edits_def.plural_translations["en"] = {
+ "one": "%d edit",
+ "other": "%d edits"
+ }
+ num_edits_def.translations["en"] = "%d edits"
+
+ num_edits_def.plural_translations["de"] = {
+ "zero": "%d Bearbeitungen",
+ "one": "%d Bearbeitung",
+ "other": "%d Bearbeitungen"
+ }
+ num_edits_def.translations["de"] = "%d Bearbeitungen"
+
+ return twine_file
+
+ def test_read_stringsdict(self, formatter:ApplePluralFormatter, fixtures_dir):
+ """Test reading Android XML format with tags."""
+ fixture_path = fixtures_dir / "formatter_apple_plurals.stringsdict"
+ with open(fixture_path, "r", encoding="utf-8") as f:
+ formatter.read(f, "en")
+
+ twine_file = formatter.twine_file
+
+ assert "bookmarks_places" in twine_file.definitions_by_key
+ translations1 = twine_file.definitions_by_key["bookmarks_places"].plural_translations
+ assert translations1 == {"en": {"one": "%d bookmark", "other": "%d bookmarks"}}
+
+ assert "tracks" in twine_file.definitions_by_key
+ translations2 = twine_file.definitions_by_key["tracks"].plural_translations
+ assert translations2 == {"en": {"one": "%d track", "other": "%d tracks"}}
+
+
+ def test_write_plural_format(self, formatter, twine_file):
+ formatter.twine_file = twine_file
+
+ en_plural_content = formatter.format_file("en")
+ assert en_plural_content == """
+
+
+
+
+
+
+\tnum_edits
+\t
+\t\tNSStringLocalizedFormatKey
+\t\t%#@value@
+\t\tvalue
+\t\t
+\t\t\tNSStringFormatSpecTypeKey
+\t\t\tNSStringPluralRuleType
+\t\t\tNSStringFormatValueTypeKey
+\t\t\td
+\t\t\tone
+\t\t\t%d edit
+\t\t\tother
+\t\t\t%d edits
+\t\t
+\t
+
+
+"""
+
+ de_plural_content = formatter.format_file("de")
+ assert de_plural_content == """
+
+
+
+
+
+
+\tnum_edits
+\t
+\t\tNSStringLocalizedFormatKey
+\t\t%#@value@
+\t\tvalue
+\t\t
+\t\t\tNSStringFormatSpecTypeKey
+\t\t\tNSStringPluralRuleType
+\t\t\tNSStringFormatValueTypeKey
+\t\t\td
+\t\t\tzero
+\t\t\t%d Bearbeitungen
+\t\t\tone
+\t\t\t%d Bearbeitung
+\t\t\tother
+\t\t\t%d Bearbeitungen
+\t\t
+\t
+
+
+"""
diff --git a/python_twine/twine/cli.py b/python_twine/twine/cli.py
index 355a224..4e942ef 100644
--- a/python_twine/twine/cli.py
+++ b/python_twine/twine/cli.py
@@ -74,6 +74,9 @@ def create_parser() -> argparse.ArgumentParser:
)
consume_all.add_argument("twine_file", help="Path to Twine data file")
consume_all.add_argument("input_path", help="Input directory path")
+ consume_all.add_argument(
+ "-n", "--file-name", help="Input file name (default: format-specific)"
+ )
CLI._add_common_arguments(consume_all)
CLI._add_consume_arguments(consume_all)
diff --git a/python_twine/twine/formatters/__init__.py b/python_twine/twine/formatters/__init__.py
index a66d3ba..dda6212 100644
--- a/python_twine/twine/formatters/__init__.py
+++ b/python_twine/twine/formatters/__init__.py
@@ -18,6 +18,16 @@ def flatten(input: List[List[str]]) -> List[str]:
flat += group
return flat
+def find_dict_diff(dict1: dict, dict2: dict):
+ keys = dict1.keys() | dict2.keys()
+ for key in sorted(keys):
+ if key in dict1 and key not in dict2:
+ yield key, dict1[key], None
+ elif key not in dict1 and key in dict2:
+ yield key, None, dict2[key]
+ elif dict1[key] != dict2[key]:
+ yield key, dict1[key], dict2[key]
+
class AbstractFormatter(ABC):
"""Base class for all format formatters."""
@@ -103,6 +113,56 @@ def set_translation_for_key(self, key: str, lang: str, value: str, section_name:
if lang not in self.twine_file.language_codes:
self.twine_file.add_language_code(lang)
+ def set_translation_for_key_plural(self, key: str, lang: str, values: Dict[str, str], section_name:Optional[str]):
+ """ Set plular values translation for a key in a specific language.
+ This method is similar to set_translation_for_key() but with dict values.
+ """
+ # Normalize newlines
+ values = {key:val.replace("\n", "\\n") for (key, val) in values.items()}
+
+ if key in self.twine_file.definitions_by_key:
+ definition = self.twine_file.definitions_by_key[key]
+ reference = None
+
+ if definition.reference_key:
+ reference = self.twine_file.definitions_by_key.get(
+ definition.reference_key
+ )
+
+ # Only set if no reference or value differs from reference
+ if not reference or values != reference.plural_translations.get(lang):
+ if lang in definition.plural_translations and definition.plural_translations[lang] != values:
+ for quantity, value_old, value_new in find_dict_diff(definition.plural_translations[lang], values):
+ msg = (f"Translation '{value_new}' overrides existing translation '{value_old}' "
+ f"for key '{key}:{quantity}' and lang '{lang}'")
+ self.add_validation_error(msg)
+ if lang in definition.plural_translations:
+ definition.plural_translations[lang].update(values)
+ else:
+ definition.plural_translations[lang] = values
+ if "tags" in self.options:
+ definition.add_tags(flatten(self.options["tags"]))
+
+ elif self.options.get("consume_all"):
+ print(f"Adding new definition '{key}' to twine file.", file=twine.stdout)
+
+ current_section = self.get_section_or_create(section_name or "Uncategorized")
+
+ current_definition = TwineDefinition(key)
+ current_section.definitions.append(current_definition)
+ if "tags" in self.options:
+ current_definition.add_tags(flatten(self.options["tags"]))
+
+ self.twine_file.definitions_by_key[key] = current_definition
+ current_definition.plural_translations[lang] = values
+
+ else:
+ print(f"WARNING: '{key}' not found in twine file.", file=twine.stdout)
+
+ # Add language code if not present
+ if lang not in self.twine_file.language_codes:
+ self.twine_file.add_language_code(lang)
+
def get_section(self, section_name) -> Optional[TwineSection]:
# Find or create a section by name
return next(
@@ -114,7 +174,7 @@ def get_section_or_create(self, section_name) -> TwineSection:
if not section:
section = TwineSection(section_name)
- self.twine_file.sections.insert(0, section)
+ self.twine_file.sections.append(section)
return section
diff --git a/python_twine/twine/formatters/android.py b/python_twine/twine/formatters/android.py
index 4df002c..3f49992 100644
--- a/python_twine/twine/formatters/android.py
+++ b/python_twine/twine/formatters/android.py
@@ -6,6 +6,7 @@
import html
from typing import Dict, Optional, TextIO
from xml.etree import ElementTree as ET
+from xml.etree.ElementTree import Element
from twine.formatters import AbstractFormatter
from twine.formatters.tools import replace_with_filter
@@ -16,14 +17,25 @@
)
+def inner_xml(node:Element) -> str:
+ # Get inner XML (text + nested elements)
+ # Start with the text before any child element
+ value = node.text or ""
+
+ # Add each child element's XML and tail
+ for subelement in node:
+ value += ET.tostring(subelement, encoding="unicode", method="html")
+ return value
+
class AndroidFormatter(AbstractFormatter):
"""Formatter for Android XML string resources."""
SUPPORTS_PLURAL = True
# Language code mappings for Android
- LANG_CODES = {
+ ANDROID_TO_TWINE_LANG_CODES = {
"zh": "zh-Hans",
+ "zh-TW": "zh-Hant",
"zh-CN": "zh-Hans",
"zh-HK": "zh-Hant",
# Legacy language codes
@@ -32,6 +44,11 @@ class AndroidFormatter(AbstractFormatter):
"ji": "yi",
}
+ TWINE_TO_ANDROID_LANG_CODES = {
+ "zh-Hans": "zh",
+ "zh-Hant": "zh-TW",
+ }
+
def format_name(self) -> str:
return "android"
@@ -70,7 +87,7 @@ def determine_language_given_path(self, path: str) -> Optional[str]:
)
if match:
lang = match.group(1).replace("-r", "-")
- return self.LANG_CODES.get(lang, lang)
+ return self.ANDROID_TO_TWINE_LANG_CODES.get(lang, lang)
return super().determine_language_given_path(path)
@@ -79,38 +96,12 @@ def output_path_for_language(self, lang: str) -> str:
if self.twine_file.language_codes and lang == self.twine_file.language_codes[0]:
return "values"
else:
+ lang = self.TWINE_TO_ANDROID_LANG_CODES.get(lang, lang)
# Convert en-US to values-en-rUS
result = f"values-{lang}"
result = re.sub(r"-([A-Z])", r"-r\1", result)
return result
- def set_translation_for_key(self, key: str, lang: str, value: str, section_name: Optional[str]):
- """Set translation, handling Android-specific unescaping."""
- # Unescape HTML entities
- value = html.unescape(value)
-
- # Unescape Android escapes
- value = value.replace("\\'", "'")
- value = value.replace('\\"', '"')
-
- # Convert placeholders from Android to Twine
- value = convert_placeholders_from_android_to_twine(value)
-
- # Unescape @ signs
- value = value.replace("\\@", "@")
-
- # Unescape \n
- value = value.replace("\n\\n", "\n")
-
- # Convert \u0020 space escapes
- def replace_spaces(match):
- spaces = match.group(0)
- return " " * (len(spaces) // 6)
-
- value = re.sub(r"(\\u0020)+", replace_spaces, value)
-
- super().set_translation_for_key(key, lang, value, section_name)
-
def read(self, io: TextIO, lang: str):
"""Read Android XML strings file."""
content = io.read()
@@ -144,18 +135,33 @@ def read(self, io: TextIO, lang: str):
if not key:
continue
- # Get inner XML (text + nested elements)
- # Start with the text before any child element
- value = child.text or ""
+ value = self.unescape_value(inner_xml(child))
+ self.set_translation_for_key(key, lang, value, current_section)
- # Add each child element's XML and tail
- for subelement in child:
- value += ET.tostring(subelement, encoding="unicode", method="html")
+ if comment:
+ self.set_comment_for_key(key, comment)
+ comment = None
+
+ # Handle plural strings elements:
+ #
+ # - %d bookmark
+ # - %d bookmarks
+ #
+ elif child.tag == "plurals":
+ key = child.get("name")
+ if not key:
+ continue
- # Add tail text if any (text after the last child element)
- # Note: child.tail is text AFTER the element, not inside
+ plural_values = {}
+ for subelement in child:
+ if subelement.tag == "item":
+ quantity = subelement.get("quantity")
+ if not quantity:
+ continue
+ plural_values[quantity] = self.unescape_value(inner_xml(subelement))
- self.set_translation_for_key(key, lang, value, current_section)
+ if plural_values:
+ self.set_translation_for_key_plural(key, lang, plural_values, current_section)
if comment:
self.set_comment_for_key(key, comment)
@@ -200,6 +206,31 @@ def format_plural_keys(self, key: str, plural_hash: Dict[str, str]) -> str:
result += "\n "
return result
+ @staticmethod
+ def unescape_value(value: str) -> str:
+ """ Unescape HTML entities """
+ value = html.unescape(value)
+
+ # Unescape Android escapes
+ value = value.replace("\\'", "'")
+ value = value.replace('\\"', '"')
+
+ # Convert placeholders from Android to Twine
+ value = convert_placeholders_from_android_to_twine(value)
+
+ # Unescape @ signs
+ value = value.replace("\\@", "@")
+
+ # Unescape \n
+ value = value.replace("\n\\n", "\n")
+
+ # Convert \u0020 space escapes
+ def replace_spaces(match):
+ spaces = match.group(0)
+ return " " * (len(spaces) // 6)
+
+ return re.sub(r"(\\u0020)+", replace_spaces, value)
+
def escape_value(self, value: str) -> str:
"""
Escape value for Android XML.
diff --git a/python_twine/twine/formatters/apple_plural.py b/python_twine/twine/formatters/apple_plural.py
index 9aa5d50..7f36b76 100644
--- a/python_twine/twine/formatters/apple_plural.py
+++ b/python_twine/twine/formatters/apple_plural.py
@@ -4,10 +4,11 @@
from typing import Dict, Optional, TextIO
from xml.etree import ElementTree as ET
+from xml.etree.ElementTree import Element
from twine.formatters.apple import AppleFormatter
from twine.placeholders import convert_placeholders_from_android_to_twine
-from twine.twine_file import TwineDefinition, TwineSection
+from twine.twine_file import TwineDefinition
class ApplePluralFormatter(AppleFormatter):
@@ -82,7 +83,6 @@ def format_plural_keys(self, key: str, plural_hash: Dict[str, str]) -> str:
def read(self, io: TextIO, lang: str):
"""Read Apple .stringsdict file."""
- import twine
from twine import TwineError
content = io.read()
@@ -128,94 +128,27 @@ def read(self, io: TextIO, lang: str):
comment_text = None
for j in range(i - 1, -1, -1):
prev = children[j]
- if isinstance(prev, ET.Comment):
+ # Handle comments (they have a callable tag function)
+ if callable(prev.tag):
comment_text = prev.text.strip() if prev.text else None
break
elif prev.tag is not None: # Hit another element
break
# Extract plural hash
- plural_hash = {}
-
- # Find value inside value_container
- value_dict = None
- value_children = list(value_container)
-
- for j, inner_key in enumerate(value_children):
- if inner_key.tag == "key" and inner_key.text == "value":
- if j + 1 < len(value_children):
- value_dict = value_children[j + 1]
- break
-
- if value_dict is not None and value_dict.tag == "dict":
- # Extract plural entries
- plural_children = list(value_dict)
- j = 0
-
- while j < len(plural_children):
- pkey_elem = plural_children[j]
-
- if pkey_elem.tag == "key":
- pkey = pkey_elem.text
-
- if pkey in TwineDefinition.PLURAL_KEYS:
- if j + 1 < len(plural_children):
- string_elem = plural_children[j + 1]
-
- if string_elem.tag == "string":
- pvalue = string_elem.text or ""
- plural_hash[pkey] = pvalue
-
- j += 1
+ plural_hash = self.extract_plural_dict(value_container)
if not plural_hash:
i += 2
continue
# Get or create definition
- definition = self.twine_file.definitions_by_key.get(key_name)
-
- if not definition:
- if self.options.get("consume_all"):
- print(
- f"Adding new plural definition '{key_name}' to twine file.",
- file=twine.stdout,
- )
-
- # Find or create Uncategorized section
- current_section = next(
- (
- s
- for s in self.twine_file.sections
- if s.name == "Uncategorized"
- ),
- None,
- )
-
- if not current_section:
- current_section = TwineSection("Uncategorized")
- self.twine_file.sections.insert(0, current_section)
-
- definition = TwineDefinition(key_name)
- current_section.definitions.append(definition)
- self.twine_file.definitions_by_key[key_name] = definition
- else:
- print(
- f"WARNING: '{key_name}' not found in twine file (plural).",
- file=twine.stdout,
- )
- i += 2
- continue
-
- # Merge plural translations
- if lang not in definition.plural_translations:
- definition.plural_translations[lang] = {}
-
- definition.plural_translations[lang].update(plural_hash)
-
- # Set base translation to 'other' if present
- if "other" in plural_hash:
- self.set_translation_for_key(key_name, lang, plural_hash["other"])
+ if not self.match_default_lang_translation(key_name, lang, plural_hash):
+ self.set_translation_for_key_plural(key_name, lang, plural_hash, section_name=None)
+
+ # Set base translation to 'other' if present
+ if "other" in plural_hash:
+ self.set_translation_for_key(key_name, lang, plural_hash["other"], section_name=None)
# Set comment if requested
if comment_text and self.options.get("consume_comments"):
@@ -227,9 +160,75 @@ def read(self, io: TextIO, lang: str):
i += 2
+ def extract_plural_dict(self, value_element: Element) -> dict:
+ """ Parse next XML structure to extract key-value pairs:
+
+ NSStringLocalizedFormatKey
+ %#@value@
+ value
+
+ NSStringFormatSpecTypeKey
+ NSStringPluralRuleType
+ NSStringFormatValueTypeKey
+ d
+ one
+ %d bookmark
+ other
+ %d bookmarks
+
+
+ """
+ plural_dict = {}
+
+ # Find value inside value_element
+ value_dict = None
+ value_children = list(value_element)
+
+ for j, inner_key in enumerate(value_children):
+ if inner_key.tag == "key" and inner_key.text == "value":
+ if j + 1 < len(value_children):
+ value_dict = value_children[j + 1]
+ break
+
+ if value_dict is not None and value_dict.tag == "dict":
+ # Extract plural entries
+ plural_children = list(value_dict)
+ j = 0
+
+ while j < len(plural_children):
+ pkey_elem = plural_children[j]
+
+ if pkey_elem.tag == "key":
+ pkey = pkey_elem.text
+
+ if pkey in TwineDefinition.PLURAL_KEYS:
+ if j + 1 < len(plural_children):
+ string_elem = plural_children[j + 1]
+
+ if string_elem.tag == "string":
+ pvalue = string_elem.text or ""
+ plural_dict[pkey] = pvalue
+
+ j += 1
+ return plural_dict
+
def should_include_definition(self, definition, lang: str) -> bool:
"""Only include plural definitions."""
return (
definition.is_plural()
and definition.plural_translation_for_lang(lang) is not None
)
+
+ def match_default_lang_translation(self, key:str, lang:str, value:dict) -> bool:
+ """ Apple strings file for non-default language (es, de, fr, etc) contains
+ default value for not translated keys. That's why in Slovenian .strings
+ file you can find english words.
+ If `value` matches translation from default language, it means that
+ this string is not translated.
+ """
+ default_lang = self.twine_file.get_developer_language_code()
+ if default_lang is None:
+ return False
+ if default_lang == lang:
+ return False
+ return self.twine_file.definitions_by_key[key].plural_translations[default_lang] == value
diff --git a/python_twine/twine/output_processor.py b/python_twine/twine/output_processor.py
index 04cf8b7..7dce07c 100644
--- a/python_twine/twine/output_processor.py
+++ b/python_twine/twine/output_processor.py
@@ -78,6 +78,7 @@ def process(self, language: str) -> TwineFile:
"""
result = TwineFile()
result.language_codes = self.twine_file.language_codes.copy()
+ fallbacks = self.fallback_languages(language)
for section in self.twine_file.sections:
new_section = TwineSection(section.name)
@@ -99,7 +100,6 @@ def process(self, language: str) -> TwineFile:
# Try fallback languages if no translation found
if value is None and include_option != "translated":
- fallbacks = self.fallback_languages(language)
value = definition.translation_for_lang(fallbacks)
# Skip if still no value
@@ -112,8 +112,10 @@ def process(self, language: str) -> TwineFile:
# Handle plural translations
if definition.is_plural():
- if language not in new_definition.plural_translations:
- new_definition.plural_translations[language] = {}
+ if language not in new_definition.plural_translations \
+ and include_option != "translated":
+ lng = definition.find_plural_lang_fallback(fallbacks)
+ new_definition.plural_translations[language] = definition.plural_translation_for_lang(lng)
# Ensure 'other' key exists for plurals
if "other" not in new_definition.plural_translations[language]:
diff --git a/python_twine/twine/runner.py b/python_twine/twine/runner.py
index f5cabc3..53e5dd5 100644
--- a/python_twine/twine/runner.py
+++ b/python_twine/twine/runner.py
@@ -236,6 +236,7 @@ def consume_all_localization_files(self):
raise TwineError(f"No files consumed from {input_path}")
# Export to Twine.
+ self.twine_file.optimize_duplicates()
self.write_twine_data(self.options["twine_file"])
def find_translation_files(self, input_path: Path, formatter: AbstractFormatter) -> Iterable[Tuple[str, Path]]:
diff --git a/python_twine/twine/twine_file.py b/python_twine/twine/twine_file.py
index 7d9c0b7..63bccd2 100644
--- a/python_twine/twine/twine_file.py
+++ b/python_twine/twine/twine_file.py
@@ -3,8 +3,16 @@
"""
import re
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Any
+FALLBACK_LANGS_MAPPING = {
+ "zh-CN": "zh-Hans", # Chinese Simplified
+ "zh-TW": "zh-Hant", # Chinese Taiwan -> Chinese Traditional
+ "zh-MO": "zh-Hant", # Chinese Macau -> Chinese Traditional
+ "zh-HK": "zh-Hant", # Chinese Hong Kong -> Chinese Traditional
+}
+
+REGIONAL_LANG_REGEX = re.compile(r"([a-zA-Z]{2})-[a-zA-Z]+")
class TwineDefinition:
"""Represents a single translatable string definition."""
@@ -114,6 +122,13 @@ def translation_for_lang(self, lang: str | List[str]) -> Optional[str]:
return None
+ def find_plural_lang_fallback(self, fallback_langs: List[str]) -> Optional[str]:
+ """ Find first language from `fallback_langs` which is in plural_translations. """
+ return next(
+ filter(lambda lng: lng in self.plural_translations,
+ fallback_langs),
+ None)
+
def plural_translation_for_lang(self, lang: str) -> Optional[Dict[str, str]]:
"""
Get plural translations for a language, sorted by PLURAL_KEYS order.
@@ -183,6 +198,49 @@ def get_developer_language_code(self) -> Optional[str]:
return self.language_codes[0]
return None
+ def optimize_duplicates(self):
+ """ Some regional languages have common items. Such as 'en-GB' and 'en'.
+ Deduplication: for each item and each language search the same translations
+ within fallback languages. Not all languages have fallbacks.
+ """
+ for key, definition in self.definitions_by_key.items():
+ definition.translations = {lang:value for (lang, value) in definition.translations.items() \
+ if not self.match_fallback_lang(definition.translations, lang, key, value)}
+ definition.plural_translations = {lang:value for (lang, value) in definition.plural_translations.items() \
+ if not self.match_fallback_lang(definition.plural_translations, lang, key, value)}
+
+ def match_fallback_lang(self, translations: dict, lang:str, key:str, value: Any) -> bool:
+ # TODO: this method is invoked for each key and lang. Optimize: cache all fallback languages in a dict
+ for fallback_lang in self.fallback_languages(lang):
+ if translations.get(fallback_lang) == value:
+ print(f"Warning: key '{key}' in lang '{lang}' matches value from fallback language '{fallback_lang}'")
+ return True
+ return False
+
+ def fallback_languages(self, language: str) -> List[str]:
+ fallbacks = []
+
+ # Check specific mapping
+ if language in FALLBACK_LANGS_MAPPING:
+ fallbacks.append(FALLBACK_LANGS_MAPPING[language])
+
+ # Regional dialect fallbacks to generic language
+ # e.g., 'es-MX' -> 'es', 'pt-BR' -> 'pt'
+ match = REGIONAL_LANG_REGEX.match(language)
+ if match:
+ generic_language = match.group(1)
+ fallbacks.append(generic_language)
+
+ # Remove duplicates while preserving order
+ seen = set()
+ result = []
+ for lang in fallbacks:
+ if lang not in seen:
+ seen.add(lang)
+ result.append(lang)
+
+ return result
+
def read(self, path: str):
"""
Read and parse a Twine file.