organicmaps · strump · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
diff --git a/python_twine/twine/formatters/__init__.py b/python_twine/twine/formatters/__init__.py
@@ -12,6 +12,14 @@
 from twine.twine_file import TwineFile, TwineDefinition, TwineSection
 from twine.output_processor import OutputProcessor
 
+
+LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE = r"[a-z]{2}(?:-[A-Za-z]{2})?"
+
+ONLY_LANGUAGE_AND_REGION_REGEX = re.compile(
+    rf"^{LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE}$", re.IGNORECASE
+)
+
+
 def flatten(input: Optional[List[List[str]]]) -> List[str]:
     if input is None:
         return []
@@ -34,7 +42,6 @@ class AbstractFormatter(ABC):
     """Base class for all format formatters."""
 
     SUPPORTS_PLURAL = False
-    LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE = r"[a-z]{2}(?:-[A-Za-z]{2})?"
 
     def __init__(self):
         self.twine_file = TwineFile()
@@ -204,15 +211,12 @@ def set_comment_for_key(self, key: str, comment: str):
 
     def determine_language_given_path(self, path: str) -> Optional[str]:
         """Determine the language code from a file path."""
-        only_language_and_region = re.compile(
-            rf"^{self.LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE}$", re.IGNORECASE
-        )
 
         path_obj = Path(path)
         basename = path_obj.stem
 
         # Check if basename is a language code
-        if only_language_and_region.match(basename):
+        if ONLY_LANGUAGE_AND_REGION_REGEX.match(basename):
             return basename
 
         # Check if basename is in known language codes
@@ -222,7 +226,7 @@ def determine_language_given_path(self, path: str) -> Optional[str]:
         # Check path segments in reverse order
         parts = path_obj.parts
         for segment in reversed(parts):
-            if only_language_and_region.match(segment):
+            if ONLY_LANGUAGE_AND_REGION_REGEX.match(segment):
                 return segment
 
         return None

diff --git a/python_twine/twine/formatters/android.py b/python_twine/twine/formatters/android.py
@@ -16,6 +16,11 @@
     number_of_twine_placeholders,
 )
 
+REGEX_CDATA_BRACKET = re.compile(r"<(?!(\/?(\!\[CDATA)))")
+REGEX_TAG_BRACKET = re.compile(
+    r"<(?!(\/?(b|em|i|cite|dfn|big|small|font|tt|s|strike|del|u|super|sub|ul|li|br|div|span|p|a|\!\[CDATA))\b)")
+REGEX_RESORCE_IDENTIFIER = re.compile(r"@(?!([a-z\.]+:)?[a-z+]+\/[a-zA-Z_]+)") # @[<package_name>:]<resource_type>/<resource_name>
+
 
 def inner_xml(node:Element) -> str:
     # Get inner XML (text + nested elements)
@@ -277,12 +282,10 @@ def inside_opening_tag(text: str, pos: int) -> bool:
 
         if has_placeholders or self.options.get("escape_all_tags"):
             # Escape all < except <![CDATA
-            angle_bracket_regex = re.compile(r"<(?!(\/?(\!\[CDATA)))")
+            angle_bracket_regex = REGEX_CDATA_BRACKET
         else:
             # Escape < except supported tags
-            angle_bracket_regex = re.compile(
-                r"<(?!(\/?(b|em|i|cite|dfn|big|small|font|tt|s|strike|del|u|super|sub|ul|li|br|div|span|p|a|\!\[CDATA))\b)"
-            )
+            angle_bracket_regex = REGEX_TAG_BRACKET
 
         def is_non_tag(result:str, i:int):
             if inside_cdata(result, i):
@@ -296,8 +299,7 @@ def is_non_tag(result:str, i:int):
         )
 
         # escape non resource identifier @ signs (http://developer.android.com/guide/topics/resources/accessing-resources.html#ResourcesFromXml)
-        resource_identifier_regex = re.compile(r"@(?!([a-z\.]+:)?[a-z+]+\/[a-zA-Z_]+)")  # @[<package_name>:]<resource_type>/<resource_name>
-        result = resource_identifier_regex.sub(r"\\@", result)
+        result = REGEX_RESORCE_IDENTIFIER.sub(r"\\@", result)
 
         return result
 

diff --git a/python_twine/twine/formatters/django.py b/python_twine/twine/formatters/django.py
@@ -7,6 +7,10 @@
 
 from twine.formatters import AbstractFormatter
 
+COMMENT_REGEX = re.compile(r'^\s*#\. *"?(.*)"?$')
+SECTION_REGEX = re.compile(r'^\s*# -{9} (.+) -{9} #$')
+KEY_REGEX = re.compile(r'^msgid *"(.*)"$')
+VALUE_REGEX = re.compile(r'^msgstr *"(.*)"$', re.MULTILINE)
 
 class DjangoFormatter(AbstractFormatter):
     """Formatter for Django .po files."""
@@ -26,10 +30,6 @@ def default_file_name(self) -> str:
 
     def read(self, io: TextIO, lang: str):
         """Read Django .po file."""
-        comment_regex = re.compile(r'^\s*#\. *"?(.*)"?$')
-        section_regex = re.compile(r'^\s*# -{9} (.+) -{9} #$')
-        key_regex = re.compile(r'^msgid *"(.*)"$')
-        value_regex = re.compile(r'^msgstr *"(.*)"$', re.MULTILINE)
 
         key = None
         value = None
@@ -38,24 +38,24 @@ def read(self, io: TextIO, lang: str):
 
         for line in io:
             # Extract comment
-            comment_match = comment_regex.match(line)
+            comment_match = COMMENT_REGEX.match(line)
             if comment_match:
                 comment = comment_match.group(1)
                 continue
 
-            section_match = section_regex.match(line)
+            section_match = SECTION_REGEX.match(line)
             if section_match:
                 current_section = section_match.group(1)
                 comment = None
                 continue
 
             # Extract key (msgid)
-            key_match = key_regex.match(line)
+            key_match = KEY_REGEX.match(line)
             if key_match:
                 key = key_match.group(1).replace('\\"', '"')
 
             # Extract value (msgstr)
-            value_match = value_regex.match(line)
+            value_match = VALUE_REGEX.match(line)
             if value_match:
                 # Handle multiline strings
                 value = value_match.group(1)

diff --git a/python_twine/twine/formatters/gettext.py b/python_twine/twine/formatters/gettext.py
@@ -8,6 +8,11 @@
 from twine.formatters import AbstractFormatter
 from twine import __version__
 
+COMMENT_REGEX = re.compile(r'#\.\s*"(.*)"$', re.MULTILINE)
+SECTION_REGEX = re.compile(r'# SECTION: (.+)$', re.MULTILINE)
+KEY_REGEX = re.compile(r'msgctxt\s+"(.*)"$', re.MULTILINE)
+VALUE_REGEX = re.compile(r'msgid\s+"(.*)"$', re.MULTILINE)
+
 
 class GettextFormatter(AbstractFormatter):
     """Formatter for Gettext .po files."""
@@ -27,10 +32,6 @@ def default_file_name(self) -> str:
 
     def read(self, io: TextIO, lang: str):
         """Read Gettext .po file."""
-        comment_regex = re.compile(r'#\.\s*"(.*)"$', re.MULTILINE)
-        section_regex = re.compile(r'# SECTION: (.+)$', re.MULTILINE)
-        key_regex = re.compile(r'msgctxt\s+"(.*)"$', re.MULTILINE)
-        value_regex = re.compile(r'msgid\s+"(.*)"$', re.MULTILINE)
 
         # Read file in chunks separated by double newlines
         content = io.read()
@@ -46,22 +47,22 @@ def read(self, io: TextIO, lang: str):
             comment = None
 
             # Extract comment
-            comment_match = comment_regex.search(item)
+            comment_match = COMMENT_REGEX.search(item)
             if comment_match:
                 comment = comment_match.group(1)
 
             # Extract section
-            section_match = section_regex.search(item)
+            section_match = SECTION_REGEX.search(item)
             if section_match:
                 current_sections = section_match.group(1)
 
             # Extract key (msgctxt)
-            key_match = key_regex.search(item)
+            key_match = KEY_REGEX.search(item)
             if key_match:
                 key = key_match.group(1).replace('\\"', '"')
 
             # Extract value (msgid)
-            value_match = value_regex.search(item)
+            value_match = VALUE_REGEX.search(item)
             if value_match:
                 # Handle multiline strings: "string"\n"continuation"
                 value = value_match.group(1)

diff --git a/python_twine/twine/formatters/jquery.py b/python_twine/twine/formatters/jquery.py
@@ -21,6 +21,10 @@ def extension(self) -> str:
     def default_file_name(self) -> str:
         return "localize.json"
 
+    def output_path_for_language(self, lang: str) -> str:
+        """Return the output path component for a language."""
+        return f"{lang}.json"
+
     def determine_language_given_path(self, path: str) -> Optional[str]:
         """Extract language from filename like strings-en-US.json."""
         from pathlib import Path

diff --git a/python_twine/twine/output_processor.py b/python_twine/twine/output_processor.py
@@ -3,8 +3,8 @@
 """
 
 import re
-import copy
 from typing import Optional, List, Dict
+
 from twine.twine_file import TwineFile, TwineSection
 
 
@@ -107,7 +107,7 @@ def process(self, language: str) -> TwineFile:
                     continue
 
                 # Create new definition with the translation
-                new_definition = copy.deepcopy(definition)
+                new_definition = definition.copy_lang(language)
                 new_definition.translations[language] = value
 
                 # Handle plural translations

diff --git a/python_twine/twine/placeholders.py b/python_twine/twine/placeholders.py
@@ -17,6 +17,28 @@
     r"%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
 )
 
+TWINE_PLACEHOLDER_REGEX = re.compile(
+    r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")@"
+)
+
+PLACEHOLDER_SYNTAX = (
+        PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
+)
+SINGLE_PERCENT_REGEX = re.compile(r"([^%])(%)(?!(%|" + PLACEHOLDER_SYNTAX + r"))")
+
+NON_NUMBERED_PLACEHOLDER_REGEX = re.compile(
+    "%(" + PLACEHOLDER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES + ")"
+)
+
+ANDROID_PLACEHOLDER_REGEX = re.compile(
+    "(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + ")s"
+)
+PYTHON_PLACEHOLDER_REGEX = re.compile(
+    r"%\([a-zA-Z0-9_-]+\)"
+    + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH
+    + PLACEHOLDER_TYPES
+)
+
 
 def number_of_twine_placeholders(input_str: str) -> int:
     """Count the number of printf-style placeholders in a string."""
@@ -25,10 +47,7 @@ def number_of_twine_placeholders(input_str: str) -> int:
 
 def convert_twine_string_placeholder(input_str: str) -> str:
     """Convert Twine string placeholder from %@ to %s."""
-    pattern = re.compile(
-        r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")@"
-    )
-    return pattern.sub(r"\1s", input_str)
+    return TWINE_PLACEHOLDER_REGEX.sub(r"\1s", input_str)
 
 
 def convert_placeholders_from_twine_to_android(input_str: str) -> str:
@@ -53,21 +72,13 @@ def convert_placeholders_from_twine_to_android(input_str: str) -> str:
 
     # Got placeholders -> need to double single percent signs
     # % -> %% (but %% -> %%, %d -> %d)
-    placeholder_syntax = (
-        PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
-    )
-    single_percent_regex = re.compile(r"([^%])(%)(?!(%|" + placeholder_syntax + r"))")
-    value = single_percent_regex.sub(r"\1%%", value)
+    value = SINGLE_PERCENT_REGEX.sub(r"\1%%", value)
 
     if num_placeholders < 2:
         return value
 
     # Number placeholders if there are multiple
-    non_numbered_placeholder_regex = re.compile(
-        r"%(" + PLACEHOLDER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES + r")"
-    )
-
-    non_numbered_matches = non_numbered_placeholder_regex.findall(value)
+    non_numbered_matches = NON_NUMBERED_PLACEHOLDER_REGEX.findall(value)
     num_non_numbered = len(non_numbered_matches)
 
     if num_non_numbered == 0:
@@ -86,17 +97,14 @@ def number_placeholder(match):
         index += 1
         return f"%{index}${match.group(1)}"
 
-    value = non_numbered_placeholder_regex.sub(number_placeholder, value)
+    value = NON_NUMBERED_PLACEHOLDER_REGEX.sub(number_placeholder, value)
 
     return value
 
 
 def convert_placeholders_from_android_to_twine(input_str: str) -> str:
     """Convert Android string placeholders (%s) to Twine format (%@)."""
-    placeholder_regex = re.compile(
-        r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")s"
-    )
-    return placeholder_regex.sub(r"\1@", input_str)
+    return ANDROID_PLACEHOLDER_REGEX.sub(r"\1@", input_str)
 
 
 def convert_placeholders_from_twine_to_flash(input_str: str) -> str:
@@ -132,9 +140,4 @@ def contains_python_specific_placeholder(input_str: str) -> bool:
     Python supports placeholders like %(amount)03d
     See https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting
     """
-    pattern = re.compile(
-        r"%\([a-zA-Z0-9_-]+\)"
-        + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH
-        + PLACEHOLDER_TYPES
-    )
-    return pattern.search(input_str) is not None
+    return PYTHON_PLACEHOLDER_REGEX.search(input_str) is not None
diff --git a/python_twine/twine/twine_file.py b/python_twine/twine/twine_file.py
@@ -4,6 +4,7 @@
 
 import re
 from typing import Dict, List, Optional, Any
+import copy
 
 FALLBACK_LANGS_MAPPING = {
     "zh-CN": "zh-Hans",  # Chinese Simplified
@@ -158,6 +159,18 @@ def is_plural(self) -> bool:
         """Check if this definition has plural translations."""
         return bool(self.plural_translations)
 
+    def copy_lang(self, lang: str) -> "TwineDefinition":
+        """ Copy translation for one language into new definition. """
+        new_def = TwineDefinition(self.key)
+        new_def._comment = self._comment
+        new_def.tags = copy.deepcopy(self.tags)
+        if lang in self.translations:
+            new_def.translations[lang] = self.translations[lang]
+
+        if lang in self.plural_translations:
+            new_def.plural_translations[lang] = self.plural_translations[lang]
+        return new_def
+
 
 class TwineSection:
     """Represents a section grouping multiple definitions."""