Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions python_twine/twine/formatters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
from twine.twine_file import TwineFile, TwineDefinition, TwineSection
from twine.output_processor import OutputProcessor


LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE = r"[a-z]{2}(?:-[A-Za-z]{2})?"

ONLY_LANGUAGE_AND_REGION_REGEX = re.compile(
rf"^{LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE}$", re.IGNORECASE
)


def flatten(input: Optional[List[List[str]]]) -> List[str]:
if input is None:
return []
Expand All @@ -34,7 +42,6 @@ class AbstractFormatter(ABC):
"""Base class for all format formatters."""

SUPPORTS_PLURAL = False
LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE = r"[a-z]{2}(?:-[A-Za-z]{2})?"

def __init__(self):
self.twine_file = TwineFile()
Expand Down Expand Up @@ -204,15 +211,12 @@ def set_comment_for_key(self, key: str, comment: str):

def determine_language_given_path(self, path: str) -> Optional[str]:
"""Determine the language code from a file path."""
only_language_and_region = re.compile(
rf"^{self.LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE}$", re.IGNORECASE
)

path_obj = Path(path)
basename = path_obj.stem

# Check if basename is a language code
if only_language_and_region.match(basename):
if ONLY_LANGUAGE_AND_REGION_REGEX.match(basename):
return basename

# Check if basename is in known language codes
Expand All @@ -222,7 +226,7 @@ def determine_language_given_path(self, path: str) -> Optional[str]:
# Check path segments in reverse order
parts = path_obj.parts
for segment in reversed(parts):
if only_language_and_region.match(segment):
if ONLY_LANGUAGE_AND_REGION_REGEX.match(segment):
return segment

return None
Expand Down
14 changes: 8 additions & 6 deletions python_twine/twine/formatters/android.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
number_of_twine_placeholders,
)

REGEX_CDATA_BRACKET = re.compile(r"<(?!(\/?(\!\[CDATA)))")
REGEX_TAG_BRACKET = re.compile(
r"<(?!(\/?(b|em|i|cite|dfn|big|small|font|tt|s|strike|del|u|super|sub|ul|li|br|div|span|p|a|\!\[CDATA))\b)")
REGEX_RESORCE_IDENTIFIER = re.compile(r"@(?!([a-z\.]+:)?[a-z+]+\/[a-zA-Z_]+)") # @[<package_name>:]<resource_type>/<resource_name>


def inner_xml(node:Element) -> str:
# Get inner XML (text + nested elements)
Expand Down Expand Up @@ -277,12 +282,10 @@ def inside_opening_tag(text: str, pos: int) -> bool:

if has_placeholders or self.options.get("escape_all_tags"):
# Escape all < except <![CDATA
angle_bracket_regex = re.compile(r"<(?!(\/?(\!\[CDATA)))")
angle_bracket_regex = REGEX_CDATA_BRACKET
else:
# Escape < except supported tags
angle_bracket_regex = re.compile(
r"<(?!(\/?(b|em|i|cite|dfn|big|small|font|tt|s|strike|del|u|super|sub|ul|li|br|div|span|p|a|\!\[CDATA))\b)"
)
angle_bracket_regex = REGEX_TAG_BRACKET

def is_non_tag(result:str, i:int):
if inside_cdata(result, i):
Expand All @@ -296,8 +299,7 @@ def is_non_tag(result:str, i:int):
)

# escape non resource identifier @ signs (http://developer.android.com/guide/topics/resources/accessing-resources.html#ResourcesFromXml)
resource_identifier_regex = re.compile(r"@(?!([a-z\.]+:)?[a-z+]+\/[a-zA-Z_]+)") # @[<package_name>:]<resource_type>/<resource_name>
result = resource_identifier_regex.sub(r"\\@", result)
result = REGEX_RESORCE_IDENTIFIER.sub(r"\\@", result)

return result

Expand Down
16 changes: 8 additions & 8 deletions python_twine/twine/formatters/django.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

from twine.formatters import AbstractFormatter

COMMENT_REGEX = re.compile(r'^\s*#\. *"?(.*)"?$')
SECTION_REGEX = re.compile(r'^\s*# -{9} (.+) -{9} #$')
KEY_REGEX = re.compile(r'^msgid *"(.*)"$')
VALUE_REGEX = re.compile(r'^msgstr *"(.*)"$', re.MULTILINE)

class DjangoFormatter(AbstractFormatter):
"""Formatter for Django .po files."""
Expand All @@ -26,10 +30,6 @@ def default_file_name(self) -> str:

def read(self, io: TextIO, lang: str):
"""Read Django .po file."""
comment_regex = re.compile(r'^\s*#\. *"?(.*)"?$')
section_regex = re.compile(r'^\s*# -{9} (.+) -{9} #$')
key_regex = re.compile(r'^msgid *"(.*)"$')
value_regex = re.compile(r'^msgstr *"(.*)"$', re.MULTILINE)

key = None
value = None
Expand All @@ -38,24 +38,24 @@ def read(self, io: TextIO, lang: str):

for line in io:
# Extract comment
comment_match = comment_regex.match(line)
comment_match = COMMENT_REGEX.match(line)
if comment_match:
comment = comment_match.group(1)
continue

section_match = section_regex.match(line)
section_match = SECTION_REGEX.match(line)
if section_match:
current_section = section_match.group(1)
comment = None
continue

# Extract key (msgid)
key_match = key_regex.match(line)
key_match = KEY_REGEX.match(line)
if key_match:
key = key_match.group(1).replace('\\"', '"')

# Extract value (msgstr)
value_match = value_regex.match(line)
value_match = VALUE_REGEX.match(line)
if value_match:
# Handle multiline strings
value = value_match.group(1)
Expand Down
17 changes: 9 additions & 8 deletions python_twine/twine/formatters/gettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
from twine.formatters import AbstractFormatter
from twine import __version__

COMMENT_REGEX = re.compile(r'#\.\s*"(.*)"$', re.MULTILINE)
SECTION_REGEX = re.compile(r'# SECTION: (.+)$', re.MULTILINE)
KEY_REGEX = re.compile(r'msgctxt\s+"(.*)"$', re.MULTILINE)
VALUE_REGEX = re.compile(r'msgid\s+"(.*)"$', re.MULTILINE)


class GettextFormatter(AbstractFormatter):
"""Formatter for Gettext .po files."""
Expand All @@ -27,10 +32,6 @@ def default_file_name(self) -> str:

def read(self, io: TextIO, lang: str):
"""Read Gettext .po file."""
comment_regex = re.compile(r'#\.\s*"(.*)"$', re.MULTILINE)
section_regex = re.compile(r'# SECTION: (.+)$', re.MULTILINE)
key_regex = re.compile(r'msgctxt\s+"(.*)"$', re.MULTILINE)
value_regex = re.compile(r'msgid\s+"(.*)"$', re.MULTILINE)

# Read file in chunks separated by double newlines
content = io.read()
Expand All @@ -46,22 +47,22 @@ def read(self, io: TextIO, lang: str):
comment = None

# Extract comment
comment_match = comment_regex.search(item)
comment_match = COMMENT_REGEX.search(item)
if comment_match:
comment = comment_match.group(1)

# Extract section
section_match = section_regex.search(item)
section_match = SECTION_REGEX.search(item)
if section_match:
current_sections = section_match.group(1)

# Extract key (msgctxt)
key_match = key_regex.search(item)
key_match = KEY_REGEX.search(item)
if key_match:
key = key_match.group(1).replace('\\"', '"')

# Extract value (msgid)
value_match = value_regex.search(item)
value_match = VALUE_REGEX.search(item)
if value_match:
# Handle multiline strings: "string"\n"continuation"
value = value_match.group(1)
Expand Down
4 changes: 4 additions & 0 deletions python_twine/twine/formatters/jquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def extension(self) -> str:
def default_file_name(self) -> str:
return "localize.json"

def output_path_for_language(self, lang: str) -> str:
"""Return the output path component for a language."""
return f"{lang}.json"

def determine_language_given_path(self, path: str) -> Optional[str]:
"""Extract language from filename like strings-en-US.json."""
from pathlib import Path
Expand Down
4 changes: 2 additions & 2 deletions python_twine/twine/output_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"""

import re
import copy
from typing import Optional, List, Dict

from twine.twine_file import TwineFile, TwineSection


Expand Down Expand Up @@ -107,7 +107,7 @@ def process(self, language: str) -> TwineFile:
continue

# Create new definition with the translation
new_definition = copy.deepcopy(definition)
new_definition = definition.copy_lang(language)
new_definition.translations[language] = value

# Handle plural translations
Expand Down
53 changes: 28 additions & 25 deletions python_twine/twine/placeholders.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,28 @@
r"%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
)

TWINE_PLACEHOLDER_REGEX = re.compile(
r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")@"
)

PLACEHOLDER_SYNTAX = (
PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
)
SINGLE_PERCENT_REGEX = re.compile(r"([^%])(%)(?!(%|" + PLACEHOLDER_SYNTAX + r"))")

NON_NUMBERED_PLACEHOLDER_REGEX = re.compile(
"%(" + PLACEHOLDER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES + ")"
)

ANDROID_PLACEHOLDER_REGEX = re.compile(
"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + ")s"
)
PYTHON_PLACEHOLDER_REGEX = re.compile(
r"%\([a-zA-Z0-9_-]+\)"
+ PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH
+ PLACEHOLDER_TYPES
)


def number_of_twine_placeholders(input_str: str) -> int:
"""Count the number of printf-style placeholders in a string."""
Expand All @@ -25,10 +47,7 @@ def number_of_twine_placeholders(input_str: str) -> int:

def convert_twine_string_placeholder(input_str: str) -> str:
"""Convert Twine string placeholder from %@ to %s."""
pattern = re.compile(
r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")@"
)
return pattern.sub(r"\1s", input_str)
return TWINE_PLACEHOLDER_REGEX.sub(r"\1s", input_str)


def convert_placeholders_from_twine_to_android(input_str: str) -> str:
Expand All @@ -53,21 +72,13 @@ def convert_placeholders_from_twine_to_android(input_str: str) -> str:

# Got placeholders -> need to double single percent signs
# % -> %% (but %% -> %%, %d -> %d)
placeholder_syntax = (
PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
)
single_percent_regex = re.compile(r"([^%])(%)(?!(%|" + placeholder_syntax + r"))")
value = single_percent_regex.sub(r"\1%%", value)
value = SINGLE_PERCENT_REGEX.sub(r"\1%%", value)

if num_placeholders < 2:
return value

# Number placeholders if there are multiple
non_numbered_placeholder_regex = re.compile(
r"%(" + PLACEHOLDER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES + r")"
)

non_numbered_matches = non_numbered_placeholder_regex.findall(value)
non_numbered_matches = NON_NUMBERED_PLACEHOLDER_REGEX.findall(value)
num_non_numbered = len(non_numbered_matches)

if num_non_numbered == 0:
Expand All @@ -86,17 +97,14 @@ def number_placeholder(match):
index += 1
return f"%{index}${match.group(1)}"

value = non_numbered_placeholder_regex.sub(number_placeholder, value)
value = NON_NUMBERED_PLACEHOLDER_REGEX.sub(number_placeholder, value)

return value


def convert_placeholders_from_android_to_twine(input_str: str) -> str:
"""Convert Android string placeholders (%s) to Twine format (%@)."""
placeholder_regex = re.compile(
r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")s"
)
return placeholder_regex.sub(r"\1@", input_str)
return ANDROID_PLACEHOLDER_REGEX.sub(r"\1@", input_str)


def convert_placeholders_from_twine_to_flash(input_str: str) -> str:
Expand Down Expand Up @@ -132,9 +140,4 @@ def contains_python_specific_placeholder(input_str: str) -> bool:
Python supports placeholders like %(amount)03d
See https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting
"""
pattern = re.compile(
r"%\([a-zA-Z0-9_-]+\)"
+ PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH
+ PLACEHOLDER_TYPES
)
return pattern.search(input_str) is not None
return PYTHON_PLACEHOLDER_REGEX.search(input_str) is not None
13 changes: 13 additions & 0 deletions python_twine/twine/twine_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import re
from typing import Dict, List, Optional, Any
import copy

FALLBACK_LANGS_MAPPING = {
"zh-CN": "zh-Hans", # Chinese Simplified
Expand Down Expand Up @@ -158,6 +159,18 @@ def is_plural(self) -> bool:
"""Check if this definition has plural translations."""
return bool(self.plural_translations)

def copy_lang(self, lang: str) -> "TwineDefinition":
""" Copy translation for one language into new definition. """
new_def = TwineDefinition(self.key)
new_def._comment = self._comment
new_def.tags = copy.deepcopy(self.tags)
if lang in self.translations:
new_def.translations[lang] = self.translations[lang]

if lang in self.plural_translations:
new_def.plural_translations[lang] = self.plural_translations[lang]
return new_def


class TwineSection:
"""Represents a section grouping multiple definitions."""
Expand Down