Skip to content

Commit 521dd53

Browse files
committed
Moved regular expressions to constants.
copy.deepcopy took long time to finish. Replaced with custom TwineDefinition copy method
1 parent 911d49b commit 521dd53

File tree

7 files changed

+79
-55
lines changed

7 files changed

+79
-55
lines changed

python_twine/twine/formatters/__init__.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212
from twine.twine_file import TwineFile, TwineDefinition, TwineSection
1313
from twine.output_processor import OutputProcessor
1414

15+
16+
LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE = r"[a-z]{2}(?:-[A-Za-z]{2})?"
17+
18+
ONLY_LANGUAGE_AND_REGION_REGEX = re.compile(
19+
rf"^{LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE}$", re.IGNORECASE
20+
)
21+
22+
1523
def flatten(input: Optional[List[List[str]]]) -> List[str]:
1624
if input is None:
1725
return []
@@ -34,7 +42,6 @@ class AbstractFormatter(ABC):
3442
"""Base class for all format formatters."""
3543

3644
SUPPORTS_PLURAL = False
37-
LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE = r"[a-z]{2}(?:-[A-Za-z]{2})?"
3845

3946
def __init__(self):
4047
self.twine_file = TwineFile()
@@ -204,15 +211,12 @@ def set_comment_for_key(self, key: str, comment: str):
204211

205212
def determine_language_given_path(self, path: str) -> Optional[str]:
206213
"""Determine the language code from a file path."""
207-
only_language_and_region = re.compile(
208-
rf"^{self.LANGUAGE_CODE_WITH_OPTIONAL_REGION_CODE}$", re.IGNORECASE
209-
)
210214

211215
path_obj = Path(path)
212216
basename = path_obj.stem
213217

214218
# Check if basename is a language code
215-
if only_language_and_region.match(basename):
219+
if ONLY_LANGUAGE_AND_REGION_REGEX.match(basename):
216220
return basename
217221

218222
# Check if basename is in known language codes
@@ -222,7 +226,7 @@ def determine_language_given_path(self, path: str) -> Optional[str]:
222226
# Check path segments in reverse order
223227
parts = path_obj.parts
224228
for segment in reversed(parts):
225-
if only_language_and_region.match(segment):
229+
if ONLY_LANGUAGE_AND_REGION_REGEX.match(segment):
226230
return segment
227231

228232
return None

python_twine/twine/formatters/android.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33
"""
44

55
import re
6+
7+
REGEX_CDATA_BRACKET = re.compile(r"<(?!(\/?(\!\[CDATA)))")
8+
REGEX_TAG_BRACKET = re.compile(
9+
r"<(?!(\/?(b|em|i|cite|dfn|big|small|font|tt|s|strike|del|u|super|sub|ul|li|br|div|span|p|a|\!\[CDATA))\b)")
10+
REGEX_RESORCE_IDENTIFIER = re.compile(r"@(?!([a-z\.]+:)?[a-z+]+\/[a-zA-Z_]+)") # @[<package_name>:]<resource_type>/<resource_name>
11+
612
import html
713
from typing import Dict, Optional, TextIO
814
from xml.etree import ElementTree as ET
@@ -277,12 +283,10 @@ def inside_opening_tag(text: str, pos: int) -> bool:
277283

278284
if has_placeholders or self.options.get("escape_all_tags"):
279285
# Escape all < except <![CDATA
280-
angle_bracket_regex = re.compile(r"<(?!(\/?(\!\[CDATA)))")
286+
angle_bracket_regex = REGEX_CDATA_BRACKET
281287
else:
282288
# Escape < except supported tags
283-
angle_bracket_regex = re.compile(
284-
r"<(?!(\/?(b|em|i|cite|dfn|big|small|font|tt|s|strike|del|u|super|sub|ul|li|br|div|span|p|a|\!\[CDATA))\b)"
285-
)
289+
angle_bracket_regex = REGEX_TAG_BRACKET
286290

287291
def is_non_tag(result:str, i:int):
288292
if inside_cdata(result, i):
@@ -296,8 +300,7 @@ def is_non_tag(result:str, i:int):
296300
)
297301

298302
# escape non resource identifier @ signs (http://developer.android.com/guide/topics/resources/accessing-resources.html#ResourcesFromXml)
299-
resource_identifier_regex = re.compile(r"@(?!([a-z\.]+:)?[a-z+]+\/[a-zA-Z_]+)") # @[<package_name>:]<resource_type>/<resource_name>
300-
result = resource_identifier_regex.sub(r"\\@", result)
303+
result = REGEX_RESORCE_IDENTIFIER.sub(r"\\@", result)
301304

302305
return result
303306

python_twine/twine/formatters/django.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
from twine.formatters import AbstractFormatter
99

10+
COMMENT_REGEX = re.compile(r'^\s*#\. *"?(.*)"?$')
11+
SECTION_REGEX = re.compile(r'^\s*# -{9} (.+) -{9} #$')
12+
KEY_REGEX = re.compile(r'^msgid *"(.*)"$')
13+
VALUE_REGEX = re.compile(r'^msgstr *"(.*)"$', re.MULTILINE)
1014

1115
class DjangoFormatter(AbstractFormatter):
1216
"""Formatter for Django .po files."""
@@ -26,10 +30,6 @@ def default_file_name(self) -> str:
2630

2731
def read(self, io: TextIO, lang: str):
2832
"""Read Django .po file."""
29-
comment_regex = re.compile(r'^\s*#\. *"?(.*)"?$')
30-
section_regex = re.compile(r'^\s*# -{9} (.+) -{9} #$')
31-
key_regex = re.compile(r'^msgid *"(.*)"$')
32-
value_regex = re.compile(r'^msgstr *"(.*)"$', re.MULTILINE)
3333

3434
key = None
3535
value = None
@@ -38,24 +38,24 @@ def read(self, io: TextIO, lang: str):
3838

3939
for line in io:
4040
# Extract comment
41-
comment_match = comment_regex.match(line)
41+
comment_match = COMMENT_REGEX.match(line)
4242
if comment_match:
4343
comment = comment_match.group(1)
4444
continue
4545

46-
section_match = section_regex.match(line)
46+
section_match = SECTION_REGEX.match(line)
4747
if section_match:
4848
current_section = section_match.group(1)
4949
comment = None
5050
continue
5151

5252
# Extract key (msgid)
53-
key_match = key_regex.match(line)
53+
key_match = KEY_REGEX.match(line)
5454
if key_match:
5555
key = key_match.group(1).replace('\\"', '"')
5656

5757
# Extract value (msgstr)
58-
value_match = value_regex.match(line)
58+
value_match = VALUE_REGEX.match(line)
5959
if value_match:
6060
# Handle multiline strings
6161
value = value_match.group(1)

python_twine/twine/formatters/gettext.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
from twine.formatters import AbstractFormatter
99
from twine import __version__
1010

11+
COMMENT_REGEX = re.compile(r'#\.\s*"(.*)"$', re.MULTILINE)
12+
SECTION_REGEX = re.compile(r'# SECTION: (.+)$', re.MULTILINE)
13+
KEY_REGEX = re.compile(r'msgctxt\s+"(.*)"$', re.MULTILINE)
14+
VALUE_REGEX = re.compile(r'msgid\s+"(.*)"$', re.MULTILINE)
15+
1116

1217
class GettextFormatter(AbstractFormatter):
1318
"""Formatter for Gettext .po files."""
@@ -27,10 +32,6 @@ def default_file_name(self) -> str:
2732

2833
def read(self, io: TextIO, lang: str):
2934
"""Read Gettext .po file."""
30-
comment_regex = re.compile(r'#\.\s*"(.*)"$', re.MULTILINE)
31-
section_regex = re.compile(r'# SECTION: (.+)$', re.MULTILINE)
32-
key_regex = re.compile(r'msgctxt\s+"(.*)"$', re.MULTILINE)
33-
value_regex = re.compile(r'msgid\s+"(.*)"$', re.MULTILINE)
3435

3536
# Read file in chunks separated by double newlines
3637
content = io.read()
@@ -46,22 +47,22 @@ def read(self, io: TextIO, lang: str):
4647
comment = None
4748

4849
# Extract comment
49-
comment_match = comment_regex.search(item)
50+
comment_match = COMMENT_REGEX.search(item)
5051
if comment_match:
5152
comment = comment_match.group(1)
5253

5354
# Extract section
54-
section_match = section_regex.search(item)
55+
section_match = SECTION_REGEX.search(item)
5556
if section_match:
5657
current_sections = section_match.group(1)
5758

5859
# Extract key (msgctxt)
59-
key_match = key_regex.search(item)
60+
key_match = KEY_REGEX.search(item)
6061
if key_match:
6162
key = key_match.group(1).replace('\\"', '"')
6263

6364
# Extract value (msgid)
64-
value_match = value_regex.search(item)
65+
value_match = VALUE_REGEX.search(item)
6566
if value_match:
6667
# Handle multiline strings: "string"\n"continuation"
6768
value = value_match.group(1)

python_twine/twine/output_processor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
"""
44

55
import re
6-
import copy
76
from typing import Optional, List, Dict
7+
88
from twine.twine_file import TwineFile, TwineSection
99

1010

@@ -107,7 +107,7 @@ def process(self, language: str) -> TwineFile:
107107
continue
108108

109109
# Create new definition with the translation
110-
new_definition = copy.deepcopy(definition)
110+
new_definition = definition.copy_lang(language)
111111
new_definition.translations[language] = value
112112

113113
# Handle plural translations

python_twine/twine/placeholders.py

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,28 @@
1717
r"%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
1818
)
1919

20+
TWINE_PLACEHOLDER_REGEX = re.compile(
21+
r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")@"
22+
)
23+
24+
PLACEHOLDER_SYNTAX = (
25+
PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
26+
)
27+
SINGLE_PERCENT_REGEX = re.compile(r"([^%])(%)(?!(%|" + PLACEHOLDER_SYNTAX + r"))")
28+
29+
NON_NUMBERED_PLACEHOLDER_REGEX = re.compile(
30+
"%(" + PLACEHOLDER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES + ")"
31+
)
32+
33+
ANDROID_PLACEHOLDER_REGEX = re.compile(
34+
"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + ")s"
35+
)
36+
PYTHON_PLACEHOLDER_REGEX = re.compile(
37+
r"%\([a-zA-Z0-9_-]+\)"
38+
+ PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH
39+
+ PLACEHOLDER_TYPES
40+
)
41+
2042

2143
def number_of_twine_placeholders(input_str: str) -> int:
2244
"""Count the number of printf-style placeholders in a string."""
@@ -25,10 +47,7 @@ def number_of_twine_placeholders(input_str: str) -> int:
2547

2648
def convert_twine_string_placeholder(input_str: str) -> str:
2749
"""Convert Twine string placeholder from %@ to %s."""
28-
pattern = re.compile(
29-
r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")@"
30-
)
31-
return pattern.sub(r"\1s", input_str)
50+
return TWINE_PLACEHOLDER_REGEX.sub(r"\1s", input_str)
3251

3352

3453
def convert_placeholders_from_twine_to_android(input_str: str) -> str:
@@ -53,21 +72,13 @@ def convert_placeholders_from_twine_to_android(input_str: str) -> str:
5372

5473
# Got placeholders -> need to double single percent signs
5574
# % -> %% (but %% -> %%, %d -> %d)
56-
placeholder_syntax = (
57-
PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES
58-
)
59-
single_percent_regex = re.compile(r"([^%])(%)(?!(%|" + placeholder_syntax + r"))")
60-
value = single_percent_regex.sub(r"\1%%", value)
75+
value = SINGLE_PERCENT_REGEX.sub(r"\1%%", value)
6176

6277
if num_placeholders < 2:
6378
return value
6479

6580
# Number placeholders if there are multiple
66-
non_numbered_placeholder_regex = re.compile(
67-
r"%(" + PLACEHOLDER_FLAGS_WIDTH_PRECISION_LENGTH + PLACEHOLDER_TYPES + r")"
68-
)
69-
70-
non_numbered_matches = non_numbered_placeholder_regex.findall(value)
81+
non_numbered_matches = NON_NUMBERED_PLACEHOLDER_REGEX.findall(value)
7182
num_non_numbered = len(non_numbered_matches)
7283

7384
if num_non_numbered == 0:
@@ -86,17 +97,14 @@ def number_placeholder(match):
8697
index += 1
8798
return f"%{index}${match.group(1)}"
8899

89-
value = non_numbered_placeholder_regex.sub(number_placeholder, value)
100+
value = NON_NUMBERED_PLACEHOLDER_REGEX.sub(number_placeholder, value)
90101

91102
return value
92103

93104

94105
def convert_placeholders_from_android_to_twine(input_str: str) -> str:
95106
"""Convert Android string placeholders (%s) to Twine format (%@)."""
96-
placeholder_regex = re.compile(
97-
r"(%" + PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH + r")s"
98-
)
99-
return placeholder_regex.sub(r"\1@", input_str)
107+
return ANDROID_PLACEHOLDER_REGEX.sub(r"\1@", input_str)
100108

101109

102110
def convert_placeholders_from_twine_to_flash(input_str: str) -> str:
@@ -132,9 +140,4 @@ def contains_python_specific_placeholder(input_str: str) -> bool:
132140
Python supports placeholders like %(amount)03d
133141
See https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting
134142
"""
135-
pattern = re.compile(
136-
r"%\([a-zA-Z0-9_-]+\)"
137-
+ PLACEHOLDER_PARAMETER_FLAGS_WIDTH_PRECISION_LENGTH
138-
+ PLACEHOLDER_TYPES
139-
)
140-
return pattern.search(input_str) is not None
143+
return PYTHON_PLACEHOLDER_REGEX.search(input_str) is not None

python_twine/twine/twine_file.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import re
66
from typing import Dict, List, Optional, Any
7+
import copy
78

89
FALLBACK_LANGS_MAPPING = {
910
"zh-CN": "zh-Hans", # Chinese Simplified
@@ -158,6 +159,18 @@ def is_plural(self) -> bool:
158159
"""Check if this definition has plural translations."""
159160
return bool(self.plural_translations)
160161

162+
def copy_lang(self, lang: str) -> TwineDefinition:
163+
""" Copy translation for one language into new definition. """
164+
new_def = TwineDefinition(self.key)
165+
new_def._comment = self._comment
166+
new_def.tags = copy.deepcopy(self.tags)
167+
if lang in self.translations:
168+
new_def.translations[lang] = self.translations[lang]
169+
170+
if lang in self.plural_translations:
171+
new_def.plural_translations[lang] = self.plural_translations[lang]
172+
return new_def
173+
161174

162175
class TwineSection:
163176
"""Represents a section grouping multiple definitions."""

0 commit comments

Comments
 (0)