Skip to content

Commit 140e9d1

Browse files
authored
Merge pull request #10 from hypercision/removeBackslash
Add option for removing back slash from input data
2 parents 83789e7 + 58a0993 commit 140e9d1

16 files changed

+491
-78
lines changed

CONTRIBUTING.md

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@ Instructions:
1010

1111
## Testing
1212

13-
### vcrpy
14-
15-
In addition to [`pytest`](https://docs.pytest.org/), we also use the [`vcrpy`](https://vcrpy.readthedocs.io/) library when writing our tests.
16-
1713
### tox
1814

1915
To run the tests, install the project dependencies in a [virtual environment](https://docs.python.org/3/library/venv.html#module-venv)
@@ -41,6 +37,17 @@ pip install "<package_name>"
4137
pip freeze > requirements.txt
4238
```
4339

40+
### vcrpy
41+
42+
In addition to [`pytest`](https://docs.pytest.org/), we also use the [`vcrpy`](https://vcrpy.readthedocs.io/) library when writing our tests.
43+
44+
If you need to update or regenerate a cassette for a test, i.e. [`tests/cassettes/test_translate_missing_messages_without_sorting.yml`](https://github.com/hypercision/i18ntools/blob/main/tests/cassettes/test_translate_missing_messages_without_sorting.yml), then:
45+
46+
- delete the cassette yml file
47+
- update the `os.environ["TRANSLATOR_API_SUBSCRIPTION_KEY"]` line in the test so it is set to a real API key (but do not commit this change)
48+
- run the tests with `tox`. This will regenerate the cassette yml file
49+
- revert the `os.environ["TRANSLATOR_API_SUBSCRIPTION_KEY"]` line in the test so it is no longer a real API key
50+
4451
### Editable installation
4552

4653
Alternatively, you can perform an [editable installation](https://setuptools.pypa.io/en/latest/userguide/development_mode.html)

src/i18ntools/parse_i18n_file.py

Lines changed: 96 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,33 @@
11
#!/usr/bin/env python
22
"""Parses an i18n Java properties file and returns the data as a dictionary.
33
4-
The benefit of this method over using configparser is that the whitespace in
4+
If called with remove_backslashes=False, then the whitespace in
55
multiline values is preserved.
66
7+
If called with remove_backslashes=True, then configparser is used
8+
and the whitespace and backslashes in multiline values are removed.
9+
710
Note that this method does not work properly for multiline translations
811
with an "=" character in them.
9-
10-
See related question: https://stackoverflow.com/questions/76047202
1112
"""
1213

1314
import argparse
15+
import configparser
16+
import tempfile
1417
from pathlib import Path
1518

1619

17-
def parse_i18n_file(file_path):
20+
def parse_i18n_file(file_path, remove_backslashes=False):
1821
"""Parses an i18n Java properties file and returns the data as a dictionary.
1922
2023
Note that this method does not work properly for multiline translations
2124
with an "=" character in them.
2225
2326
Keyword arguments:
2427
file_path -- filepath of the i18n Java properties file to parse
28+
remove_backslashes -- when true, the data returned will not have the
29+
backslashes used in multiline values.
30+
Multiline values will be transformed into single line values.
2531
"""
2632
if not Path(file_path).exists():
2733
raise FileNotFoundError(f"File {file_path} does not exist")
@@ -60,6 +66,82 @@ def parse_i18n_file(file_path):
6066
f"It has at least one duplicate key: {duplicate_keys}"
6167
)
6268

69+
if remove_backslashes:
70+
# Now that we've ensured the file has no duplicate properties, return
71+
# the data as a dictionary with multiline values transformed into
72+
# single line values.
73+
return parse_i18n_file_without_backslashes(file_path)
74+
75+
return data
76+
77+
78+
def convert_properties_to_ini(input_path, ini_path):
79+
"""Reads a properties file and writes it as an .ini file with a [DEFAULT] section
80+
header to make it compatible with configparser.
81+
82+
Keyword arguments:
83+
input_path -- filepath of the i18n Java properties file to convert
84+
ini_path -- filepath of the output .ini file
85+
"""
86+
with (
87+
open(input_path, "r", encoding="utf-8") as infile,
88+
open(ini_path, "w", encoding="utf-8") as outfile,
89+
):
90+
# Add a dummy section header
91+
outfile.write("[DEFAULT]\n")
92+
outfile.writelines(infile.readlines())
93+
94+
95+
def merge_multiline_string(multiline_string: str) -> str:
96+
"""Takes a multiline string as input, removes the backslashes at the
97+
end of each line, and returns a single line string.
98+
99+
Keyword arguments:
100+
multiline_string -- the input string potentially containing multiple lines
101+
with backslash continuations.
102+
"""
103+
# Split the string into lines and strip any leading/trailing whitespace
104+
# from each line
105+
lines = multiline_string.splitlines()
106+
# Remove the backslash from the end of each line
107+
processed_lines = [line.rstrip("\\").strip() for line in lines]
108+
# Join the lines into a single string, filtering out any empty lines
109+
# to avoid leading/trailing spaces
110+
merged_string = " ".join(line for line in processed_lines if line)
111+
return merged_string
112+
113+
114+
def parse_i18n_file_without_backslashes(file_path):
115+
"""Parses an i18n Java properties file and returns the data as a dictionary.
116+
Multiline values will be transformed into single line values with the
117+
backslashes removed.
118+
119+
Note that this method does not work properly for multiline translations
120+
with an "=" character in them.
121+
122+
Keyword arguments:
123+
file_path -- filepath of the i18n Java properties file to parse
124+
"""
125+
# Use a temporary file that is automatically cleaned up
126+
with tempfile.NamedTemporaryFile(
127+
mode="w", suffix=".ini", encoding="utf-8", delete=True
128+
) as temp_ini:
129+
# Convert the properties file into a temporary .ini file
130+
convert_properties_to_ini(file_path, temp_ini.name)
131+
132+
# Parse the temporary .ini file
133+
# Use RawConfigParser to avoid any interpolation or automatic conversions
134+
config = configparser.RawConfigParser(empty_lines_in_values=False)
135+
# Override the optionxform method to prevent lowercase conversion of the keys
136+
config.optionxform = str # type: ignore
137+
138+
config.read(temp_ini.name, encoding="utf-8")
139+
140+
data = {}
141+
for key, value in config["DEFAULT"].items():
142+
merged_string = merge_multiline_string(value)
143+
data[key] = merged_string
144+
63145
return data
64146

65147

@@ -80,8 +162,17 @@ def main():
80162
"Can be specified as a relative or absolute file path."
81163
),
82164
)
165+
parser.add_argument(
166+
"-r",
167+
"--remove_backslashes",
168+
action="store_true",
169+
help=(
170+
"the data returned will not have the "
171+
"backslashes used in multiline values."
172+
),
173+
)
83174
args = parser.parse_args()
84-
result = parse_i18n_file(args.input_file)
175+
result = parse_i18n_file(args.input_file, args.remove_backslashes)
85176
for key, value in result.items():
86177
print("key", key)
87178
print("value", value)

src/i18ntools/translate.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ def translate_file(
109109
output_file_path=None,
110110
input_lang=default_lang,
111111
translator_region=default_region,
112+
remove_backslashes=False,
112113
):
113114
if not Path(input_file_path).exists():
114115
raise FileNotFoundError(f"File {input_file_path} does not exist")
@@ -120,7 +121,7 @@ def translate_file(
120121
output_file_path = get_default_filepath(input_file_path, output_lang)
121122

122123
# Parse the input file into a dictionary
123-
input_data = parse_i18n_file(input_file_path)
124+
input_data = parse_i18n_file(input_file_path, remove_backslashes)
124125

125126
# Open the input file in read mode to read its contents
126127
with open(input_file_path, "r", encoding="utf-8") as f:
@@ -214,9 +215,23 @@ def main():
214215
default=default_region,
215216
help="region of the Azure translator resource. Defaults to eastus2",
216217
)
218+
parser.add_argument(
219+
"-rbs",
220+
"--remove_backslashes",
221+
action="store_true",
222+
help=(
223+
"any backslashes from multiline values in the input file "
224+
"will not be included in the text that gets translated."
225+
),
226+
)
217227
args = parser.parse_args()
218228
translate_file(
219-
args.input_file, args.to, args.output_file, args.from_lang, args.region
229+
args.input_file,
230+
args.to,
231+
args.output_file,
232+
args.from_lang,
233+
args.region,
234+
args.remove_backslashes,
220235
)
221236

222237

src/i18ntools/translate_missing.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def translate_missing_messages(
3030
output_file_path=None,
3131
input_lang=default_lang,
3232
translator_region=default_region,
33+
remove_backslashes=False,
3334
):
3435
if not Path(input_file_path).exists():
3536
raise FileNotFoundError(f"File {input_file_path} does not exist")
@@ -46,8 +47,8 @@ def translate_missing_messages(
4647
raise FileNotFoundError(f"File {output_file_path} does not exist")
4748

4849
# Parse the input file and output file into a dictionary
49-
input_data = parse_i18n_file(input_file_path)
50-
output_data = parse_i18n_file(output_file_path)
50+
input_data = parse_i18n_file(input_file_path, remove_backslashes)
51+
output_data = parse_i18n_file(output_file_path, remove_backslashes)
5152

5253
# Find any i18n messages missing from the output file
5354
# and put those keys and values in the payload_data dictionary
@@ -148,6 +149,15 @@ def main():
148149
default=default_region,
149150
help="region of the Azure translator resource. Defaults to eastus2",
150151
)
152+
parser.add_argument(
153+
"-rbs",
154+
"--remove_backslashes",
155+
action="store_true",
156+
help=(
157+
"any backslashes from multiline values in the input file "
158+
"will not be included in the text that gets translated."
159+
),
160+
)
151161
parser.add_argument(
152162
"-s",
153163
"--sort",
@@ -166,6 +176,7 @@ def main():
166176
args.output_file,
167177
args.from_lang,
168178
args.region,
179+
args.remove_backslashes,
169180
)
170181

171182

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
interactions:
2+
- request:
3+
body: '[{"text": "Property [{0}] of class [{1}] with value [{2}] is less than
4+
minimum value [{3}]"}, {"text": "I want to see you knocking at the door. I wanna
5+
leave you out there waiting in the downpour. Singing that you\u2019re sorry,
6+
dripping on the hall floor."}, {"text": "The customSubmitTS parameter is missing.
7+
It must be present and of type Date."}, {"text": "{0} session removed."}, {"text":
8+
"The trial period has ended for your account and you can no longer use the application."},
9+
{"text": "Instructor is disabled"}, {"text": " Attendance actions made on this
10+
page will also be made for every session in this group."}, {"text": "Errors:
11+
{0}. \\n\\n Sessions successfully removed: {1}"}]'
12+
headers:
13+
Accept:
14+
- '*/*'
15+
Accept-Encoding:
16+
- gzip, deflate
17+
Connection:
18+
- keep-alive
19+
Content-Length:
20+
- '690'
21+
Content-Type:
22+
- application/json
23+
Ocp-Apim-Subscription-Region:
24+
- eastus2
25+
User-Agent:
26+
- python-requests/2.32.5
27+
method: POST
28+
uri: https://api.cognitive.microsofttranslator.com/translate?api-version=3.0&from=en&to=de
29+
response:
30+
body:
31+
string: "[{\"translations\":[{\"text\":\"Die Eigenschaft [{0}] der Klasse [{1}]
32+
mit Wert [{2}] ist kleiner als der Mindestwert [{3}]\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"Ich
33+
will dich an der T\xFCr klopfen sehen. Ich will dich drau\xDFen im Wolkenbruch
34+
warten lassen. Singend, dass es dir leid tut, tropfend auf den Flurboden.\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"Der
35+
customSubmitTS-Parameter fehlt. Es muss anwesend und vom Typ Datum sein.\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"{0}
36+
Sitzung entfernt.\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"Die Testphase
37+
f\xFCr dein Konto ist beendet und du kannst die Anwendung nicht mehr nutzen.\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"Der
38+
Ausbilder ist behindert\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"F\xFCr jede Sitzung in dieser Gruppe werden auch die auf dieser Seite vorgenommenen
39+
Anwesenheitsma\xDFnahmen vorgenommen.\",\"to\":\"de\"}]},{\"translations\":[{\"text\":\"Fehler:
40+
{0}. \\\\n\\\\n Sitzungen erfolgreich entfernt: {1}\",\"to\":\"de\"}]}]"
41+
headers:
42+
Connection:
43+
- keep-alive
44+
Content-Type:
45+
- application/json; charset=utf-8
46+
Date:
47+
- Wed, 14 Jan 2026 20:16:28 GMT
48+
Strict-Transport-Security:
49+
- max-age=31536000; includeSubDomains
50+
Transfer-Encoding:
51+
- chunked
52+
access-control-expose-headers:
53+
- X-RequestId,X-Metered-Usage,X-MT-System
54+
x-content-type-options:
55+
- nosniff
56+
x-envoy-upstream-service-time:
57+
- '438'
58+
x-metered-usage:
59+
- '571'
60+
x-mt-system:
61+
- Microsoft
62+
x-requestid:
63+
- 5e1ef095-341f-4bca-b80b-b07d6089bf28.EUWE.0114T2016
64+
status:
65+
code: 200
66+
message: OK
67+
version: 1

0 commit comments

Comments
 (0)