Skip to content

Commit 89cee16

Browse files
committed
fix: share clean_string but no glossary replace for mobile yet
1 parent 45e796f commit 89cee16

File tree

5 files changed

+101
-87
lines changed

5 files changed

+101
-87
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,8 @@
22
.DS_Store
33

44
# Exclude venv folder
5-
venv
5+
venv
6+
7+
8+
__pycache__
9+
.vscode/

crowdin/generate_android_strings.py

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import re
77
from pathlib import Path
88
from colorama import Fore, Style
9+
from generate_shared import load_glossary_dict, clean_string
910

1011
# Variables that should be treated as numeric (using %d)
1112
NUMERIC_VARIABLES = ['count', 'found_count', 'total_count']
@@ -66,20 +67,6 @@ def repl(match):
6667

6768
return re.sub(r'\{([^}]+)\}', repl, text)
6869

69-
def clean_string(text):
70-
# Note: any changes done for all platforms needs most likely to be done on crowdin side.
71-
# So we don't want to replace -> with → for instance, we want the crowdin strings to not have those at all.
72-
# We can use standard XML escaped characters for most things (since XLIFF is an XML format) but
73-
# want the following cases escaped in a particular way
74-
text = text.replace("'", r"\'")
75-
text = text.replace(""", "\"")
76-
text = text.replace("\"", "\\\"")
77-
text = text.replace("&lt;b&gt;", "<b>")
78-
text = text.replace("&lt;/b&gt;", "</b>")
79-
text = text.replace("&lt;/br&gt;", "\\n")
80-
text = text.replace("<br/>", "\\n")
81-
text = text.replace("&", "&amp;") # Assume any remaining ampersands are desired
82-
return text.strip() # Strip whitespace
8370

8471
def generate_android_xml(translations, app_name):
8572
sorted_translations = sorted(translations.items())
@@ -93,11 +80,11 @@ def generate_android_xml(translations, app_name):
9380
if isinstance(target, dict): # It's a plural group
9481
result += f' <plurals name="{resname}">\n'
9582
for form, value in target.items():
96-
escaped_value = clean_string(convert_placeholders(value))
83+
escaped_value = clean_string(convert_placeholders(value), True, {}, {})
9784
result += f' <item quantity="{form}">{escaped_value}</item>\n'
9885
result += ' </plurals>\n'
9986
else: # It's a regular string (for these we DON'T want to convert the placeholders)
100-
escaped_target = clean_string(target)
87+
escaped_target = clean_string(target, True, {}, {})
10188
result += f' <string name="{resname}">{escaped_target}</string>\n'
10289

10390
result += '</resources>'
@@ -131,17 +118,10 @@ def convert_xliff_to_android_xml(input_file, output_dir, source_locale, locale,
131118

132119

133120
def convert_non_translatable_strings_to_kotlin(input_file, output_path):
134-
if not os.path.exists(input_file):
135-
raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
121+
glossary_dict = load_glossary_dict(input_file)
136122

137-
# Process the non-translatable string input
138-
non_translatable_strings_data = {}
139-
with open(input_file, 'r', encoding="utf-8") as file:
140-
non_translatable_strings_data = json.load(file)
141-
142-
entries = non_translatable_strings_data['data']
143-
max_key_length = max(len(entry['data']['note'].upper()) for entry in entries)
144-
app_name = None
123+
max_key_length = max(len(key) for key in glossary_dict)
124+
app_name = glossary_dict['app_name']
145125

146126
# Output the file in the desired format
147127
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
@@ -151,17 +131,16 @@ def convert_non_translatable_strings_to_kotlin(input_file, output_path):
151131
file.write('\n')
152132
file.write('// Non-translatable strings for use with the UI\n')
153133
file.write("object NonTranslatableStringConstants {\n")
154-
for entry in entries:
155-
key = entry['data']['note'].upper()
156-
text = entry['data']['text']
134+
for key_lowercase in glossary_dict:
135+
key = key_lowercase.upper()
136+
text = glossary_dict[key_lowercase]
157137
file.write(f' const val {key:<{max_key_length}} = "{text}"\n')
158138

159-
if key == 'APP_NAME':
160-
app_name = text
161-
162139
file.write('}\n')
163140
file.write('\n')
164141

142+
if not app_name:
143+
raise ValueError("could not find app_name in glossary_dict")
165144
return app_name
166145

167146
def convert_all_files(input_directory):

crowdin/generate_desktop_strings.py

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import xml.etree.ElementTree as ET
44
import sys
55
import argparse
6-
import html
76
from pathlib import Path
8-
from colorama import Fore, Style, init
7+
from colorama import Fore, Style
8+
from generate_shared import clean_string, load_glossary_dict
99

1010
# Customizable mapping for output folder hierarchy
1111
# Add entries here to customize the output path for specific locales
@@ -37,6 +37,8 @@
3737
TRANSLATIONS_OUTPUT_DIRECTORY = args.translations_output_directory
3838
NON_TRANSLATABLE_STRINGS_OUTPUT_PATH = args.non_translatable_strings_output_path
3939

40+
clean_string_extra_dict = {'{count}': '#'}
41+
4042
def parse_xliff(file_path):
4143
tree = ET.parse(file_path)
4244
root = tree.getroot()
@@ -69,26 +71,20 @@ def parse_xliff(file_path):
6971

7072
return translations
7173

72-
def clean_string(text):
73-
# Note: any changes done for all platforms needs most likely to be done on crowdin side.
74-
# So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
75-
text = html.unescape(text) # Unescape any HTML escaping
76-
return text.strip() # Strip whitespace
7774

78-
def generate_icu_pattern(target):
75+
def generate_icu_pattern(target, glossary_dict):
7976
if isinstance(target, dict): # It's a plural group
8077
pattern_parts = []
8178
for form, value in target.items():
8279
if form in ['zero', 'one', 'two', 'few', 'many', 'other', 'exact', 'fractional']:
83-
# Replace {count} with #
84-
value = clean_string(value.replace('{count}', '#'))
80+
value = clean_string(value, False, glossary_dict, clean_string_extra_dict)
8581
pattern_parts.append(f"{form} [{value}]")
8682

8783
return "{{count, plural, {0}}}".format(" ".join(pattern_parts))
8884
else: # It's a regular string
89-
return clean_string(target)
85+
return clean_string(target, False, glossary_dict, clean_string_extra_dict)
9086

91-
def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code):
87+
def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code, glossary_dict):
9288
if not os.path.exists(input_file):
9389
raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
9490

@@ -97,8 +93,9 @@ def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code
9793
sorted_translations = sorted(translations.items())
9894
converted_translations = {}
9995

96+
10097
for resname, target in sorted_translations:
101-
converted_translations[resname] = generate_icu_pattern(target)
98+
converted_translations[resname] = generate_icu_pattern(target, glossary_dict)
10299

103100
# Generate output files
104101
output_locale = LOCALE_PATH_MAPPING.get(locale, LOCALE_PATH_MAPPING.get(locale_two_letter_code, locale_two_letter_code))
@@ -112,16 +109,10 @@ def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code
112109
file.write('\n')
113110
return output_locale
114111

115-
def convert_non_translatable_strings_to_type_script(input_file, output_path, exported_locales, rtl_languages):
116-
if not os.path.exists(input_file):
117-
raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
118112

119-
# Process the non-translatable string input
120-
non_translatable_strings_data = {}
121-
with open(input_file, 'r', encoding="utf-8") as file:
122-
non_translatable_strings_data = json.load(file)
123113

124-
entries = non_translatable_strings_data['data']
114+
def convert_non_translatable_strings_to_type_script(input_file, output_path, exported_locales, rtl_languages):
115+
glossary_dict = load_glossary_dict(input_file)
125116
rtl_locales = sorted([lang["twoLettersCode"] for lang in rtl_languages])
126117

127118
# Output the file in the desired format
@@ -132,9 +123,8 @@ def convert_non_translatable_strings_to_type_script(input_file, output_path, exp
132123

133124
with open(output_path, 'w', encoding='utf-8') as file:
134125
file.write('export enum LOCALE_DEFAULTS {\n')
135-
for entry in entries:
136-
key = entry['data']['note']
137-
text = entry['data']['text']
126+
for key in glossary_dict:
127+
text = glossary_dict[key]
138128
file.write(f" {key} = '{text}',\n")
139129

140130
file.write('}\n')
@@ -143,7 +133,7 @@ def convert_non_translatable_strings_to_type_script(input_file, output_path, exp
143133
file.write('\n')
144134
file.write(f"export const crowdinLocales = [{joined_exported_locales},\n] as const;\n")
145135
file.write('\n')
146-
file.write(f"export type CrowdinLocale = (typeof crowdinLocales)[number];\n")
136+
file.write("export type CrowdinLocale = (typeof crowdinLocales)[number];\n")
147137
file.write('\n')
148138

149139

@@ -158,6 +148,8 @@ def convert_all_files(input_directory):
158148
with open(project_info_file, 'r', encoding="utf-8") as file:
159149
project_details = json.load(file)
160150

151+
non_translatable_strings_file = os.path.join(input_directory, "_non_translatable_strings.json")
152+
161153
# Extract the language info and sort the target languages alphabetically by locale
162154
source_language = project_details['data']['sourceLanguage']
163155
target_languages = project_details['data']['targetLanguages']
@@ -168,18 +160,20 @@ def convert_all_files(input_directory):
168160
# Convert the XLIFF data to the desired format
169161
print(f"\033[2K{Fore.WHITE}⏳ Converting translations to target format...{Style.RESET_ALL}", end='\r')
170162
exported_locales = []
163+
glossary_dict = load_glossary_dict(non_translatable_strings_file)
164+
171165
for language in [source_language] + target_languages:
172166
lang_locale = language['locale']
173167
lang_two_letter_code = language['twoLettersCode']
174168
print(f"\033[2K{Fore.WHITE}⏳ Converting translations for {lang_locale} to target format...{Style.RESET_ALL}", end='\r')
175169
input_file = os.path.join(input_directory, f"{lang_locale}.xliff")
176-
exported_as = convert_xliff_to_json(input_file, TRANSLATIONS_OUTPUT_DIRECTORY, lang_locale, lang_two_letter_code)
170+
exported_as = convert_xliff_to_json(input_file, TRANSLATIONS_OUTPUT_DIRECTORY, lang_locale, lang_two_letter_code, glossary_dict)
177171
exported_locales.append(exported_as)
178172
print(f"\033[2K{Fore.GREEN}✅ All conversions complete{Style.RESET_ALL}")
179173

180174
# Convert the non-translatable strings to the desired format
181175
print(f"\033[2K{Fore.WHITE}⏳ Generating static strings file...{Style.RESET_ALL}", end='\r')
182-
non_translatable_strings_file = os.path.join(input_directory, "_non_translatable_strings.json")
176+
183177
rtl_languages = [lang for lang in target_languages if lang["textDirection"] == "rtl"]
184178
convert_non_translatable_strings_to_type_script(non_translatable_strings_file, NON_TRANSLATABLE_STRINGS_OUTPUT_PATH, exported_locales, rtl_languages)
185179
print(f"\033[2K{Fore.GREEN}✅ Static string generation complete{Style.RESET_ALL}")

crowdin/generate_ios_strings.py

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
import argparse
66
import html
77
from pathlib import Path
8-
from colorama import Fore, Style, init
8+
from colorama import Fore, Style
99
from datetime import datetime
10+
from generate_shared import load_glossary_dict, clean_string
11+
1012

1113
# It seems that Xcode uses different language codes and doesn't support all of the languages we get from Crowdin
1214
# (at least in the variants that Crowdin is specifying them in) so need to map/exclude them in order to build correctly
@@ -54,7 +56,7 @@ def parse_xliff(file_path):
5456
target_language = file_elem.get('target-language')
5557
if target_language is None:
5658
raise ValueError(f"Missing target-language in file: {file_path}")
57-
59+
5860
if target_language in LANGUAGE_MAPPING:
5961
target_language = LANGUAGE_MAPPING[target_language]
6062

@@ -65,7 +67,7 @@ def parse_xliff(file_path):
6567
for trans_unit in group.findall('ns:trans-unit', namespaces=namespace):
6668
if resname is None:
6769
resname = trans_unit.get('resname') or trans_unit.get('id')
68-
70+
6971
target = trans_unit.find('ns:target', namespaces=namespace)
7072
source = trans_unit.find('ns:source', namespaces=namespace)
7173
context_group = trans_unit.find('ns:context-group', namespaces=namespace)
@@ -103,17 +105,11 @@ def parse_xliff(file_path):
103105

104106
return translations, target_language
105107

106-
def clean_string(text):
107-
# Note: any changes done for all platforms needs most likely to be done on crowdin side.
108-
# So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
109-
text = html.unescape(text) # Unescape any HTML escaping
110-
return text.strip() # Strip whitespace
111-
112-
def convert_placeholders_for_plurals(resname, translations):
108+
def convert_placeholders_for_plurals(translations):
113109
# Replace {count} with %lld for iOS
114110
converted_translations = {}
115111
for form, value in translations.items():
116-
converted_translations[form] = clean_string(value.replace('{count}', '%lld'))
112+
converted_translations[form] = clean_string(value.replace('{count}', '%lld'), False, {}, {})
117113

118114
return converted_translations
119115

@@ -138,7 +134,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
138134
# then the output will differ from what Xcode generates)
139135
all_languages = [source_language] + target_mapped_languages
140136
sorted_languages = sorted(all_languages, key=lambda x: x['mapped_id'])
141-
137+
142138
for language in sorted_languages:
143139
lang_locale = language['locale']
144140
input_file = os.path.join(input_dir, f"{lang_locale}.xliff")
@@ -152,7 +148,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
152148
raise ValueError(f"Error processing locale {lang_locale}: {str(e)}")
153149

154150
print(f"\033[2K{Fore.WHITE}⏳ Converting translations for {target_language} to target format...{Style.RESET_ALL}", end='\r')
155-
151+
156152
for resname, translation in translations.items():
157153
if resname not in string_catalog["strings"]:
158154
string_catalog["strings"][resname] = {
@@ -161,7 +157,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
161157
}
162158

163159
if isinstance(translation, dict): # It's a plural group
164-
converted_translations = convert_placeholders_for_plurals(resname, translation)
160+
converted_translations = convert_placeholders_for_plurals(translation)
165161

166162
# Check if any of the translations contain '{count}'
167163
contains_count = any('{count}' in value for value in translation.values())
@@ -207,7 +203,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
207203
string_catalog["strings"][resname]["localizations"][target_language] = {
208204
"stringUnit": {
209205
"state": "translated",
210-
"value": clean_string(translation)
206+
"value": clean_string(translation, False, {}, {})
211207
}
212208
}
213209

@@ -225,15 +221,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
225221
json.dump(sorted_string_catalog, f, ensure_ascii=False, indent=2, separators=(',', ' : '))
226222

227223
def convert_non_translatable_strings_to_swift(input_file, output_path):
228-
if not os.path.exists(input_file):
229-
raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
230-
231-
# Process the non-translatable string input
232-
non_translatable_strings_data = {}
233-
with open(input_file, 'r', encoding="utf-8") as file:
234-
non_translatable_strings_data = json.load(file)
235-
236-
entries = non_translatable_strings_data['data']
224+
glossary_dict = load_glossary_dict(input_file)
237225

238226
# Output the file in the desired format
239227
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
@@ -245,9 +233,8 @@ def convert_non_translatable_strings_to_swift(input_file, output_path):
245233
file.write('// stringlint:disable\n')
246234
file.write('\n')
247235
file.write('public enum Constants {\n')
248-
for entry in entries:
249-
key = entry['data']['note']
250-
text = entry['data']['text']
236+
for key in glossary_dict:
237+
text = glossary_dict[key]
251238
file.write(f' public static let {key}: String = "{text}"\n')
252239

253240
file.write('}\n')

crowdin/generate_shared.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import html
2+
import json
3+
import os
4+
5+
def clean_string(text, is_android, glossary_dict, extra_replace_dict):
6+
to_ret = text
7+
if(is_android):
8+
# Note: any changes done for all platforms needs most likely to be done on crowdin side.
9+
# So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
10+
# We can use standard XML escaped characters for most things (since XLIFF is an XML format) but
11+
# want the following cases escaped in a particular way (for android only)
12+
text = text.replace("'", r"\'")
13+
text = text.replace("&quot;", "\"")
14+
text = text.replace("\"", "\\\"")
15+
text = text.replace("&lt;b&gt;", "<b>")
16+
text = text.replace("&lt;/b&gt;", "</b>")
17+
text = text.replace("&lt;/br&gt;", "\\n")
18+
text = text.replace("<br/>", "\\n")
19+
text = text.replace("&", "&amp;") # Assume any remaining ampersands are desired
20+
else:
21+
text = html.unescape(text) # Unescape any HTML escaping
22+
23+
stripped = to_ret.strip() # Strip whitespace
24+
25+
# replace all the defined constants (from crowdin's glossary) in the string
26+
for glossary_key in glossary_dict:
27+
stripped = stripped.replace("{" + glossary_key + "}", glossary_dict[glossary_key])
28+
29+
# if extra_replace_dict has keys, replace those too
30+
for extra_key in extra_replace_dict:
31+
stripped = stripped.replace(extra_key, extra_replace_dict[extra_key])
32+
return stripped
33+
34+
35+
def load_glossary_dict(input_file):
36+
if not os.path.exists(input_file):
37+
raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
38+
39+
# Process the non-translatable string input
40+
non_translatable_strings_data = {}
41+
with open(input_file, 'r', encoding="utf-8") as file:
42+
non_translatable_strings_data = json.load(file)
43+
44+
non_translatable_strings_entries = non_translatable_strings_data['data']
45+
glossary_dict = {
46+
entry['data']['note']: entry['data']['text']
47+
for entry in non_translatable_strings_entries
48+
}
49+
50+
return glossary_dict

0 commit comments

Comments
 (0)