fix: share clean_string but no glossary replace for mobile yet

Bilb · Bilb · commit 89cee161d55d · 2025-02-10T16:27:29.000+11:00
diff --git a/.gitignore b/.gitignore
@@ -2,4 +2,8 @@
 .DS_Store
 
 # Exclude venv folder
-venv
+venv
+
+
+__pycache__
+.vscode/
diff --git a/crowdin/generate_android_strings.py b/crowdin/generate_android_strings.py
@@ -6,6 +6,7 @@
 import re
 from pathlib import Path
 from colorama import Fore, Style
+from generate_shared import load_glossary_dict, clean_string
 
 # Variables that should be treated as numeric (using %d)
 NUMERIC_VARIABLES = ['count', 'found_count', 'total_count']
@@ -66,20 +67,6 @@ def repl(match):
 
     return re.sub(r'\{([^}]+)\}', repl, text)
 
-def clean_string(text):
-    # Note: any changes done for all platforms needs most likely to be done on crowdin side.
-    # So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
-    # We can use standard XML escaped characters for most things (since XLIFF is an XML format) but
-    # want the following cases escaped in a particular way
-    text = text.replace("'", r"\'")
-    text = text.replace("&quot;", "\"")
-    text = text.replace("\"", "\\\"")
-    text = text.replace("&lt;b&gt;", "<b>")
-    text = text.replace("&lt;/b&gt;", "</b>")
-    text = text.replace("&lt;/br&gt;", "\\n")
-    text = text.replace("<br/>", "\\n")
-    text = text.replace("&", "&amp;")   # Assume any remaining ampersands are desired
-    return text.strip()                 # Strip whitespace
 
 def generate_android_xml(translations, app_name):
     sorted_translations = sorted(translations.items())
@@ -93,11 +80,11 @@ def generate_android_xml(translations, app_name):
         if isinstance(target, dict):  # It's a plural group
             result += f'    <plurals name="{resname}">\n'
             for form, value in target.items():
-                escaped_value = clean_string(convert_placeholders(value))
+                escaped_value = clean_string(convert_placeholders(value), True, {}, {})
                 result += f'        <item quantity="{form}">{escaped_value}</item>\n'
             result += '    </plurals>\n'
         else:  # It's a regular string (for these we DON'T want to convert the placeholders)
-            escaped_target = clean_string(target)
+            escaped_target = clean_string(target, True, {}, {})
             result += f'    <string name="{resname}">{escaped_target}</string>\n'
 
     result += '</resources>'
@@ -131,17 +118,10 @@ def convert_xliff_to_android_xml(input_file, output_dir, source_locale, locale,
 
 
 def convert_non_translatable_strings_to_kotlin(input_file, output_path):
-    if not os.path.exists(input_file):
-        raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
+    glossary_dict = load_glossary_dict(input_file)
 
-    # Process the non-translatable string input
-    non_translatable_strings_data = {}
-    with open(input_file, 'r', encoding="utf-8") as file:
-        non_translatable_strings_data = json.load(file)
-
-    entries = non_translatable_strings_data['data']
-    max_key_length = max(len(entry['data']['note'].upper()) for entry in entries)
-    app_name = None
+    max_key_length = max(len(key) for key in glossary_dict)
+    app_name = glossary_dict['app_name']
 
     # Output the file in the desired format
     Path(output_path).parent.mkdir(parents=True, exist_ok=True)
@@ -151,17 +131,16 @@ def convert_non_translatable_strings_to_kotlin(input_file, output_path):
         file.write('\n')
         file.write('// Non-translatable strings for use with the UI\n')
         file.write("object NonTranslatableStringConstants {\n")
-        for entry in entries:
-            key = entry['data']['note'].upper()
-            text = entry['data']['text']
+        for key_lowercase in glossary_dict:
+            key = key_lowercase.upper()
+            text = glossary_dict[key_lowercase]
             file.write(f'    const val {key:<{max_key_length}} = "{text}"\n')
 
-            if key == 'APP_NAME':
-                app_name = text
-
         file.write('}\n')
         file.write('\n')
 
+    if not app_name:
+        raise ValueError("could not find app_name in glossary_dict")
     return app_name
 
 def convert_all_files(input_directory):
diff --git a/crowdin/generate_desktop_strings.py b/crowdin/generate_desktop_strings.py
@@ -3,9 +3,9 @@
 import xml.etree.ElementTree as ET
 import sys
 import argparse
-import html
 from pathlib import Path
-from colorama import Fore, Style, init
+from colorama import Fore, Style
+from generate_shared import clean_string, load_glossary_dict
 
 # Customizable mapping for output folder hierarchy
 # Add entries here to customize the output path for specific locales
@@ -37,6 +37,8 @@
 TRANSLATIONS_OUTPUT_DIRECTORY = args.translations_output_directory
 NON_TRANSLATABLE_STRINGS_OUTPUT_PATH = args.non_translatable_strings_output_path
 
+clean_string_extra_dict = {'{count}': '#'}
+
 def parse_xliff(file_path):
     tree = ET.parse(file_path)
     root = tree.getroot()
@@ -69,26 +71,20 @@ def parse_xliff(file_path):
 
     return translations
 
-def clean_string(text):
-    # Note: any changes done for all platforms needs most likely to be done on crowdin side.
-    # So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
-    text = html.unescape(text)          # Unescape any HTML escaping
-    return text.strip()                 # Strip whitespace
 
-def generate_icu_pattern(target):
+def generate_icu_pattern(target, glossary_dict):
     if isinstance(target, dict):  # It's a plural group
         pattern_parts = []
         for form, value in target.items():
             if form in ['zero', 'one', 'two', 'few', 'many', 'other', 'exact', 'fractional']:
-                # Replace {count} with #
-                value = clean_string(value.replace('{count}', '#'))
+                value = clean_string(value, False, glossary_dict, clean_string_extra_dict)
                 pattern_parts.append(f"{form} [{value}]")
 
         return "{{count, plural, {0}}}".format(" ".join(pattern_parts))
     else:  # It's a regular string
-        return clean_string(target)
+        return clean_string(target, False, glossary_dict, clean_string_extra_dict)
 
-def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code):
+def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code, glossary_dict):
     if not os.path.exists(input_file):
         raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
 
@@ -97,8 +93,9 @@ def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code
     sorted_translations = sorted(translations.items())
     converted_translations = {}
 
+
     for resname, target in sorted_translations:
-        converted_translations[resname] = generate_icu_pattern(target)
+        converted_translations[resname] = generate_icu_pattern(target, glossary_dict)
 
     # Generate output files
     output_locale = LOCALE_PATH_MAPPING.get(locale, LOCALE_PATH_MAPPING.get(locale_two_letter_code, locale_two_letter_code))
@@ -112,16 +109,10 @@ def convert_xliff_to_json(input_file, output_dir, locale, locale_two_letter_code
         file.write('\n')
     return output_locale
 
-def convert_non_translatable_strings_to_type_script(input_file, output_path, exported_locales, rtl_languages):
-    if not os.path.exists(input_file):
-        raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
 
-    # Process the non-translatable string input
-    non_translatable_strings_data = {}
-    with open(input_file, 'r', encoding="utf-8") as file:
-        non_translatable_strings_data = json.load(file)
 
-    entries = non_translatable_strings_data['data']
+def convert_non_translatable_strings_to_type_script(input_file, output_path, exported_locales, rtl_languages):
+    glossary_dict = load_glossary_dict(input_file)
     rtl_locales = sorted([lang["twoLettersCode"] for lang in rtl_languages])
 
     # Output the file in the desired format
@@ -132,9 +123,8 @@ def convert_non_translatable_strings_to_type_script(input_file, output_path, exp
 
     with open(output_path, 'w', encoding='utf-8') as file:
         file.write('export enum LOCALE_DEFAULTS {\n')
-        for entry in entries:
-            key = entry['data']['note']
-            text = entry['data']['text']
+        for key in glossary_dict:
+            text = glossary_dict[key]
             file.write(f"  {key} = '{text}',\n")
 
         file.write('}\n')
@@ -143,7 +133,7 @@ def convert_non_translatable_strings_to_type_script(input_file, output_path, exp
         file.write('\n')
         file.write(f"export const crowdinLocales = [{joined_exported_locales},\n] as const;\n")
         file.write('\n')
-        file.write(f"export type CrowdinLocale = (typeof crowdinLocales)[number];\n")
+        file.write("export type CrowdinLocale = (typeof crowdinLocales)[number];\n")
         file.write('\n')
 
 
@@ -158,6 +148,8 @@ def convert_all_files(input_directory):
     with open(project_info_file, 'r', encoding="utf-8") as file:
         project_details = json.load(file)
 
+    non_translatable_strings_file = os.path.join(input_directory, "_non_translatable_strings.json")
+
     # Extract the language info and sort the target languages alphabetically by locale
     source_language = project_details['data']['sourceLanguage']
     target_languages = project_details['data']['targetLanguages']
@@ -168,18 +160,20 @@ def convert_all_files(input_directory):
     # Convert the XLIFF data to the desired format
     print(f"\033[2K{Fore.WHITE}⏳ Converting translations to target format...{Style.RESET_ALL}", end='\r')
     exported_locales = []
+    glossary_dict = load_glossary_dict(non_translatable_strings_file)
+
     for language in [source_language] + target_languages:
         lang_locale = language['locale']
         lang_two_letter_code = language['twoLettersCode']
         print(f"\033[2K{Fore.WHITE}⏳ Converting translations for {lang_locale} to target format...{Style.RESET_ALL}", end='\r')
         input_file = os.path.join(input_directory, f"{lang_locale}.xliff")
-        exported_as = convert_xliff_to_json(input_file, TRANSLATIONS_OUTPUT_DIRECTORY, lang_locale, lang_two_letter_code)
+        exported_as = convert_xliff_to_json(input_file, TRANSLATIONS_OUTPUT_DIRECTORY, lang_locale, lang_two_letter_code, glossary_dict)
         exported_locales.append(exported_as)
     print(f"\033[2K{Fore.GREEN}✅ All conversions complete{Style.RESET_ALL}")
 
     # Convert the non-translatable strings to the desired format
     print(f"\033[2K{Fore.WHITE}⏳ Generating static strings file...{Style.RESET_ALL}", end='\r')
-    non_translatable_strings_file = os.path.join(input_directory, "_non_translatable_strings.json")
+
     rtl_languages = [lang for lang in target_languages if lang["textDirection"] == "rtl"]
     convert_non_translatable_strings_to_type_script(non_translatable_strings_file, NON_TRANSLATABLE_STRINGS_OUTPUT_PATH, exported_locales, rtl_languages)
     print(f"\033[2K{Fore.GREEN}✅ Static string generation complete{Style.RESET_ALL}")
diff --git a/crowdin/generate_ios_strings.py b/crowdin/generate_ios_strings.py
@@ -5,8 +5,10 @@
 import argparse
 import html
 from pathlib import Path
-from colorama import Fore, Style, init
+from colorama import Fore, Style
 from datetime import datetime
+from generate_shared import load_glossary_dict, clean_string
+
 
 # It seems that Xcode uses different language codes and doesn't support all of the languages we get from Crowdin
 # (at least in the variants that Crowdin is specifying them in) so need to map/exclude them in order to build correctly
@@ -54,7 +56,7 @@ def parse_xliff(file_path):
     target_language = file_elem.get('target-language')
     if target_language is None:
         raise ValueError(f"Missing target-language in file: {file_path}")
-    
+
     if target_language in LANGUAGE_MAPPING:
         target_language = LANGUAGE_MAPPING[target_language]
 
@@ -65,7 +67,7 @@ def parse_xliff(file_path):
         for trans_unit in group.findall('ns:trans-unit', namespaces=namespace):
             if resname is None:
                 resname = trans_unit.get('resname') or trans_unit.get('id')
-            
+
             target = trans_unit.find('ns:target', namespaces=namespace)
             source = trans_unit.find('ns:source', namespaces=namespace)
             context_group = trans_unit.find('ns:context-group', namespaces=namespace)
@@ -103,17 +105,11 @@ def parse_xliff(file_path):
 
     return translations, target_language
 
-def clean_string(text):
-    # Note: any changes done for all platforms needs most likely to be done on crowdin side.
-    # So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
-    text = html.unescape(text)          # Unescape any HTML escaping
-    return text.strip()                 # Strip whitespace
-
-def convert_placeholders_for_plurals(resname, translations):
+def convert_placeholders_for_plurals(translations):
     # Replace {count} with %lld for iOS
     converted_translations = {}
     for form, value in translations.items():
-        converted_translations[form] = clean_string(value.replace('{count}', '%lld'))
+        converted_translations[form] = clean_string(value.replace('{count}', '%lld'), False, {}, {})
 
     return converted_translations
 
@@ -138,7 +134,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
     # then the output will differ from what Xcode generates)
     all_languages = [source_language] + target_mapped_languages
     sorted_languages = sorted(all_languages, key=lambda x: x['mapped_id'])
-    
+
     for language in sorted_languages:
         lang_locale = language['locale']
         input_file = os.path.join(input_dir, f"{lang_locale}.xliff")
@@ -152,7 +148,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
             raise ValueError(f"Error processing locale {lang_locale}: {str(e)}")
 
         print(f"\033[2K{Fore.WHITE}⏳ Converting translations for {target_language} to target format...{Style.RESET_ALL}", end='\r')
-        
+
         for resname, translation in translations.items():
             if resname not in string_catalog["strings"]:
                 string_catalog["strings"][resname] = {
@@ -161,7 +157,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
                 }
 
             if isinstance(translation, dict):  # It's a plural group
-                converted_translations = convert_placeholders_for_plurals(resname, translation)
+                converted_translations = convert_placeholders_for_plurals(translation)
 
                 # Check if any of the translations contain '{count}'
                 contains_count = any('{count}' in value for value in translation.values())
@@ -207,7 +203,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
                 string_catalog["strings"][resname]["localizations"][target_language] = {
                     "stringUnit": {
                         "state": "translated",
-                        "value": clean_string(translation)
+                        "value": clean_string(translation, False, {}, {})
                     }
                 }
 
@@ -225,15 +221,7 @@ def convert_xliff_to_string_catalog(input_dir, output_dir, source_language, targ
         json.dump(sorted_string_catalog, f, ensure_ascii=False, indent=2, separators=(',', ' : '))
 
 def convert_non_translatable_strings_to_swift(input_file, output_path):
-    if not os.path.exists(input_file):
-        raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
-
-    # Process the non-translatable string input
-    non_translatable_strings_data = {}
-    with open(input_file, 'r', encoding="utf-8") as file:
-        non_translatable_strings_data = json.load(file)
-
-    entries = non_translatable_strings_data['data']
+    glossary_dict = load_glossary_dict(input_file)
 
     # Output the file in the desired format
     Path(output_path).parent.mkdir(parents=True, exist_ok=True)
@@ -245,9 +233,8 @@ def convert_non_translatable_strings_to_swift(input_file, output_path):
         file.write('// stringlint:disable\n')
         file.write('\n')
         file.write('public enum Constants {\n')
-        for entry in entries:
-            key = entry['data']['note']
-            text = entry['data']['text']
+        for key in glossary_dict:
+            text = glossary_dict[key]
             file.write(f'    public static let {key}: String = "{text}"\n')
 
         file.write('}\n')
diff --git a/crowdin/generate_shared.py b/crowdin/generate_shared.py
@@ -0,0 +1,50 @@
+import html
+import json
+import os
+
+def clean_string(text, is_android, glossary_dict, extra_replace_dict):
+    to_ret = text
+    if(is_android):
+        # Note: any changes done for all platforms needs most likely to be done on crowdin side.
+        # So we don't want to replace -&gt; with → for instance, we want the crowdin strings to not have those at all.
+        # We can use standard XML escaped characters for most things (since XLIFF is an XML format) but
+        # want the following cases escaped in a particular way (for android only)
+        text = text.replace("'", r"\'")
+        text = text.replace("&quot;", "\"")
+        text = text.replace("\"", "\\\"")
+        text = text.replace("&lt;b&gt;", "<b>")
+        text = text.replace("&lt;/b&gt;", "</b>")
+        text = text.replace("&lt;/br&gt;", "\\n")
+        text = text.replace("<br/>", "\\n")
+        text = text.replace("&", "&amp;")   # Assume any remaining ampersands are desired
+    else:
+        text = html.unescape(text)          # Unescape any HTML escaping
+
+    stripped = to_ret.strip()               # Strip whitespace
+
+    # replace all the defined constants (from crowdin's glossary) in the string
+    for glossary_key in glossary_dict:
+        stripped = stripped.replace("{" + glossary_key + "}", glossary_dict[glossary_key])
+
+    # if extra_replace_dict has keys, replace those too
+    for extra_key in extra_replace_dict:
+        stripped = stripped.replace(extra_key, extra_replace_dict[extra_key])
+    return stripped
+
+
+def load_glossary_dict(input_file):
+    if not os.path.exists(input_file):
+        raise FileNotFoundError(f"Could not find '{input_file}' in raw translations directory")
+
+    # Process the non-translatable string input
+    non_translatable_strings_data = {}
+    with open(input_file, 'r', encoding="utf-8") as file:
+        non_translatable_strings_data = json.load(file)
+
+    non_translatable_strings_entries = non_translatable_strings_data['data']
+    glossary_dict = {
+        entry['data']['note']: entry['data']['text']
+        for entry in non_translatable_strings_entries
+    }
+
+    return glossary_dict