@@ -253,6 +253,73 @@ def save_translations(
253253 raise e
254254
255255
256+ def add_gender_or_base_translations (
257+ language ,
258+ section ,
259+ path ,
260+ gender_dict ,
261+ label ,
262+ extraSuffix ,
263+ rowNumber ,
264+ translations_dict ,
265+ ):
266+ """
267+ Adds gender-specific translations to the translations dictionary or simply the label if no gender specific strings.
268+
269+ Args:
270+ language (str): The language code (e.g., 'fr', 'en')
271+ section (str): The section name
272+ path (str): The base path for the translation key
273+ gender_dict (dict or None): Dictionary with gender-specific values or None
274+ label (str): The default label value to use if gender_dict is None
275+ rowNumber (int): The row number for error reporting
276+ translations_dict (dict): The translations dictionary to update
277+ """
278+ if gender_dict is not None :
279+ add_translation (
280+ language = language ,
281+ section = section ,
282+ path = path + "_male" + extraSuffix ,
283+ value = gender_dict ["male" ],
284+ rowNumber = rowNumber ,
285+ translations = translations_dict [language ],
286+ )
287+ add_translation (
288+ language = language ,
289+ section = section ,
290+ path = path + "_female" + extraSuffix ,
291+ value = gender_dict ["female" ],
292+ rowNumber = rowNumber ,
293+ translations = translations_dict [language ],
294+ )
295+ add_translation (
296+ language = language ,
297+ section = section ,
298+ path = path + "_custom" + extraSuffix ,
299+ value = gender_dict ["custom" ],
300+ rowNumber = rowNumber ,
301+ translations = translations_dict [language ],
302+ )
303+ # The "other" translation will be the default one
304+ add_translation (
305+ language = language ,
306+ section = section ,
307+ path = path + extraSuffix ,
308+ value = gender_dict ["other" ],
309+ rowNumber = rowNumber ,
310+ translations = translations_dict [language ],
311+ )
312+ elif label is not None :
313+ add_translation (
314+ language = language ,
315+ section = section ,
316+ path = path + extraSuffix ,
317+ value = label ,
318+ rowNumber = rowNumber ,
319+ translations = translations_dict [language ],
320+ )
321+
322+
256323def add_translations_from_excel (
257324 excel_file_path ,
258325 labels_output_folder_path ,
@@ -314,149 +381,48 @@ def add_translations_from_excel(
314381 processed_sections .add (section ) # Mark section as processed
315382
316383 # Add French translations
317- if gender_fr :
318- add_translation (
319- language = "fr" ,
320- section = section ,
321- path = path + "_male" ,
322- value = gender_fr ["male" ],
323- rowNumber = rowNumber ,
324- translations = translations_dict ["fr" ],
325- )
326- add_translation (
327- language = "fr" ,
328- section = section ,
329- path = path + "_female" ,
330- value = gender_fr ["female" ],
331- rowNumber = rowNumber ,
332- translations = translations_dict ["fr" ],
333- )
334- # The "other" translation will be the default one
335- add_translation (
336- language = "fr" ,
337- section = section ,
338- path = path ,
339- value = gender_fr ["other" ],
340- rowNumber = rowNumber ,
341- translations = translations_dict ["fr" ],
342- )
343- elif fr_label is not None :
344- add_translation (
345- language = "fr" ,
346- section = section ,
347- path = path ,
348- value = fr_label ,
349- rowNumber = rowNumber ,
350- translations = translations_dict ["fr" ],
351- )
352-
353- # Add French one person translation for count context if it exists
354- if gender_fr_one :
355- add_translation (
356- language = "fr" ,
357- section = section ,
358- path = path + "_male_one" ,
359- value = gender_fr_one ["male" ],
360- rowNumber = rowNumber ,
361- translations = translations_dict ["fr" ],
362- )
363- add_translation (
364- language = "fr" ,
365- section = section ,
366- path = path + "_female_one" ,
367- value = gender_fr_one ["female" ],
368- rowNumber = rowNumber ,
369- translations = translations_dict ["fr" ],
370- )
371- add_translation (
372- language = "fr" ,
373- section = section ,
374- path = path + "_one" ,
375- value = gender_fr_one ["other" ],
376- rowNumber = rowNumber ,
377- translations = translations_dict ["fr" ],
378- )
379- elif fr_label_one :
380- add_translation (
381- language = "fr" ,
382- section = section ,
383- path = path + "_one" ,
384- value = fr_label_one ,
385- rowNumber = rowNumber ,
386- translations = translations_dict ["fr" ],
387- )
388-
384+ add_gender_or_base_translations (
385+ "fr" ,
386+ section ,
387+ path ,
388+ gender_fr ,
389+ fr_label ,
390+ "" ,
391+ rowNumber ,
392+ translations_dict ,
393+ )
394+ add_gender_or_base_translations (
395+ "fr" ,
396+ section ,
397+ path ,
398+ gender_fr_one ,
399+ fr_label_one ,
400+ "_one" ,
401+ rowNumber ,
402+ translations_dict ,
403+ )
389404 # Add English translations
390- if gender_en :
391- add_translation (
392- language = "en" ,
393- section = section ,
394- path = path + "_male" ,
395- value = gender_en ["male" ],
396- rowNumber = rowNumber ,
397- translations = translations_dict ["en" ],
398- )
399- add_translation (
400- language = "en" ,
401- section = section ,
402- path = path + "_female" ,
403- value = gender_en ["female" ],
404- rowNumber = rowNumber ,
405- translations = translations_dict ["en" ],
406- )
407- add_translation (
408- language = "en" ,
409- section = section ,
410- path = path ,
411- value = gender_en ["other" ],
412- rowNumber = rowNumber ,
413- translations = translations_dict ["en" ],
414- )
415- elif en_label is not None :
416- add_translation (
417- language = "en" ,
418- section = section ,
419- path = path ,
420- value = en_label ,
421- rowNumber = rowNumber ,
422- translations = translations_dict ["en" ],
423- )
405+ add_gender_or_base_translations (
406+ "en" ,
407+ section ,
408+ path ,
409+ gender_en ,
410+ en_label ,
411+ "" ,
412+ rowNumber ,
413+ translations_dict ,
414+ )
415+ add_gender_or_base_translations (
416+ "en" ,
417+ section ,
418+ path ,
419+ gender_en_one ,
420+ en_label_one ,
421+ "_one" ,
422+ rowNumber ,
423+ translations_dict ,
424+ )
424425
425- # Add English one person translation for count context if it exists
426- if gender_en_one :
427- add_translation (
428- language = "en" ,
429- section = section ,
430- path = path + "_male_one" ,
431- value = gender_en_one ["male" ],
432- rowNumber = rowNumber ,
433- translations = translations_dict ["en" ],
434- )
435- add_translation (
436- language = "en" ,
437- section = section ,
438- path = path + "_female_one" ,
439- value = gender_en_one ["female" ],
440- rowNumber = rowNumber ,
441- translations = translations_dict ["en" ],
442- )
443- add_translation (
444- language = "en" ,
445- section = section ,
446- path = path + "_one" ,
447- value = gender_en_one ["other" ],
448- rowNumber = rowNumber ,
449- translations = translations_dict ["en" ],
450- )
451- elif en_label_one :
452- add_translation (
453- language = "en" ,
454- section = section ,
455- path = path + "_one" ,
456- value = en_label_one ,
457- rowNumber = rowNumber ,
458- translations = translations_dict ["en" ],
459- )
460426 rowNumber += 1 # Increment row number
461427
462428 # Save all translations
@@ -474,16 +440,81 @@ def add_translations_from_excel(
474440 raise e
475441
476442
443+ def split_respecting_quotes (text , delimiter = "/" ):
444+ """
445+ Split a string by a delimiter character, but ignore delimiters inside quoted sections.
446+ After splitting, quotes are removed from the parts.
447+
448+ Args:
449+ text (str): The text to split
450+ delimiter (str): The delimiter character
451+
452+ Returns:
453+ list: List of split parts with quotes removed
454+ """
455+ parts = []
456+ current_part = ""
457+ quote_char = None # Tracks the current quote character (' or ")
458+ escape = False # Tracks if the previous character was a backslash
459+
460+ for char in text :
461+ if escape : # character is escaped, just append
462+ current_part += char
463+ escape = False
464+ elif char == "\\ " : # Handle escape character
465+ escape = True
466+ elif quote_char : # We are inside quotes
467+ if char == quote_char : # Check if we reached the end of the quoted section
468+ quote_char = None
469+ current_part += char
470+ elif (
471+ char == '"' or char == "'"
472+ ): # Starting a quote with either single or double quote
473+ quote_char = char
474+ current_part += char
475+ elif char == delimiter and not quote_char : # Split only if not inside quotes
476+ # Remove surrounding quotes from the part if they exist
477+ if (current_part .startswith ('"' ) and current_part .endswith ('"' )) or (
478+ current_part .startswith ("'" ) and current_part .endswith ("'" )
479+ ):
480+ current_part = current_part [1 :- 1 ]
481+ parts .append (current_part )
482+ current_part = ""
483+ else :
484+ current_part += char
485+
486+ # Add the last part and remove quotes if necessary
487+ if (current_part .startswith ('"' ) and current_part .endswith ('"' )) or (
488+ current_part .startswith ("'" ) and current_part .endswith ("'" )
489+ ):
490+ current_part = current_part [1 :- 1 ]
491+ parts .append (current_part )
492+
493+ return parts
494+
495+
477496def expand_gender (label ):
478497 """
479- Replace all occurrences of {{gender:...}} or {{gender : ...}} (spaces before or after the colon) with the male, female, and other forms.
480- Example: "Étudian{{gender:t/te/t·e}}" -> {"male": "Étudiant", "female": "Étudiante", "other": "Étudiant·e"}
481- If only one part, 'female' is the part, 'male' and 'other' are ''.
482- If only two parts, 'male' is first part, 'female' is second part, 'other' is ''.
483- If three or more parts, only the first three are used: male, female, other.
498+ Replace all occurrences of {{gender:...}} or {{gender : ...}} (spaces before
499+ or after the colon) with the male, female, custom and other forms. It
500+ returns `None` if there are no gender strings in the label.
484501
485- Note: We accept both {{gender:...}}, {{gender :...}}, {{gender: ...}}, and {{gender : ...}}
486- (with spaces before and/or after the colon) because LibreOffice (in French) automatically inserts a space after the colon.
502+ Example: "Étudian{{gender:t/te/t·e}}" -> {"male": "Étudiant", "female":
503+ "Étudiante", "custom": "Étudiant·e", "other": "Étudiant·e"}
504+
505+ If only one part, 'female' is the part, 'male' and 'other' are ''.
506+ If only two parts, 'male' is first part, 'female' is second part, 'custom' and
507+ 'other' use the first part.
508+ If three parts, 'male' is first part, 'female' is second part, 'custom' and 'other' are the third part.
509+ If four or more parts, only the first four are used: male, female, custom, other.
510+
511+ Note: We accept both {{gender:...}}, {{gender :...}}, {{gender: ...}}, and
512+ {{gender : ...}} (with spaces before and/or after the colon) because
513+ LibreOffice (in French) automatically inserts a space after the colon.
514+
515+ Handles quoted strings in gender expressions to not split on slashes within quotes.
516+ Example: {{gender:il/elle/iel/"il/elle"}} -> "il" for male, "elle" for female,
517+ "iel" for custom, and "il/elle" for other.
487518 """
488519 if label is None :
489520 return None
@@ -497,25 +528,36 @@ def expand_gender(label):
497528 return None
498529 male_label = label
499530 female_label = label
531+ custom_label = label
500532 other_label = label
501533 for match in matches :
502- parts = match .split ("/" )
503- if len (parts ) >= 3 :
504- male , female , other = parts [0 ], parts [1 ], parts [2 ]
534+ # Use the split_respecting_quotes function to correctly handle quoted strings
535+ parts = split_respecting_quotes (match )
536+
537+ if len (parts ) >= 4 :
538+ male , female , custom , other = parts [0 ], parts [1 ], parts [2 ], parts [3 ]
539+ elif len (parts ) == 3 :
540+ male , female , custom , other = parts [0 ], parts [1 ], parts [2 ], parts [2 ]
505541 elif len (parts ) == 2 :
506- male , female , other = parts [0 ], parts [1 ], ""
542+ male , female , custom , other = parts [0 ], parts [1 ], parts [ 0 ], parts [ 0 ]
507543 elif len (parts ) == 1 :
508- male , female , other = "" , parts [0 ], ""
544+ male , female , custom , other = "" , parts [0 ], "" , ""
509545 else :
510- male , female , other = "" , "" , ""
546+ male , female , custom , other = "" , "" , "" , ""
511547 # Replace all variants of the gender pattern (with or without spaces before and after colon) with the correct gendered string
512548 # Note: We use re.escape to escape any special characters in the match.
513549 # This ensures that the pattern is treated as a literal string.
514550 pattern_exact = r"\{\{gender\s*:\s*" + re .escape (match ) + r"\}\}"
515551 male_label = re .sub (pattern_exact , male , male_label )
516552 female_label = re .sub (pattern_exact , female , female_label )
553+ custom_label = re .sub (pattern_exact , custom , custom_label )
517554 other_label = re .sub (pattern_exact , other , other_label )
518- return {"male" : male_label , "female" : female_label , "other" : other_label }
555+ return {
556+ "male" : male_label ,
557+ "female" : female_label ,
558+ "custom" : custom_label ,
559+ "other" : other_label ,
560+ }
519561
520562
521563def string_to_yaml (str ):
0 commit comments