Skip to content

Commit b66cb84

Browse files
committed
generator: add a fourth option for custom vs other gender string
fixes #1111 This allows to add another gender option, that will be used as the default, for more neutral tones (like if the participant prefers not to answer), while a custom option is when the gender is some other, but specific, custom option. The gender string now supports four parts: male/female/custom/other. If there are only 2 parts, custom and other will use the first one (same as male). If there are three parts, other and custom will be the same. Also add support for quoted gender strings, which allow to have a '/' character inside the quotes. For example `{{gender:il/elle/iel/"il/elle"}}` would resolve in 4 parts, respectively, "il", "elle", "iel" and "il/elle". Add tests for all those cases.
1 parent 7f65c2d commit b66cb84

File tree

2 files changed

+332
-251
lines changed

2 files changed

+332
-251
lines changed

packages/evolution-generator/src/scripts/generate_labels.py

Lines changed: 197 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,73 @@ def save_translations(
253253
raise e
254254

255255

256+
def add_gender_or_base_translations(
257+
language,
258+
section,
259+
path,
260+
gender_dict,
261+
label,
262+
extraSuffix,
263+
rowNumber,
264+
translations_dict,
265+
):
266+
"""
267+
Adds gender-specific translations to the translations dictionary or simply the label if no gender specific strings.
268+
269+
Args:
270+
language (str): The language code (e.g., 'fr', 'en')
271+
section (str): The section name
272+
path (str): The base path for the translation key
273+
gender_dict (dict or None): Dictionary with gender-specific values or None
274+
label (str): The default label value to use if gender_dict is None
275+
rowNumber (int): The row number for error reporting
276+
translations_dict (dict): The translations dictionary to update
277+
"""
278+
if gender_dict is not None:
279+
add_translation(
280+
language=language,
281+
section=section,
282+
path=path + "_male" + extraSuffix,
283+
value=gender_dict["male"],
284+
rowNumber=rowNumber,
285+
translations=translations_dict[language],
286+
)
287+
add_translation(
288+
language=language,
289+
section=section,
290+
path=path + "_female" + extraSuffix,
291+
value=gender_dict["female"],
292+
rowNumber=rowNumber,
293+
translations=translations_dict[language],
294+
)
295+
add_translation(
296+
language=language,
297+
section=section,
298+
path=path + "_custom" + extraSuffix,
299+
value=gender_dict["custom"],
300+
rowNumber=rowNumber,
301+
translations=translations_dict[language],
302+
)
303+
# The "other" translation will be the default one
304+
add_translation(
305+
language=language,
306+
section=section,
307+
path=path + extraSuffix,
308+
value=gender_dict["other"],
309+
rowNumber=rowNumber,
310+
translations=translations_dict[language],
311+
)
312+
elif label is not None:
313+
add_translation(
314+
language=language,
315+
section=section,
316+
path=path + extraSuffix,
317+
value=label,
318+
rowNumber=rowNumber,
319+
translations=translations_dict[language],
320+
)
321+
322+
256323
def add_translations_from_excel(
257324
excel_file_path,
258325
labels_output_folder_path,
@@ -314,149 +381,48 @@ def add_translations_from_excel(
314381
processed_sections.add(section) # Mark section as processed
315382

316383
# Add French translations
317-
if gender_fr:
318-
add_translation(
319-
language="fr",
320-
section=section,
321-
path=path + "_male",
322-
value=gender_fr["male"],
323-
rowNumber=rowNumber,
324-
translations=translations_dict["fr"],
325-
)
326-
add_translation(
327-
language="fr",
328-
section=section,
329-
path=path + "_female",
330-
value=gender_fr["female"],
331-
rowNumber=rowNumber,
332-
translations=translations_dict["fr"],
333-
)
334-
# The "other" translation will be the default one
335-
add_translation(
336-
language="fr",
337-
section=section,
338-
path=path,
339-
value=gender_fr["other"],
340-
rowNumber=rowNumber,
341-
translations=translations_dict["fr"],
342-
)
343-
elif fr_label is not None:
344-
add_translation(
345-
language="fr",
346-
section=section,
347-
path=path,
348-
value=fr_label,
349-
rowNumber=rowNumber,
350-
translations=translations_dict["fr"],
351-
)
352-
353-
# Add French one person translation for count context if it exists
354-
if gender_fr_one:
355-
add_translation(
356-
language="fr",
357-
section=section,
358-
path=path + "_male_one",
359-
value=gender_fr_one["male"],
360-
rowNumber=rowNumber,
361-
translations=translations_dict["fr"],
362-
)
363-
add_translation(
364-
language="fr",
365-
section=section,
366-
path=path + "_female_one",
367-
value=gender_fr_one["female"],
368-
rowNumber=rowNumber,
369-
translations=translations_dict["fr"],
370-
)
371-
add_translation(
372-
language="fr",
373-
section=section,
374-
path=path + "_one",
375-
value=gender_fr_one["other"],
376-
rowNumber=rowNumber,
377-
translations=translations_dict["fr"],
378-
)
379-
elif fr_label_one:
380-
add_translation(
381-
language="fr",
382-
section=section,
383-
path=path + "_one",
384-
value=fr_label_one,
385-
rowNumber=rowNumber,
386-
translations=translations_dict["fr"],
387-
)
388-
384+
add_gender_or_base_translations(
385+
"fr",
386+
section,
387+
path,
388+
gender_fr,
389+
fr_label,
390+
"",
391+
rowNumber,
392+
translations_dict,
393+
)
394+
add_gender_or_base_translations(
395+
"fr",
396+
section,
397+
path,
398+
gender_fr_one,
399+
fr_label_one,
400+
"_one",
401+
rowNumber,
402+
translations_dict,
403+
)
389404
# Add English translations
390-
if gender_en:
391-
add_translation(
392-
language="en",
393-
section=section,
394-
path=path + "_male",
395-
value=gender_en["male"],
396-
rowNumber=rowNumber,
397-
translations=translations_dict["en"],
398-
)
399-
add_translation(
400-
language="en",
401-
section=section,
402-
path=path + "_female",
403-
value=gender_en["female"],
404-
rowNumber=rowNumber,
405-
translations=translations_dict["en"],
406-
)
407-
add_translation(
408-
language="en",
409-
section=section,
410-
path=path,
411-
value=gender_en["other"],
412-
rowNumber=rowNumber,
413-
translations=translations_dict["en"],
414-
)
415-
elif en_label is not None:
416-
add_translation(
417-
language="en",
418-
section=section,
419-
path=path,
420-
value=en_label,
421-
rowNumber=rowNumber,
422-
translations=translations_dict["en"],
423-
)
405+
add_gender_or_base_translations(
406+
"en",
407+
section,
408+
path,
409+
gender_en,
410+
en_label,
411+
"",
412+
rowNumber,
413+
translations_dict,
414+
)
415+
add_gender_or_base_translations(
416+
"en",
417+
section,
418+
path,
419+
gender_en_one,
420+
en_label_one,
421+
"_one",
422+
rowNumber,
423+
translations_dict,
424+
)
424425

425-
# Add English one person translation for count context if it exists
426-
if gender_en_one:
427-
add_translation(
428-
language="en",
429-
section=section,
430-
path=path + "_male_one",
431-
value=gender_en_one["male"],
432-
rowNumber=rowNumber,
433-
translations=translations_dict["en"],
434-
)
435-
add_translation(
436-
language="en",
437-
section=section,
438-
path=path + "_female_one",
439-
value=gender_en_one["female"],
440-
rowNumber=rowNumber,
441-
translations=translations_dict["en"],
442-
)
443-
add_translation(
444-
language="en",
445-
section=section,
446-
path=path + "_one",
447-
value=gender_en_one["other"],
448-
rowNumber=rowNumber,
449-
translations=translations_dict["en"],
450-
)
451-
elif en_label_one:
452-
add_translation(
453-
language="en",
454-
section=section,
455-
path=path + "_one",
456-
value=en_label_one,
457-
rowNumber=rowNumber,
458-
translations=translations_dict["en"],
459-
)
460426
rowNumber += 1 # Increment row number
461427

462428
# Save all translations
@@ -474,16 +440,81 @@ def add_translations_from_excel(
474440
raise e
475441

476442

443+
def split_respecting_quotes(text, delimiter="/"):
444+
"""
445+
Split a string by a delimiter character, but ignore delimiters inside quoted sections.
446+
After splitting, quotes are removed from the parts.
447+
448+
Args:
449+
text (str): The text to split
450+
delimiter (str): The delimiter character
451+
452+
Returns:
453+
list: List of split parts with quotes removed
454+
"""
455+
parts = []
456+
current_part = ""
457+
quote_char = None # Tracks the current quote character (' or ")
458+
escape = False # Tracks if the previous character was a backslash
459+
460+
for char in text:
461+
if escape: # character is escaped, just append
462+
current_part += char
463+
escape = False
464+
elif char == "\\": # Handle escape character
465+
escape = True
466+
elif quote_char: # We are inside quotes
467+
if char == quote_char: # Check if we reached the end of the quoted section
468+
quote_char = None
469+
current_part += char
470+
elif (
471+
char == '"' or char == "'"
472+
): # Starting a quote with either single or double quote
473+
quote_char = char
474+
current_part += char
475+
elif char == delimiter and not quote_char: # Split only if not inside quotes
476+
# Remove surrounding quotes from the part if they exist
477+
if (current_part.startswith('"') and current_part.endswith('"')) or (
478+
current_part.startswith("'") and current_part.endswith("'")
479+
):
480+
current_part = current_part[1:-1]
481+
parts.append(current_part)
482+
current_part = ""
483+
else:
484+
current_part += char
485+
486+
# Add the last part and remove quotes if necessary
487+
if (current_part.startswith('"') and current_part.endswith('"')) or (
488+
current_part.startswith("'") and current_part.endswith("'")
489+
):
490+
current_part = current_part[1:-1]
491+
parts.append(current_part)
492+
493+
return parts
494+
495+
477496
def expand_gender(label):
478497
"""
479-
Replace all occurrences of {{gender:...}} or {{gender : ...}} (spaces before or after the colon) with the male, female, and other forms.
480-
Example: "Étudian{{gender:t/te/t·e}}" -> {"male": "Étudiant", "female": "Étudiante", "other": "Étudiant·e"}
481-
If only one part, 'female' is the part, 'male' and 'other' are ''.
482-
If only two parts, 'male' is first part, 'female' is second part, 'other' is ''.
483-
If three or more parts, only the first three are used: male, female, other.
498+
Replace all occurrences of {{gender:...}} or {{gender : ...}} (spaces before
499+
or after the colon) with the male, female, custom and other forms. It
500+
returns `None` if there are no gender strings in the label.
484501
485-
Note: We accept both {{gender:...}}, {{gender :...}}, {{gender: ...}}, and {{gender : ...}}
486-
(with spaces before and/or after the colon) because LibreOffice (in French) automatically inserts a space after the colon.
502+
Example: "Étudian{{gender:t/te/t·e}}" -> {"male": "Étudiant", "female":
503+
"Étudiante", "custom": "Étudiant·e", "other": "Étudiant·e"}
504+
505+
If only one part, 'female' is the part, 'male' and 'other' are ''.
506+
If only two parts, 'male' is first part, 'female' is second part, 'custom' and
507+
'other' use the first part.
508+
If three parts, 'male' is first part, 'female' is second part, 'custom' and 'other' are the third part.
509+
If four or more parts, only the first four are used: male, female, custom, other.
510+
511+
Note: We accept both {{gender:...}}, {{gender :...}}, {{gender: ...}}, and
512+
{{gender : ...}} (with spaces before and/or after the colon) because
513+
LibreOffice (in French) automatically inserts a space after the colon.
514+
515+
Handles quoted strings in gender expressions to not split on slashes within quotes.
516+
Example: {{gender:il/elle/iel/"il/elle"}} -> "il" for male, "elle" for female,
517+
"iel" for custom, and "il/elle" for other.
487518
"""
488519
if label is None:
489520
return None
@@ -497,25 +528,36 @@ def expand_gender(label):
497528
return None
498529
male_label = label
499530
female_label = label
531+
custom_label = label
500532
other_label = label
501533
for match in matches:
502-
parts = match.split("/")
503-
if len(parts) >= 3:
504-
male, female, other = parts[0], parts[1], parts[2]
534+
# Use the split_respecting_quotes function to correctly handle quoted strings
535+
parts = split_respecting_quotes(match)
536+
537+
if len(parts) >= 4:
538+
male, female, custom, other = parts[0], parts[1], parts[2], parts[3]
539+
elif len(parts) == 3:
540+
male, female, custom, other = parts[0], parts[1], parts[2], parts[2]
505541
elif len(parts) == 2:
506-
male, female, other = parts[0], parts[1], ""
542+
male, female, custom, other = parts[0], parts[1], parts[0], parts[0]
507543
elif len(parts) == 1:
508-
male, female, other = "", parts[0], ""
544+
male, female, custom, other = "", parts[0], "", ""
509545
else:
510-
male, female, other = "", "", ""
546+
male, female, custom, other = "", "", "", ""
511547
# Replace all variants of the gender pattern (with or without spaces before and after colon) with the correct gendered string
512548
# Note: We use re.escape to escape any special characters in the match.
513549
# This ensures that the pattern is treated as a literal string.
514550
pattern_exact = r"\{\{gender\s*:\s*" + re.escape(match) + r"\}\}"
515551
male_label = re.sub(pattern_exact, male, male_label)
516552
female_label = re.sub(pattern_exact, female, female_label)
553+
custom_label = re.sub(pattern_exact, custom, custom_label)
517554
other_label = re.sub(pattern_exact, other, other_label)
518-
return {"male": male_label, "female": female_label, "other": other_label}
555+
return {
556+
"male": male_label,
557+
"female": female_label,
558+
"custom": custom_label,
559+
"other": other_label,
560+
}
519561

520562

521563
def string_to_yaml(str):

0 commit comments

Comments
 (0)