11import re
2+ import unicodedata
3+
4+ # Define the German-specific mapping once
5+ GERMAN_UMLAUTS_MAPPING = str .maketrans ({
6+ 'ä' : 'ae' , 'ö' : 'oe' , 'ü' : 'ue' , 'Ä' : 'Ae' , 'Ö' : 'Oe' , 'Ü' : 'Ue' , 'ß' : 'ss'
7+ })
28
39
410def normalize_special_characters (text ):
5- """Maps special characters and common European accents to their ASCII equivalents."""
6- mapping = {
7- 'ä' : 'ae' ,
8- 'ö' : 'oe' ,
9- 'ü' : 'ue' ,
10- 'Ä' : 'Ae' ,
11- 'Ö' : 'Oe' ,
12- 'Ü' : 'Ue' ,
13- 'ß' : 'ss' ,
14- 'é' : 'e' ,
15- 'è' : 'e' ,
16- 'ê' : 'e' ,
17- 'ë' : 'e' ,
18- 'à' : 'a' ,
19- 'â' : 'a' ,
20- 'á' : 'a' ,
21- 'ã' : 'a' ,
22- 'ò' : 'o' ,
23- 'ô' : 'o' ,
24- 'ó' : 'o' ,
25- 'õ' : 'o' ,
26- 'ù' : 'u' ,
27- 'û' : 'u' ,
28- 'ú' : 'u' ,
29- 'ì' : 'i' ,
30- 'î' : 'i' ,
31- 'í' : 'i' ,
32- 'ï' : 'i' ,
33- 'ç' : 'c' ,
34- 'ñ' : 'n' ,
35- 'É' : 'E' ,
36- 'È' : 'E' ,
37- 'Ê' : 'E' ,
38- 'Ë' : 'E' ,
39- 'À' : 'A' ,
40- 'Â' : 'A' ,
41- 'Á' : 'A' ,
42- 'Ã' : 'A' ,
43- 'Ò' : 'O' ,
44- 'Ô' : 'O' ,
45- 'Ó' : 'O' ,
46- 'Õ' : 'O' ,
47- 'Ù' : 'U' ,
48- 'Û' : 'U' ,
49- 'Ú' : 'U' ,
50- 'Ì' : 'I' ,
51- 'Î' : 'I' ,
52- 'Í' : 'I' ,
53- 'Ï' : 'I' ,
54- 'Ç' : 'C' ,
55- 'Ñ' : 'N'
56- }
57- for char , replacement in mapping .items ():
58- text = text .replace (char , replacement )
59- return text
11+ # Step 1: handle the german umlauts specifically (make sure, ö will be oe and so on.)
12+ text = text .translate (GERMAN_UMLAUTS_MAPPING )
13+
14+ # Step 2: Decompose remaining accents (é -> e + ´)
15+ # NFKD separates the base character from the "combining" accent mark
16+ text = unicodedata .normalize ('NFKD' , text )
17+
18+ # Filter out the combining marks (the accents) and rejoin
19+ return "" .join (c for c in text if not unicodedata .combining (c ))
6020
6121
6222def sanitize_name (text ):
@@ -68,5 +28,7 @@ def sanitize_name(text):
6828 clean_name = normalize_special_characters (text )
6929 # Replace non-allowed chars with hyphens
7030 clean_name = re .sub (r'[^a-zA-Z0-9-]+' , '-' , clean_name )
31+ # prevent multiple subsequent hyphens, such as -- for example. Replace with a single -
32+ clean_name = re .sub (r'-+' , '-' , clean_name )
7133 # Strip leading/trailing hyphens and spaces
7234 return clean_name .strip ('-' )
0 commit comments