44Languages - Human-Language Alphabets and Locales via PyICU.
55"""
66
7- # PYTHON MODULES USED IN HERE
8-
9- # PyICU: human-language alphabets and locales
10-
11-
127from typing import List , Optional
138
14- from icu import Locale , LocaleData
15- from mathics .core .atoms import String
9+ from icu import Collator , Locale , LocaleData
10+ from mathics .core .atoms import Integer , String
1611from mathics .core .builtin import Builtin , Predefined
1712from mathics .core .convert .expression import to_mathics_list
1813from mathics .core .evaluation import Evaluation
1914
20- availableLocales = Locale .getAvailableLocales ()
15+ available_locales = Locale .getAvailableLocales ()
2116language2locale = {
2217 availableLocale .getDisplayLanguage (): locale_name
23- for locale_name , availableLocale in availableLocales .items ()
18+ for locale_name , availableLocale in available_locales .items ()
2419}
2520
2621# The current value of $Language
2722LANGUAGE = "English"
2823
24+
2925def eval_alphabet (language_name : String ) -> Optional [List [String ]]:
3026
3127 py_language_name = language_name .value
3228 locale = language2locale .get (py_language_name , py_language_name )
33- if locale not in availableLocales :
29+ if locale not in available_locales :
3430 return
3531 alphabet_set = LocaleData (locale ).getExemplarSet (0 , 0 )
3632 return to_mathics_list (* alphabet_set , elements_conversion_fn = String )
3733
3834
35+ def eval_alphabetic_order (string1 : str , string2 : str , language_name = LANGUAGE ) -> int :
36+ """
37+ Compare two strings using locale-sensitive alphabetic order.
38+
39+ Returns:
40+ 1 if string1 appears before string2 in alphabetic order,
41+ -1 if string1 appears after string2,
42+ 0 if they are identical.
43+ """
44+ locale_str = language_to_locale (language_name )
45+ collator = Collator .createInstance (Locale (locale_str ))
46+ comparison = collator .compare (string1 , string2 )
47+ if comparison < 0 :
48+ return 1
49+ elif comparison > 0 :
50+ return - 1
51+ else :
52+ return 0
53+
54+
55+ def language_to_locale (language_name : str , fallback = "en_US" ) -> str :
56+ """
57+ Convert a language name (e.g., "English") to an ICU locale string (e.g., "en_US").
58+ Returns the first matching locale string or a fallback if not found.
59+
60+ Args:
61+ language_name (str): Language name in English (e.g., "English", "French").
62+ fallback (str): Locale string to return if not found.
63+
64+ Returns:
65+ str: Locale string (e.g., "en_US", "fr_FR").
66+ """
67+ # Normalize input
68+ language_name = language_name .strip ().lower ()
69+
70+ for loc_str in available_locales :
71+ loc = Locale (loc_str )
72+ # Get display language in English.
73+ # FIXME? Generalize or do better later?
74+ disp_lang = loc .getDisplayLanguage (Locale ("en" )).lower ()
75+ if disp_lang == language_name :
76+ return loc_str
77+
78+ # Could not find exact match, return fallback
79+ return fallback
80+
81+
3982class Alphabet (Builtin ):
4083 """
4184 Basic lowercase alphabet via <url>:Unicode: https://home.unicode.org/</url> and <url>:PyICU: https://pypi.org/project/PyICU/</url>
@@ -48,7 +91,7 @@ class Alphabet(Builtin):
4891 </dl>
4992
5093 >> Alphabet["Ukrainian"]
51- = {a, ä, b, c, d, e, f, g, h, i, j, k, l, m, n, o, ö, p, q, r, s, ß, t, u, ü, v, w, x, y, z }
94+ = {ʼ, а, б, в, г, д, е, ж, з, и, й, к, л, м, н, о, п, р, с, т, у, ф, х, ц, ч, ш, щ, ь, ю, я, є, і, ї, ґ }
5295
5396 The alphabet when nothing is specified, "English" is used:
5497 >> Alphabet[]
@@ -81,6 +124,48 @@ def eval(self, alpha: String, evaluation):
81124 return
82125 return alphabet_list
83126
127+
128+ class AlphabeticOrder (Builtin ):
129+ """
130+ <url>:WMA:https://reference.wolfram.com/language/ref/AlphabeticOrder.html</url>
131+ <dl>
132+ <dt>'AlphabetOrder'[$string_1$, $string_2$]
133+ <dd>gives 1 if $string_1$ appears before $string_2$ in alphabetical order, -1 if it is after, and 0 if it is identical.
134+ </dl>
135+
136+ >> AlphabeticOrder["apple", "banana"]
137+ = 1
138+
139+ >> AlphabeticOrder["parrot", "parrot"]
140+ = 0
141+
142+ When words are the same but only differ in case, usually lowercase letters come first:
143+ >> AlphabeticOrder["A", "a"]
144+ = -1
145+
146+ Longer words follow their prefixes:
147+ >> AlphabeticOrder["Papagayo", "Papa", "Spanish"]
148+ = -1
149+
150+ But accented letters usually appear at the end of the alphabet:
151+ >> AlphabeticOrder["Papá", "Papa", "Spanish"]
152+ = -1
153+
154+ >> AlphabeticOrder["Papá", "Papagayo", "Spanish"]
155+ = 1
156+ """
157+
158+ summary_text = "compare strings according to an alphabet"
159+
160+ def eval (self , string1 : String , string2 : String , evaluation : Evaluation ):
161+ """AlphabeticOrder[string1_String, string2_String]"""
162+ return Integer (eval_alphabetic_order (string1 .value , string2 .value ))
163+
164+ def eval_with_lang (self , string1 : String , string2 : String , lang : String , evaluation : Evaluation ):
165+ """AlphabeticOrder[string1_String, string2_String, lang_String]"""
166+ return Integer (eval_alphabetic_order (string1 .value , string2 .value , lang .value , ))
167+
168+
84169## FIXME: move to mathics-core. Will have to change references to Pymathics`$Language to $Language
85170class Language (Predefined ):
86171 """
@@ -95,15 +180,13 @@ class Language(Predefined):
95180
96181 See the language in effect used for functions like 'Alphabet[]':
97182
98- >> old_language = $Language
99- = ...
100-
101183 By setting its value, The letters of 'Alphabet[]' are changed:
102184
103185 >> $Language = "German"; Alphabet[]
104186 = ...
105187
106- #> $Language = old_language;
188+ #> $Language = "English"
189+ = English
107190
108191 See also <url>
109192 :Alphabet:
0 commit comments