@@ -21,9 +21,19 @@ def re_from_keys(d: dict) -> str:
2121 )
2222
2323def get_plain_text (char_name : str , char_data : dict , use_unicode : bool ) -> str :
24- """
25- Takes in data about a named character and returns the appropriate
26- plain text representation according to use_unicode
24+ """:param char_name: named character to look up.
25+ :param char_data: translation dictionary.
26+
27+ :returns: if use_unicode is True, then return the standard unicode equivalent
28+ of the name if there is one.
29+
30+ Note that this may sometimes be different than the WL unicode
31+ value. An example of this is DifferentialD.
32+
33+ If use_unicode is False, return char_name if it consists of only
34+ ASCII characters.
35+
36+ Failing above, return \\ [char_name]]
2737 """
2838 uni = char_data .get ("unicode-equivalent" )
2939
@@ -33,7 +43,7 @@ def get_plain_text(char_name: str, char_data: dict, use_unicode: bool) -> str:
3343
3444 # If all of the characters in the unicode representation are valid
3545 # ASCII then return the unicode representation
36- elif all (ord (c ) < 127 for c in uni ):
46+ elif all (ord (c ) < 127 for c in uni ):
3747 return uni
3848
3949 return f"\\ [{ char_name } ]"
@@ -49,7 +59,7 @@ def compile_tables(data: dict) -> dict:
4959 # equivalent is equal to it's WL unicode representation (i.e. the
5060 # "wl-unicode" field is the same as the "unicode-equivalent" field) then it
5161 # is considered rendundant for us, since no conversion is needed.
52- #
62+ #
5363 # As an optimization, we explicit remove any redundant characters from all
5464 # JSON tables. This makes the tables smaller (therefore easier to load), as
5565 # well as the correspond regex patterns. This implies that not all
@@ -59,16 +69,16 @@ def compile_tables(data: dict) -> dict:
5969 # `unicode_to_wl_dict`
6070
6171 # Conversion from WL to the fully qualified names
62- wl_to_ascii_dict = {v ["wl-unicode" ]: get_plain_text (k , v , False )
72+ wl_to_ascii_dict = {v ["wl-unicode" ]: get_plain_text (k , v , use_unicode = False )
6373 for k , v in data .items ()}
6474 wl_to_ascii_dict = {k : v for k , v in wl_to_ascii_dict .items () if k != v }
6575 wl_to_ascii_re = re_from_keys (wl_to_ascii_dict )
6676
6777 # Conversion from wl to unicode
6878 # We filter the dictionary after it's first created to redundant entries
69- wl_to_unicode_dict = {v ["wl-unicode" ]: get_plain_text (k , v , True )
79+ wl_to_unicode_dict = {v ["wl-unicode" ]: get_plain_text (k , v , use_unicode = True )
7080 for k , v in data .items ()}
71- wl_to_unicode_dict = {k : v for k , v in wl_to_unicode_dict .items ()
81+ wl_to_unicode_dict = {k : v for k , v in wl_to_unicode_dict .items ()
7282 if k != v }
7383 wl_to_unicode_re = re_from_keys (wl_to_unicode_dict )
7484
@@ -78,11 +88,11 @@ def compile_tables(data: dict) -> dict:
7888 for v in data .values ()
7989 if "unicode-equivalent" in v
8090 and v ["has-unicode-inverse" ]}
81- unicode_to_wl_dict = {k : v for k , v in unicode_to_wl_dict .items ()
91+ unicode_to_wl_dict = {k : v for k , v in unicode_to_wl_dict .items ()
8292 if k != v }
8393 unicode_to_wl_re = re_from_keys (unicode_to_wl_dict )
8494
85- # Character ranges of letterlikes
95+ # Unicode string containing all letterlikes values
8696 letterlikes = "" .join (v ["wl-unicode" ] for v in data .values ()
8797 if v ["is-letter-like" ])
8898
0 commit comments