Skip to content

Commit 7d949ef

Browse files
committed
Go over character tables
* Separate entries with a blank line * Remove "unicode-equivalent" when it is the same as "ascii" Because if this wl_to_unicode() nees to pick use "acsii" when "unicode-equivalent" there is no unicode-equivalent, but has-unicode-inverse is set. * Add test that unicode-equivalent isn't the same things as ascii
1 parent 9585f48 commit 7d949ef

File tree

7 files changed

+995
-37
lines changed

7 files changed

+995
-37
lines changed

mathics_scanner/characters.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
_wl_to_amstex = _data.get("wl-to-amstex", None)
4848

4949
# Conversion from WL to unicode
50-
_wl_to_unicode = _data.get("wl-to-unicode-dict", {})
50+
_wl_to_unicode = _data.get("wl-to-unicode-dict", _data.get("wl_to_ascii"))
5151
_wl_to_unicode_re = re.compile(_data.get("wl-to-unicode-re", ""))
5252

5353
# Conversion from unicode to WL
@@ -61,6 +61,7 @@
6161
aliased_characters = _data.get("aliased-characters", {})
6262

6363

64+
# Deprecated
6465
def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
6566
"""
6667
The Wolfram Language uses specific Unicode characters to represent Wolfram
@@ -81,9 +82,12 @@ def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:
8182
r = _wl_to_unicode_re if use_unicode else _wl_to_ascii_re
8283
d = _wl_to_unicode if use_unicode else _wl_to_ascii
8384

84-
return r.sub(lambda m: d[m.group(0)], wl_input)
85+
# The below on when use_unicode is False will sometime test on "ascii" twice.
86+
# But this routine should be deprecated.
87+
return r.sub(lambda m: d.get(m.group(0), _wl_to_ascii.get(m.group(0))), wl_input)
8588

8689

90+
# Deprecated
8791
def replace_unicode_with_wl(unicode_input: str) -> str:
8892
"""
8993
The Wolfram Language uses specific Unicode characters to represent Wolfram

0 commit comments

Comments
 (0)