Skip to content

Commit 411abf1

Browse files
committed
Add operator-to-precedence table
version.py: make sure black doesn't reformat characters.py: tolerate an empty characters.json for now.
1 parent 42c0179 commit 411abf1

File tree

4 files changed

+65
-56
lines changed

4 files changed

+65
-56
lines changed

mathics_scanner/characters.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@
1818
ROOT_DIR = pkg_resources.resource_filename("mathics_scanner", "")
1919

2020
# Load the conversion tables from disk
21-
with open(os.path.join(ROOT_DIR, "data", "characters.json"), "r") as f:
22-
_data = ujson.load(f)
21+
characters_path = os.path.join(ROOT_DIR, "data", "characters.json")
22+
if os.path.exists(characters_path):
23+
with open(characters_path, "r") as f:
24+
_data = ujson.load(f)
25+
else:
26+
_data = {}
2327

2428
# Character ranges of letters
2529
_letters = "a-zA-Z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u0103\u0106\u0107\
@@ -33,25 +37,25 @@
3337
\uf793-\uf79a\uf79c-\uf7a2\uf7a4-\uf7bd\uf800-\uf833\ufb01\ufb02"
3438

3539
# Character ranges of letterlikes
36-
_letterlikes = _data["letterlikes"]
40+
_letterlikes = _data.get("letterlikes", {})
3741

3842
# Conversion from WL to the fully qualified names
39-
_wl_to_ascii = _data["wl-to-ascii-dict"]
40-
_wl_to_ascii_re = re.compile(_data["wl-to-ascii-re"])
43+
_wl_to_ascii = _data.get("wl-to-ascii-dict", {})
44+
_wl_to_ascii_re = re.compile(_data.get("wl-to-ascii-re", ""))
4145

4246
# Conversion from WL to unicode
43-
_wl_to_unicode = _data["wl-to-unicode-dict"]
44-
_wl_to_unicode_re = re.compile(_data["wl-to-unicode-re"])
47+
_wl_to_unicode = _data.get("wl-to-unicode-dict", {})
48+
_wl_to_unicode_re = re.compile(_data.get("wl-to-unicode-re", ""))
4549

4650
# Conversion from unicode to WL
47-
_unicode_to_wl = _data["unicode-to-wl-dict"]
48-
_unicode_to_wl_re = re.compile(_data["unicode-to-wl-re"])
51+
_unicode_to_wl = _data.get("unicode-to-wl-dict", {})
52+
_unicode_to_wl_re = re.compile(_data.get("unicode-to-wl-re", ""))
4953

5054
# All supported named characters
51-
named_characters = _data["named-characters"]
55+
named_characters = _data.get("named-characters", {})
5256

5357
# ESC sequence aliases
54-
aliased_characters = _data["aliased-characters"]
58+
aliased_characters = _data.get("aliased-characters", {})
5559

5660

5761
def replace_wl_with_plain_text(wl_input: str, use_unicode=True) -> str:

mathics_scanner/data/characters.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

mathics_scanner/generate/build_tables.py

Lines changed: 48 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
import os.path as osp
1212
from pathlib import Path
1313

14+
from mathics_scanner.version import __version__
15+
1416

1517
def get_srcdir():
1618
filename = osp.normcase(osp.dirname(osp.abspath(__file__)))
@@ -21,16 +23,6 @@ def read(*rnames):
2123
return open(osp.join(get_srcdir(), *rnames)).read()
2224

2325

24-
# stores __version__ in the current namespace
25-
exec(
26-
compile(
27-
open(Path(get_srcdir()) / ".." / "version.py").read(),
28-
"mathics_scanner/version.py",
29-
"exec",
30-
)
31-
)
32-
33-
3426
def re_from_keys(d: dict) -> str:
3527
"""
3628
Takes dictionary whose keys are all strings and returns a regex that
@@ -73,8 +65,9 @@ def get_plain_text(char_name: str, char_data: dict, use_unicode: bool) -> str:
7365

7466
def compile_tables(data: dict) -> dict:
7567
"""
76-
Compiles the general table into the tables used internally by the library
77-
for fast access
68+
Compiles the general table into the tables used internally by the library.
69+
This facilitates fast access of this information by clients needing this
70+
information.
7871
"""
7972

8073
# Multiple entries in the YAML table are redundant in the following sence:
@@ -91,26 +84,26 @@ def compile_tables(data: dict) -> dict:
9184
# characters that have a unicode inverse are included in
9285
# `unicode_to_wl_dict`
9386

94-
# Conversion from WL to the fully qualified names
95-
wl_to_ascii_dict = {
96-
v["wl-unicode"]: get_plain_text(k, v, use_unicode=False)
87+
# ESC sequence aliases dictionary entry
88+
aliased_characters = {
89+
v["esc-alias"]: v["wl-unicode"] for v in data.values() if "esc-alias" in v
90+
}
91+
92+
# operator-to-unicode dictionary entry
93+
operator_to_precedence = {
94+
v["operator-name"]: v["precedence"]
9795
for k, v in data.items()
98-
if "wl-unicode" in v
96+
if "operator-name" in v and "precedence" in v
9997
}
100-
wl_to_ascii_dict = {k: v for k, v in wl_to_ascii_dict.items() if k != v}
101-
wl_to_ascii_re = re_from_keys(wl_to_ascii_dict)
10298

103-
# Conversion from wl to unicode
104-
# We filter the dictionary after it's first created to redundant entries
105-
wl_to_unicode_dict = {
106-
v["wl-unicode"]: get_plain_text(k, v, use_unicode=True)
99+
# operator-to-unicode dictionary entry
100+
operator_to_unicode = {
101+
v["operator-name"]: v["unicode-equivalent"]
107102
for k, v in data.items()
108-
if "wl-unicode" in v
103+
if "operator-name" in v and "unicode-equivalent" in v
109104
}
110-
wl_to_unicode_dict = {k: v for k, v in wl_to_unicode_dict.items() if k != v}
111-
wl_to_unicode_re = re_from_keys(wl_to_unicode_dict)
112105

113-
# Conversion from unicode to wl
106+
# Conversion from unicode to wl dictionary entry.
114107
# We filter the dictionary after it's first created to redundant entries
115108
unicode_to_wl_dict = {
116109
v["unicode-equivalent"]: v["wl-unicode"]
@@ -120,32 +113,20 @@ def compile_tables(data: dict) -> dict:
120113
unicode_to_wl_dict = {k: v for k, v in unicode_to_wl_dict.items() if k != v}
121114
unicode_to_wl_re = re_from_keys(unicode_to_wl_dict)
122115

123-
# Unicode string containing all letterlikes values
116+
# Unicode string containing all letterlikes values dictionarhy entry
124117
letterlikes = "".join(v["wl-unicode"] for v in data.values() if v["is-letter-like"])
125118

126-
# All supported named characters
119+
# All supported named characters dictionary entry
127120
named_characters = {
128121
k: v["wl-unicode"] for k, v in data.items() if "wl-unicode" in v
129122
}
130123

131-
# Operators with ASCII sequences
124+
# Operators with ASCII sequences list entry
132125
ascii_operators = sorted(
133126
[v["ascii"] for v in data.values() if "operator-name" in v and "ascii" in v]
134127
)
135128

136-
# ESC sequence aliases
137-
aliased_characters = {
138-
v["esc-alias"]: v["wl-unicode"] for v in data.values() if "esc-alias" in v
139-
}
140-
141-
# operator-to-unicode dictionary
142-
operator_to_unicode = {
143-
v["operator-name"]: v["unicode-equivalent"]
144-
for k, v in data.items()
145-
if "operator-name" in v and "unicode-equivalent" in v
146-
}
147-
148-
# ESC sequence aliases
129+
# unicode-equivalent list entry
149130
unicode_operators = sorted(
150131
[
151132
v["unicode-equivalent"]
@@ -154,19 +135,40 @@ def compile_tables(data: dict) -> dict:
154135
]
155136
)
156137

157-
# operator-to-unicode dictionary
138+
# unicode-to-operator dictionary entry
158139
unicode_to_operator = {
159140
v["unicode-equivalent"]: v["operator-name"]
160141
for k, v in data.items()
161142
if "operator-name" in v and "unicode-equivalent" in v
162143
}
144+
# Conversion from WL to the fully qualified names dictionary entry
145+
wl_to_ascii_dict = {
146+
v["wl-unicode"]: get_plain_text(k, v, use_unicode=False)
147+
for k, v in data.items()
148+
if "wl-unicode" in v
149+
}
150+
wl_to_ascii_dict = {k: v for k, v in wl_to_ascii_dict.items() if k != v}
151+
wl_to_ascii_re = re_from_keys(wl_to_ascii_dict)
152+
153+
# Conversion from wl to unicode dictionary entry
154+
# We filter the dictionary after it's first created to redundant entries
155+
wl_to_unicode_dict = {
156+
v["wl-unicode"]: get_plain_text(k, v, use_unicode=True)
157+
for k, v in data.items()
158+
if "wl-unicode" in v
159+
}
160+
wl_to_unicode_dict = {k: v for k, v in wl_to_unicode_dict.items() if k != v}
161+
wl_to_unicode_re = re_from_keys(wl_to_unicode_dict)
162+
163163
return {
164164
"aliased-characters": aliased_characters,
165165
"ascii-operators": ascii_operators,
166166
"letterlikes": letterlikes,
167167
"named-characters": named_characters,
168+
"operator-to-precedence": operator_to_precedence,
168169
"operator-to-unicode": operator_to_unicode,
169-
"unicode-operators": unicode_operators,
170+
"unicode-equivalent": unicode_operators,
171+
"unicode-operators": unicode_to_operator,
170172
"unicode-to-operator": unicode_to_operator,
171173
"unicode-to-wl-dict": unicode_to_wl_dict,
172174
"unicode-to-wl-re": unicode_to_wl_re,
@@ -184,7 +186,9 @@ def compile_tables(data: dict) -> dict:
184186
"ascii-operators",
185187
"letterlikes",
186188
"named-characters",
189+
"operator-to-precedence",
187190
"operator-to-unicode",
191+
"unicode-equivalent",
188192
"unicode-operators",
189193
"unicode-to-operator",
190194
"unicode-to-wl-dict",

mathics_scanner/version.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
# This file is suitable for sourcing inside POSIX shell as
55
# well as importing into Python. That's why there is no
66
# space around "=" below.
7+
# fmt: off
78
__version__ = "1.2.1.dev0" # noqa

0 commit comments

Comments
 (0)