Skip to content

Commit 707e165

Browse files
committed
Update docstring; add some table consistency tests
1 parent 4d0def3 commit 707e165

File tree

2 files changed

+31
-10
lines changed

2 files changed

+31
-10
lines changed

mathics_scanner/generate/build_tables.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,19 @@ def re_from_keys(d: dict) -> str:
2121
)
2222

2323
def get_plain_text(char_name: str, char_data: dict, use_unicode: bool) -> str:
24-
"""
25-
Takes in data about a named character and returns the appropriate
26-
plain text representation according to use_unicode
24+
""":param char_name: named character to look up.
25+
:param char_data: translation dictionary.
26+
27+
:returns: if use_unicode is True, then return the standard unicode equivalent
28+
of the name if there is one.
29+
30+
Note that this may sometimes be different than the WL unicode
31+
value. An example of this is DifferentialD.
32+
33+
If use_unicode is False, return char_name if it consists of only
34+
ASCII characters.
35+
36+
Failing above, return \\[char_name]]
2737
"""
2838
uni = char_data.get("unicode-equivalent")
2939

@@ -33,7 +43,7 @@ def get_plain_text(char_name: str, char_data: dict, use_unicode: bool) -> str:
3343

3444
# If all of the characters in the unicode representation are valid
3545
# ASCII then return the unicode representation
36-
elif all(ord(c) < 127 for c in uni):
46+
elif all(ord(c) < 127 for c in uni):
3747
return uni
3848

3949
return f"\\[{char_name}]"
@@ -49,7 +59,7 @@ def compile_tables(data: dict) -> dict:
4959
# equivalent is equal to it's WL unicode representation (i.e. the
5060
# "wl-unicode" field is the same as the "unicode-equivalent" field) then it
5161
# is considered rendundant for us, since no conversion is needed.
52-
#
62+
#
5363
# As an optimization, we explicit remove any redundant characters from all
5464
# JSON tables. This makes the tables smaller (therefore easier to load), as
5565
# well as the correspond regex patterns. This implies that not all
@@ -59,16 +69,16 @@ def compile_tables(data: dict) -> dict:
5969
# `unicode_to_wl_dict`
6070

6171
# Conversion from WL to the fully qualified names
62-
wl_to_ascii_dict = {v["wl-unicode"]: get_plain_text(k, v, False)
72+
wl_to_ascii_dict = {v["wl-unicode"]: get_plain_text(k, v, use_unicode=False)
6373
for k, v in data.items()}
6474
wl_to_ascii_dict = {k: v for k, v in wl_to_ascii_dict.items() if k != v}
6575
wl_to_ascii_re = re_from_keys(wl_to_ascii_dict)
6676

6777
# Conversion from wl to unicode
6878
# We filter the dictionary after it's first created to redundant entries
69-
wl_to_unicode_dict = {v["wl-unicode"]: get_plain_text(k, v, True)
79+
wl_to_unicode_dict = {v["wl-unicode"]: get_plain_text(k, v, use_unicode=True)
7080
for k, v in data.items()}
71-
wl_to_unicode_dict = {k: v for k, v in wl_to_unicode_dict.items()
81+
wl_to_unicode_dict = {k: v for k, v in wl_to_unicode_dict.items()
7282
if k != v}
7383
wl_to_unicode_re = re_from_keys(wl_to_unicode_dict)
7484

@@ -78,11 +88,11 @@ def compile_tables(data: dict) -> dict:
7888
for v in data.values()
7989
if "unicode-equivalent" in v
8090
and v["has-unicode-inverse"]}
81-
unicode_to_wl_dict = {k: v for k, v in unicode_to_wl_dict.items()
91+
unicode_to_wl_dict = {k: v for k, v in unicode_to_wl_dict.items()
8292
if k != v}
8393
unicode_to_wl_re = re_from_keys(unicode_to_wl_dict)
8494

85-
# Character ranges of letterlikes
95+
# Unicode string containing all letterlikes values
8696
letterlikes = "".join(v["wl-unicode"] for v in data.values()
8797
if v["is-letter-like"])
8898

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,14 @@ def test_roundtrip():
2828
unicode_to_wl_dict[uni] == wl
2929
), f"key {k} unicode {uni}, {wl_to_unicode[uni]}"
3030

31+
32+
def test_counts():
33+
letterlikes_len = len(set(json_data["letterlikes"]))
34+
named_characters_set = set(json_data["named-characters"].keys())
35+
assert letterlikes_len <= len(
36+
named_characters_set
37+
), "Number of letter-likes should be less than the number of all named characters"
38+
39+
assert set(yaml_data.keys()) == set(
40+
json_data["named-characters"].keys()
41+
), "There should be a named character for each WL symbol"

0 commit comments

Comments
 (0)