Skip to content

Commit c758102

Browse files
authored
Merge pull request #53 from Mathics3/ascii-op-to-unicode
Revise to add ascii operator tables
2 parents 8b994d3 + 0734e42 commit c758102

File tree

8 files changed

+93
-34
lines changed

8 files changed

+93
-34
lines changed

.github/workflows/mathics.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,6 @@ jobs:
2929
run: |
3030
# Until next Mathics3/mathics-core release is out...
3131
python -m pip install -e git+https://github.com/Mathics3/mathics-core#egg=Mathics3[full]
32+
(cd src/mathics3 && ./admin-tools/make-op-tables.sh )
3233
# pip install Mathics3[full]
33-
make check-mathics
34+
MATHICS_CHARACTER_ENCODING="ASCII" make check-mathics

admin-tools/make-tables.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
# Create a complete set of tables.
3+
# This just runs build_tables.py in this distribution
4+
bs=${BASH_SOURCE[0]}
5+
mydir=$(dirname $bs)
6+
PYTHON=${PYTHON:-python}
7+
8+
cd $mydir/../mathics_scanner/data
9+
$PYTHON ../generate/build_tables.py -o characters.json

mathics_scanner/data/named-characters.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,7 @@ CapitalDelta:
653653

654654
CapitalDifferentialD:
655655
amslatex: "\\CapitalDifferentialD"
656+
ascii: "d"
656657
esc-alias: DD
657658
has-unicode-inverse: true
658659
is-letter-like: true
@@ -1787,6 +1788,8 @@ DifferentialD:
17871788
has-unicode-inverse: true
17881789
# This can't be letter-like because it is used in derivatives as a function
17891790
is-letter-like: false
1791+
# TODO: This should be a prefix operator
1792+
operator-name: DifferentialD
17901793
unicode-equivalent: "\U0001D451"
17911794
unicode-equivalent-name: MATHEMATICAL ITALIC SMALL D
17921795
wl-unicode: "\uF74C"
@@ -5026,6 +5029,7 @@ Integral:
50265029
esc-alias: int
50275030
has-unicode-inverse: false
50285031
is-letter-like: false
5032+
# TODO: This should be a prefix operator
50295033
operator-name: Integral
50305034
unicode-equivalent: "\u222B"
50315035
unicode-equivalent-name: INTEGRAL

mathics_scanner/generate/build_tables.py

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1-
#!/usr/bin/env python3
1+
#!/usr/bin/env python
22
# This scripts reads the data from named-characters and converts it to the
33
# format used by the library internally
44

5-
from collections import OrderedDict
6-
75
import click
86

97
import json
@@ -120,6 +118,13 @@ def compile_tables(data: dict) -> dict:
120118
if "operator-name" in v and ("unicode-equivalent" in v or "ascii" in v)
121119
}
122120

121+
# operator-to-ascii or character symbol name
122+
operator_to_ascii = {
123+
v["operator-name"]: v.get("ascii", rf'\[{v["operator-name"]}]')
124+
for k, v in data.items()
125+
if "operator-name" in v and ("unicode-equivalent" in v or "ascii" in v)
126+
}
127+
123128
# Conversion from unicode or ascii to wl dictionary entry.
124129
# We filter the dictionary after it's first created to redundant entries
125130
unicode_to_wl_dict = {
@@ -144,20 +149,30 @@ def compile_tables(data: dict) -> dict:
144149
if "wl-unicode" in v
145150
}
146151

147-
# Operators with ASCII sequences list entry
148-
ascii_operators = sorted(
149-
[v["ascii"] for v in data.values() if "operator-name" in v and "ascii" in v]
150-
)
151-
152-
# Mathics core stores the ascii operator value, Use that to get an operator name
153-
# Operators with ASCII sequences list entry
154-
ascii_operator_to_name = OrderedDict(
155-
{
156-
v["ascii"]: rf'\[{v["operator-name"]}]'
157-
for v in data.values()
158-
if "operator-name" in v and "ascii" in v
159-
}.items()
160-
)
152+
operator_names = sorted([k for k, v in data.items() if "operator-name" in v])
153+
154+
ascii_operators = []
155+
ascii_operator_to_character_symbol = {}
156+
ascii_operator_to_symbol = {}
157+
ascii_operator_to_unicode = {}
158+
ascii_operator_to_wl_unicode = {}
159+
160+
for operator_name in operator_names:
161+
# Operators with ASCII sequences list entry
162+
v = data[operator_name]
163+
ascii_name = v.get("ascii", None)
164+
if ascii_name is not None:
165+
ascii_operators.append(v["ascii"])
166+
ascii_operator_to_character_symbol[ascii_name] = rf'\[{v["operator-name"]}]'
167+
ascii_operator_to_symbol[ascii_name] = v["operator-name"]
168+
# Mathics core stores the ascii operator value, Use that to get standard unicode
169+
# symbol, and failing use the ASCII sequence.
170+
ascii_operator_to_unicode[ascii_name] = v.get(
171+
"unicode-equivalent", v.get("ascii")
172+
)
173+
ascii_operator_to_wl_unicode[ascii_name] = v.get(
174+
"wl-unicode", v.get("ascii")
175+
)
161176

162177
# unicode-to-operator dictionary entry
163178
unicode_to_operator = {
@@ -187,13 +202,16 @@ def compile_tables(data: dict) -> dict:
187202
return {
188203
"aliased-characters": aliased_characters,
189204
"ascii-operators": ascii_operators,
190-
"ascii-operator-to-name": ascii_operator_to_name,
205+
"ascii-operator-to-symbol": ascii_operator_to_symbol,
206+
"ascii-operator-to-character-symbol": ascii_operator_to_character_symbol,
207+
"ascii-operator-to-unicode": ascii_operator_to_unicode,
208+
"ascii-operator-to-wl-unicode": ascii_operator_to_wl_unicode,
191209
"letterlikes": letterlikes,
192210
"named-characters": named_characters,
211+
"operator-names": operator_names,
193212
"operator-to-precedence": operator_to_precedence,
213+
"operator-to-ascii": operator_to_ascii,
194214
"operator-to-unicode": operator_to_unicode,
195-
# unicode-operators is irregular, but this is what
196-
# mathics-pygments uses
197215
"unicode-operators": unicode_to_operator,
198216
"unicode-to-wl-dict": unicode_to_wl_dict,
199217
"unicode-to-wl-re": unicode_to_wl_re,
@@ -210,13 +228,17 @@ def compile_tables(data: dict) -> dict:
210228
ALL_FIELDS = [
211229
"aliased-characters",
212230
"ascii-operators",
231+
"ascii-operator-to-character-symbol",
232+
"ascii-operator-to-symbol",
233+
"ascii-operator-to-unicode",
234+
"ascii-operator-to-wl-unicode",
213235
"letterlikes",
214236
"named-characters",
237+
"operator-names",
238+
"operator-to-ascii",
215239
"operator-to-precedence",
216240
"operator-to-unicode",
217-
"unicode-equivalent",
218-
"unicode-operators",
219-
"unicode-to-operator",
241+
# "unicode-operators", # not used yet
220242
"unicode-to-wl-dict",
221243
"unicode-to-wl-re",
222244
"wl-to-amslatex",

mathics_scanner/tokeniser.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,13 @@ def _skip_blank(self):
424424
while True:
425425
if self.pos >= len(self.code):
426426
if comment:
427-
self.incomplete()
427+
try:
428+
self.incomplete()
429+
except ValueError:
430+
# Funny symbols like | in comments can cause a ValueError.
431+
# Until we have a better fix -- like noting we are inside a comment and
432+
# should not try to substitute symbols -- ignore.
433+
pass
428434
else:
429435
break
430436
if comment:

mathics_scanner/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
# well as importing into Python. That's why there is no
55
# space around "=" below.
66
# fmt: off
7-
__version__="1.2.5.dev0" # noqa
7+
__version__="1.3.0.dev0" # noqa

setup.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
[email protected] and ask for help.
2626
"""
2727

28+
import atexit
29+
import pkg_resources
2830
import re
31+
import subprocess
2932
import sys
3033
import os.path as osp
3134
import platform
@@ -127,3 +130,15 @@ def subdirs(root, file="*.*", depth=10):
127130
],
128131
# TODO: could also include long_description, download_url,
129132
)
133+
134+
135+
def build_json_table() -> int:
136+
"""Run program to create JSON tables"""
137+
ROOT_DIR = pkg_resources.resource_filename("mathics_scanner", "")
138+
build_tables_program = osp.join(ROOT_DIR, "generate", "build_tables.py")
139+
print(f"Building JSON tables via f{build_tables_program}")
140+
result = subprocess.run([sys.executable, build_tables_program])
141+
return result.returncode
142+
143+
144+
atexit.register(build_json_table)

test/test_ascii.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,19 @@
1010

1111

1212
def test_ascii():
13-
ascii_operator_to_name = json_data["ascii-operator-to-name"]
13+
ascii_operator_to_character_symbol = json_data["ascii-operator-to-character-symbol"]
14+
ascii_operator_to_symbol = json_data["ascii-operator-to-symbol"]
1415
ascii_operators = json_data["ascii-operators"]
15-
operator_keys = frozenset(ascii_operator_to_name.keys())
16+
operator_keys = frozenset(ascii_operator_to_symbol.keys())
1617
# operator_to_precedence = json_data["operator-to-precedence"]
1718
for chars in json_data["ascii-operators"]:
1819
assert chars in ascii_operators
1920
assert chars in operator_keys
2021
# assert chars in unicode_to_operator.keys()
21-
name = ascii_operator_to_name.get(chars)
22-
assert name is not None
23-
assert name.startswith(r"\[")
24-
assert name.endswith(r"]")
25-
raw_name = name[len(r"\[") : -len(r"]")]
26-
assert raw_name in yaml_data
22+
char_symbol = ascii_operator_to_character_symbol.get(chars)
23+
assert char_symbol is not None
24+
assert char_symbol.startswith(r"\[")
25+
assert char_symbol.endswith(r"]")
26+
raw_char_symbol = char_symbol[len(r"\[") : -len(r"]")]
27+
assert raw_char_symbol in yaml_data
28+
assert raw_char_symbol in ascii_operator_to_symbol.values()

0 commit comments

Comments
 (0)