Skip to content

Commit a0aad4f

Browse files
committed
Adding operator testing
1 parent 7051a0d commit a0aad4f

File tree

2 files changed

+104
-11
lines changed

2 files changed

+104
-11
lines changed

mathics_scanner/data/named-characters.yml

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,19 @@ AGrave:
7575
unicode-equivalent-name: LATIN SMALL LETTER A WITH GRAVE
7676
wl-unicode: "\xE0"
7777
wl-unicode-name: LATIN SMALL LETTER A WITH GRAVE
78+
Apply:
79+
ascii: "@@"
80+
has-unicode-inverse: false
81+
is-letter-like: false
82+
operator-name: Apply
83+
# @@@ replaces heads at level 1 of expr by f.
84+
# We add "1" at the end so keys are unique, but
85+
# the WL operator is still "Apply".
86+
Apply1:
87+
ascii: "@@@"
88+
has-unicode-inverse: false
89+
is-letter-like: false
90+
operator-name: Apply
7891
AHat:
7992
esc-alias: a^
8093
has-unicode-inverse: false
@@ -107,6 +120,7 @@ Aleph:
107120
unicode-equivalent-name: ALEF SYMBOL
108121
wl-unicode: "\u2135"
109122
wl-unicode-name: ALEF SYMBOL
123+
# Note: the unicode ofr AliasDelimiter doesn't look like "-"
110124
AliasDelimiter:
111125
has-unicode-inverse: false
112126
is-letter-like: false
@@ -1430,6 +1444,7 @@ Distributed:
14301444
is-letter-like: false
14311445
wl-unicode: "\uF3D2"
14321446
Divide:
1447+
ascii: "/"
14331448
esc-alias: div
14341449
has-unicode-inverse: true
14351450
is-letter-like: false
@@ -4065,6 +4080,12 @@ Infinity:
40654080
unicode-equivalent-name: INFINITY
40664081
wl-unicode: "\u221E"
40674082
wl-unicode-name: INFINITY
4083+
# Infix isn't really an operator
4084+
# Infix:
4085+
# ascii: "~"
4086+
# has-unicode-inverse: false
4087+
# is-letter-like: false
4088+
# operator-name: Infix
40684089
Integral:
40694090
esc-alias: int
40704091
has-unicode-inverse: false
@@ -4405,12 +4426,13 @@ LeoSign:
44054426
wl-unicode: "\u264C"
44064427
wl-unicode-name: LEO
44074428
Less:
4429+
ascii: "<"
44084430
has-unicode-inverse: true
44094431
is-letter-like: false
44104432
operator-name: Less
4411-
unicode-equivalent: <
4433+
unicode-equivalent: "<"
44124434
unicode-equivalent-name: LESS-THAN SIGN
4413-
wl-unicode: <
4435+
wl-unicode: "<"
44144436
wl-unicode-name: LESS-THAN SIGN
44154437
LessEqual:
44164438
ascii: "<="
@@ -5313,6 +5335,11 @@ Pluto:
53135335
unicode-equivalent-name: PLUTO
53145336
wl-unicode: "\u2647"
53155337
wl-unicode-name: PLUTO
5338+
Postfix:
5339+
ascii: "//"
5340+
has-unicode-inverse: false
5341+
is-letter-like: false
5342+
operator-name: Postfix
53165343
Power:
53175344
ascii: "^"
53185345
has-unicode-inverse: true
@@ -5350,6 +5377,12 @@ PrecedesTilde:
53505377
unicode-equivalent-name: PRECEDES OR EQUIVALENT TO
53515378
wl-unicode: "\u227E"
53525379
wl-unicode-name: PRECEDES OR EQUIVALENT TO
5380+
# Prefix isn't really an operator
5381+
# Prefix:
5382+
# ascii: "@"
5383+
# has-unicode-inverse: false
5384+
# is-letter-like: false
5385+
# operator-name: Prefix
53535386
Prime:
53545387
esc-alias: ''''
53555388
has-unicode-inverse: false
@@ -5556,10 +5589,11 @@ RawSemicolon:
55565589
unicode-equivalent-name: SEMICOLON
55575590
wl-unicode: ;
55585591
wl-unicode-name: SEMICOLON
5592+
# RawSlash is not an operator. Divide which
5593+
# looks the same, however is.
55595594
RawSlash:
55605595
has-unicode-inverse: true
55615596
is-letter-like: false
5562-
operator-name: Divide
55635597
unicode-equivalent: /
55645598
unicode-equivalent-name: SOLIDUS
55655599
wl-unicode: /
@@ -5898,15 +5932,17 @@ RoundSpaceIndicator:
58985932
is-letter-like: false
58995933
wl-unicode: "\uF3B2"
59005934
Rule:
5901-
esc-alias: ->
5935+
ascii: "->"
5936+
esc-alias: "->"
59025937
has-unicode-inverse: false
59035938
is-letter-like: false
59045939
operator-name: Rule
59055940
unicode-equivalent: "\u2192"
59065941
unicode-equivalent-name: RIGHTWARDS ARROW
59075942
wl-unicode: "\uF522"
59085943
RuleDelayed:
5909-
esc-alias: :>
5944+
ascii: ":>"
5945+
esc-alias: ":>"
59105946
has-unicode-inverse: true
59115947
operator-name: RuleDelayed
59125948
is-letter-like: false
@@ -6722,6 +6758,11 @@ TabKey:
67226758
has-unicode-inverse: false
67236759
is-letter-like: false
67246760
wl-unicode: "\uF7BE"
6761+
TagSet:
6762+
ascii: "/:"
6763+
has-unicode-inverse: false
6764+
is-letter-like: false
6765+
operator-name: TagSet
67256766
Tau:
67266767
esc-alias: t
67276768
has-unicode-inverse: false
@@ -6930,11 +6971,6 @@ UnSameQ:
69306971
has-unicode-inverse: false
69316972
is-letter-like: false
69326973
operator-name: UnSameQ
6933-
UnSet:
6934-
ascii: "=."
6935-
has-unicode-inverse: false
6936-
is-letter-like: false
6937-
operator-name: UnSet
69386974
Union:
69396975
esc-alias: un
69406976
has-unicode-inverse: false
@@ -6950,6 +6986,11 @@ UnionPlus:
69506986
unicode-equivalent-name: MULTISET UNION
69516987
wl-unicode: "\u228E"
69526988
wl-unicode-name: MULTISET UNION
6989+
Unset:
6990+
ascii: "=."
6991+
has-unicode-inverse: false
6992+
is-letter-like: false
6993+
operator-name: Unset
69536994
UpArrow:
69546995
has-unicode-inverse: false
69556996
is-letter-like: false
@@ -7031,6 +7072,16 @@ Upsilon:
70317072
unicode-equivalent-name: GREEK SMALL LETTER UPSILON
70327073
wl-unicode: "\u03C5"
70337074
wl-unicode-name: GREEK SMALL LETTER UPSILON
7075+
UpSet:
7076+
ascii: "^="
7077+
has-unicode-inverse: false
7078+
is-letter-like: false
7079+
operator-name: UpSet
7080+
UpSetDelayed:
7081+
ascii: "^:="
7082+
has-unicode-inverse: false
7083+
is-letter-like: false
7084+
operator-name: UpSetDelayed
70347085

70357086
# The rendering of Uranus at https://reference.wolfram.com/language/ref/character/Uranus.html
70367087
# looks more like U+26E2 (Astronomical Symbol for Uranus) than the Standard Unicode equavalent

test/test_general_yaml_sanity.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
yaml_data = load_mathics_character_yaml()
77

8+
89
def check_attr_is_invertible(attr: str):
910
for v in yaml_data.values():
1011
if attr in v:
@@ -39,6 +40,43 @@ def test_yaml_field_names():
3940
assert diff == set(), f"Item {k} has unknown fields {diff}"
4041

4142

43+
def test_operators():
44+
ascii_seen = set()
45+
operator_name_seen = set()
46+
dup_operators = set(["Apply"])
47+
for k, v in yaml_data.items():
48+
if "ascii" in v:
49+
if len(v["ascii"]) > 1:
50+
assert (
51+
"operator-name" in v
52+
), "In %s: ASCII with more than one characters must be an operator" % (
53+
k
54+
)
55+
pass
56+
else:
57+
assert "wl-unicode" in v, (
58+
"In %s: there must be either an ascii name or have a wl-unicode"
59+
% k
60+
)
61+
if "operator-name" not in v:
62+
continue
63+
64+
assert not v["is-letter-like"], "Operator %s should not be letter-like" % k
65+
66+
assert "ascii" in v, 'Operator %s should have an "ascii" field' % k
67+
ascii = v["ascii"]
68+
assert ascii not in ascii_seen
69+
ascii_seen.add(ascii)
70+
71+
operator_name = v["operator-name"]
72+
if operator_name in dup_operators:
73+
continue
74+
assert (
75+
operator_name not in operator_name_seen
76+
), "Operator name %s has operator %s already been seen" % (operator_name, k)
77+
operator_name_seen.add(operator_name)
78+
79+
4280
def test_wl_unicode_name():
4381
for k, v in yaml_data.items():
4482
if "wl-unicode" not in v:
@@ -77,7 +115,9 @@ def test_unicode_name():
77115
try:
78116
expected_name = " + ".join(unicodedata.name(c) for c in uni)
79117
except ValueError:
80-
import pdb; pdb.set_trace()
118+
import pdb
119+
120+
pdb.set_trace()
81121
raise ValueError(
82122
f"{k}'s unicode-equivalent doesn't have a unicode name (it's not valid unicode)"
83123
)
@@ -97,6 +137,7 @@ def test_unicode_name():
97137
"unicode-equivalent-name" not in v
98138
), f"{k} has unicode-equivalent-name set to {v['unicode-equivalent-name']} but it doesn't have a unicode equivalent"
99139

140+
100141
def test_wl_unicode():
101142
for k, v in yaml_data.items():
102143
if "operator-name" in v:
@@ -106,6 +147,7 @@ def test_wl_unicode():
106147
continue
107148
assert "wl-unicode" in v, f"{k} has no wl-unicode attribute"
108149

150+
109151
def test_general_yaml_sanity():
110152
# Check if required attributes are in place
111153
check_has_attr("is-letter-like")

0 commit comments

Comments
 (0)