Skip to content

Commit 063e487

Browse files
committed
More operators
Have to adjust expectations since there can be more than one ascii sequence ofr an operator, e.g. Function Ascii Operators don't need to have unicode equivalents
1 parent 5a94353 commit 063e487

File tree

2 files changed

+124
-26
lines changed

2 files changed

+124
-26
lines changed

mathics_scanner/data/named-characters.yml

Lines changed: 116 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@
2222
# or unicode symbol is not in that, don't use it here.
2323
#
2424
# wl-unicode: The unicode code point used by Mathics internally to represent
25-
# the named character.
25+
# the named character. If it is the same as unicode-equivalent
26+
# it can be omitted
2627
#
27-
# wl-unicode-name: The name of the character corresponding to `wl-unicode`, if it exists.
28+
# wl-unicode-name: The name of the character corresponding to `wl-unicode`, if it exists. If it is the same as unicode-equivalent-name it can be omitted.
2829
# It will mentioned in Wolfram Language docs if it exists.
2930

3031
AAcute:
@@ -51,6 +52,11 @@ ACup:
5152
unicode-equivalent-name: LATIN SMALL LETTER A WITH BREVE
5253
wl-unicode: "\u0103"
5354
wl-unicode-name: LATIN SMALL LETTER A WITH BREVE
55+
AddTo:
56+
ascii: "+="
57+
has-unicode-inverse: false
58+
is-letter-like: false
59+
operator-name: AddTo
5460
ADoubleDot:
5561
esc-alias: a"
5662
has-unicode-inverse: false
@@ -75,6 +81,14 @@ AGrave:
7581
unicode-equivalent-name: LATIN SMALL LETTER A WITH GRAVE
7682
wl-unicode: "\xE0"
7783
wl-unicode-name: LATIN SMALL LETTER A WITH GRAVE
84+
AHat:
85+
esc-alias: a^
86+
has-unicode-inverse: false
87+
is-letter-like: false
88+
unicode-equivalent: "\xE2"
89+
unicode-equivalent-name: LATIN SMALL LETTER A WITH CIRCUMFLEX
90+
wl-unicode: "\xE2"
91+
wl-unicode-name: LATIN SMALL LETTER A WITH CIRCUMFLEX
7892
Apply:
7993
ascii: "@@"
8094
has-unicode-inverse: false
@@ -88,14 +102,6 @@ Apply1:
88102
has-unicode-inverse: false
89103
is-letter-like: false
90104
operator-name: Apply
91-
AHat:
92-
esc-alias: a^
93-
has-unicode-inverse: false
94-
is-letter-like: false
95-
unicode-equivalent: "\xE2"
96-
unicode-equivalent-name: LATIN SMALL LETTER A WITH CIRCUMFLEX
97-
wl-unicode: "\xE2"
98-
wl-unicode-name: LATIN SMALL LETTER A WITH CIRCUMFLEX
99105
ARing:
100106
esc-alias: ao
101107
has-unicode-inverse: false
@@ -299,6 +305,21 @@ BlackRook:
299305
unicode-equivalent-name: BLACK CHESS ROOK
300306
wl-unicode: "\u265C"
301307
wl-unicode-name: BLACK CHESS ROOK
308+
Blank:
309+
ascii: "_"
310+
has-unicode-inverse: false
311+
is-letter-like: false
312+
operator-name: Blank
313+
BlankNullSequence:
314+
ascii: "___"
315+
has-unicode-inverse: false
316+
is-letter-like: false
317+
operator-name: BlankNullSequence
318+
BlankSequence:
319+
ascii: "__"
320+
has-unicode-inverse: false
321+
is-letter-like: false
322+
operator-name: BlankSequence
302323
Breve:
303324
esc-alias: bv
304325
has-unicode-inverse: false
@@ -1320,6 +1341,12 @@ Decrement:
13201341
has-unicode-inverse: false
13211342
is-letter-like: false
13221343
operator-name: Decrement
1344+
# See also RawQuestion
1345+
Definition:
1346+
ascii: "?"
1347+
has-unicode-inverse: false
1348+
is-letter-like: false
1349+
operator-name: Definition
13231350
Degree:
13241351
esc-alias: deg
13251352
has-unicode-inverse: false
@@ -1349,6 +1376,11 @@ Delta:
13491376
unicode-equivalent-name: GREEK SMALL LETTER DELTA
13501377
wl-unicode: "\u03B4"
13511378
wl-unicode-name: GREEK SMALL LETTER DELTA
1379+
Derivative:
1380+
ascii: "'"
1381+
has-unicode-inverse: false
1382+
is-letter-like: false
1383+
operator-name: Derivative
13521384
DescendingEllipsis:
13531385
has-unicode-inverse: false
13541386
is-letter-like: false
@@ -1463,6 +1495,11 @@ Divide:
14631495
unicode-equivalent-name: DIVISION SIGN
14641496
wl-unicode: "\xF7"
14651497
wl-unicode-name: DIVISION SIGN
1498+
DivideBy:
1499+
ascii: "/="
1500+
has-unicode-inverse: false
1501+
is-letter-like: false
1502+
operator-name: DivideBy
14661503
Divides:
14671504
esc-alias: divides
14681505
has-unicode-inverse: false
@@ -3422,13 +3459,24 @@ FreakedSmiley:
34223459
has-unicode-inverse: false
34233460
is-letter-like: true
34243461
wl-unicode: "\uF721"
3462+
# See also RawAmpersand
3463+
# Function:
3464+
# ascii: "&"
3465+
# has-unicode-inverse: false
3466+
# is-letter-like: false
3467+
# operator-name: Function
3468+
# wl-unicode: "&"
3469+
# Alternative form of Function
3470+
# See also RightTeeArrow
34253471
Function:
3426-
esc-alias: '|->'
3427-
has-unicode-inverse: true
3472+
ascii: "|->"
3473+
has-unicode-inverse: false
34283474
is-letter-like: false
3475+
operator-name: Function
34293476
unicode-equivalent: "\u21A6"
34303477
unicode-equivalent-name: RIGHTWARDS ARROW FROM BAR
34313478
wl-unicode: "\uF4A1"
3479+
wl-unicode-name: RIGHTWARDS ARROW FROM BAR
34323480
Gamma:
34333481
esc-alias: g
34343482
has-unicode-inverse: false
@@ -3444,6 +3492,11 @@ GeminiSign:
34443492
unicode-equivalent-name: GEMINI
34453493
wl-unicode: "\u264A"
34463494
wl-unicode-name: GEMINI
3495+
Get:
3496+
ascii: "<<"
3497+
has-unicode-inverse: false
3498+
is-letter-like: false
3499+
operator-name: Get
34473500
Gimel:
34483501
esc-alias: gi
34493502
has-unicode-inverse: false
@@ -4101,6 +4154,11 @@ Infinity:
41014154
# has-unicode-inverse: false
41024155
# is-letter-like: false
41034156
# operator-name: Infix
4157+
Information:
4158+
ascii: "??"
4159+
has-unicode-inverse: false
4160+
is-letter-like: false
4161+
operator-name: Information
41044162
Integral:
41054163
esc-alias: int
41064164
has-unicode-inverse: false
@@ -5447,6 +5505,16 @@ Psi:
54475505
unicode-equivalent-name: GREEK SMALL LETTER PSI
54485506
wl-unicode: "\u03C8"
54495507
wl-unicode-name: GREEK SMALL LETTER PSI
5508+
Put:
5509+
ascii: ">>"
5510+
has-unicode-inverse: false
5511+
is-letter-like: false
5512+
operator-name: Put
5513+
PutAppend:
5514+
ascii: ">>>"
5515+
has-unicode-inverse: false
5516+
is-letter-like: false
5517+
operator-name: PutAppend
54505518
QuarterNote:
54515519
has-unicode-inverse: false
54525520
is-letter-like: false
@@ -5462,10 +5530,11 @@ RHacek:
54625530
unicode-equivalent-name: LATIN SMALL LETTER R WITH CARON
54635531
wl-unicode: "\u0159"
54645532
wl-unicode-name: LATIN SMALL LETTER R WITH CARON
5533+
# See also Function
54655534
RawAmpersand:
54665535
has-unicode-inverse: false
54675536
is-letter-like: false
5468-
unicode-equivalent: '&'
5537+
unicode-equivalent: "&"
54695538
unicode-equivalent-name: AMPERSAND
54705539
wl-unicode: '&'
54715540
wl-unicode-name: AMPERSAND
@@ -5570,16 +5639,16 @@ RawPercent:
55705639
RawQuestion:
55715640
has-unicode-inverse: false
55725641
is-letter-like: false
5573-
unicode-equivalent: '?'
5642+
unicode-equivalent: "?"
55745643
unicode-equivalent-name: QUESTION MARK
5575-
wl-unicode: '?'
5644+
wl-unicode: "?"
55765645
wl-unicode-name: QUESTION MARK
55775646
RawQuote:
55785647
has-unicode-inverse: false
55795648
is-letter-like: false
5580-
unicode-equivalent: ''''
5649+
unicode-equivalent: "'"
55815650
unicode-equivalent-name: APOSTROPHE
5582-
wl-unicode: ''''
5651+
wl-unicode: "'"
55835652
wl-unicode-name: APOSTROPHE
55845653
RawReturn:
55855654
has-unicode-inverse: true
@@ -5671,6 +5740,16 @@ RegisteredTrademark:
56715740
unicode-equivalent-name: REGISTERED SIGN
56725741
wl-unicode: "\xAE"
56735742
wl-unicode-name: REGISTERED SIGN
5743+
Repeated:
5744+
ascii: ".."
5745+
has-unicode-inverse: false
5746+
is-letter-like: false
5747+
operator-name: Repeated
5748+
RepeatedNull:
5749+
ascii: "..."
5750+
has-unicode-inverse: false
5751+
is-letter-like: false
5752+
operator-name: RepeatedNull
56745753
ReplaceAll:
56755754
ascii: "/."
56765755
has-unicode-inverse: false
@@ -6701,6 +6780,16 @@ Stigma:
67016780
unicode-equivalent-name: GREEK SMALL LETTER STIGMA
67026781
wl-unicode: "\u03DB"
67036782
wl-unicode-name: GREEK SMALL LETTER STIGMA
6783+
StringExpression:
6784+
ascii: "~~"
6785+
has-unicode-inverse: false
6786+
is-letter-like: false
6787+
operator-name: StringExpression
6788+
StringJoin:
6789+
ascii: "<>"
6790+
has-unicode-inverse: false
6791+
is-letter-like: false
6792+
operator-name: StringJoin
67046793
Subset:
67056794
esc-alias: sub
67066795
has-unicode-inverse: false
@@ -6717,6 +6806,11 @@ SubsetEqual:
67176806
unicode-equivalent-name: SUBSET OF OR EQUAL TO
67186807
wl-unicode: "\u2286"
67196808
wl-unicode-name: SUBSET OF OR EQUAL TO
6809+
SubtractFrom:
6810+
ascii: "-="
6811+
has-unicode-inverse: false
6812+
is-letter-like: false
6813+
operator-name: SubtractFrom
67206814
Succeeds:
67216815
has-unicode-inverse: false
67226816
is-letter-like: false
@@ -6906,6 +7000,11 @@ Times:
69067000
unicode-equivalent-name: MULTIPLICATION SIGN
69077001
wl-unicode: "\xD7"
69087002
wl-unicode-name: MULTIPLICATION SIGN
7003+
TimesBy:
7004+
ascii: "*="
7005+
has-unicode-inverse: false
7006+
is-letter-like: false
7007+
operator-name: TimesBy
69097008
Trademark:
69107009
esc-alias: tm
69117010
has-unicode-inverse: false

test/test_general_yaml_sanity.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ def test_operators():
5555
pass
5656
else:
5757
assert "wl-unicode" in v, (
58-
"In %s: there must be either an ascii name or have a wl-unicode"
59-
% k
58+
"In %s: there must be either an ascii name or have a wl-unicode" % k
6059
)
6160
if "operator-name" not in v:
6261
continue
@@ -86,9 +85,6 @@ def test_wl_unicode_name():
8685
try:
8786
expected_name = unicodedata.name(wl)
8887
except ValueError:
89-
assert (
90-
"wl-unicode-name" not in v
91-
), f"{k} has wl-unicode-name set to {v['wl-unicode-name']} but {wl} has no unicode name"
9288
continue
9389

9490
real_name = v.get("wl-unicode-name")
@@ -141,11 +137,14 @@ def test_unicode_name():
141137
def test_wl_unicode():
142138
for k, v in yaml_data.items():
143139
if "operator-name" in v:
144-
if "ascii" in v and len(v["ascii"]) > 1:
145-
# Multi-character operators like "**" don't need to
146-
# have a wl-unicode equivalent.
140+
if "ascii" in v:
141+
# Operators like "**" or "?" don't need to
142+
# have a wl-unicode equivalent and might not have a
143+
# unique equivalent
147144
continue
148-
assert "wl-unicode" in v, f"{k} has no wl-unicode attribute"
145+
assert (
146+
"wl-unicode" in v or "unicode-equivalent" in v
147+
), f"{k} has neither wl-unicode nor unicode-equivalent attribute"
149148

150149

151150
def test_general_yaml_sanity():

0 commit comments

Comments
 (0)