Skip to content

Commit 5a25116

Browse files
committed
Add ApplyTo and reqularize unicode-equivalent
Also adust Tilde
1 parent 90161cc commit 5a25116

File tree

2 files changed

+69
-79
lines changed

2 files changed

+69
-79
lines changed

mathics_scanner/data/named-characters.yml

Lines changed: 63 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,13 @@ ACup:
7171
unicode-equivalent-name: LATIN SMALL LETTER A WITH BREVE
7272
wl-unicode: "\u0103"
7373
wl-unicode-name: LATIN SMALL LETTER A WITH BREVE
74+
7475
AddTo:
7576
ascii: "+="
7677
has-unicode-inverse: false
7778
is-letter-like: false
7879
operator-name: AddTo
80+
7981
ADoubleDot:
8082
esc-alias: a"
8183
has-unicode-inverse: false
@@ -117,10 +119,6 @@ Alternative:
117119
is-letter-like: false
118120
operator-name: Alternative
119121
precedence: 160
120-
unicode-equivalent: "|"
121-
unicode-equivalent-name: VERTICAL LINE
122-
wl-unicode: "|"
123-
wl-unicode-name: VERTICAL LINE
124122

125123
Apply:
126124
ascii: "@@"
@@ -140,6 +138,13 @@ Apply3Ats:
140138
operator-name: Apply
141139
precedence: 620
142140

141+
ApplyTo:
142+
ascii: "//="
143+
has-unicode-inverse: false
144+
is-letter-like: false
145+
operator-name: ApplyTo
146+
precedence: 620
147+
143148
ARing:
144149
esc-alias: ao
145150
has-unicode-inverse: false
@@ -1478,8 +1483,6 @@ Definition:
14781483
is-letter-like: false
14791484
operator-name: Definition
14801485
precedence: 670
1481-
unicode-equivalent: "?"
1482-
unicode-equivalent-name: QUESTION MARK
14831486

14841487
# \[Degree] is letter-like, not an operator, which is
14851488
# mutually exclusive.
@@ -1682,10 +1685,6 @@ Dot:
16821685
is-letter-like: false
16831686
operator-name: Dot
16841687
precedence: 490
1685-
unicode-equivalent: "."
1686-
unicode-equivalent-name: FULL STOP
1687-
wl-unicode: "."
1688-
wl-unicode-name: FULL STOP
16891688

16901689
DotEqual:
16911690
esc-alias: .=
@@ -4441,11 +4440,14 @@ Infinity:
44414440
wl-unicode-name: INFINITY
44424441

44434442
# Infix isn't really an operator
4444-
# Infix:
4445-
# ascii: "~"
4446-
# has-unicode-inverse: false
4447-
# is-letter-like: false
4448-
# operator-name: Infix
4443+
# See also Tilde
4444+
Infix:
4445+
ascii: "~"
4446+
amslatex: "\\textasciitilde"
4447+
has-unicode-inverse: false
4448+
is-letter-like: false
4449+
# operator-name: Infix
4450+
44494451
Information:
44504452
ascii: "??"
44514453
has-unicode-inverse: false
@@ -4833,16 +4835,14 @@ LeoSign:
48334835
unicode-equivalent-name: LEO
48344836
wl-unicode: "\u264C"
48354837
wl-unicode-name: LEO
4838+
48364839
Less:
48374840
ascii: "<"
48384841
has-unicode-inverse: true
48394842
is-letter-like: false
48404843
operator-name: Less
4841-
unicode-equivalent: "<"
4842-
unicode-equivalent-name: LESS-THAN SIGN
4843-
wl-unicode: "<"
4844-
wl-unicode-name: LESS-THAN SIGN
48454844
LessEqual:
4845+
48464846
amslatex: "$\\le$"
48474847
ascii: "<="
48484848
esc-alias: "<="
@@ -4908,6 +4908,7 @@ LetterSpace:
49084908
has-unicode-inverse: false
49094909
is-letter-like: true
49104910
wl-unicode: "\uF754"
4911+
49114912
LibraSign:
49124913
has-unicode-inverse: false
49134914
is-letter-like: false
@@ -4919,6 +4920,7 @@ LightBulb:
49194920
has-unicode-inverse: false
49204921
is-letter-like: true
49214922
wl-unicode: "\uF723"
4923+
49224924
LongDash:
49234925
esc-alias: --
49244926
has-unicode-inverse: false
@@ -4928,13 +4930,15 @@ LongDash:
49284930
unicode-equivalent-name: EM DASH
49294931
wl-unicode: "\u2014"
49304932
wl-unicode-name: EM DASH
4933+
49314934
LongEqual:
49324935
esc-alias: l=
49334936
has-unicode-inverse: true
49344937
is-letter-like: false
49354938
unicode-equivalent: '=='
49364939
unicode-equivalent-name: EQUALS SIGN + EQUALS SIGN
49374940
wl-unicode: "\uF7D9"
4941+
49384942
LongLeftArrow:
49394943
esc-alias: <--
49404944
has-unicode-inverse: false
@@ -5047,16 +5051,14 @@ Micro:
50475051
unicode-equivalent-name: MICRO SIGN
50485052
wl-unicode: "\xB5"
50495053
wl-unicode-name: MICRO SIGN
5054+
50505055
Minus:
50515056
ascii: "-"
50525057
has-unicode-inverse: false
50535058
is-letter-like: false
50545059
operator-name: Minus
50555060
precedence: 480
5056-
unicode-equivalent: "-"
5057-
unicode-equivalent-name: HYPHEN-MINUS
5058-
wl-unicode: "-"
5059-
wl-unicode-name: HYPHEN-MINUS
5061+
50605062
MinusPlus:
50615063
amslatex: "\\mp"
50625064
esc-alias: -+
@@ -5607,6 +5609,7 @@ Nu:
56075609
has-unicode-inverse: false
56085610
is-letter-like: true
56095611
wl-unicode: "\uF3A0"
5612+
56105613
NumberSign:
56115614
esc-alias: '#'
56125615
has-unicode-inverse: false
@@ -5615,6 +5618,7 @@ NumberSign:
56155618
# unicode-equivalent: '#'
56165619
# unicode-equivalent-name: NUMBER SIGN
56175620
wl-unicode: "\uF724"
5621+
56185622
OAcute:
56195623
amslatex: "\\'{o}"
56205624
esc-alias: o'
@@ -5780,15 +5784,14 @@ PartialD:
57805784
unicode-equivalent-name: PARTIAL DIFFERENTIAL
57815785
wl-unicode: "\u2202"
57825786
wl-unicode-name: PARTIAL DIFFERENTIAL
5787+
57835788
# See also Definition and RawQuestion
57845789
PatternTest:
57855790
ascii: "?"
57865791
has-unicode-inverse: false
57875792
is-letter-like: false
57885793
operator-name: PatternTest
57895794
precedence: 660
5790-
unicode-equivalent: "?"
5791-
unicode-equivalent-name: QUESTION MARK
57925795

57935796
PermutationProduct:
57945797
has-unicode-inverse: true
@@ -5993,19 +5996,15 @@ RHacek:
59935996
wl-unicode-name: LATIN SMALL LETTER R WITH CARON
59945997
# See also Function
59955998
RawAmpersand:
5999+
ascii: '&'
59966000
has-unicode-inverse: false
59976001
is-letter-like: false
5998-
unicode-equivalent: "&"
5999-
unicode-equivalent-name: AMPERSAND
6000-
wl-unicode: '&'
6001-
wl-unicode-name: AMPERSAND
6002+
60026003
RawAt:
6004+
ascii: '@'
60036005
has-unicode-inverse: false
60046006
is-letter-like: false
6005-
unicode-equivalent: '@'
6006-
unicode-equivalent-name: COMMERCIAL AT
6007-
wl-unicode: '@'
6008-
wl-unicode-name: COMMERCIAL AT
6007+
60096008
RawBackquote:
60106009
has-unicode-inverse: false
60116010
is-letter-like: false
@@ -6020,34 +6019,29 @@ RawBackslash:
60206019
unicode-equivalent-name: REVERSE SOLIDUS
60216020
wl-unicode: \
60226021
wl-unicode-name: REVERSE SOLIDUS
6022+
60236023
RawColon:
6024+
ascii: ':'
60246025
has-unicode-inverse: false
60256026
is-letter-like: false
6026-
unicode-equivalent: ':'
6027-
unicode-equivalent-name: COLON
6028-
wl-unicode: ':'
6029-
wl-unicode-name: COLON
6027+
60306028
RawComma:
6029+
ascii: ','
60316030
has-unicode-inverse: false
60326031
is-letter-like: false
6033-
unicode-equivalent: ','
6034-
unicode-equivalent-name: COMMA
6035-
wl-unicode: ','
6036-
wl-unicode-name: COMMA
6032+
60376033
RawDollar:
6034+
ascii: '$'
60386035
has-unicode-inverse: false
60396036
is-letter-like: true
6040-
unicode-equivalent: $
6041-
unicode-equivalent-name: DOLLAR SIGN
6042-
wl-unicode: $
6043-
wl-unicode-name: DOLLAR SIGN
6037+
# Since we have an is-letter-line, we add wl-unicode for the checker to work.
6038+
wl-unicode: "$"
6039+
60446040
RawDoubleQuote:
6041+
ascii: '"'
60456042
has-unicode-inverse: false
60466043
is-letter-like: false
6047-
unicode-equivalent: '"'
6048-
unicode-equivalent-name: QUOTATION MARK
6049-
wl-unicode: '"'
6050-
wl-unicode-name: QUOTATION MARK
6044+
60516045
RawEscape:
60526046
has-unicode-inverse: false
60536047
is-letter-like: false
@@ -6063,51 +6057,38 @@ RawGreater:
60636057
unicode-equivalent-name: GREATER-THAN SIGN
60646058
wl-unicode: "\u003e"
60656059
wl-unicode-name: GREATER-THAN SIGN
6060+
60666061
RawLeftBrace:
6062+
ascii: '{'
60676063
has-unicode-inverse: false
60686064
is-letter-like: false
6069-
unicode-equivalent: '{'
6070-
unicode-equivalent-name: LEFT CURLY BRACKET
6071-
wl-unicode: '{'
6072-
wl-unicode-name: LEFT CURLY BRACKET
6065+
60736066
RawLeftBracket:
6067+
ascii: '['
60746068
has-unicode-inverse: false
60756069
is-letter-like: false
6076-
unicode-equivalent: '['
6077-
unicode-equivalent-name: LEFT SQUARE BRACKET
6078-
wl-unicode: '['
6079-
wl-unicode-name: LEFT SQUARE BRACKET
6070+
60806071
RawLeftParenthesis:
6072+
ascii: '('
60816073
has-unicode-inverse: false
60826074
is-letter-like: false
6083-
unicode-equivalent: (
6084-
unicode-equivalent-name: LEFT PARENTHESIS
6085-
wl-unicode: (
6086-
wl-unicode-name: LEFT PARENTHESIS
6075+
60876076
RawNumberSign:
6077+
ascii: "#"
60886078
has-unicode-inverse: false
60896079
is-letter-like: false
6090-
unicode-equivalent: '#'
6091-
unicode-equivalent-name: NUMBER SIGN
6092-
wl-unicode: '#'
6093-
wl-unicode-name: NUMBER SIGN
6080+
60946081
RawPercent:
6082+
ascii: "%"
60956083
has-unicode-inverse: false
60966084
is-letter-like: false
6097-
unicode-equivalent: '%'
6098-
unicode-equivalent-name: PERCENT SIGN
6099-
wl-unicode: '%'
6100-
wl-unicode-name: PERCENT SIGN
61016085

61026086
# See also Definition and PatternTest
61036087
RawQuestion:
61046088
ascii: "?"
61056089
has-unicode-inverse: false
61066090
is-letter-like: false
6107-
unicode-equivalent: "?"
6108-
unicode-equivalent-name: QUESTION MARK
6109-
wl-unicode: "?"
6110-
wl-unicode-name: QUESTION MARK
6091+
61116092
RawQuote:
61126093
has-unicode-inverse: false
61136094
is-letter-like: false
@@ -7510,17 +7491,20 @@ Thorn:
75107491
unicode-equivalent-name: LATIN SMALL LETTER THORN
75117492
wl-unicode: "\xFE"
75127493
wl-unicode-name: LATIN SMALL LETTER THORN
7494+
7495+
# See also operator Infix.
7496+
# Tilde has no built-in meaning per the documentation,
7497+
# so it is not the same as Tilde.
7498+
# Yes, it is confusing.
75137499
Tilde:
7514-
amslatex: "\textasciitilde"
7515-
esc-alias: '~'
75167500
ascii: "~"
7501+
esc-alias: '~'
75177502
has-unicode-inverse: false
75187503
is-letter-like: false
75197504
operator-name: Tilde
7520-
unicode-equivalent: "\u223C"
7521-
unicode-equivalent-name: TILDE OPERATOR
7522-
wl-unicode: "\u223C"
7523-
wl-unicode-name: TILDE OPERATOR
7505+
# Since we have an esc-alias, we add wl-unicode for the checker to work.
7506+
wl-unicode: "~"
7507+
75247508
TildeEqual:
75257509
amslatex: "$\\simeq$"
75267510
esc-alias: ~=

mathics_scanner/generate/build_tables.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ def compile_tables(data: dict) -> dict:
8484
# characters that have a unicode inverse are included in
8585
# `unicode_to_wl_dict`
8686

87+
for k, v in data.items():
88+
if "esc-alias" in v:
89+
assert "wl-unicode" in v, f"esc-alias {k} needs wl-unicode"
90+
if v.get("is-letter-like"):
91+
assert "wl-unicode" in v, f"is-letter-like {k} needs wl-unicode"
92+
8793
# ESC sequence aliases dictionary entry
8894
aliased_characters = {
8995
v["esc-alias"]: v["wl-unicode"] for v in data.values() if "esc-alias" in v

0 commit comments

Comments
 (0)