1- #!/usr/bin/env python3
1+ #!/usr/bin/env python
22# This scripts reads the data from named-characters and converts it to the
33# format used by the library internally
44
5- from collections import OrderedDict
6-
75import click
86
97import json
@@ -120,6 +118,13 @@ def compile_tables(data: dict) -> dict:
120118 if "operator-name" in v and ("unicode-equivalent" in v or "ascii" in v )
121119 }
122120
121+ # operator-to-ascii or character symbol name
122+ operator_to_ascii = {
123+ v ["operator-name" ]: v .get ("ascii" , rf'\[{ v ["operator-name" ]} ]' )
124+ for k , v in data .items ()
125+ if "operator-name" in v and ("unicode-equivalent" in v or "ascii" in v )
126+ }
127+
123128 # Conversion from unicode or ascii to wl dictionary entry.
124129 # We filter the dictionary after it's first created to redundant entries
125130 unicode_to_wl_dict = {
@@ -144,20 +149,30 @@ def compile_tables(data: dict) -> dict:
144149 if "wl-unicode" in v
145150 }
146151
147- # Operators with ASCII sequences list entry
148- ascii_operators = sorted (
149- [v ["ascii" ] for v in data .values () if "operator-name" in v and "ascii" in v ]
150- )
151-
152- # Mathics core stores the ascii operator value, Use that to get an operator name
153- # Operators with ASCII sequences list entry
154- ascii_operator_to_name = OrderedDict (
155- {
156- v ["ascii" ]: rf'\[{ v ["operator-name" ]} ]'
157- for v in data .values ()
158- if "operator-name" in v and "ascii" in v
159- }.items ()
160- )
152+ operator_names = sorted ([k for k , v in data .items () if "operator-name" in v ])
153+
154+ ascii_operators = []
155+ ascii_operator_to_character_symbol = {}
156+ ascii_operator_to_symbol = {}
157+ ascii_operator_to_unicode = {}
158+ ascii_operator_to_wl_unicode = {}
159+
160+ for operator_name in operator_names :
161+ # Operators with ASCII sequences list entry
162+ v = data [operator_name ]
163+ ascii_name = v .get ("ascii" , None )
164+ if ascii_name is not None :
165+ ascii_operators .append (v ["ascii" ])
166+ ascii_operator_to_character_symbol [ascii_name ] = rf'\[{ v ["operator-name" ]} ]'
167+ ascii_operator_to_symbol [ascii_name ] = v ["operator-name" ]
168+ # Mathics core stores the ascii operator value, Use that to get standard unicode
169+ # symbol, and failing use the ASCII sequence.
170+ ascii_operator_to_unicode [ascii_name ] = v .get (
171+ "unicode-equivalent" , v .get ("ascii" )
172+ )
173+ ascii_operator_to_wl_unicode [ascii_name ] = v .get (
174+ "wl-unicode" , v .get ("ascii" )
175+ )
161176
162177 # unicode-to-operator dictionary entry
163178 unicode_to_operator = {
@@ -187,13 +202,16 @@ def compile_tables(data: dict) -> dict:
187202 return {
188203 "aliased-characters" : aliased_characters ,
189204 "ascii-operators" : ascii_operators ,
190- "ascii-operator-to-name" : ascii_operator_to_name ,
205+ "ascii-operator-to-symbol" : ascii_operator_to_symbol ,
206+ "ascii-operator-to-character-symbol" : ascii_operator_to_character_symbol ,
207+ "ascii-operator-to-unicode" : ascii_operator_to_unicode ,
208+ "ascii-operator-to-wl-unicode" : ascii_operator_to_wl_unicode ,
191209 "letterlikes" : letterlikes ,
192210 "named-characters" : named_characters ,
211+ "operator-names" : operator_names ,
193212 "operator-to-precedence" : operator_to_precedence ,
213+ "operator-to-ascii" : operator_to_ascii ,
194214 "operator-to-unicode" : operator_to_unicode ,
195- # unicode-operators is irregular, but this is what
196- # mathics-pygments uses
197215 "unicode-operators" : unicode_to_operator ,
198216 "unicode-to-wl-dict" : unicode_to_wl_dict ,
199217 "unicode-to-wl-re" : unicode_to_wl_re ,
@@ -210,13 +228,17 @@ def compile_tables(data: dict) -> dict:
210228ALL_FIELDS = [
211229 "aliased-characters" ,
212230 "ascii-operators" ,
231+ "ascii-operator-to-character-symbol" ,
232+ "ascii-operator-to-symbol" ,
233+ "ascii-operator-to-unicode" ,
234+ "ascii-operator-to-wl-unicode" ,
213235 "letterlikes" ,
214236 "named-characters" ,
237+ "operator-names" ,
238+ "operator-to-ascii" ,
215239 "operator-to-precedence" ,
216240 "operator-to-unicode" ,
217- "unicode-equivalent" ,
218- "unicode-operators" ,
219- "unicode-to-operator" ,
241+ # "unicode-operators", # not used yet
220242 "unicode-to-wl-dict" ,
221243 "unicode-to-wl-re" ,
222244 "wl-to-amslatex" ,
0 commit comments