Skip to content

Commit 3ed4bf7

Browse files
committed
Expand script names
1 parent 51bc013 commit 3ed4bf7

File tree

3 files changed

+235
-0
lines changed

3 files changed

+235
-0
lines changed

bin/make_script_map.sh

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Download and unzip the Unicode Character Database XML file
4+
# from Unicode.org.
5+
#
6+
7+
set -o nounset
8+
set -o errexit
9+
set -o pipefail
10+
11+
SCRIPT_HOME="$( cd "$( dirname "$0" )" && pwd )"
12+
BASE_DIR=$(realpath "${SCRIPT_HOME}/..")
13+
14+
SRC_DIR="${BASE_DIR}/src"
15+
SRC_PATH="${SRC_DIR}/scriptMap.ts"
16+
17+
echo "INFO: starting map gen at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
18+
19+
TMP_DIR="${BASE_DIR}/tmp"
20+
if [ ! -d "${TMP_DIR}" ]; then
21+
echo "INFO: creating temp dir ${TMP_DIR}"
22+
mkdir -p "${TMP_DIR}"
23+
else
24+
echo "INFO: using existing temp dir ${TMP_DIR}"
25+
fi
26+
27+
if [ -f "${TMP_DIR}/PropertyValueAliases.txt" ]; then
28+
echo "INFO: using existing PropertyValueAliases.txt"
29+
else
30+
curl \
31+
--location \
32+
--output "${TMP_DIR}/PropertyValueAliases.txt" \
33+
--show-error \
34+
--silent \
35+
https://www.unicode.org/Public/latest/ucd/PropertyValueAliases.txt
36+
fi
37+
38+
cd "${TMP_DIR}"
39+
40+
echo "INFO: generating script map to ${SRC_PATH}"
41+
42+
echo "/* This file is generated by bin/make_script_map.sh */" > "${SRC_PATH}"
43+
echo "export const scriptMap: { [key: string]: string } = {" >> "${SRC_PATH}"
44+
45+
46+
grep "^sc ; " PropertyValueAliases.txt | \
47+
cut -d';' -f2,3 | \
48+
awk -F'; ' '{ gsub(/[ \t]+/, "", $1); gsub(/[ \t]+$/, "", $2); printf("\"%s\": \"%s\",\n", $1, $2); }' \
49+
>> "${SRC_PATH}"
50+
51+
echo "};" >> "${SRC_PATH}"
52+
53+
echo "INFO: completed map gen at $(date -u +"%Y-%m-%dT%H:%M:%SZ")"

src/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import "./styles.css";
22
import "../node_modules/tabulator-tables/dist/css/tabulator_bootstrap5.min.css";
3+
import { scriptMap } from "./scriptMap.ts";
34

45
import {
56
CellComponent,
@@ -26,6 +27,7 @@ type SearchEntry = {
2627
age: string;
2728
block: string;
2829
category: string;
30+
script: string;
2931
tags?: string[];
3032
notes?: string[];
3133
};
@@ -461,6 +463,7 @@ async function main() {
461463

462464
for (const row of data) {
463465
row.example = codeToString(row.code);
466+
row.script = scriptMap[row.script] || row.script;
464467
}
465468

466469
initExampleMap(data);

src/scriptMap.ts

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
/* This file is generated by bin/make_script_map.sh */
2+
export const scriptMap: { [key: string]: string } = {
3+
"Adlm": "Adlam",
4+
"Aghb": "Caucasian_Albanian",
5+
"Ahom": "Ahom",
6+
"Arab": "Arabic",
7+
"Armi": "Imperial_Aramaic",
8+
"Armn": "Armenian",
9+
"Avst": "Avestan",
10+
"Bali": "Balinese",
11+
"Bamu": "Bamum",
12+
"Bass": "Bassa_Vah",
13+
"Batk": "Batak",
14+
"Beng": "Bengali",
15+
"Berf": "Beria_Erfe",
16+
"Bhks": "Bhaiksuki",
17+
"Bopo": "Bopomofo",
18+
"Brah": "Brahmi",
19+
"Brai": "Braille",
20+
"Bugi": "Buginese",
21+
"Buhd": "Buhid",
22+
"Cakm": "Chakma",
23+
"Cans": "Canadian_Aboriginal",
24+
"Cari": "Carian",
25+
"Cham": "Cham",
26+
"Cher": "Cherokee",
27+
"Chrs": "Chorasmian",
28+
"Copt": "Coptic",
29+
"Cpmn": "Cypro_Minoan",
30+
"Cprt": "Cypriot",
31+
"Cyrl": "Cyrillic",
32+
"Deva": "Devanagari",
33+
"Diak": "Dives_Akuru",
34+
"Dogr": "Dogra",
35+
"Dsrt": "Deseret",
36+
"Dupl": "Duployan",
37+
"Egyp": "Egyptian_Hieroglyphs",
38+
"Elba": "Elbasan",
39+
"Elym": "Elymaic",
40+
"Ethi": "Ethiopic",
41+
"Gara": "Garay",
42+
"Geor": "Georgian",
43+
"Glag": "Glagolitic",
44+
"Gong": "Gunjala_Gondi",
45+
"Gonm": "Masaram_Gondi",
46+
"Goth": "Gothic",
47+
"Gran": "Grantha",
48+
"Grek": "Greek",
49+
"Gujr": "Gujarati",
50+
"Gukh": "Gurung_Khema",
51+
"Guru": "Gurmukhi",
52+
"Hang": "Hangul",
53+
"Hani": "Han",
54+
"Hano": "Hanunoo",
55+
"Hatr": "Hatran",
56+
"Hebr": "Hebrew",
57+
"Hira": "Hiragana",
58+
"Hluw": "Anatolian_Hieroglyphs",
59+
"Hmng": "Pahawh_Hmong",
60+
"Hmnp": "Nyiakeng_Puachue_Hmong",
61+
"Hrkt": "Katakana_Or_Hiragana",
62+
"Hung": "Old_Hungarian",
63+
"Ital": "Old_Italic",
64+
"Java": "Javanese",
65+
"Kali": "Kayah_Li",
66+
"Kana": "Katakana",
67+
"Kawi": "Kawi",
68+
"Khar": "Kharoshthi",
69+
"Khmr": "Khmer",
70+
"Khoj": "Khojki",
71+
"Kits": "Khitan_Small_Script",
72+
"Knda": "Kannada",
73+
"Krai": "Kirat_Rai",
74+
"Kthi": "Kaithi",
75+
"Lana": "Tai_Tham",
76+
"Laoo": "Lao",
77+
"Latn": "Latin",
78+
"Lepc": "Lepcha",
79+
"Limb": "Limbu",
80+
"Lina": "Linear_A",
81+
"Linb": "Linear_B",
82+
"Lisu": "Lisu",
83+
"Lyci": "Lycian",
84+
"Lydi": "Lydian",
85+
"Mahj": "Mahajani",
86+
"Maka": "Makasar",
87+
"Mand": "Mandaic",
88+
"Mani": "Manichaean",
89+
"Marc": "Marchen",
90+
"Medf": "Medefaidrin",
91+
"Mend": "Mende_Kikakui",
92+
"Merc": "Meroitic_Cursive",
93+
"Mero": "Meroitic_Hieroglyphs",
94+
"Mlym": "Malayalam",
95+
"Modi": "Modi",
96+
"Mong": "Mongolian",
97+
"Mroo": "Mro",
98+
"Mtei": "Meetei_Mayek",
99+
"Mult": "Multani",
100+
"Mymr": "Myanmar",
101+
"Nagm": "Nag_Mundari",
102+
"Nand": "Nandinagari",
103+
"Narb": "Old_North_Arabian",
104+
"Nbat": "Nabataean",
105+
"Newa": "Newa",
106+
"Nkoo": "Nko",
107+
"Nshu": "Nushu",
108+
"Ogam": "Ogham",
109+
"Olck": "Ol_Chiki",
110+
"Onao": "Ol_Onal",
111+
"Orkh": "Old_Turkic",
112+
"Orya": "Oriya",
113+
"Osge": "Osage",
114+
"Osma": "Osmanya",
115+
"Ougr": "Old_Uyghur",
116+
"Palm": "Palmyrene",
117+
"Pauc": "Pau_Cin_Hau",
118+
"Perm": "Old_Permic",
119+
"Phag": "Phags_Pa",
120+
"Phli": "Inscriptional_Pahlavi",
121+
"Phlp": "Psalter_Pahlavi",
122+
"Phnx": "Phoenician",
123+
"Plrd": "Miao",
124+
"Prti": "Inscriptional_Parthian",
125+
"Rjng": "Rejang",
126+
"Rohg": "Hanifi_Rohingya",
127+
"Runr": "Runic",
128+
"Samr": "Samaritan",
129+
"Sarb": "Old_South_Arabian",
130+
"Saur": "Saurashtra",
131+
"Sgnw": "SignWriting",
132+
"Shaw": "Shavian",
133+
"Shrd": "Sharada",
134+
"Sidd": "Siddham",
135+
"Sidt": "Sidetic",
136+
"Sind": "Khudawadi",
137+
"Sinh": "Sinhala",
138+
"Sogd": "Sogdian",
139+
"Sogo": "Old_Sogdian",
140+
"Sora": "Sora_Sompeng",
141+
"Soyo": "Soyombo",
142+
"Sund": "Sundanese",
143+
"Sunu": "Sunuwar",
144+
"Sylo": "Syloti_Nagri",
145+
"Syrc": "Syriac",
146+
"Tagb": "Tagbanwa",
147+
"Takr": "Takri",
148+
"Tale": "Tai_Le",
149+
"Talu": "New_Tai_Lue",
150+
"Taml": "Tamil",
151+
"Tang": "Tangut",
152+
"Tavt": "Tai_Viet",
153+
"Tayo": "Tai_Yo",
154+
"Telu": "Telugu",
155+
"Tfng": "Tifinagh",
156+
"Tglg": "Tagalog",
157+
"Thaa": "Thaana",
158+
"Thai": "Thai",
159+
"Tibt": "Tibetan",
160+
"Tirh": "Tirhuta",
161+
"Tnsa": "Tangsa",
162+
"Todr": "Todhri",
163+
"Tols": "Tolong_Siki",
164+
"Toto": "Toto",
165+
"Tutg": "Tulu_Tigalari",
166+
"Ugar": "Ugaritic",
167+
"Vaii": "Vai",
168+
"Vith": "Vithkuqi",
169+
"Wara": "Warang_Citi",
170+
"Wcho": "Wancho",
171+
"Xpeo": "Old_Persian",
172+
"Xsux": "Cuneiform",
173+
"Yezi": "Yezidi",
174+
"Yiii": "Yi",
175+
"Zanb": "Zanabazar_Square",
176+
"Zinh": "Inherited",
177+
"Zyyy": "Common",
178+
"Zzzz": "Unknown",
179+
};

0 commit comments

Comments
 (0)