Skip to content

Commit 035f6bf

Browse files
authored
chore: add script to auto update unicode-alias (#730)
1 parent 8c23a79 commit 035f6bf

File tree

4 files changed

+184
-0
lines changed

4 files changed

+184
-0
lines changed

.github/workflows/cron.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: cron
2+
on:
3+
schedule:
4+
- cron: 0 0 * * 0
5+
6+
jobs:
7+
check-unicode-alias-update:
8+
name: check-unicode-alias-update
9+
runs-on: ubuntu-latest
10+
steps:
11+
- name: Checkout
12+
uses: actions/checkout@v4
13+
- name: Install Node.js
14+
uses: actions/setup-node@v4
15+
- name: Install Packages
16+
run: npm install
17+
- name: Update
18+
run: npm run update:unicode-alias
19+
- name: Format
20+
run: npm run eslint-fix
21+
- name: Check changes
22+
run: |
23+
git add --all && \
24+
git diff-index --cached HEAD --stat --exit-code

lib/utils/unicode-alias.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ export const UNICODE_CATEGORY_ALIAS = new AliasMap({
5050
otherToLong: {},
5151
})
5252

53+
/* PROPERTY_ALIASES_START */
5354
// https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt
5455
export const UNICODE_BINARY_PROPERTY_ALIAS = new AliasMap({
5556
shortToLong: {
@@ -120,7 +121,9 @@ export const UNICODE_BINARY_PROPERTY_ALIAS = new AliasMap({
120121
space: "White_Space",
121122
},
122123
})
124+
/* PROPERTY_ALIASES_END */
123125

126+
/* PROPERTY_VALUE_ALIASES_START */
124127
// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
125128
export const UNICODE_GENERAL_CATEGORY_ALIAS = new AliasMap({
126129
shortToLong: {
@@ -337,3 +340,4 @@ export const UNICODE_SCRIPT_ALIAS = new AliasMap({
337340
Qaai: "Inherited",
338341
},
339342
})
343+
/* PROPERTY_VALUE_ALIASES_END */

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"test:update": "npm run test:base -- --update",
2727
"update": "ts-node --transpile-only ./tools/update.ts && npm run eslint-fix && npm run update:eslint-docs",
2828
"update:eslint-docs": "npm run build && eslint-doc-generator",
29+
"update:unicode-alias": "ts-node ./tools/update-unicode-alias.ts",
2930
"new": "ts-node ./tools/new-rule.ts",
3031
"docs:watch": "vitepress dev docs",
3132
"docs:build": "vitepress build docs",

tools/update-unicode-alias.ts

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import fs from "fs"
2+
import path from "path"
3+
4+
type Alias = {
5+
short: string
6+
long: string
7+
other: string[]
8+
}
9+
type UnicodePropertyValueAlias = {
10+
propertyAlias: string
11+
} & Alias
12+
type UnicodePropertyAlias = {
13+
category: string
14+
} & Alias
15+
16+
const filePath = path.resolve(__dirname, "../lib/utils/unicode-alias.ts")
17+
const logger = console
18+
19+
void main()
20+
21+
async function main() {
22+
const propertyAliases: UnicodePropertyAlias[] = []
23+
for await (const item of getUnicodePropertyAliases()) {
24+
propertyAliases.push(item)
25+
}
26+
const propertyValueAliases: UnicodePropertyValueAlias[] = []
27+
for await (const item of getUnicodePropertyValueAliases()) {
28+
propertyValueAliases.push(item)
29+
}
30+
const content = fs
31+
.readFileSync(filePath, "utf-8")
32+
.replace(
33+
/\/\*\s*PROPERTY_ALIASES_START\s*\*\/[\s\S]*\/\*\s*PROPERTY_ALIASES_END\s*\*\//u,
34+
`/* PROPERTY_ALIASES_START */
35+
// https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt
36+
${generateAliasMap(
37+
"UNICODE_BINARY_PROPERTY_ALIAS",
38+
propertyAliases.filter((u) => u.category === "Binary Properties"),
39+
)}
40+
/* PROPERTY_ALIASES_END */`,
41+
)
42+
.replace(
43+
/\/\*\s*PROPERTY_VALUE_ALIASES_START\s*\*\/[\s\S]*\/\*\s*PROPERTY_VALUE_ALIASES_END\s*\*\//u,
44+
`/* PROPERTY_VALUE_ALIASES_START */
45+
// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
46+
${generateAliasMap(
47+
"UNICODE_GENERAL_CATEGORY_ALIAS",
48+
propertyValueAliases.filter((u) => u.propertyAlias === "gc"),
49+
)}
50+
51+
// https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
52+
${generateAliasMap(
53+
"UNICODE_SCRIPT_ALIAS",
54+
propertyValueAliases.filter((u) => u.propertyAlias === "sc"),
55+
)}
56+
/* PROPERTY_VALUE_ALIASES_END */`,
57+
)
58+
59+
// Update file.
60+
fs.writeFileSync(filePath, content)
61+
}
62+
63+
function generateAliasMap(name: string, aliases: Alias[]): string {
64+
let content = `export const ${name} = new AliasMap({
65+
shortToLong: {
66+
`
67+
const shortToLong = new Map<string, string>()
68+
const otherToLong = new Map<string, string>()
69+
for (const item of aliases) {
70+
shortToLong.set(item.short, item.long)
71+
for (const o of item.other) {
72+
otherToLong.set(o, item.long)
73+
}
74+
}
75+
content += mapToProperties(shortToLong)
76+
content += `
77+
},
78+
otherToLong: {
79+
`
80+
content += mapToProperties(otherToLong)
81+
content += `
82+
},
83+
})`
84+
85+
return content
86+
87+
function mapToProperties(map: Map<string, string>) {
88+
return (
89+
[...map]
90+
.filter(([s, l]) => s !== l)
91+
// .sort(([a], [b]) => (a > b ? 1 : -1))
92+
.map(([s, l]) => ` ${s}: "${l}",`)
93+
.join("\n")
94+
)
95+
}
96+
}
97+
98+
async function* getUnicodePropertyAliases(): AsyncIterable<UnicodePropertyAlias> {
99+
const DB_URL =
100+
"https://unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt"
101+
logger.log("Fetching data... (%s)", DB_URL)
102+
103+
const dbContent = await fetch(DB_URL).then((res) => res.text())
104+
const dbLines = dbContent.split("\n")
105+
let category = ""
106+
for (let index = 0; index < dbLines.length; index++) {
107+
const line = dbLines[index]
108+
if (!line) {
109+
continue
110+
}
111+
if (line.startsWith("#")) {
112+
if (
113+
/^#\s*=+$/u.test(dbLines[index - 1]) &&
114+
/^#\s*=+$/u.test(dbLines[index + 1])
115+
) {
116+
category = line.slice(1).trim()
117+
}
118+
continue
119+
}
120+
const [short, long, ...other] = line
121+
.split("#")[0] // strip comments
122+
.split(";") // split by semicolon
123+
.map((x) => x.trim()) // trim
124+
125+
yield {
126+
category,
127+
short,
128+
long,
129+
other,
130+
}
131+
}
132+
}
133+
134+
async function* getUnicodePropertyValueAliases(): AsyncIterable<UnicodePropertyValueAlias> {
135+
const DB_URL =
136+
"https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt"
137+
logger.log("Fetching data... (%s)", DB_URL)
138+
const dbContent = await fetch(DB_URL).then((res) => res.text())
139+
for (const line of dbContent.split("\n")) {
140+
if (!line || line.startsWith("#")) {
141+
continue
142+
}
143+
const [propertyAlias, short, long, ...other] = line
144+
.split("#")[0] // strip comments
145+
.split(";") // split by semicolon
146+
.map((x) => x.trim()) // trim
147+
148+
yield {
149+
propertyAlias,
150+
short,
151+
long,
152+
other,
153+
}
154+
}
155+
}

0 commit comments

Comments
 (0)