Skip to content

Commit 923b557

Browse files
committed
Add 字頭原貌
字頭原貌 is taken from poem's "字頭-原貌" field (only those marked with "校" but not with "部件換位" or "調整碼位", as the latter cases are for equivalent characters), plus our patches.
1 parent 0a19cce commit 923b557

File tree

4 files changed

+25374
-25358
lines changed

4 files changed

+25374
-25358
lines changed

build.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class Patch:
8383
校正釋義: str
8484
原釋義參照: str
8585
校正釋義參照: str
86-
當刪說明: str
86+
字頭說明: str
8787
備注: str
8888

8989

@@ -106,8 +106,9 @@ class 廣韻Row:
106106
韻目原貌: str
107107
音韻地位: str
108108
反切: str
109+
字頭原貌: str
109110
字頭: str
110-
字頭當刪: str
111+
字頭說明: str
111112
釋義: str
112113
釋義參照: str
113114

@@ -174,7 +175,7 @@ def main():
174175
釋義參照 = ''
175176

176177
# 修正
177-
字頭當刪 = ''
178+
字頭說明 = ''
178179
if (patch := patches.get(字序_key)) is not None:
179180
assert patch.原字頭 == 字頭, (
180181
f'patching 小韻 #{原書小韻號}/{小韻字號} 字 "{patch.原字頭}", but the actual 字 is "{字頭}"'
@@ -192,12 +193,10 @@ def main():
192193
f'cannot use "~" in 校正字頭 when 字頭 contains correction: "{字頭}"'
193194
)
194195
字頭 = patch.校正字頭.replace('~', 字頭)
195-
if 字頭.endswith('/-]'):
196-
字頭當刪 = patch.當刪說明 or '當刪'
197-
else:
198-
assert not patch.當刪說明, (
199-
f'patching 當刪說明 on 小韻 #{原書小韻號}/{小韻字號} 字 "{patch.原字頭}", but 校正字頭 is not marked for removal'
200-
)
196+
197+
# 字頭說明 is an added field, thus it does not have an original value
198+
字頭說明 = patch.字頭說明
199+
201200
if patch.校正釋義 or patch.原釋義:
202201
assert patch.原釋義 == 釋義, (
203202
f'patching 釋義 on 小韻 #{原書小韻號}/{小韻字號} 字 "{patch.原字頭}", but the actual 釋義 is "{釋義}"'
@@ -212,15 +211,17 @@ def main():
212211
elif 字序_data[字序_key].sbgy_字.endswith('/-]'):
213212
assert not 字頭.startswith('[')
214213
字頭 = f'[{字頭}/-]'
215-
字頭當刪 = '當刪'
216214

217215
字_check = 字序_data[字序_key].
218216
assert 字頭 == 字_check, (
219-
f'字頭 mismatch between 字序表 and patched data: "{字_check}" != "{字頭}" (小韻 {原書小韻號}/{小韻字號})'
217+
f'字頭 mismatch between 字序表 and (patched) 廣韻 data: "{字_check}" != "{字頭}" (小韻 {原書小韻號}/{小韻字號})'
220218
)
221219
if 字頭.startswith('['):
222-
校前, 校後 = 字頭[1:-1].split('/')
223-
字頭 = 校後 if 校後 != '-' else 校前
220+
字頭原貌, 字頭 = 字頭[1:-1].split('/')
221+
字頭 = '' if 字頭 == '-' else 字頭
222+
字頭原貌 = '' if 字頭原貌 == '-' else 字頭原貌
223+
else:
224+
字頭原貌 = ''
224225

225226
# 小韻號
226227
if 原書小韻號 in has_細分:
@@ -254,7 +255,16 @@ def main():
254255
釋義 = 釋義.replace(poem_反切 + '切', 反切原貌 + '切')
255256

256257
廣韻_data[字序_key] = 廣韻Row(
257-
小韻號, 小韻字號, 韻目原貌, 音韻地位, 反切, 字頭, 字頭當刪, 釋義, 釋義參照
258+
小韻號,
259+
小韻字號,
260+
韻目原貌,
261+
音韻地位,
262+
反切,
263+
字頭原貌,
264+
字頭,
265+
字頭說明,
266+
釋義,
267+
釋義參照,
258268
)
259269

260270
for 小韻號, cov in 小韻細分_coverage.items():

check.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
. ( <.> | ⦉.⦊ | \(.\) | ⦅.⦆ )* # 原貌及校正
1616
){2}"""
1717
)
18+
PATTERN_IDC = re.compile(r'[\u2ff0-\u2fff\u303e\u31ef]')
1819

1920

2021
def contains_ascii(s: str):
@@ -28,7 +29,7 @@ def contains_ascii(s: str):
2829
with open('韻書/廣韻.csv') as f:
2930
assert (
3031
next(f).rstrip('\n')
31-
== '小韻號,小韻字號,韻目原貌,音韻地位,反切,字頭,字頭當刪,釋義,釋義參照'
32+
== '小韻號,小韻字號,韻目原貌,音韻地位,反切,字頭原貌,字頭,字頭說明,釋義,釋義參照'
3233
)
3334
for line in f:
3435
(
@@ -37,8 +38,9 @@ def contains_ascii(s: str):
3738
韻目原貌,
3839
音韻地位描述,
3940
反切,
41+
字頭原貌,
4042
字頭,
41-
字頭當刪,
43+
字頭說明,
4244
釋義,
4345
釋義參照,
4446
) = line.rstrip('\n').split(',')
@@ -48,9 +50,13 @@ def contains_ascii(s: str):
4850
f'invalid 小韻字號: {小韻字號}'
4951
)
5052
assert len(韻目原貌) == 1, f'invalid 韻目原𩩕: {韻目原貌}'
51-
assert len(字頭) == 1 or re.match(r'[\u2ff0-\u2fff\u303e\u31ef]', 字頭), (
52-
f'invalid 字頭: {字頭}'
53-
)
53+
assert 字頭原貌 != 字頭, f'字頭原貌 same as 字頭: {字頭}'
54+
for field, in (('字頭原貌', 字頭原貌), ('字頭', 字頭)):
55+
if not :
56+
continue
57+
assert != '-' and (len() == 1 or PATTERN_IDC.match()), (
58+
f'invalid {field}: {}'
59+
)
5460

5561
assert PATTERN_描述.fullmatch(音韻地位描述) is not None, (
5662
f'invalid 音韻地位: {音韻地位描述}'
@@ -59,7 +65,7 @@ def contains_ascii(s: str):
5965
if 反切:
6066
assert PATTERN_反切.fullmatch(反切) is not None, f'invalid 反切: {反切}'
6167

62-
assert 釋義 + 釋義參照, '釋義 and 釋義參照 should not be both empty'
68+
assert 釋義 or 釋義參照, '釋義 and 釋義參照 should not be both empty'
6369
assert not contains_ascii(釋義), (
6470
'釋義 should not contain any ASCII characters'
6571
)

src/patches.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
原書小韻號,小韻字號,原字頭,校正字頭,原釋義,校正釋義,原釋義參照,校正釋義參照,當刪說明,備注
1+
原書小韻號,小韻字號,原字頭,校正字頭,原釋義,校正釋義,原釋義參照,校正釋義參照,字頭說明,備注
22
11,2,,,,上同,,,,左下為「𢆉」形,poem表因未入U而缺
33
130,4,,,襹毛羽衣皃,𧞬襹毛羽衣皃,,,,poem表承「宋本廣韻データ」用 PUA 字元 U+EE42,當為 U+277AC「𧞬」
44
141,1,𤿎,[~/𢻹],,,,,,《形聲考》校

0 commit comments

Comments
 (0)