Skip to content

Commit 5810b60

Browse files
authored
实现 txt 词典注释语法和排序规则 (#1016)
1 parent 5caf024 commit 5810b60

20 files changed

+523
-40
lines changed

data/dictionary/DictionaryTest.cpp

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -135,30 +135,38 @@ TEST_F(DictionaryRunfilesTest, TWPhrasesReverseMapping) {
135135
return map;
136136
};
137137

138-
LexiconPtr twPhrases = loadLexicon(twPhrasesFile);
139-
LexiconPtr twPhrasesRev = loadLexicon(twPhrasesRevFile);
140-
ASSERT_NE(twPhrases, nullptr);
141-
ASSERT_NE(twPhrasesRev, nullptr);
142-
143-
auto twMap = buildMap(twPhrases);
144-
auto twRevMap = buildMap(twPhrasesRev);
145-
146-
for (const auto& entry : twMap) {
147-
const std::string& key = entry.first;
148-
for (const auto& value : entry.second) {
149-
auto it = twRevMap.find(value);
150-
EXPECT_TRUE(it != twRevMap.end() && it->second.count(key) > 0)
151-
<< "Missing reverse mapping: " << key << " -> " << value;
138+
try {
139+
LexiconPtr twPhrases = loadLexicon(twPhrasesFile);
140+
LexiconPtr twPhrasesRev = loadLexicon(twPhrasesRevFile);
141+
ASSERT_NE(twPhrases, nullptr);
142+
ASSERT_NE(twPhrasesRev, nullptr);
143+
144+
auto twMap = buildMap(twPhrases);
145+
auto twRevMap = buildMap(twPhrasesRev);
146+
147+
for (const auto& entry : twMap) {
148+
const std::string& key = entry.first;
149+
for (const auto& value : entry.second) {
150+
auto it = twRevMap.find(value);
151+
EXPECT_TRUE(it != twRevMap.end() && it->second.count(key) > 0)
152+
<< "Missing reverse mapping: " << key << " -> " << value;
153+
}
152154
}
153-
}
154155

155-
for (const auto& entry : twRevMap) {
156-
const std::string& key = entry.first;
157-
for (const auto& value : entry.second) {
158-
auto it = twMap.find(value);
159-
EXPECT_TRUE(it != twMap.end() && it->second.count(key) > 0)
160-
<< "Missing reverse mapping: " << key << " -> " << value;
156+
for (const auto& entry : twRevMap) {
157+
const std::string& key = entry.first;
158+
for (const auto& value : entry.second) {
159+
auto it = twMap.find(value);
160+
EXPECT_TRUE(it != twMap.end() && it->second.count(key) > 0)
161+
<< "Missing reverse mapping: " << key << " -> " << value;
162+
}
161163
}
164+
} catch (const Exception& ex) {
165+
FAIL() << "Exception: " << ex.what();
166+
} catch (const std::exception& ex) {
167+
FAIL() << "std::exception: " << ex.what();
168+
} catch (...) {
169+
FAIL() << "Unknown exception thrown during reverse mapping check.";
162170
}
163171
}
164172

data/dictionary/HKVariants.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: HKVariants.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: s2hk.json, t2hk.json
7+
18
僞 偽
29
兌 兑
310
叄 叁

data/dictionary/HKVariantsRevPhrases.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: HKVariantsRevPhrases.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: hk2s.json, hk2t.json
7+
18
一口吃個 一口喫個
29
一口吃成 一口喫成
310
一家三口 一家三口

data/dictionary/JPShinjitaiCharacters.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: JPShinjitaiCharacters.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: jp2t.json
7+
18
両 兩 輛
29
弁 辨 辯 瓣 辦 弁
310
御 御 禦

data/dictionary/JPShinjitaiPhrases.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: JPShinjitaiPhrases.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: jp2t.json
7+
18
一獲千金 一攫千金
29
丁寧 叮嚀
310
丁重 鄭重

data/dictionary/JPVariants.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: JPVariants.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: t2jp.json
7+
18
乘 乗
29
亂 乱
310
亙 亘

data/dictionary/STCharacters.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: STCharacters.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: s2hk.json, s2t.json, s2tw.json, s2twp.json
7+
18
㐷 傌
29
㐹 㑶 㐹
310
㐽 偑

data/dictionary/STPhrases.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: STPhrases.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: s2hk.json, s2t.json, s2tw.json, s2twp.json
7+
18
㓦划 㓦劃
29
一丝不挂 一絲不掛
310
一了心愿 一了心願

data/dictionary/TSCharacters.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: TSCharacters.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: hk2s.json, t2s.json, tw2s.json, tw2sp.json
7+
18
㑮 𫝈
29
㑯 㑔
310
㑳 㑇

data/dictionary/TSPhrases.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# Open Chinese Convert (OpenCC) Dictionary
2+
# File: TSPhrases.txt
3+
# Format: key value(s) (values separated by spaces)
4+
# License: Apache-2.0 (see LICENSE)
5+
# Source: https://github.com/ByVoid/OpenCC
6+
# Used in configs: hk2s.json, t2s.json, tw2s.json, tw2sp.json
7+
18
一目瞭然 一目了然
29
上鍊 上链
310
不瞭解 不了解

0 commit comments

Comments
 (0)