Skip to content

Commit 727c974

Browse files
authored
feat(all): remobe noqa (#175)
1 parent 85581f4 commit 727c974

File tree

3 files changed

+35
-31
lines changed

3 files changed

+35
-31
lines changed

itn/chinese/inverse_normalizer.py

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -46,39 +46,37 @@ def __init__(self,
4646
self.build_fst('zh_itn', cache_dir, overwrite_cache)
4747

4848
def build_tagger(self):
49-
tagger = (
50-
add_weight(Date().tagger, 1.02)
51-
| add_weight(Whitelist().tagger, 1.01)
52-
| add_weight(Fraction().tagger, 1.05)
53-
| add_weight(
54-
Measure(enable_0_to_9=self.enable_0_to_9).tagger, 1.05) # noqa
55-
| add_weight(Money(enable_0_to_9=self.enable_0_to_9).tagger,
56-
1.04) # noqa
57-
| add_weight(Time().tagger, 1.05)
58-
| add_weight(
59-
Cardinal(self.convert_number, self.enable_0_to_9,
60-
self.enable_million).tagger, 1.06) # noqa
61-
| add_weight(Math().tagger, 1.10)
62-
| add_weight(LicensePlate().tagger, 1.0)
63-
| add_weight(Char().tagger, 100)).optimize()
49+
tagger = (add_weight(Date().tagger, 1.02)
50+
| add_weight(Whitelist().tagger, 1.01)
51+
| add_weight(Fraction().tagger, 1.05)
52+
| add_weight(
53+
Measure(enable_0_to_9=self.enable_0_to_9).tagger, 1.05)
54+
| add_weight(
55+
Money(enable_0_to_9=self.enable_0_to_9).tagger, 1.04)
56+
| add_weight(Time().tagger, 1.05)
57+
| add_weight(
58+
Cardinal(self.convert_number, self.enable_0_to_9,
59+
self.enable_million).tagger, 1.06)
60+
| add_weight(Math().tagger, 1.10)
61+
| add_weight(LicensePlate().tagger, 1.0)
62+
| add_weight(Char().tagger, 100)).optimize()
6463

6564
tagger = tagger.star
6665
# remove the last space
6766
self.tagger = tagger @ self.build_rule(delete(' '), '', '[EOS]')
6867

6968
def build_verbalizer(self):
70-
verbalizer = (
71-
Cardinal(self.convert_number, self.enable_0_to_9,
72-
self.enable_million).verbalizer # noqa
73-
| Char().verbalizer
74-
| Date().verbalizer
75-
| Fraction().verbalizer
76-
| Math().verbalizer
77-
| Measure(enable_0_to_9=self.enable_0_to_9).verbalizer
78-
| Money(enable_0_to_9=self.enable_0_to_9).verbalizer
79-
| Time().verbalizer
80-
| LicensePlate().verbalizer
81-
| Whitelist().verbalizer).optimize()
69+
verbalizer = (Cardinal(self.convert_number, self.enable_0_to_9,
70+
self.enable_million).verbalizer
71+
| Char().verbalizer
72+
| Date().verbalizer
73+
| Fraction().verbalizer
74+
| Math().verbalizer
75+
| Measure(enable_0_to_9=self.enable_0_to_9).verbalizer
76+
| Money(enable_0_to_9=self.enable_0_to_9).verbalizer
77+
| Time().verbalizer
78+
| LicensePlate().verbalizer
79+
| Whitelist().verbalizer).optimize()
8280
postprocessor = PostProcessor(remove_interjections=True).processor
8381

8482
self.verbalizer = (verbalizer @ postprocessor).star

itn/chinese/rules/cardinal.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ def build_tagger(self):
3939
sign = string_file('itn/chinese/data/number/sign.tsv') # + -
4040
dot = string_file('itn/chinese/data/number/dot.tsv') # .
4141

42+
# 0. 基础数字
4243
addzero = insert('0')
4344
digits = zero | digit # 0 ~ 9
44-
4545
# 十一 => 11, 十二 => 12
4646
teen = cross('十', '1') + (digit | add_weight(addzero, 0.1))
4747
# 一十一 => 11, 二十一 => 21, 三十 => 30
@@ -81,6 +81,8 @@ def build_tagger(self):
8181
| add_weight(addzero**4, 1.0)))
8282
ten_thousand |= (thousand | hundred) + accep("万") + delete(
8383
"零").ques + (thousand | hundred | tens | teen | digits).ques
84+
85+
# 1. 利用基础数字所构建的包含0~9的完整数字
8486
# 个/十/百/千/万
8587
number = digits | teen | tens | hundred | thousand | ten_thousand
8688
# 兆/亿
@@ -106,6 +108,7 @@ def build_tagger(self):
106108
self.special_2number = special_2number.optimize()
107109
self.special_3number = special_3number.optimize()
108110

111+
# 2. 利用基础数字所构建的不包含0~9的完整数字
109112
# 十/百/千/万
110113
number_exclude_0_to_9 = teen | tens | hundred | thousand | ten_thousand
111114
# 兆/亿
@@ -124,8 +127,9 @@ def build_tagger(self):
124127
number_exclude_0_to_9 |= add_weight(special_3number, -100.0)
125128

126129
self.number_exclude_0_to_9 = (sign.ques +
127-
number_exclude_0_to_9).optimize() # noqa
130+
number_exclude_0_to_9).optimize()
128131

132+
# 3. 特殊格式的数字
129133
# cardinal string like 127.0.0.1, used in ID, IP, etc.
130134
cardinal = digits.plus + (dot + digits.plus).plus
131135
# float number like 1.11
@@ -134,6 +138,8 @@ def build_tagger(self):
134138
# 340621199806051223, used in ID card
135139
cardinal |= (digits**3 | digits**4 | digits**5 | digits**11
136140
| digits**18)
141+
142+
# 4. 特殊格式的数字 + 包含或不包含0~9的完整数字
137143
# cardinal string like 23
138144
if self.enable_standalone_number:
139145
if self.enable_0_to_9:

itn/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ def main():
4242
parser.add_argument('--enable_standalone_number',
4343
type=str,
4444
default='True',
45-
help='enable standalone number')
45+
help='一百 = 100 if True else 一百')
4646
parser.add_argument('--enable_0_to_9',
4747
type=str,
4848
default='False',
49-
help='enable convert number 0 to 9')
49+
help='零和九 = 0和9 if True else 零和九')
5050
parser.add_argument('--enable_million',
5151
type=str,
5252
default='False',

0 commit comments

Comments
 (0)