2020
2121class Cardinal (Processor ):
2222
23- def __init__ (self , enable_standalone_number = True , enable_0_to_9 = True ,
23+ def __init__ (self ,
24+ enable_standalone_number = True ,
25+ enable_0_to_9 = True ,
2426 enable_million = False ):
2527 super ().__init__ ('cardinal' )
2628 self .number = None
@@ -32,10 +34,10 @@ def __init__(self, enable_standalone_number=True, enable_0_to_9=True,
3234 self .build_verbalizer ()
3335
3436 def build_tagger (self ):
35- zero = string_file ('itn/chinese/data/number/zero.tsv' ) # 0
37+ zero = string_file ('itn/chinese/data/number/zero.tsv' ) # 0
3638 digit = string_file ('itn/chinese/data/number/digit.tsv' ) # 1 ~ 9
37- sign = string_file ('itn/chinese/data/number/sign.tsv' ) # + -
38- dot = string_file ('itn/chinese/data/number/dot.tsv' ) # .
39+ sign = string_file ('itn/chinese/data/number/sign.tsv' ) # + -
40+ dot = string_file ('itn/chinese/data/number/dot.tsv' ) # .
3941
4042 addzero = insert ('0' )
4143 digits = zero | digit # 0 ~ 9
@@ -52,33 +54,33 @@ def build_tagger(self):
5254 | add_weight (addzero ** 2 , 1.0 )))
5355 # 一千一百一十一 => 1111, 一千零一十一 => 1011, 一千零一 => 1001
5456 # 一千一 => 1100, 一千 => 1000
55- thousand = ((hundred | teen | tens | digits ) + delete ('千' ) + (
56- hundred
57- | add_weight (zero + (tens | teen ), 0.1 )
58- | add_weight (addzero + zero + digit , 0.5 )
59- | add_weight (digit + addzero ** 2 , 0.8 )
60- | add_weight (addzero ** 3 , 1.0 )))
57+ thousand = ((hundred | teen | tens | digits ) + delete ('千' ) +
58+ ( hundred
59+ | add_weight (zero + (tens | teen ), 0.1 )
60+ | add_weight (addzero + zero + digit , 0.5 )
61+ | add_weight (digit + addzero ** 2 , 0.8 )
62+ | add_weight (addzero ** 3 , 1.0 )))
6163 # 10001111, 1001111, 101111, 11111, 10111, 10011, 10001, 10000
6264 if self .enable_million :
63- ten_thousand = (( thousand | hundred | teen | tens | digits )
64- + delete ('万' )
65- + (thousand
66- | add_weight (zero + hundred , 0.1 )
67- | add_weight (addzero + zero + (tens | teen ), 0.5 )
68- | add_weight (addzero + addzero + zero + digit , 0.5 )
69- | add_weight (digit + addzero ** 3 , 0.8 )
70- | add_weight (addzero ** 4 , 1.0 )))
65+ ten_thousand = (
66+ ( thousand | hundred | teen | tens | digits ) + delete ('万' ) +
67+ (thousand
68+ | add_weight (zero + hundred , 0.1 )
69+ | add_weight (addzero + zero + (tens | teen ), 0.5 )
70+ | add_weight (addzero + addzero + zero + digit , 0.5 )
71+ | add_weight (digit + addzero ** 3 , 0.8 )
72+ | add_weight (addzero ** 4 , 1.0 )))
7173 else :
72- ten_thousand = (( teen | tens | digits )
73- + delete ('万' )
74- + (thousand
75- | add_weight (zero + hundred , 0.1 )
76- | add_weight (addzero + zero + (tens | teen ), 0.5 )
77- | add_weight (addzero + addzero + zero + digit , 0.5 )
78- | add_weight (digit + addzero ** 3 , 0.8 )
79- | add_weight (addzero ** 4 , 1.0 )))
80- ten_thousand |= (thousand | hundred ) + accep ("万" ) + delete ("零" ). ques + (
81- thousand | hundred | tens | teen | digits ).ques
74+ ten_thousand = (
75+ ( teen | tens | digits ) + delete ('万' ) +
76+ (thousand
77+ | add_weight (zero + hundred , 0.1 )
78+ | add_weight (addzero + zero + (tens | teen ), 0.5 )
79+ | add_weight (addzero + addzero + zero + digit , 0.5 )
80+ | add_weight (digit + addzero ** 3 , 0.8 )
81+ | add_weight (addzero ** 4 , 1.0 )))
82+ ten_thousand |= (thousand | hundred ) + accep ("万" ) + delete (
83+ "零" ). ques + ( thousand | hundred | tens | teen | digits ).ques
8284 # 个/十/百/千/万
8385 number = digits | teen | tens | hundred | thousand | ten_thousand
8486 # 兆/亿
@@ -107,31 +109,31 @@ def build_tagger(self):
107109 # 十/百/千/万
108110 number_exclude_0_to_9 = teen | tens | hundred | thousand | ten_thousand
109111 # 兆/亿
110- number_exclude_0_to_9 = (
111- (( number_exclude_0_to_9 | digits ) + accep ('兆' ) + delete ('零' ).ques ).ques +
112- ((number_exclude_0_to_9 | digits ) + accep ( '亿' ) + delete ( '零' ). ques ). ques +
113- number_exclude_0_to_9
114- )
112+ number_exclude_0_to_9 = ((( number_exclude_0_to_9 | digits ) +
113+ accep ('兆' ) + delete ('零' ).ques ).ques +
114+ ((number_exclude_0_to_9 | digits ) +
115+ accep ( '亿' ) + delete ( '零' ). ques ). ques +
116+ number_exclude_0_to_9 )
115117 # 负的xxx 1.11, 1.01
116- number_exclude_0_to_9 |= (
117- (number_exclude_0_to_9 | digits ) +
118- (dot + digits .plus ).plus
119- )
118+ number_exclude_0_to_9 |= ((number_exclude_0_to_9 | digits ) +
119+ (dot + digits .plus ).plus )
120120 # 五六万,三五千,六七百,三四十
121121 # 十七八美元 => $17~18, 四十五六岁 => 45-6岁,
122122 # 三百七八公里 => 370-80km, 三百七八十千克 => 370-80kg
123123 number_exclude_0_to_9 |= special_2number
124124 number_exclude_0_to_9 |= add_weight (special_3number , - 100.0 )
125125
126- self .number_exclude_0_to_9 = (sign .ques + number_exclude_0_to_9 ).optimize () # noqa
126+ self .number_exclude_0_to_9 = (sign .ques +
127+ number_exclude_0_to_9 ).optimize () # noqa
127128
128129 # cardinal string like 127.0.0.1, used in ID, IP, etc.
129130 cardinal = digits .plus + (dot + digits .plus ).plus
130131 # float number like 1.11
131132 cardinal |= (number + dot + digits .plus )
132133 # cardinal string like 110 or 12306 or 13125617878, used in phone,
133134 # 340621199806051223, used in ID card
134- cardinal |= (digits ** 3 | digits ** 4 | digits ** 5 | digits ** 11 | digits ** 18 )
135+ cardinal |= (digits ** 3 | digits ** 4 | digits ** 5 | digits ** 11
136+ | digits ** 18 )
135137 # cardinal string like 23
136138 if self .enable_standalone_number :
137139 if self .enable_0_to_9 :
0 commit comments