@@ -86,7 +86,7 @@ def build_tagger(self):
8686 ten_thousand |= (thousand | hundred ) + accep ("万" ) + delete (
8787 "零" ).ques + (thousand | hundred | tens | teen | digits ).ques
8888
89- # 1. 利用基础数字所构建的包含0~9的完整数字
89+ # 1. 利用基础数字所构建的包含0~9的标准数字
9090 # 个/十/百/千/万
9191 number = digits | teen | tens | hundred | thousand | ten_thousand
9292 # 兆/亿
@@ -100,13 +100,13 @@ def build_tagger(self):
100100 _special_dash = cross ('十' , '1' ) + special_dash
101101 _special_dash |= digit + delete ('十' ) + special_dash
102102 _special_dash |= digit + delete ('百' ) + special_dash
103- number |= add_weight ( _special_dash , - 100.0 )
103+ number |= _special_dash
104104
105105 self .number = number .optimize ()
106106 self .special_tilde = special_tilde .optimize ()
107107 self .special_dash = _special_dash .optimize ()
108108
109- # 2. 利用基础数字所构建的不包含0~9的完整数字
109+ # 2. 利用基础数字所构建的不包含0~9的标准数字
110110 # 十/百/千/万
111111 number_exclude_0_to_9 = teen | tens | hundred | thousand | ten_thousand
112112 # 兆/亿
@@ -137,13 +137,15 @@ def build_tagger(self):
137137 cardinal |= (digits ** 3 | digits ** 4 | digits ** 5 | digits ** 11
138138 | digits ** 18 )
139139
140- # 4. 特殊格式的数字 + 包含或不包含0~9的完整数字
140+ # 4. 特殊格式的数字 + 标准数字
141141 # cardinal string like 23
142142 if self .enable_standalone_number :
143143 if self .enable_0_to_9 :
144- cardinal |= number
144+ # 特殊格式数字为第一优先级, 标准数字为第二优先级, 如 "一二三四"
145+ # 优先转译为 "1234" 而非 "1~2 3~4"
146+ cardinal |= add_weight (number , 0.1 )
145147 else :
146- cardinal |= number_exclude_0_to_9
148+ cardinal |= add_weight ( number_exclude_0_to_9 , 0.1 )
147149 tagger = insert ('value: "' ) + cardinal + (insert (" " ) + cardinal ).star \
148150 + insert ('"' )
149151 self .tagger = self .add_tokens (tagger )
0 commit comments