@@ -13,13 +13,38 @@ impl Scorer for StandardScorer {
1313 // program中的字符串与user_input都已经是预处理过了,不再需要预处理了
1414 let mut ret: f64 = -10000.0 ;
1515 for names in & program. search_keywords {
16- if names. chars ( ) . count ( ) < user_input. chars ( ) . count ( ) {
16+ let input_len = user_input. chars ( ) . count ( ) ;
17+ let target_len = names. chars ( ) . count ( ) ;
18+
19+ // 条件性容错:短关键字(<=2字符)严格匹配,长关键字允许多打1字符
20+ let tolerance = if target_len <= 2 { 0 } else { 1 } ;
21+ if target_len + tolerance < input_len {
1722 continue ;
1823 }
24+
1925 let mut score: f64 = shortest_edit_dis ( names, user_input) ;
20- score *= adjust_score_log2 (
21- ( user_input. chars ( ) . count ( ) as f64 ) / ( names. chars ( ) . count ( ) as f64 ) ,
22- ) ;
26+
27+ // 计算长度比率
28+ let input_len_f = input_len as f64 ;
29+ let target_len_f = target_len as f64 ;
30+
31+ // 1. 限制比率加成:如果输入比目标长,比率锁定为 1.0,避免"越长分越高"的逻辑谬误
32+ let ratio = if input_len > target_len {
33+ 1.0
34+ } else {
35+ input_len_f / target_len_f
36+ } ;
37+ score *= adjust_score_log2 ( ratio) ;
38+
39+ // 2. 动态溢出惩罚:根据溢出比例动态调整惩罚
40+ // 溢出越多惩罚越重,对长词更宽容,对短词更严格
41+ if input_len > target_len {
42+ let overflow_ratio = ( input_len_f - target_len_f) / target_len_f;
43+ // 惩罚因子:溢出比例 * 0.3,最低 0.7
44+ let penalty = ( 1.0 - overflow_ratio * 0.3 ) . max ( 0.7 ) ;
45+ score *= penalty;
46+ }
47+
2348 score += subset_dis ( names, user_input) ;
2449 score += kmp ( names, user_input) ;
2550 ret = f64:: max ( ret, score) ;
@@ -87,35 +112,34 @@ pub fn shortest_edit_dis(compare_name: &str, input_name: &str) -> f64 {
87112
88113 let mut prev = vec ! [ 0i32 ; n + 1 ] ;
89114 let mut current = vec ! [ 0i32 ; n + 1 ] ;
90- let mut min_operations = i32:: MAX ;
115+ // 初始化为最大可能距离(即完全插入),确保包含dp[0][n]的情况
116+ let mut min_operations = n as i32 ;
91117
92118 // 初始化prev数组(对应i=0)
93119 for ( j, value) in prev. iter_mut ( ) . enumerate ( ) {
94120 * value = j as i32 ;
95121 }
96122
97123 for i in 1 ..=m {
98- current[ 0 ] = 0 ; // dp[i][0] = 0
124+ current[ 0 ] = 0 ; // dp[i][0] = 0,允许从compare的任意位置开始匹配
99125 for j in 1 ..=n {
100- if compare_chars[ i - 1 ] == input_chars[ j - 1 ] {
101- current [ j ] = prev [ j - 1 ] ;
126+ let cost = if compare_chars[ i - 1 ] == input_chars[ j - 1 ] {
127+ 0
102128 } else {
103- current[ j] = std:: cmp:: min ( prev[ j - 1 ] + 1 , prev[ j] + 1 ) ;
104- }
129+ 1
130+ } ;
131+ current[ j] = ( prev[ j - 1 ] + cost) // 替换/匹配
132+ . min ( prev[ j] + 1 ) // 删除 (compare中有,input中无)
133+ . min ( current[ j - 1 ] + 1 ) ; // 插入 (compare中无,input中有)
105134 }
106- // 记录dp[i][n]
107- if i >= n && current[ n] < min_operations {
135+ // 记录dp[i][n],即input完全匹配到compare[..i]的某个后缀的代价
136+ if current[ n] < min_operations {
108137 min_operations = current[ n] ;
109138 }
110139 // 交换prev和current
111140 std:: mem:: swap ( & mut prev, & mut current) ;
112141 }
113142
114- // 确保min_operations包含dp[m][n]
115- if m >= n && prev[ n] < min_operations {
116- min_operations = prev[ n] ;
117- }
118-
119143 // 计算最终得分
120144 let value = 1.0 - ( min_operations as f64 / n as f64 ) ;
121145 adjust_score_log2 ( n as f64 ) * ( 3.0 * value - 2.0 ) . exp ( )
0 commit comments