Skip to content

Commit 32b7f52

Browse files
committed
perf(scorer): 优化了标准匹配算法,支持用户多打一个字符来匹配。比如'steam\'来搜索'steam'
1 parent 361b61a commit 32b7f52

File tree

1 file changed

+41
-17
lines changed

1 file changed

+41
-17
lines changed

src-tauri/src/modules/program_manager/search_model/standard_search_model.rs

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,38 @@ impl Scorer for StandardScorer {
1313
// program中的字符串与user_input都已经是预处理过了,不再需要预处理了
1414
let mut ret: f64 = -10000.0;
1515
for names in &program.search_keywords {
16-
if names.chars().count() < user_input.chars().count() {
16+
let input_len = user_input.chars().count();
17+
let target_len = names.chars().count();
18+
19+
// 条件性容错:短关键字(<=2字符)严格匹配,长关键字允许多打1字符
20+
let tolerance = if target_len <= 2 { 0 } else { 1 };
21+
if target_len + tolerance < input_len {
1722
continue;
1823
}
24+
1925
let mut score: f64 = shortest_edit_dis(names, user_input);
20-
score *= adjust_score_log2(
21-
(user_input.chars().count() as f64) / (names.chars().count() as f64),
22-
);
26+
27+
// 计算长度比率
28+
let input_len_f = input_len as f64;
29+
let target_len_f = target_len as f64;
30+
31+
// 1. 限制比率加成:如果输入比目标长,比率锁定为 1.0,避免"越长分越高"的逻辑谬误
32+
let ratio = if input_len > target_len {
33+
1.0
34+
} else {
35+
input_len_f / target_len_f
36+
};
37+
score *= adjust_score_log2(ratio);
38+
39+
// 2. 动态溢出惩罚:根据溢出比例动态调整惩罚
40+
// 溢出越多惩罚越重,对长词更宽容,对短词更严格
41+
if input_len > target_len {
42+
let overflow_ratio = (input_len_f - target_len_f) / target_len_f;
43+
// 惩罚因子:溢出比例 * 0.3,最低 0.7
44+
let penalty = (1.0 - overflow_ratio * 0.3).max(0.7);
45+
score *= penalty;
46+
}
47+
2348
score += subset_dis(names, user_input);
2449
score += kmp(names, user_input);
2550
ret = f64::max(ret, score);
@@ -87,35 +112,34 @@ pub fn shortest_edit_dis(compare_name: &str, input_name: &str) -> f64 {
87112

88113
let mut prev = vec![0i32; n + 1];
89114
let mut current = vec![0i32; n + 1];
90-
let mut min_operations = i32::MAX;
115+
// 初始化为最大可能距离(即完全插入),确保包含dp[0][n]的情况
116+
let mut min_operations = n as i32;
91117

92118
// 初始化prev数组(对应i=0)
93119
for (j, value) in prev.iter_mut().enumerate() {
94120
*value = j as i32;
95121
}
96122

97123
for i in 1..=m {
98-
current[0] = 0; // dp[i][0] = 0
124+
current[0] = 0; // dp[i][0] = 0,允许从compare的任意位置开始匹配
99125
for j in 1..=n {
100-
if compare_chars[i - 1] == input_chars[j - 1] {
101-
current[j] = prev[j - 1];
126+
let cost = if compare_chars[i - 1] == input_chars[j - 1] {
127+
0
102128
} else {
103-
current[j] = std::cmp::min(prev[j - 1] + 1, prev[j] + 1);
104-
}
129+
1
130+
};
131+
current[j] = (prev[j - 1] + cost) // 替换/匹配
132+
.min(prev[j] + 1) // 删除 (compare中有,input中无)
133+
.min(current[j - 1] + 1); // 插入 (compare中无,input中有)
105134
}
106-
// 记录dp[i][n]
107-
if i >= n && current[n] < min_operations {
135+
// 记录dp[i][n],即input完全匹配到compare[..i]的某个后缀的代价
136+
if current[n] < min_operations {
108137
min_operations = current[n];
109138
}
110139
// 交换prev和current
111140
std::mem::swap(&mut prev, &mut current);
112141
}
113142

114-
// 确保min_operations包含dp[m][n]
115-
if m >= n && prev[n] < min_operations {
116-
min_operations = prev[n];
117-
}
118-
119143
// 计算最终得分
120144
let value = 1.0 - (min_operations as f64 / n as f64);
121145
adjust_score_log2(n as f64) * (3.0 * value - 2.0).exp()

0 commit comments

Comments
 (0)