Skip to content

Commit bae77e4

Browse files
committed
feat:添加模糊匹配(beta)
1 parent e83e3c9 commit bae77e4

File tree

4 files changed

+544
-34
lines changed

4 files changed

+544
-34
lines changed

src/cvbManager.ts

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import * as vscode from "vscode";
66
import { generateFilenameFromRequest, callDeepSeekApi } from "./deepseekApi";
77

88
import { getLanguageFromPath } from "./languageMapping";
9-
import {getOutputChannel, getCurrentOperationController} from './extension'
9+
import {getOutputChannel, getCurrentOperationController} from './extension';
10+
11+
import * as FuzzyMatch from './fuzzyMatch';
1012

1113
// ================== CVB 核心类 ==================
1214
export class Cvb {
@@ -780,7 +782,33 @@ function applyExactReplace(
780782
return strContent.replace(regPattern, strReplacement);
781783
}
782784

783-
function applyGlobalReplace(
785+
// 调整缩进的函数
786+
function adjustIndentation(originalContent: string, matchStart: number, newContent: string): string {
787+
// 找到匹配部分的起始行
788+
const linesBeforeMatch = originalContent.substring(0, matchStart).split("\n");
789+
const lastLineBeforeMatch = linesBeforeMatch[linesBeforeMatch.length - 1];
790+
791+
// 获取匹配部分的缩进(前导空白字符)
792+
const indentMatch = lastLineBeforeMatch.match(/^(\s*)/)?.[1] || "";
793+
794+
// 处理新内容的每一行,添加匹配的缩进
795+
const newLines = newContent.split("\n");
796+
const adjustedLines = newLines.map((line, index) => {
797+
// 第一行保持与匹配内容相同的缩进,后续行根据需要可保持相对缩进
798+
if (index === 0) {
799+
return line.trimStart(); // 只移除行首多余空格,保留内容本身可能的缩进
800+
}
801+
// 如果是最后一行且为空,不添加缩进
802+
if (index === newLines.length - 1 && line.trim().length === 0) {
803+
return "";
804+
}
805+
return indentMatch + line; // 后续行直接加上缩进
806+
});
807+
808+
return adjustedLines.join("\n");
809+
}
810+
811+
export function applyGlobalReplace(
784812
strContent: string,
785813
op: GlobalReplaceOperation
786814
): string {
@@ -795,15 +823,20 @@ function applyGlobalReplace(
795823
"gs"
796824
);
797825

798-
regPattern.lastIndex = 0;
799-
if (!regPattern.test(strContent)) {
826+
if (regPattern.test(strContent)) {
827+
regPattern.lastIndex = 0;
828+
return strContent.replace(regPattern, (match, offset) => {
829+
return adjustIndentation(strContent, offset, op.m_strNewContent);
830+
});
831+
}
832+
833+
try {
834+
return FuzzyMatch.applyFuzzyGlobalReplace(strContent, op.m_strOldContent, op.m_strNewContent);
835+
} catch {
800836
const errorMsg = `GLOBAL-REPLACE 失败:FILE:"${op.m_strFilePath}" 中未找到OLD_CONTENT: "${op.m_strOldContent}" 可能是和原文有细微差异,或者文件路径和别的文件搞错了`;
801837
console.log(errorMsg + `\n表达式: ${regPattern}`);
802838
throw new Error(errorMsg);
803839
}
804-
regPattern.lastIndex = 0;
805-
806-
return strContent.replace(regPattern, op.m_strNewContent);
807840
}
808841

809842
// 根据前锚点、内容、后锚点构建正则表达式(dotall 模式)
@@ -950,35 +983,58 @@ function escapeRegExp(str: string): string {
950983
return str.replace(/[.*+?^${}()|[\]\\&]/g, (match) => "\\" + match);
951984
}
952985

986+
export function normalizeInput(anchor: string): string {
987+
let lines: string[] = anchor.split("\n");
988+
989+
// 移除首行空行
990+
while (lines.length > 0 && lines[0].trim().length === 0) {
991+
lines.shift();
992+
}
993+
994+
// 移除末尾空行
995+
while (lines.length > 0 && lines[lines.length - 1].trim().length === 0) {
996+
lines.pop();
997+
}
998+
999+
// 如果全是空行,返回空字符串
1000+
if (lines.length === 0) {
1001+
return "";
1002+
}
1003+
1004+
return lines.join("\n");
1005+
}
1006+
1007+
// 处理空白字符的规范化函数
9531008
function normalizeLineWhitespace(anchor: string): string {
954-
// 按行拆分后对每行做空白归一化处理
955-
let aszNormalized_Arr: string[] = anchor
956-
.split("\n")
957-
.map((szLine_Str: string, unIndex_Uint: number, aszArr_Arr: string[]) => {
958-
szLine_Str = szLine_Str.trim();
959-
if (szLine_Str.length > 0) {
960-
// 将行内连续空白替换为 \s*
961-
szLine_Str = szLine_Str.replace(/\s+/g, "\\s*");
962-
// 在每行前后各增加一个 \s*
963-
szLine_Str = `\\s*${szLine_Str}\\s*`;
1009+
if (anchor === "") {
1010+
return "\\s*";
1011+
}
1012+
1013+
let lines: string[] = anchor.split("\n");
1014+
1015+
// 处理每一行的空白字符
1016+
let normalizedLines: string[] = lines.map((line: string, index: number, arr: string[]) => {
1017+
const isFirstLine = index === 0;
1018+
const isLastLine = index === arr.length - 1;
1019+
line = line.trim();
1020+
1021+
if (line.length > 0) {
1022+
// 将行内连续空白替换为 \s*
1023+
line = line.replace(/\s+/g, "\\s*");
1024+
1025+
// 根据行位置添加前后 \s*
1026+
if (isFirstLine) {
1027+
return `${line}\\s*`;
1028+
} else if (isLastLine) {
1029+
return `\\s*${line}`;
9641030
} else {
965-
// 空行处理:直接使用 \s*
966-
szLine_Str = "\\s*";
1031+
return `\\s*${line}\\s*`;
9671032
}
968-
return szLine_Str;
969-
});
970-
971-
// 去除整体结果中最开头和最末尾多余的 \s*
972-
if (aszNormalized_Arr.length > 0) {
973-
// 第1行:移除行首的 \s*
974-
aszNormalized_Arr[0] = aszNormalized_Arr[0].replace(/^\\s\*/, "");
975-
// 最后一行:移除行尾的 \s*
976-
aszNormalized_Arr[aszNormalized_Arr.length - 1] = aszNormalized_Arr[
977-
aszNormalized_Arr.length - 1
978-
].replace(/\\s\*$/, "");
979-
}
980-
981-
return aszNormalized_Arr.join("\n");
1033+
}
1034+
return "\\s*"; // 空行处理
1035+
});
1036+
1037+
return normalizedLines.join("\n");
9821038
}
9831039

9841040
function filePathNormalize(strRawPath: string): string {

src/fuzzyMatch.ts

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
// ================ 类型定义 ================
2+
interface MatchPosition {
3+
start: number;
4+
end: number;
5+
}
6+
7+
interface NormalizedContent {
8+
content: string;
9+
mapping: number[];
10+
}
11+
12+
// ================ 核心实现 ================
13+
const MAX_EDIT_DISTANCE = 5;
14+
const SEGMENT_COUNT = MAX_EDIT_DISTANCE + 1;
15+
16+
export function applyFuzzyGlobalReplace(
17+
strContent: string,
18+
strOldContent: string,
19+
strNewContent: string
20+
): string {
21+
// 第二阶段:模糊匹配流程
22+
const { content: normContent, mapping } = normalizeContent(strContent);
23+
const pattern = normalizePattern(strOldContent);
24+
25+
// 分片查找候选位置
26+
const candidates = findCandidatePositions(normContent, pattern);
27+
28+
// 验证并获取有效匹配
29+
const matches = verifyMatches(normContent, pattern, candidates, mapping);
30+
31+
if (matches.length === 0) {
32+
throw new Error(`GLOBAL-REPLACE失败:未找到允许${MAX_EDIT_DISTANCE}个字符差异的匹配`);
33+
}
34+
35+
// 应用替换
36+
return applyReplacements(strContent, matches, strNewContent);
37+
}
38+
39+
// ================ 算法核心模块 ================
40+
function normalizeContent(original: string): NormalizedContent {
41+
const mapping: number[] = [];
42+
let normalized = "";
43+
let lastCharIsWhitespace = true;
44+
let currentPos = 0;
45+
46+
for (const char of original) {
47+
if (/\s/.test(char)) {
48+
if (!lastCharIsWhitespace) {
49+
normalized += ' ';
50+
mapping.push(currentPos);
51+
lastCharIsWhitespace = true;
52+
}
53+
currentPos++;
54+
} else {
55+
normalized += char;
56+
mapping.push(currentPos);
57+
currentPos++;
58+
lastCharIsWhitespace = false;
59+
}
60+
}
61+
62+
return { content: normalized, mapping };
63+
}
64+
65+
function normalizePattern(pattern: string): string {
66+
return pattern.replace(/\s+/g, ' ').trim();
67+
}
68+
69+
function findCandidatePositions(content: string, pattern: string): number[] {
70+
const candidates = new Set<number>();
71+
const segments = splitPattern(pattern, SEGMENT_COUNT);
72+
73+
// 查找每个分片的匹配位置
74+
segments.forEach(segment => {
75+
let pos = -1;
76+
while ((pos = content.indexOf(segment, pos + 1)) !== -1) {
77+
if (pos === -1) {
78+
break;
79+
}
80+
// 向前后扩展可能的匹配范围
81+
const start = Math.max(0, pos - pattern.length);
82+
const end = Math.min(content.length, pos + pattern.length * 2);
83+
for (let i = start; i < end; i++) {
84+
candidates.add(i);
85+
}
86+
}
87+
});
88+
89+
return Array.from(candidates).sort((a, b) => a - b);
90+
}
91+
92+
function verifyMatches(
93+
content: string,
94+
pattern: string,
95+
candidates: number[],
96+
mapping: number[]
97+
): MatchPosition[] {
98+
const validMatches: MatchPosition[] = [];
99+
const patternLen = pattern.length;
100+
101+
candidates.forEach(start => {
102+
const end = start + patternLen;
103+
if (end > content.length) {
104+
return;
105+
}
106+
107+
const substring = content.substring(start, end);
108+
const distance = calculateEditDistance(substring, pattern, MAX_EDIT_DISTANCE);
109+
110+
if (distance <= MAX_EDIT_DISTANCE) {
111+
validMatches.push({
112+
start: mapping[start],
113+
end: mapping[end] || mapping[mapping.length - 1]
114+
});
115+
}
116+
});
117+
118+
return processOverlaps(validMatches);
119+
}
120+
121+
// ================ 工具函数 ================
122+
function splitPattern(pattern: string, count: number): string[] {
123+
const segments: string[] = [];
124+
const baseLength = Math.floor(pattern.length / count);
125+
let remaining = pattern.length % count;
126+
let pos = 0;
127+
128+
for (let i = 0; i < count; i++) {
129+
const length = baseLength + (remaining-- > 0 ? 1 : 0);
130+
segments.push(pattern.substr(pos, length));
131+
pos += length;
132+
}
133+
134+
return segments.filter(s => s.length > 0);
135+
}
136+
137+
function calculateEditDistance(a: string, b: string, maxDistance: number): number {
138+
if (Math.abs(a.length - b.length) > maxDistance) {
139+
return Infinity;
140+
}
141+
142+
// 使用滚动数组优化
143+
let prevRow = Array(b.length + 1).fill(0).map((_, i) => i);
144+
let currentRow = new Array(b.length + 1);
145+
146+
for (let i = 1; i <= a.length; i++) {
147+
currentRow[0] = i;
148+
let minInRow = i;
149+
150+
for (let j = 1; j <= b.length; j++) {
151+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
152+
currentRow[j] = Math.min(
153+
prevRow[j] + 1,
154+
currentRow[j - 1] + 1,
155+
prevRow[j - 1] + cost
156+
);
157+
minInRow = Math.min(minInRow, currentRow[j]);
158+
}
159+
160+
if (minInRow > maxDistance) {
161+
return Infinity;
162+
}
163+
[prevRow, currentRow] = [currentRow, prevRow];
164+
}
165+
166+
return prevRow[b.length];
167+
}
168+
169+
function processOverlaps(matches: MatchPosition[]): MatchPosition[] {
170+
return matches
171+
.sort((a, b) => a.start - b.start)
172+
.filter((match, index, arr) => {
173+
return index === 0 || match.start >= arr[index - 1].end;
174+
});
175+
}
176+
177+
function applyReplacements(
178+
original: string,
179+
matches: MatchPosition[],
180+
replacement: string
181+
): string {
182+
let result = original;
183+
// 从后往前替换避免影响索引
184+
for (let i = matches.length - 1; i >= 0; i--) {
185+
const { start, end } = matches[i];
186+
result = result.slice(0, start) + replacement + result.slice(end);
187+
}
188+
return result;
189+
}

0 commit comments

Comments
 (0)