Skip to content

Commit 6400905

Browse files
committed
test:模糊匹配通过部分单元测试
1 parent d241b1d commit 6400905

File tree

2 files changed

+192
-68
lines changed

2 files changed

+192
-68
lines changed

src/fuzzyMatch.ts

Lines changed: 73 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -189,106 +189,126 @@ export function normalizePattern(pattern: string): string {
189189
return content;
190190
}
191191

192-
function findCandidatePositions(content: string, pattern: string): number[] {
192+
export function findCandidatePositions(content: string, pattern: string): number[] {
193193
const candidates = new Set<number>();
194-
const segments = splitPattern(pattern, SEGMENT_COUNT);
195-
196-
// 查找每个分片的匹配位置
197-
segments.forEach(segment => {
198-
let pos = -1;
199-
while ((pos = content.indexOf(segment, pos + 1)) !== -1) {
200-
if (pos === -1) {
201-
break;
202-
}
203-
// 向前后扩展可能的匹配范围
204-
const start = Math.max(0, pos - pattern.length);
205-
const end = Math.min(content.length, pos + pattern.length * 2);
206-
for (let i = start; i < end; i++) {
194+
const segments = splitPatternWithStart(pattern, SEGMENT_COUNT);
195+
segments.forEach(({ segment, start }) => {
196+
let pos = -1;
197+
while ((pos = content.indexOf(segment, pos + 1)) !== -1) {
198+
const expectedStart = pos - start;
199+
const minStart = Math.max(0, expectedStart - MAX_EDIT_DISTANCE);
200+
const maxStart = Math.min(content.length - pattern.length, expectedStart + MAX_EDIT_DISTANCE);
201+
for (let i = minStart; i <= maxStart; i++) {
207202
candidates.add(i);
208203
}
209204
}
210205
});
211-
212206
return Array.from(candidates).sort((a, b) => a - b);
213207
}
214208

215-
function verifyMatches(
209+
export function verifyMatches(
216210
content: string,
217211
pattern: string,
218212
candidates: number[],
219213
mapping: number[]
220214
): MatchPosition[] {
221215
let bestMatch: MatchPosition | null = null;
222-
let minDistance = MAX_EDIT_DISTANCE + 1; // 设为比允许的最大距离大1
223-
216+
let minDistance = Infinity;
217+
let bestCandidate: number = -1;
224218
const patternLen = pattern.length;
225219

226220
candidates.forEach(start => {
227-
const end = start + patternLen;
228-
if (end > content.length) {
221+
if (start + patternLen > content.length) {
229222
return;
230223
}
231224

232-
const substring = content.substring(start, end);
225+
const substring = content.substring(start, start + patternLen + MAX_EDIT_DISTANCE);
233226
const distance = calculateEditDistance(substring, pattern, MAX_EDIT_DISTANCE);
234-
235-
if (distance < minDistance) {
227+
if (distance <= MAX_EDIT_DISTANCE && distance < minDistance) {
236228
minDistance = distance;
229+
bestCandidate = start;
230+
const end = start + patternLen + distance;
237231
bestMatch = {
238232
start: mapping[start],
239-
end: mapping[end] || mapping[mapping.length - 1]
233+
end: mapping[Math.min(end, content.length - 1)]
240234
};
241235
}
242236
});
237+
238+
// 如果找到了最佳候选,则用贪心方式扩展匹配范围
239+
if (bestMatch && bestCandidate !== -1)
240+
{
241+
let candidateIdx: number = bestCandidate;
242+
let patternIdx: number = 0;
243+
let startIndex: number = -1;
244+
// 从最佳候选起点开始,贪心扫描候选区域,遇到匹配的字符则同步推进模式串下标
245+
while (candidateIdx < content.length && patternIdx < pattern.length)
246+
{
247+
if (content.charAt(candidateIdx) === pattern.charAt(patternIdx))
248+
{
249+
patternIdx++;
250+
251+
if (startIndex === -1) {
252+
startIndex = candidateIdx;
253+
}
254+
}
255+
candidateIdx++;
256+
}
257+
258+
let tmpMatch : MatchPosition = bestMatch;
259+
// nCandidateIdx 作为最终匹配结束位置(注意这里是最后一次匹配后加1的位置)
260+
tmpMatch.start = mapping[startIndex];
261+
tmpMatch.end = mapping[Math.min(candidateIdx, content.length - 1)];
262+
bestMatch = tmpMatch;
263+
}
243264

244265
return bestMatch ? [bestMatch] : [];
245266
}
246-
// ================ 工具函数 ================
247-
function splitPattern(pattern: string, count: number): string[] {
248-
const segments: string[] = [];
249267

250-
if (pattern.length / count < 3) {
251-
count = pattern.length / 3;
268+
// ================ 工具函数 ================
269+
function splitPatternWithStart(pattern: string, count: number): { segment: string, start: number }[] {
270+
const segments: { segment: string, start: number }[] = [];
271+
const minSegmentLength = 3;
272+
if (pattern.length < minSegmentLength * count) {
273+
count = Math.max(1, Math.floor(pattern.length / minSegmentLength));
252274
}
253275

254276
const baseLength = Math.floor(pattern.length / count);
255277
let remaining = pattern.length % count;
256278
let pos = 0;
257-
258279
for (let i = 0; i < count; i++) {
259280
const length = baseLength + (remaining-- > 0 ? 1 : 0);
260-
segments.push(pattern.substr(pos, length));
281+
segments.push({ segment: pattern.substr(pos, length), start: pos });
261282
pos += length;
262283
}
263-
264-
return segments.filter(s => s.length > 0);
284+
return segments.filter(s => s.segment.length > 0);
265285
}
266286

267287
function calculateEditDistance(a: string, b: string, maxDistance: number): number {
268-
if (Math.abs(a.length - b.length) > maxDistance) {
269-
return Infinity;
270-
}
288+
if (Math.abs(a.length - b.length) > maxDistance) {
289+
return Infinity;
290+
}
271291

272-
// 使用滚动数组优化
273-
let prevRow = Array(b.length + 1).fill(0).map((_, i) => i);
274-
let currentRow = new Array(b.length + 1);
292+
// 使用滚动数组优化
293+
let prevRow = Array(b.length + 1).fill(0).map((_, i) => i);
294+
let currentRow = new Array(b.length + 1);
275295

276-
for (let i = 1; i <= a.length; i++) {
296+
for (let i = 1; i <= a.length; i++) {
277297
currentRow[0] = i;
278298
let minInRow = i;
279299

280300
for (let j = 1; j <= b.length; j++) {
281-
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
282-
currentRow[j] = Math.min(
283-
prevRow[j] + 1,
284-
currentRow[j - 1] + 1,
285-
prevRow[j - 1] + cost
286-
);
287-
minInRow = Math.min(minInRow, currentRow[j]);
301+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
302+
currentRow[j] = Math.min(
303+
prevRow[j] + 1,
304+
currentRow[j - 1] + 1,
305+
prevRow[j - 1] + cost
306+
);
307+
minInRow = Math.min(minInRow, currentRow[j]);
288308
}
289309

290310
if (minInRow > maxDistance) {
291-
return Infinity;
311+
return Infinity;
292312
}
293313
[prevRow, currentRow] = [currentRow, prevRow];
294314
}
@@ -300,20 +320,14 @@ function processOverlaps(matches: MatchPosition[]): MatchPosition[] {
300320
return matches
301321
.sort((a, b) => a.start - b.start)
302322
.filter((match, index, arr) => {
303-
return index === 0 || match.start >= arr[index - 1].end;
323+
return index === 0 || match.start >= arr[index - 1].end;
304324
});
305325
}
306326

307-
function applyReplacements(
308-
original: string,
309-
matches: MatchPosition[],
310-
replacement: string
311-
): string {
312-
let result = original;
313-
// 从后往前替换避免影响索引
314-
for (let i = matches.length - 1; i >= 0; i--) {
315-
const { start, end } = matches[i];
316-
result = result.slice(0, start) + replacement + result.slice(end);
327+
export function applyReplacements(content: string, matches: MatchPosition[], newContent: string): string {
328+
let result = content;
329+
for (const match of matches.reverse()) {
330+
result = result.slice(0, match.start) + newContent + result.slice(match.end);
317331
}
318332
return result;
319333
}

src/test/fuzzyMatch.test.ts

Lines changed: 119 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import * as assert from 'assert';
22
import * as vscode from 'vscode';
33
import { applyGlobalReplace, normalizeInput } from '../cvbManager';
4-
import { normalizeContent, removeComments, normalizeWhitespace, normalizePattern, removeSymbolSpaces} from '../fuzzyMatch';
4+
import * as fuzzyMatch from '../fuzzyMatch';
55

66
// 定义 GlobalReplaceOperation 接口
77

@@ -276,7 +276,7 @@ suite('Normalization Full Coverage Test Suite', () =>
276276
} // 结束函数
277277
`;
278278

279-
const stResult = removeComments(strInput);
279+
const stResult = fuzzyMatch.removeComments(strInput);
280280
const strContent: string = stResult.content;
281281
const arrMapping: number[] = stResult.mapping;
282282

@@ -297,7 +297,7 @@ suite('Normalization Full Coverage Test Suite', () =>
297297
const strInput: string = `a + b
298298
( x - y )
299299
{ c * d }`;
300-
const stResult = removeSymbolSpaces(strInput);
300+
const stResult = fuzzyMatch.removeSymbolSpaces(strInput);
301301
const strContent: string = stResult.content;
302302
const arrMapping: number[] = stResult.mapping;
303303

@@ -316,7 +316,7 @@ suite('Normalization Full Coverage Test Suite', () =>
316316
const strInput: string = `abc def
317317
ghi\t\tjkl
318318
mno pqr`;
319-
const stResult = normalizeWhitespace(strInput);
319+
const stResult = fuzzyMatch.normalizeWhitespace(strInput);
320320
const strContent: string = stResult.content;
321321
const arrMapping: number[] = stResult.mapping;
322322

@@ -334,7 +334,7 @@ mno pqr`;
334334
// 还有注释
335335
`;
336336

337-
const stResult = removeComments(strInput);
337+
const stResult = fuzzyMatch.removeComments(strInput);
338338
const strContent: string = stResult.content;
339339
const arrMapping: number[] = stResult.mapping;
340340

@@ -351,7 +351,7 @@ mno pqr`;
351351
352352
353353
def`;
354-
const stResult = normalizeWhitespace(strInput);
354+
const stResult = fuzzyMatch.normalizeWhitespace(strInput);
355355
const strContent: string = stResult.content;
356356
const arrMapping: number[] = stResult.mapping;
357357

@@ -365,7 +365,7 @@ def`;
365365
test('removeSymbolSpaces - 复杂符号空格情况', () =>
366366
{
367367
const strInput: string = `a + ( b * c ) / [ d - e ]`;
368-
const stResult = removeSymbolSpaces(strInput);
368+
const stResult = fuzzyMatch.removeSymbolSpaces(strInput);
369369
const strContent: string = stResult.content;
370370
const arrMapping: number[] = stResult.mapping;
371371

@@ -379,7 +379,7 @@ def`;
379379
test('normalizeWhitespace - 只有空格和换行符', () =>
380380
{
381381
const strInput: string = " \n \n ";
382-
const stResult = normalizeWhitespace(strInput);
382+
const stResult = fuzzyMatch.normalizeWhitespace(strInput);
383383
const strContent: string = stResult.content;
384384
const arrMapping: number[] = stResult.mapping;
385385

@@ -397,7 +397,7 @@ def`;
397397
let b = a * 2; // 还有注释
398398
return b;
399399
}`;
400-
const stResult = normalizeContent(strInput);
400+
const stResult = fuzzyMatch.normalizeContent(strInput);
401401
const strContent: string = stResult.content;
402402
const arrMapping: number[] = stResult.mapping;
403403

@@ -412,3 +412,113 @@ def`;
412412
assert.strictEqual(arrMapping.length, strExpectedContent.length, "normalizeContent mapping 长度错误");
413413
});
414414
});
415+
416+
// 测试套件
417+
suite('Fuzzy Global Replace Test Suite', () => {
418+
vscode.window.showInformationMessage('Start all fuzzy global replace tests.');
419+
420+
const originalContent = `
421+
function logMessage(message) {
422+
console.log(message);
423+
}
424+
425+
function logError(error) {
426+
console.log(error);
427+
}
428+
429+
function logWarning(warning) {
430+
console.log(warning);
431+
}
432+
`.trim();
433+
const oldContent = `
434+
console.log(warn);
435+
`.trim();
436+
const newContent = `
437+
console.warn(warning);
438+
`.trim();
439+
const expectedContent = `
440+
function logMessage(message) {
441+
console.log(message);
442+
}
443+
444+
function logError(error) {
445+
console.log(error);
446+
}
447+
448+
function logWarning(warning) {
449+
console.warn(warning);
450+
}
451+
`.trim();
452+
453+
test('normalizeContent should correctly normalize content and provide accurate mapping', () => {
454+
const { content: normContent, mapping } = fuzzyMatch.normalizeContent(originalContent);
455+
const logWarningStart = originalContent.indexOf('console.log(warning);');
456+
const logWarningEnd = logWarningStart + 'console.log(warning);'.length;
457+
const normLogWarningStart = normContent.indexOf('console.log(warning);');
458+
459+
assert.ok(normContent.includes('console.log(warning);'), 'Normalized content should contain the target string');
460+
assert.strictEqual(
461+
mapping[normLogWarningStart],
462+
logWarningStart,
463+
'Mapping should point to original start position'
464+
);
465+
assert.strictEqual(
466+
mapping[normLogWarningStart + 'console.log(warning);'.length] || mapping[mapping.length - 1],
467+
logWarningEnd,
468+
'Mapping should point to original end position'
469+
);
470+
});
471+
472+
test('normalizePattern should correctly normalize the old content', () => {
473+
const normPattern = fuzzyMatch.normalizePattern(oldContent);
474+
assert.strictEqual(
475+
normPattern.trim(),
476+
'console.log(warn);',
477+
'Pattern should be normalized correctly'
478+
);
479+
});
480+
481+
test('findCandidatePositions should find potential match positions', () => {
482+
const { content: normContent } = fuzzyMatch.normalizeContent(originalContent);
483+
const normPattern = fuzzyMatch.normalizePattern(oldContent);
484+
const candidates = fuzzyMatch.findCandidatePositions(normContent, normPattern);
485+
486+
assert.ok(candidates.length > 0, 'Should find at least one candidate position');
487+
const logWarningPos = normContent.indexOf('console.log(warning);');
488+
assert.ok(
489+
candidates.some(pos => Math.abs(pos - logWarningPos) < normPattern.length * 2),
490+
'Should include a position near the target string'
491+
);
492+
});
493+
494+
test('verifyMatches should select the best match with correct positions', () => {
495+
const { content: normContent, mapping } = fuzzyMatch.normalizeContent(originalContent);
496+
const normPattern = fuzzyMatch.normalizePattern(oldContent);
497+
const candidates = fuzzyMatch.findCandidatePositions(normContent, normPattern);
498+
const matches = fuzzyMatch.verifyMatches(normContent, normPattern, candidates, mapping);
499+
500+
assert.strictEqual(matches.length, 1, 'Should find exactly one best match');
501+
assert.strictEqual(
502+
originalContent.slice(matches[0].start, matches[0].end).trim(),
503+
'console.log(warning);',
504+
'Best match should correspond to the closest substring'
505+
);
506+
});
507+
508+
test('applyReplacements should replace content correctly without extra characters', () => {
509+
const matches = [{
510+
start: originalContent.indexOf('console.log(warning);'),
511+
end: originalContent.indexOf('console.log(warning);') + 'console.log(warning);'.length
512+
}];
513+
const result = fuzzyMatch.applyReplacements(originalContent, matches, newContent);
514+
515+
assert.strictEqual(result.trim(), expectedContent, 'Replacement should match expected output');
516+
assert.ok(!result.includes('g);'), 'Result should not contain extra characters like "g);"');
517+
});
518+
519+
test('applyFuzzyGlobalReplace should perform the full replacement correctly', () => {
520+
const result = fuzzyMatch.applyFuzzyGlobalReplace(originalContent, oldContent, newContent);
521+
assert.strictEqual(result.trim(), expectedContent, 'Full fuzzy replace should produce the expected output');
522+
assert.ok(!result.includes('g);'), 'Result should not contain extra characters like "g);"');
523+
});
524+
});

0 commit comments

Comments
 (0)