Skip to content

Commit e15af59

Browse files
authored
edits: fix file corruption issue with replace_string tool (#1134)
The similarity matching in the replace_string tool incorrect was using line numbers rather than string offsets (since inception!) which caused file corruption issues. I initially thought it was only in the multi edit tool, but it happens in all replace_string variants. Closes microsoft/vscode#265842
1 parent d55998a commit e15af59

File tree

5 files changed

+1788
-20
lines changed

5 files changed

+1788
-20
lines changed

src/extension/tools/node/editFileToolUtils.tsx

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { IConfigurationService } from '../../../platform/configuration/common/co
1111
import { ICustomInstructionsService } from '../../../platform/customInstructions/common/customInstructionsService';
1212
import { OffsetLineColumnConverter } from '../../../platform/editing/common/offsetLineColumnConverter';
1313
import { TextDocumentSnapshot } from '../../../platform/editing/common/textDocumentSnapshot';
14+
import { IFileSystemService } from '../../../platform/filesystem/common/fileSystemService';
1415
import { IAlternativeNotebookContentService } from '../../../platform/notebook/common/alternativeContent';
1516
import { INotebookService } from '../../../platform/notebook/common/notebookService';
1617
import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService';
@@ -23,7 +24,6 @@ import { URI } from '../../../util/vs/base/common/uri';
2324
import { Position as EditorPosition } from '../../../util/vs/editor/common/core/position';
2425
import { ServicesAccessor } from '../../../util/vs/platform/instantiation/common/instantiation';
2526
import { EndOfLine, MarkdownString, Position, Range, TextEdit } from '../../../vscodeTypes';
26-
import { IFileSystemService } from '../../../platform/filesystem/common/fileSystemService';
2727

2828
// Simplified Hunk type for the patch
2929
interface Hunk {
@@ -349,42 +349,47 @@ function trySimilarityMatch(text: string, oldStr: string, newStr: string, eol: s
349349
return { text, editPosition: [], type: 'none' };
350350
}
351351

352-
let bestMatch = { index: -1, similarity: 0, length: 0 };
352+
let bestMatch = { startLine: -1, startOffset: 0, oldLength: 0, similarity: 0 };
353+
let startOffset = 0;
353354

354355
// Sliding window approach to find the best matching section
355356
for (let i = 0; i <= lines.length - oldLines.length; i++) {
356357
let totalSimilarity = 0;
358+
let oldLength = 0;
357359

358360
// Calculate similarity for each line in the window
359361
for (let j = 0; j < oldLines.length; j++) {
360362
const similarity = calculateSimilarity(oldLines[j], lines[i + j]);
361363
totalSimilarity += similarity;
364+
oldLength += lines[i + j].length;
362365
}
363366

364367
const avgSimilarity = totalSimilarity / oldLines.length;
365368
if (avgSimilarity > threshold && avgSimilarity > bestMatch.similarity) {
366-
bestMatch = { index: i, similarity: avgSimilarity, length: oldLines.length };
369+
bestMatch = { startLine: i, startOffset, similarity: avgSimilarity, oldLength: oldLength + (oldLines.length - 1) * eol.length };
367370
}
368-
}
369371

370-
if (bestMatch.index !== -1) {
371-
// Found a match with similarity above the threshold
372-
const startIndex = bestMatch.index;
373-
374-
// Replace the matched section
375-
const newLines = [...lines];
376-
newLines.splice(startIndex, bestMatch.length, ...newStr.split(eol));
372+
startOffset += lines[i].length + eol.length;
373+
}
377374

378-
return {
379-
text: newLines.join(eol),
380-
type: 'similarity',
381-
editPosition: [[startIndex, startIndex + bestMatch.length]],
382-
similarity: bestMatch.similarity,
383-
suggestion: `Used similarity matching (${(bestMatch.similarity * 100).toFixed(1)}% similar). Verify the replacement.`
384-
};
375+
if (bestMatch.startLine === -1) {
376+
return { text, editPosition: [], type: 'none' };
385377
}
386378

387-
return { text, editPosition: [], type: 'none' };
379+
// Replace the matched section
380+
const newLines = [
381+
...lines.slice(0, bestMatch.startLine),
382+
...newStr.split(eol),
383+
...lines.slice(bestMatch.startLine + oldLines.length)
384+
];
385+
386+
return {
387+
text: newLines.join(eol),
388+
type: 'similarity',
389+
editPosition: [[bestMatch.startOffset, bestMatch.startOffset + bestMatch.oldLength]],
390+
similarity: bestMatch.similarity,
391+
suggestion: `Used similarity matching (${(bestMatch.similarity * 100).toFixed(1)}% similar). Verify the replacement.`
392+
};
388393
}
389394

390395
// Function to generate a simple patch

0 commit comments

Comments
 (0)