Skip to content

Commit 0d5e2a9

Browse files
committed
Enhance fuzzy search efficiency by pre-computing values and introducing proximity boost for matches near preferred lines. Clean up code for better readability and performance.
1 parent 09d2826 commit 0d5e2a9

File tree

1 file changed

+56
-49
lines changed

1 file changed

+56
-49
lines changed

src/core/diff/strategies/multi-search-replace.ts

Lines changed: 56 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { ToolUse, DiffStrategy, DiffResult } from "../../../shared/tools"
66
import { normalizeString } from "../../../utils/text-normalization"
77

88
const BUFFER_LINES = 40 // Number of extra context lines to show before and after matches
9+
const PROXIMITY_BOOST = 0.02 // small bonus for matches extremely close to preferred line
910

1011
function getSimilarity(original: string, search: string): number {
1112
// Empty searches are no longer supported
@@ -51,81 +52,87 @@ function fuzzySearch(
5152
preferredStartIndex?: number,
5253
prioritizePreferredStart: boolean = false
5354
) {
55+
// Pre-compute values outside the tight loop for efficiency
56+
const searchLinesArr = searchChunk.split(/\r?\n/)
57+
const searchLen = searchLinesArr.length
58+
59+
// Guard against impossible searches
60+
if (searchLen === 0 || endIndex - startIndex < searchLen) {
61+
return { bestScore: 0, bestMatchIndex: -1, bestMatchContent: "" }
62+
}
63+
64+
// Cache the normalised search text once
65+
const normalizedSearchChunk = normalizeString(searchChunk)
66+
const computeSimilarity = (originalChunk: string): number => {
67+
const normalizedOriginal = normalizeString(originalChunk)
68+
if (normalizedOriginal === normalizedSearchChunk) return 1
69+
const dist = distance(normalizedOriginal, normalizedSearchChunk)
70+
const maxLength = Math.max(normalizedOriginal.length, normalizedSearchChunk.length)
71+
return 1 - dist / maxLength
72+
}
73+
5474
let bestScore = 0
5575
let bestMatchIndex = -1
5676
let bestMatchContent = ""
57-
// Track the smallest distance from preferred start index for similarly scored matches
58-
let bestMatchDistance = Number.MAX_SAFE_INTEGER
59-
const searchLen = searchChunk.split(/\r?\n/).length
77+
let bestMatchDistance = Number.MAX_SAFE_INTEGER // distance from reference line for tie-breaking
6078

61-
// Calculate the starting point for search
79+
// Calculate the starting point for search (middle-out or preferred)
6280
const midPoint = Math.floor((startIndex + endIndex) / 2)
63-
64-
// Determine the reference point for calculating distance
65-
// This is the point we want matches to be close to
66-
const referencePoint = (prioritizePreferredStart && preferredStartIndex !== undefined)
67-
? preferredStartIndex
68-
: midPoint
69-
70-
// Determine the search starting position
81+
const referencePoint = prioritizePreferredStart && preferredStartIndex !== undefined ? preferredStartIndex : midPoint
7182
let searchStartPosition: number
7283
if (prioritizePreferredStart && preferredStartIndex !== undefined) {
73-
// Constrain preferredStartIndex to be within valid bounds
74-
searchStartPosition = Math.min(
75-
Math.max(preferredStartIndex, startIndex),
76-
endIndex - searchLen
77-
)
84+
searchStartPosition = Math.min(Math.max(preferredStartIndex, startIndex), endIndex - searchLen)
7885
} else {
79-
// Use middle-out approach (original behavior)
8086
searchStartPosition = midPoint
8187
}
82-
88+
8389
let leftIndex = searchStartPosition
8490
let rightIndex = searchStartPosition + 1
8591

8692
while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
8793
if (leftIndex >= startIndex) {
8894
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
89-
const similarity = getSimilarity(originalChunk, searchChunk)
90-
95+
const similarity = computeSimilarity(originalChunk)
9196
const distanceFromReference = Math.abs(leftIndex - referencePoint)
92-
93-
// Small boost for matches very close to preferred position when prioritizing by location
94-
const proximityBoost = (prioritizePreferredStart && distanceFromReference <= 2) ? 0.02 : 0
97+
const proximityBoost = prioritizePreferredStart && distanceFromReference <= 2 ? PROXIMITY_BOOST : 0
9598
const effectiveScore = similarity + proximityBoost
96-
97-
// Update best match if:
98-
// 1. This match has a better effective score, or
99-
// 2. This match has the same effective score but is closer to the reference point
100-
if (effectiveScore > bestScore ||
101-
(effectiveScore === bestScore && distanceFromReference < bestMatchDistance)) {
99+
100+
if (
101+
effectiveScore > bestScore ||
102+
(effectiveScore === bestScore && distanceFromReference < bestMatchDistance)
103+
) {
102104
bestScore = effectiveScore
103105
bestMatchIndex = leftIndex
104106
bestMatchContent = originalChunk
105107
bestMatchDistance = distanceFromReference
108+
109+
// Early-exit: perfect match at ideal line – cannot improve further
110+
if (bestScore === 1 && bestMatchDistance === 0) {
111+
return { bestScore, bestMatchIndex, bestMatchContent }
112+
}
106113
}
107114
leftIndex--
108115
}
109116

110117
if (rightIndex <= endIndex - searchLen) {
111118
const originalChunk = lines.slice(rightIndex, rightIndex + searchLen).join("\n")
112-
const similarity = getSimilarity(originalChunk, searchChunk)
113-
119+
const similarity = computeSimilarity(originalChunk)
114120
const distanceFromReference = Math.abs(rightIndex - referencePoint)
115-
116-
// Small boost for matches very close to preferred position when prioritizing by location
117-
const proximityBoost = (prioritizePreferredStart && distanceFromReference <= 2) ? 0.02 : 0
121+
const proximityBoost = prioritizePreferredStart && distanceFromReference <= 2 ? PROXIMITY_BOOST : 0
118122
const effectiveScore = similarity + proximityBoost
119-
120-
// Update best match if:
121-
// 1. This match has a better effective score, or
122-
// 2. This match has the same effective score but is closer to the reference point
123-
if (effectiveScore > bestScore ||
124-
(effectiveScore === bestScore && distanceFromReference < bestMatchDistance)) {
123+
124+
if (
125+
effectiveScore > bestScore ||
126+
(effectiveScore === bestScore && distanceFromReference < bestMatchDistance)
127+
) {
125128
bestScore = effectiveScore
126129
bestMatchIndex = rightIndex
127130
bestMatchContent = originalChunk
128131
bestMatchDistance = distanceFromReference
132+
133+
if (bestScore === 1 && bestMatchDistance === 0) {
134+
return { bestScore, bestMatchIndex, bestMatchContent }
135+
}
129136
}
130137
rightIndex++
131138
}
@@ -380,28 +387,28 @@ Only use a single line of '=======' between search and replacement content, beca
380387
  Ensures the first marker starts at the beginning of the file or right after a newline.
381388
382389
2. (?<!\\)<<<<<<< SEARCH\s*\n
383-
  Matches the line <<<<<<< SEARCH (ignoring any trailing spaces) – the negative lookbehind makes sure it isnt escaped.
390+
  Matches the line "<<<<<<< SEARCH" (ignoring any trailing spaces) – the negative lookbehind makes sure it isn't escaped.
384391
385392
3. ((?:\:start_line:\s*(\d+)\s*\n))?
386-
  Optionally matches a :start_line: line. The outer capturing group is group1 and the inner (\d+) is group2.
393+
  Optionally matches a ":start_line:" line. The outer capturing group is group 1 and the inner (\d+) is group 2.
387394
388395
4. ((?:\:end_line:\s*(\d+)\s*\n))?
389-
  Optionally matches a :end_line: line. Group3 is the whole match and group4 is the digits.
396+
  Optionally matches a ":end_line:" line. Group 3 is the whole match and group 4 is the digits.
390397
391398
5. ((?<!\\)-------\s*\n)?
392-
  Optionally matches the ------- marker line (group5).
399+
  Optionally matches the "-------" marker line (group 5).
393400
394401
6. ([\s\S]*?)(?:\n)?
395-
  Non‐greedy match for the search content (group6) up to the next marker.
402+
  Non‐greedy match for the "search content" (group 6) up to the next marker.
396403
397404
7. (?:(?<=\n)(?<!\\)=======\s*\n)
398-
  Matches the ======= marker on its own line.
405+
  Matches the "=======" marker on its own line.
399406
400407
8. ([\s\S]*?)(?:\n)?
401-
  Non‐greedy match for the replace content (group7).
408+
  Non‐greedy match for the "replace content" (group 7).
402409
403410
9. (?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)
404-
  Matches the final >>>>>>> REPLACE marker on its own line (and requires a following newline or the end of file).
411+
  Matches the final ">>>>>>> REPLACE" marker on its own line (and requires a following newline or the end of file).
405412
*/
406413

407414
let matches = [

0 commit comments

Comments
 (0)