Skip to content
104 changes: 57 additions & 47 deletions src/core/diff/strategies/multi-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@ function getSimilarity(original: string, search: string): number {
return 1 - dist / maxLength
}

function getWeightedThreshold(baseThreshold: number, textLength: number): number {
const MIN_THRESHOLD_FACTOR = 0.8
const LENGTH_THRESHOLD = 50
const lengthFactor = Math.max(
MIN_THRESHOLD_FACTOR,
1 - (Math.log10(Math.max(textLength, LENGTH_THRESHOLD)) - 2) * 0.1,
)
return Math.max(baseThreshold * lengthFactor, 0.8) // Nunca por debajo de 0.6
}

/**
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
Expand All @@ -37,6 +47,8 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
let bestScore = 0
let bestMatchIndex = -1
let bestMatchContent = ""
// Normalize searchChunk by removing any trailing newline
searchChunk = searchChunk.replace(/\r?\n$/, "")
const searchLen = searchChunk.split(/\r?\n/).length

// Middle-out from the midpoint
Expand Down Expand Up @@ -96,24 +108,30 @@ Only a single operation is allowed per tool use.
The SEARCH section must exactly match existing content including whitespace and indentation.
If you're not confident in the exact content to search for, use the read_file tool first to get the exact content.
When applying the diffs, be extra careful to remember to change any closing brackets or other syntax that may be affected by the diff farther down in the file.
ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks
ALWAYS make as many changes in a single 'apply_diff' request as possible using multiple SEARCH/REPLACE blocks.

Parameters:
- path: (required) The path of the file to modify (relative to the current workspace directory ${args.cwd})
- diff: (required) The search/replace block defining the changes.

Diff format:
Diff format (EXACTLY follow this format with correct markers and line breaks):
\`\`\`
<<<<<<< SEARCH
:start_line: (required) The line number of original content where the search block starts.
-------
<<<<<<< SEARCH
:start_line: N (Replace N with the actual line number where the search should begin)
------- (This divider line is required after the start_line)
[exact content to find including whitespace]
=======
[new content to replace with]
>>>>>>> REPLACE

>>>>>>> REPLACE (This exact closing marker is required)
\`\`\`

IMPORTANT FORMATTING RULES:
1. Each marker MUST be on its own line
2. The ":start_line:" is REQUIRED and must contain a valid line number
3. The "-------" divider after start_line is REQUIRED
4. Use EXACTLY one line with 7 equal signs (=======) to separate search from replace content
5. EVERY search/replace block MUST start with "<<<<<<< SEARCH" and end with ">>>>>>> REPLACE"
6. When including multiple blocks, each block follows the same format in sequence

Example:

Expand All @@ -126,7 +144,7 @@ Original file:
5 | return total
\`\`\`

Search/Replace content:
Search/Replace content (note the exact format):
\`\`\`
<<<<<<< SEARCH
:start_line:1
Expand All @@ -144,7 +162,7 @@ def calculate_total(items):

\`\`\`

Search/Replace content with multi edits:
Multiple edits example (note each block follows the exact same format):
\`\`\`
<<<<<<< SEARCH
:start_line:1
Expand All @@ -167,6 +185,13 @@ def calculate_sum(items):
>>>>>>> REPLACE
\`\`\`

COMMON ERRORS TO AVOID:
- Do NOT forget the ":start_line:" indicator and line number in the SEARCH section
- Do NOT put a ":start_line:" indicator in the REPLACE section
- Do NOT use multiple "=======" separator lines, ONLY ONE between search and replace
- Do NOT put content on the same line as any of the markers
- Do NOT alter the format of the markers (exact number of < or = characters matters)
- Do NOT omit any of the required markers or dividers

Usage:
<apply_diff>
Expand Down Expand Up @@ -310,49 +335,29 @@ Only use a single line of '=======' between search and replacement content, beca
}
}

/*
Regex parts:

1. (?:^|\n)
  Ensures the first marker starts at the beginning of the file or right after a newline.

2. (?<!\\)<<<<<<< SEARCH\s*\n
  Matches the line “<<<<<<< SEARCH” (ignoring any trailing spaces) – the negative lookbehind makes sure it isn’t escaped.

3. ((?:\:start_line:\s*(\d+)\s*\n))?
  Optionally matches a “:start_line:” line. The outer capturing group is group 1 and the inner (\d+) is group 2.

4. ((?:\:end_line:\s*(\d+)\s*\n))?
  Optionally matches a “:end_line:” line. Group 3 is the whole match and group 4 is the digits.

5. ((?<!\\)-------\s*\n)?
  Optionally matches the “-------” marker line (group 5).

6. ([\s\S]*?)(?:\n)?
  Non‐greedy match for the “search content” (group 6) up to the next marker.

7. (?:(?<=\n)(?<!\\)=======\s*\n)
  Matches the “=======” marker on its own line.

8. ([\s\S]*?)(?:\n)?
  Non‐greedy match for the “replace content” (group 7).
const regexPatterns = [
/(?:^|\n)/, // 1. the first marker is preceded by start-of-file or one or more newlines, but does not include them in the match.
/(?<!\\)<<<<<<< SEARCH\s*\n/, // 2. Matches the line “<<<<<<< SEARCH” (ignoring any trailing spaces) – the negative lookbehind makes sure it isn’t escaped.
/((?:\:start_line:\s*(\d+)\s*\n))?/, // 3. Optionally matches a “:start_line:” line. The outer capturing group is group 1 and the inner (\d+) is group 2.
/((?:\:end_line:\s*(\d+)\s*\n))?/, // 4. Optionally matches a “:end_line:” line. Group 3 is the whole match and group 4 is the digits.
/((?<!\\)-------\s*\n)?/, // 5. Optionally matches the “-------” marker line (group 5).
/([\s\S]*?)(?:\n)?/, // 6. Non‐greedy match for the “search content” (group 6) up to the next marker.
/(?:(?<!\\)=======\s*\n)/, // 7. Matches the “=======” marker on its own line.
/([\s\S]*?)(?:\n)?/, // 8. Non‐greedy match for the “replace content” (group 7).
/(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/, // 9. Matches the final “>>>>>>> REPLACE” marker on its own line (and requires a following newline or the end of file).
]

9. (?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)
  Matches the final “>>>>>>> REPLACE” marker on its own line (and requires a following newline or the end of file).
*/
const diffRegex = new RegExp(regexPatterns.map((pattern) => pattern.source).join(""), "g")

let matches = [
...diffContent.matchAll(
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
),
]
let matches = [...diffContent.matchAll(diffRegex)]

if (matches.length === 0) {
return {
success: false,
error: `Invalid diff format - missing required sections\n\nDebug Info:\n- Expected Format: <<<<<<< SEARCH\\n:start_line: start line\\n-------\\n[search content]\\n=======\\n[replace content]\\n>>>>>>> REPLACE\n- Tip: Make sure to include start_line/SEARCH/=======/REPLACE sections with correct markers on new lines`,
}
}

// Detect line ending from original content
const lineEnding = originalContent.includes("\r\n") ? "\r\n" : "\n"
let resultLines = originalContent.split(/\r?\n/)
Expand Down Expand Up @@ -432,8 +437,10 @@ Only use a single line of '=======' between search and replacement content, beca

// Try exact match first
const originalChunk = resultLines.slice(exactStartIndex, exactEndIndex + 1).join("\n")
const textLength = originalChunk.length
const weightedThreshold = getWeightedThreshold(this.fuzzyThreshold, textLength)
const similarity = getSimilarity(originalChunk, searchChunk)
if (similarity >= this.fuzzyThreshold) {
if (similarity >= weightedThreshold) {
matchIndex = exactStartIndex
bestMatchScore = similarity
bestMatchContent = originalChunk
Expand All @@ -450,7 +457,7 @@ Only use a single line of '=======' between search and replacement content, beca
bestScore,
bestMatchIndex,
bestMatchContent: midContent,
} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex)
} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex) // resultLines tiene un array de líneas sin retorno de carro. searchChunk tiene el contenido de la búsqueda con retorno de carro
matchIndex = bestMatchIndex
bestMatchScore = bestScore
bestMatchContent = midContent
Expand All @@ -471,7 +478,10 @@ Only use a single line of '=======' between search and replacement content, beca
bestMatchIndex,
bestMatchContent: aggContent,
} = fuzzySearch(resultLines, aggressiveSearchChunk, searchStartIndex, searchEndIndex)
if (bestMatchIndex !== -1 && bestScore >= this.fuzzyThreshold) {
if (
bestMatchIndex !== -1 &&
bestScore >= getWeightedThreshold(this.fuzzyThreshold, aggressiveSearchChunk.length)
) {
matchIndex = bestMatchIndex
bestMatchScore = bestScore
bestMatchContent = aggContent
Expand Down Expand Up @@ -503,7 +513,7 @@ Only use a single line of '=======' between search and replacement content, beca

diffResults.push({
success: false,
error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine ? `starting at line ${startLine}` : "start to end"}\n- Tried both standard and aggressive line number stripping\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(getWeightedThreshold(this.fuzzyThreshold, searchChunk.length) * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(getWeightedThreshold(this.fuzzyThreshold, searchChunk.length) * 100)}%\n- Search Range: ${startLine ? `starting at line ${startLine}` : "start to end"}\n- Tried both standard and aggressive line number stripping\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
})
continue
}
Expand Down
Loading