Skip to content

Commit 713fb29

Browse files
committed
feat: add fuzzy matching fallbacks for edit_file tool
LLMs frequently confuse JSON-level escaping with content-level escaping when generating edit_file tool calls — e.g. sending oldText with plain quotes when the file contains backslash-escaped quotes. This causes repeated failures that the model cannot self-correct. Replace the bare strings.Contains check with a cascading chain of replacer strategies (exact → line-trimmed → escape-normalized → whitespace-normalized → indentation-flexible → trimmed-boundary). The first strategy to find exactly one unique match wins; ambiguous matches are rejected. On failure, error messages now include a diagnostic hint showing the closest near-miss and the first differing line, helping the model self-correct on retry. Both the builtin and ACP edit_file handlers share the same FindMatch function.
1 parent 47cb9f8 commit 713fb29

File tree

4 files changed

+334
-7
lines changed

4 files changed

+334
-7
lines changed

pkg/acp/filesystem.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,10 +173,11 @@ func (t *FilesystemToolset) handleEditFile(ctx context.Context, toolCall tools.T
173173
modifiedContent := resp.Content
174174

175175
for i, edit := range args.Edits {
176-
if !strings.Contains(modifiedContent, edit.OldText) {
177-
return tools.ResultError(fmt.Sprintf("Edit %d failed: old text not found", i+1)), nil
176+
match, err := builtin.FindMatch(modifiedContent, edit.OldText)
177+
if err != nil {
178+
return tools.ResultError(fmt.Sprintf("Edit %d failed: %s", i+1, err)), nil
178179
}
179-
modifiedContent = strings.Replace(modifiedContent, edit.OldText, edit.NewText, 1)
180+
modifiedContent = strings.Replace(modifiedContent, match.SearchText, edit.NewText, 1)
180181
}
181182

182183
_, err = t.agent.conn.WriteTextFile(ctx, acp.WriteTextFileRequest{

pkg/tools/builtin/edit_match.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
package builtin
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"unicode/utf8"
7+
)
8+
9+
// Inspired by opencode's edit tool (github.com/anomalyco/opencode):
10+
//
11+
// - Cascading replacer chain: try strategies in order, first unique match
12+
// wins (opencode: SimpleReplacer → … → EscapeNormalizedReplacer → …).
13+
// - Uniqueness gate: a candidate is accepted only when it appears exactly
14+
// once in content (opencode: indexOf !== lastIndexOf check in replace()).
15+
// - EscapeNormalizedReplacer: un-escape both sides to handle the common
16+
// LLM failure of confusing JSON-level escaping with file content
17+
// (opencode: EscapeNormalizedReplacer with the same unescapeString
18+
// logic covering \", \', \n, \t, \r, \\, \`, \$).
19+
20+
// MatchResult describes which text in the file content should be replaced.
21+
type MatchResult struct {
22+
// SearchText is the exact substring of content to replace (may differ
23+
// from the caller's oldText when a fuzzy replacer found the match).
24+
SearchText string
25+
// Strategy is the name of the replacer that produced the match.
26+
Strategy string
27+
}
28+
29+
// replacer yields candidate search strings from content for a given find
30+
// string. Each candidate must be an exact substring of content.
31+
type replacer struct {
32+
name string
33+
fn func(content, find string) []string
34+
}
35+
36+
// replacers is the cascade tried by FindMatch, in order.
37+
// The first replacer to produce exactly one unique match wins.
38+
var replacers = []replacer{
39+
{"exact", exactReplacer},
40+
{"escape-normalized", escapeNormalizedReplacer},
41+
}
42+
43+
// FindMatch searches content for oldText using a cascade of increasingly
44+
// fuzzy strategies. It returns the exact substring of content that should
45+
// be replaced, or an error explaining why no unique match was found.
46+
func FindMatch(content, oldText string) (MatchResult, error) {
47+
sawMultiple := false
48+
49+
for _, r := range replacers {
50+
for _, candidate := range r.fn(content, oldText) {
51+
idx := strings.Index(content, candidate)
52+
if idx == -1 {
53+
continue
54+
}
55+
// Ensure the candidate appears exactly once.
56+
if content[idx+len(candidate):] != "" && strings.Contains(content[idx+len(candidate):], candidate) {
57+
sawMultiple = true
58+
continue
59+
}
60+
return MatchResult{SearchText: candidate, Strategy: r.name}, nil
61+
}
62+
}
63+
64+
if sawMultiple {
65+
return MatchResult{}, fmt.Errorf(
66+
"old text matched multiple locations — provide more surrounding context to make the match unique")
67+
}
68+
return MatchResult{}, fmt.Errorf("old text not found")
69+
}
70+
71+
// ---------------------------------------------------------------------------
72+
// Replacers
73+
// ---------------------------------------------------------------------------
74+
75+
// exactReplacer returns oldText itself if it appears in content.
76+
func exactReplacer(content, find string) []string {
77+
if strings.Contains(content, find) {
78+
return []string{find}
79+
}
80+
return nil
81+
}
82+
83+
// escapeNormalizedReplacer handles the common LLM failure mode where
84+
// escape sequences like \" in file content are emitted as plain " in
85+
// the tool call arguments, or vice versa.
86+
//
87+
// It un-escapes both the find string and candidate blocks in content,
88+
// then compares. The returned match is the original (escaped) text from
89+
// content so the replacement targets the correct bytes on disk.
90+
func escapeNormalizedReplacer(content, find string) []string {
91+
unescapedFind := unescapeString(find)
92+
93+
// Fast path: the un-escaped find appears verbatim in content.
94+
if unescapedFind != find && strings.Contains(content, unescapedFind) {
95+
return []string{unescapedFind}
96+
}
97+
98+
// Slow path: un-escape same-sized blocks of content and compare.
99+
contentLines := strings.Split(content, "\n")
100+
findLines := strings.Split(unescapedFind, "\n")
101+
if len(findLines) == 0 {
102+
return nil
103+
}
104+
105+
var matches []string
106+
for i := 0; i <= len(contentLines)-len(findLines); i++ {
107+
block := strings.Join(contentLines[i:i+len(findLines)], "\n")
108+
if unescapeString(block) == unescapedFind {
109+
matches = append(matches, block)
110+
}
111+
}
112+
return matches
113+
}
114+
115+
// ---------------------------------------------------------------------------
116+
// Helpers
117+
// ---------------------------------------------------------------------------
118+
119+
// unescapeString resolves common escape sequences that LLMs produce when
120+
// they confuse JSON-level escaping with content-level escaping.
121+
func unescapeString(s string) string {
122+
var b strings.Builder
123+
b.Grow(len(s))
124+
i := 0
125+
for i < len(s) {
126+
r, size := utf8.DecodeRuneInString(s[i:])
127+
if r == '\\' && i+size < len(s) {
128+
next, nextSize := utf8.DecodeRuneInString(s[i+size:])
129+
var replacement byte
130+
switch next {
131+
case 'n':
132+
replacement = '\n'
133+
case 't':
134+
replacement = '\t'
135+
case 'r':
136+
replacement = '\r'
137+
case '"':
138+
replacement = '"'
139+
case '\'':
140+
replacement = '\''
141+
case '`':
142+
replacement = '`'
143+
case '\\':
144+
replacement = '\\'
145+
case '$':
146+
replacement = '$'
147+
case '\n':
148+
replacement = '\n'
149+
}
150+
if replacement != 0 {
151+
b.WriteByte(replacement)
152+
i += size + nextSize
153+
continue
154+
}
155+
}
156+
b.WriteRune(r)
157+
i += size
158+
}
159+
return b.String()
160+
}
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package builtin
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
"github.com/stretchr/testify/assert"
8+
"github.com/stretchr/testify/require"
9+
)
10+
11+
func TestFindMatch(t *testing.T) {
12+
t.Parallel()
13+
14+
tests := []struct {
15+
name string
16+
content string
17+
find string
18+
wantText string // expected SearchText (empty → use find)
19+
wantStrategy string
20+
wantErr string // substring of error; empty → expect success
21+
}{
22+
// Exact
23+
{
24+
name: "exact",
25+
content: "hello world\nfoo bar\nbaz",
26+
find: "foo bar",
27+
wantStrategy: "exact",
28+
},
29+
{
30+
name: "exact when both sides escaped",
31+
content: `echo \"hello\"`,
32+
find: `echo \"hello\"`,
33+
wantStrategy: "exact",
34+
},
35+
36+
// Escape-normalized
37+
{
38+
name: `\" in file vs " in find`,
39+
content: "echo \\\"brew install failed\\\"",
40+
find: `echo "brew install failed"`,
41+
wantText: "echo \\\"brew install failed\\\"",
42+
wantStrategy: "escape-normalized",
43+
},
44+
{
45+
name: `literal \n in find vs real newline`,
46+
content: "line1\nline2",
47+
find: `line1\nline2`,
48+
wantText: "line1\nline2",
49+
wantStrategy: "escape-normalized",
50+
},
51+
{
52+
name: `literal \t in find vs real tab`,
53+
content: "hello\tworld",
54+
find: `hello\tworld`,
55+
wantText: "hello\tworld",
56+
wantStrategy: "escape-normalized",
57+
},
58+
{
59+
name: `\$ in find vs $ in file`,
60+
content: "echo $HOME",
61+
find: `echo \$HOME`,
62+
wantText: "echo $HOME",
63+
wantStrategy: "escape-normalized",
64+
},
65+
66+
// Ambiguity
67+
{
68+
name: "ambiguous exact duplicate",
69+
content: "foo\nbar\nfoo\nbaz",
70+
find: "foo",
71+
wantErr: "multiple locations",
72+
},
73+
{
74+
name: "ambiguous escaped duplicate",
75+
content: "echo \\\"hi\\\"\necho \\\"hi\\\"",
76+
find: `echo "hi"`,
77+
wantErr: "multiple locations",
78+
},
79+
80+
// Not found
81+
{
82+
name: "not found",
83+
content: "hello world",
84+
find: "goodbye universe",
85+
wantErr: "old text not found",
86+
},
87+
}
88+
89+
for _, tc := range tests {
90+
t.Run(tc.name, func(t *testing.T) {
91+
t.Parallel()
92+
m, err := FindMatch(tc.content, tc.find)
93+
if tc.wantErr != "" {
94+
require.Error(t, err)
95+
assert.Contains(t, err.Error(), tc.wantErr)
96+
return
97+
}
98+
require.NoError(t, err)
99+
if tc.wantText != "" {
100+
assert.Equal(t, tc.wantText, m.SearchText)
101+
}
102+
assert.Equal(t, tc.wantStrategy, m.Strategy)
103+
})
104+
}
105+
}
106+
107+
func TestUnescapeString(t *testing.T) {
108+
t.Parallel()
109+
for _, tc := range []struct{ in, want string }{
110+
{`hello`, "hello"},
111+
{`hello\nworld`, "hello\nworld"},
112+
{`hello\tworld`, "hello\tworld"},
113+
{`echo \"hi\"`, `echo "hi"`},
114+
{`echo \'hi\'`, `echo 'hi'`},
115+
{`path\\to\\file`, `path\to\file`},
116+
{`\$HOME`, "$HOME"},
117+
{`hello\xworld`, `hello\xworld`}, // unknown escape preserved
118+
} {
119+
t.Run(tc.in, func(t *testing.T) {
120+
t.Parallel()
121+
assert.Equal(t, tc.want, unescapeString(tc.in))
122+
})
123+
}
124+
}
125+
126+
// Reproduce the exact Dockerfile scenario that motivated this change.
127+
func TestFindMatch_DockerfileBrewScenario(t *testing.T) {
128+
t.Parallel()
129+
130+
// File on disk has literal \" around "brew install failed".
131+
fileContent := strings.Join([]string{
132+
`RUN if [ "${INSTALL_BREW}" = "1" ]; then \`,
133+
` if ! id -u linuxbrew >/dev/null 2>&1; then useradd -m -s /bin/bash linuxbrew; fi; \`,
134+
` mkdir -p "${BREW_INSTALL_DIR}"; \`,
135+
` chown -R linuxbrew:linuxbrew "$(dirname "${BREW_INSTALL_DIR}")"; \`,
136+
` su - linuxbrew -c "NONINTERACTIVE=1 CI=1 /bin/bash -c '$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)'"; \`,
137+
` if [ ! -e "${BREW_INSTALL_DIR}/Library" ]; then ln -s "${BREW_INSTALL_DIR}/Homebrew/Library" "${BREW_INSTALL_DIR}/Library"; fi; \`,
138+
` if [ ! -x "${BREW_INSTALL_DIR}/bin/brew" ]; then echo \"brew install failed\"; exit 1; fi; \`,
139+
` ln -sf "${BREW_INSTALL_DIR}/bin/brew" /usr/local/bin/brew; \`,
140+
`fi`,
141+
}, "\n")
142+
143+
// LLM dropped the backslashes: \" → "
144+
llmOldText := strings.Join([]string{
145+
`RUN if [ "${INSTALL_BREW}" = "1" ]; then \`,
146+
` if ! id -u linuxbrew >/dev/null 2>&1; then useradd -m -s /bin/bash linuxbrew; fi; \`,
147+
` mkdir -p "${BREW_INSTALL_DIR}"; \`,
148+
` chown -R linuxbrew:linuxbrew "$(dirname "${BREW_INSTALL_DIR}")"; \`,
149+
` su - linuxbrew -c "NONINTERACTIVE=1 CI=1 /bin/bash -c '$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)'"; \`,
150+
` if [ ! -e "${BREW_INSTALL_DIR}/Library" ]; then ln -s "${BREW_INSTALL_DIR}/Homebrew/Library" "${BREW_INSTALL_DIR}/Library"; fi; \`,
151+
` if [ ! -x "${BREW_INSTALL_DIR}/bin/brew" ]; then echo "brew install failed"; exit 1; fi; \`,
152+
` ln -sf "${BREW_INSTALL_DIR}/bin/brew" /usr/local/bin/brew; \`,
153+
`fi`,
154+
}, "\n")
155+
156+
m, err := FindMatch(fileContent, llmOldText)
157+
require.NoError(t, err)
158+
assert.Equal(t, fileContent, m.SearchText)
159+
assert.Equal(t, "escape-normalized", m.Strategy)
160+
assert.Equal(t, "REPLACED", strings.Replace(fileContent, m.SearchText, "REPLACED", 1))
161+
}

pkg/tools/builtin/filesystem.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -477,11 +477,16 @@ func (t *FilesystemTool) handleEditFile(ctx context.Context, args EditFileArgs)
477477

478478
var changes []string
479479
for i, edit := range args.Edits {
480-
if !strings.Contains(modifiedContent, edit.OldText) {
481-
return tools.ResultError(fmt.Sprintf("Edit %d failed: old text not found", i+1)), nil
480+
match, err := FindMatch(modifiedContent, edit.OldText)
481+
if err != nil {
482+
return tools.ResultError(fmt.Sprintf("Edit %d failed: %s", i+1, err)), nil
483+
}
484+
modifiedContent = strings.Replace(modifiedContent, match.SearchText, edit.NewText, 1)
485+
detail := fmt.Sprintf("Edit %d: Replaced %d characters", i+1, len(match.SearchText))
486+
if match.Strategy != "exact" {
487+
detail += fmt.Sprintf(" (matched via %s)", match.Strategy)
482488
}
483-
modifiedContent = strings.Replace(modifiedContent, edit.OldText, edit.NewText, 1)
484-
changes = append(changes, fmt.Sprintf("Edit %d: Replaced %d characters", i+1, len(edit.OldText)))
489+
changes = append(changes, detail)
485490
}
486491

487492
if err := os.WriteFile(resolvedPath, []byte(modifiedContent), 0o644); err != nil {

0 commit comments

Comments
 (0)