Skip to content

Commit a590523

Browse files
authored
fix: parse git apply paths correctly (#8824)
Fixes apply.rs path parsing so - quoted diff headers are tokenized and extracted correctly, - /dev/null headers are ignored before prefix stripping to avoid bogus dev/null paths, and - git apply output paths are unescaped from C-style quoting. **Why** This prevents potentially missed staging and misclassified paths when applying or reverting patches, which could lead to incorrect behavior for repos with spaces or escaped characters in filenames. **Impact** I checked and this is only used in the cloud tasks support and `codex apply <task_id>` flow.
1 parent 8372d61 commit a590523

File tree

1 file changed

+150
-18
lines changed

1 file changed

+150
-18
lines changed

codex-rs/utils/git/src/apply.rs

Lines changed: 150 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -192,28 +192,130 @@ fn render_command_for_log(cwd: &Path, git_cfg: &[String], args: &[String]) -> St
192192

193193
/// Collect every path referenced by the diff headers inside `diff --git` sections.
194194
pub fn extract_paths_from_patch(diff_text: &str) -> Vec<String> {
195-
static RE: Lazy<Regex> = Lazy::new(|| {
196-
Regex::new(r"(?m)^diff --git a/(.*?) b/(.*)$")
197-
.unwrap_or_else(|e| panic!("invalid regex: {e}"))
198-
});
199195
let mut set = std::collections::BTreeSet::new();
200-
for caps in RE.captures_iter(diff_text) {
201-
if let Some(a) = caps.get(1).map(|m| m.as_str())
202-
&& a != "/dev/null"
203-
&& !a.trim().is_empty()
204-
{
205-
set.insert(a.to_string());
196+
for raw_line in diff_text.lines() {
197+
let line = raw_line.trim();
198+
let Some(rest) = line.strip_prefix("diff --git ") else {
199+
continue;
200+
};
201+
let Some((a, b)) = parse_diff_git_paths(rest) else {
202+
continue;
203+
};
204+
if let Some(a) = normalize_diff_path(&a, "a/") {
205+
set.insert(a);
206206
}
207-
if let Some(b) = caps.get(2).map(|m| m.as_str())
208-
&& b != "/dev/null"
209-
&& !b.trim().is_empty()
210-
{
211-
set.insert(b.to_string());
207+
if let Some(b) = normalize_diff_path(&b, "b/") {
208+
set.insert(b);
212209
}
213210
}
214211
set.into_iter().collect()
215212
}
216213

214+
fn parse_diff_git_paths(line: &str) -> Option<(String, String)> {
215+
let mut chars = line.chars().peekable();
216+
let first = read_diff_git_token(&mut chars)?;
217+
let second = read_diff_git_token(&mut chars)?;
218+
Some((first, second))
219+
}
220+
221+
fn read_diff_git_token(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) -> Option<String> {
222+
while matches!(chars.peek(), Some(c) if c.is_whitespace()) {
223+
chars.next();
224+
}
225+
let quote = match chars.peek().copied() {
226+
Some('"') | Some('\'') => chars.next(),
227+
_ => None,
228+
};
229+
let mut out = String::new();
230+
while let Some(c) = chars.next() {
231+
if let Some(q) = quote {
232+
if c == q {
233+
break;
234+
}
235+
if c == '\\' {
236+
out.push('\\');
237+
if let Some(next) = chars.next() {
238+
out.push(next);
239+
}
240+
continue;
241+
}
242+
} else if c.is_whitespace() {
243+
break;
244+
}
245+
out.push(c);
246+
}
247+
if out.is_empty() && quote.is_none() {
248+
None
249+
} else {
250+
Some(match quote {
251+
Some(_) => unescape_c_string(&out),
252+
None => out,
253+
})
254+
}
255+
}
256+
257+
fn normalize_diff_path(raw: &str, prefix: &str) -> Option<String> {
258+
let trimmed = raw.trim();
259+
if trimmed.is_empty() {
260+
return None;
261+
}
262+
if trimmed == "/dev/null" || trimmed == format!("{prefix}dev/null") {
263+
return None;
264+
}
265+
let trimmed = trimmed.strip_prefix(prefix).unwrap_or(trimmed);
266+
if trimmed.is_empty() {
267+
return None;
268+
}
269+
Some(trimmed.to_string())
270+
}
271+
272+
fn unescape_c_string(input: &str) -> String {
273+
let mut out = String::with_capacity(input.len());
274+
let mut chars = input.chars().peekable();
275+
while let Some(c) = chars.next() {
276+
if c != '\\' {
277+
out.push(c);
278+
continue;
279+
}
280+
let Some(next) = chars.next() else {
281+
out.push('\\');
282+
break;
283+
};
284+
match next {
285+
'n' => out.push('\n'),
286+
'r' => out.push('\r'),
287+
't' => out.push('\t'),
288+
'b' => out.push('\u{0008}'),
289+
'f' => out.push('\u{000C}'),
290+
'a' => out.push('\u{0007}'),
291+
'v' => out.push('\u{000B}'),
292+
'\\' => out.push('\\'),
293+
'"' => out.push('"'),
294+
'\'' => out.push('\''),
295+
'0'..='7' => {
296+
let mut value = next.to_digit(8).unwrap_or(0);
297+
for _ in 0..2 {
298+
match chars.peek() {
299+
Some('0'..='7') => {
300+
if let Some(digit) = chars.next() {
301+
value = value * 8 + digit.to_digit(8).unwrap_or(0);
302+
} else {
303+
break;
304+
}
305+
}
306+
_ => break,
307+
}
308+
}
309+
if let Some(ch) = std::char::from_u32(value) {
310+
out.push(ch);
311+
}
312+
}
313+
other => out.push(other),
314+
}
315+
}
316+
out
317+
}
318+
217319
/// Stage only the files that actually exist on disk for the given diff.
218320
pub fn stage_paths(git_root: &Path, diff: &str) -> io::Result<()> {
219321
let paths = extract_paths_from_patch(diff);
@@ -266,12 +368,12 @@ pub fn parse_git_apply_output(
266368
let first = trimmed.chars().next().unwrap_or('\0');
267369
let last = trimmed.chars().last().unwrap_or('\0');
268370
let unquoted = if (first == '"' || first == '\'') && last == first && trimmed.len() >= 2 {
269-
&trimmed[1..trimmed.len() - 1]
371+
unescape_c_string(&trimmed[1..trimmed.len() - 1])
270372
} else {
271-
trimmed
373+
trimmed.to_string()
272374
};
273375
if !unquoted.is_empty() {
274-
set.insert(unquoted.to_string());
376+
set.insert(unquoted);
275377
}
276378
}
277379

@@ -531,6 +633,36 @@ mod tests {
531633
.replace("\r\n", "\n")
532634
}
533635

636+
#[test]
637+
fn extract_paths_handles_quoted_headers() {
638+
let diff = "diff --git \"a/hello world.txt\" \"b/hello world.txt\"\nnew file mode 100644\n--- /dev/null\n+++ b/hello world.txt\n@@ -0,0 +1 @@\n+hi\n";
639+
let paths = extract_paths_from_patch(diff);
640+
assert_eq!(paths, vec!["hello world.txt".to_string()]);
641+
}
642+
643+
#[test]
644+
fn extract_paths_ignores_dev_null_header() {
645+
let diff = "diff --git a/dev/null b/ok.txt\nnew file mode 100644\n--- /dev/null\n+++ b/ok.txt\n@@ -0,0 +1 @@\n+hi\n";
646+
let paths = extract_paths_from_patch(diff);
647+
assert_eq!(paths, vec!["ok.txt".to_string()]);
648+
}
649+
650+
#[test]
651+
fn extract_paths_unescapes_c_style_in_quoted_headers() {
652+
let diff = "diff --git \"a/hello\\tworld.txt\" \"b/hello\\tworld.txt\"\nnew file mode 100644\n--- /dev/null\n+++ b/hello\tworld.txt\n@@ -0,0 +1 @@\n+hi\n";
653+
let paths = extract_paths_from_patch(diff);
654+
assert_eq!(paths, vec!["hello\tworld.txt".to_string()]);
655+
}
656+
657+
#[test]
658+
fn parse_output_unescapes_quoted_paths() {
659+
let stderr = "error: patch failed: \"hello\\tworld.txt\":1\n";
660+
let (applied, skipped, conflicted) = parse_git_apply_output("", stderr);
661+
assert_eq!(applied, Vec::<String>::new());
662+
assert_eq!(conflicted, Vec::<String>::new());
663+
assert_eq!(skipped, vec!["hello\tworld.txt".to_string()]);
664+
}
665+
534666
#[test]
535667
fn apply_add_success() {
536668
let _g = env_lock().lock().unwrap();

0 commit comments

Comments
 (0)