Skip to content

Commit 2403e2d

Browse files
authored
Treat zsh -lc like bash -lc (openai#5411)
Without proper `zsh -lc` parsing, we lose some things like proper command parsing, turn diff tracking, safe command checks, and other things we expect from raw or `bash -lc` commands.
1 parent 84da181 commit 2403e2d

File tree

4 files changed

+60
-24
lines changed

4 files changed

+60
-24
lines changed

codex-rs/core/src/bash.rs

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ use tree_sitter_bash::LANGUAGE as BASH;
55

66
/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
77
/// success or None if parsing failed.
8-
pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
8+
pub fn try_parse_shell(shell_lc_arg: &str) -> Option<Tree> {
99
let lang = BASH.into();
1010
let mut parser = Parser::new();
1111
#[expect(clippy::expect_used)]
1212
parser.set_language(&lang).expect("load bash grammar");
1313
let old_tree: Option<&Tree> = None;
14-
parser.parse(bash_lc_arg, old_tree)
14+
parser.parse(shell_lc_arg, old_tree)
1515
}
1616

1717
/// Parse a script which may contain multiple simple commands joined only by
@@ -88,18 +88,19 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
8888
Some(commands)
8989
}
9090

91-
/// Returns the sequence of plain commands within a `bash -lc "..."` invocation
92-
/// when the script only contains word-only commands joined by safe operators.
93-
pub fn parse_bash_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
94-
let [bash, flag, script] = command else {
91+
/// Returns the sequence of plain commands within a `bash -lc "..."` or
92+
/// `zsh -lc "..."` invocation when the script only contains word-only commands
93+
/// joined by safe operators.
94+
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
95+
let [shell, flag, script] = command else {
9596
return None;
9697
};
9798

98-
if bash != "bash" || flag != "-lc" {
99+
if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
99100
return None;
100101
}
101102

102-
let tree = try_parse_bash(script)?;
103+
let tree = try_parse_shell(script)?;
103104
try_parse_word_only_commands_sequence(&tree, script)
104105
}
105106

@@ -154,7 +155,7 @@ mod tests {
154155
use super::*;
155156

156157
fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
157-
let tree = try_parse_bash(src)?;
158+
let tree = try_parse_shell(src)?;
158159
try_parse_word_only_commands_sequence(&tree, src)
159160
}
160161

@@ -234,4 +235,11 @@ mod tests {
234235
fn rejects_trailing_operator_parse_error() {
235236
assert!(parse_seq("ls &&").is_none());
236237
}
238+
239+
#[test]
240+
fn parse_zsh_lc_plain_commands() {
241+
let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
242+
let parsed = parse_shell_lc_plain_commands(&command).unwrap();
243+
assert_eq!(parsed, vec![vec!["ls".to_string()]]);
244+
}
237245
}

codex-rs/core/src/command_safety/is_dangerous_command.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
use crate::bash::parse_bash_lc_plain_commands;
1+
use crate::bash::parse_shell_lc_plain_commands;
22

33
pub fn command_might_be_dangerous(command: &[String]) -> bool {
44
if is_dangerous_to_call_with_exec(command) {
55
return true;
66
}
77

88
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
9-
if let Some(all_commands) = parse_bash_lc_plain_commands(command)
9+
if let Some(all_commands) = parse_shell_lc_plain_commands(command)
1010
&& all_commands
1111
.iter()
1212
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
@@ -57,6 +57,15 @@ mod tests {
5757
])));
5858
}
5959

60+
#[test]
61+
fn zsh_git_reset_is_dangerous() {
62+
assert!(command_might_be_dangerous(&vec_str(&[
63+
"zsh",
64+
"-lc",
65+
"git reset --hard"
66+
])));
67+
}
68+
6069
#[test]
6170
fn git_status_is_not_dangerous() {
6271
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));

codex-rs/core/src/command_safety/is_safe_command.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::bash::parse_bash_lc_plain_commands;
1+
use crate::bash::parse_shell_lc_plain_commands;
22

33
pub fn is_known_safe_command(command: &[String]) -> bool {
44
let command: Vec<String> = command
@@ -29,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
2929
// introduce side effects ( "&&", "||", ";", and "|" ). If every
3030
// individual command in the script is itself a known‑safe command, then
3131
// the composite expression is considered safe.
32-
if let Some(all_commands) = parse_bash_lc_plain_commands(&command)
32+
if let Some(all_commands) = parse_shell_lc_plain_commands(&command)
3333
&& !all_commands.is_empty()
3434
&& all_commands
3535
.iter()
@@ -201,6 +201,11 @@ mod tests {
201201
])));
202202
}
203203

204+
#[test]
205+
fn zsh_lc_safe_command_sequence() {
206+
assert!(is_known_safe_command(&vec_str(&["zsh", "-lc", "ls"])));
207+
}
208+
204209
#[test]
205210
fn unknown_or_partial() {
206211
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));

codex-rs/core/src/parse_command.rs

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::bash::try_parse_bash;
1+
use crate::bash::try_parse_shell;
22
use crate::bash::try_parse_word_only_commands_sequence;
33
use codex_protocol::parse_command::ParsedCommand;
44
use shlex::split as shlex_split;
@@ -193,6 +193,19 @@ mod tests {
193193
);
194194
}
195195

196+
#[test]
197+
fn zsh_lc_supports_cat() {
198+
let inner = "cat README.md";
199+
assert_parsed(
200+
&vec_str(&["zsh", "-lc", inner]),
201+
vec![ParsedCommand::Read {
202+
cmd: inner.to_string(),
203+
name: "README.md".to_string(),
204+
path: PathBuf::from("README.md"),
205+
}],
206+
);
207+
}
208+
196209
#[test]
197210
fn cd_then_cat_is_single_read() {
198211
assert_parsed(
@@ -843,7 +856,7 @@ mod tests {
843856
}
844857

845858
pub fn parse_command_impl(command: &[String]) -> Vec<ParsedCommand> {
846-
if let Some(commands) = parse_bash_lc_commands(command) {
859+
if let Some(commands) = parse_shell_lc_commands(command) {
847860
return commands;
848861
}
849862

@@ -981,7 +994,7 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
981994
}
982995

983996
/// Normalize a command by:
984-
/// - Removing `yes`/`no`/`bash -c`/`bash -lc` prefixes.
997+
/// - Removing `yes`/`no`/`bash -c`/`bash -lc`/`zsh -c`/`zsh -lc` prefixes.
985998
/// - Splitting on `|` and `&&`/`||`/`;
986999
fn normalize_tokens(cmd: &[String]) -> Vec<String> {
9871000
match cmd {
@@ -993,9 +1006,10 @@ fn normalize_tokens(cmd: &[String]) -> Vec<String> {
9931006
// Do not re-shlex already-tokenized input; just drop the prefix.
9941007
rest.to_vec()
9951008
}
996-
[bash, flag, script] if bash == "bash" && (flag == "-c" || flag == "-lc") => {
997-
shlex_split(script)
998-
.unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()])
1009+
[shell, flag, script]
1010+
if (shell == "bash" || shell == "zsh") && (flag == "-c" || flag == "-lc") =>
1011+
{
1012+
shlex_split(script).unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()])
9991013
}
10001014
_ => cmd.to_vec(),
10011015
}
@@ -1151,19 +1165,19 @@ fn parse_find_query_and_path(tail: &[String]) -> (Option<String>, Option<String>
11511165
(query, path)
11521166
}
11531167

1154-
fn parse_bash_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
1155-
let [bash, flag, script] = original else {
1168+
fn parse_shell_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
1169+
let [shell, flag, script] = original else {
11561170
return None;
11571171
};
1158-
if bash != "bash" || flag != "-lc" {
1172+
if flag != "-lc" || !(shell == "bash" || shell == "zsh") {
11591173
return None;
11601174
}
1161-
if let Some(tree) = try_parse_bash(script)
1175+
if let Some(tree) = try_parse_shell(script)
11621176
&& let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
11631177
&& !all_commands.is_empty()
11641178
{
11651179
let script_tokens = shlex_split(script)
1166-
.unwrap_or_else(|| vec!["bash".to_string(), flag.clone(), script.clone()]);
1180+
.unwrap_or_else(|| vec![shell.clone(), flag.clone(), script.clone()]);
11671181
// Strip small formatting helpers (e.g., head/tail/awk/wc/etc) so we
11681182
// bias toward the primary command when pipelines are present.
11691183
// First, drop obvious small formatting helpers (e.g., wc/awk/etc).

0 commit comments

Comments
 (0)