Skip to content

Commit 33e1d08

Browse files
feat(windows) start powershell in utf-8 mode (#7902)
## Summary Adds a FeatureFlag to enforce UTF8 encoding in powershell, particularly Windows Powershell v5. This should help address issues like #7290. Notably, this PR does not include the ability to parse `apply_patch` invocations within UTF8 shell commands (calls to the freeform tool should not be impacted). I am leaving this out of scope for now. We should address before this feature becomes Stable, but those cases are not the default behavior at this time so we're okay for experimentation phase. We should continue cleaning up the `apply_patch::invocation` logic and then can handle it more cleanly. ## Testing - [x] Adds additional testing
1 parent b24b788 commit 33e1d08

File tree

6 files changed

+251
-3
lines changed

6 files changed

+251
-3
lines changed

codex-rs/core/src/features.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ pub enum Feature {
9191
Tui2,
9292
/// Enable discovery and injection of skills.
9393
Skills,
94+
/// Enforce UTF8 output in Powershell.
95+
PowershellUtf8,
9496
}
9597

9698
impl Feature {
@@ -386,6 +388,12 @@ pub const FEATURES: &[FeatureSpec] = &[
386388
stage: Stage::Experimental,
387389
default_enabled: true,
388390
},
391+
FeatureSpec {
392+
id: Feature::PowershellUtf8,
393+
key: "powershell_utf8",
394+
stage: Stage::Experimental,
395+
default_enabled: false,
396+
},
389397
FeatureSpec {
390398
id: Feature::Tui2,
391399
key: "tui2",

codex-rs/core/src/powershell.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,30 @@ use crate::shell::detect_shell_type;
88

99
const POWERSHELL_FLAGS: &[&str] = &["-nologo", "-noprofile", "-command", "-c"];
1010

11+
/// Prefixed command for powershell shell calls to force UTF-8 console output.
12+
pub(crate) const UTF8_OUTPUT_PREFIX: &str =
13+
"[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;\n";
14+
15+
pub(crate) fn prefix_powershell_script_with_utf8(command: &[String]) -> Vec<String> {
16+
let Some((_, script)) = extract_powershell_command(command) else {
17+
return command.to_vec();
18+
};
19+
20+
let trimmed = script.trim_start();
21+
let script = if trimmed.starts_with(UTF8_OUTPUT_PREFIX) {
22+
script.to_string()
23+
} else {
24+
format!("{UTF8_OUTPUT_PREFIX}{script}")
25+
};
26+
27+
let mut command: Vec<String> = command[..(command.len() - 1)]
28+
.iter()
29+
.map(std::string::ToString::to_string)
30+
.collect();
31+
command.push(script);
32+
command
33+
}
34+
1135
/// Extract the PowerShell script body from an invocation such as:
1236
///
1337
/// - ["pwsh", "-NoProfile", "-Command", "Get-ChildItem -Recurse | Select-String foo"]
@@ -22,7 +46,10 @@ pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
2246
}
2347

2448
let shell = &command[0];
25-
if detect_shell_type(&PathBuf::from(shell)) != Some(ShellType::PowerShell) {
49+
if !matches!(
50+
detect_shell_type(&PathBuf::from(shell)),
51+
Some(ShellType::PowerShell)
52+
) {
2653
return None;
2754
}
2855

@@ -36,7 +63,7 @@ pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
3663
}
3764
if flag.eq_ignore_ascii_case("-Command") || flag.eq_ignore_ascii_case("-c") {
3865
let script = &command[i + 1];
39-
return Some((shell, script.as_str()));
66+
return Some((shell, script));
4067
}
4168
i += 1;
4269
}

codex-rs/core/src/tools/runtimes/shell.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@ Executes shell requests under the orchestrator: asks for approval when needed,
55
builds a CommandSpec, and runs it under the current SandboxAttempt.
66
*/
77
use crate::exec::ExecToolCallOutput;
8+
use crate::features::Feature;
9+
use crate::powershell::prefix_powershell_script_with_utf8;
810
use crate::sandboxing::SandboxPermissions;
911
use crate::sandboxing::execute_env;
12+
use crate::shell::ShellType;
1013
use crate::tools::runtimes::build_command_spec;
1114
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
1215
use crate::tools::sandboxing::Approvable;
@@ -144,6 +147,13 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
144147
let base_command = &req.command;
145148
let session_shell = ctx.session.user_shell();
146149
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
150+
let command = if matches!(session_shell.shell_type, ShellType::PowerShell)
151+
&& ctx.session.features().enabled(Feature::PowershellUtf8)
152+
{
153+
prefix_powershell_script_with_utf8(&command)
154+
} else {
155+
command
156+
};
147157

148158
let spec = build_command_spec(
149159
&command,

codex-rs/core/src/tools/runtimes/unified_exec.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ the session manager to spawn PTYs once an ExecEnv is prepared.
77
use crate::error::CodexErr;
88
use crate::error::SandboxErr;
99
use crate::exec::ExecExpiration;
10+
use crate::features::Feature;
11+
use crate::powershell::prefix_powershell_script_with_utf8;
1012
use crate::sandboxing::SandboxPermissions;
13+
use crate::shell::ShellType;
1114
use crate::tools::runtimes::build_command_spec;
1215
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
1316
use crate::tools::sandboxing::Approvable;
@@ -165,6 +168,13 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
165168
let base_command = &req.command;
166169
let session_shell = ctx.session.user_shell();
167170
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
171+
let command = if matches!(session_shell.shell_type, ShellType::PowerShell)
172+
&& ctx.session.features().enabled(Feature::PowershellUtf8)
173+
{
174+
prefix_powershell_script_with_utf8(&command)
175+
} else {
176+
command
177+
};
168178

169179
let spec = build_command_spec(
170180
&command,

codex-rs/core/tests/suite/apply_patch_cli.rs

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22

33
use anyhow::Result;
44
use core_test_support::responses::ev_apply_patch_call;
5+
use core_test_support::responses::ev_apply_patch_custom_tool_call;
56
use core_test_support::responses::ev_shell_command_call;
67
use core_test_support::test_codex::ApplyPatchModelOutput;
78
use pretty_assertions::assert_eq;
89
use std::fs;
10+
use std::sync::atomic::AtomicI32;
11+
use std::sync::atomic::Ordering;
912

1013
use codex_core::features::Feature;
1114
use codex_core::protocol::AskForApproval;
@@ -29,6 +32,11 @@ use core_test_support::test_codex::test_codex;
2932
use core_test_support::wait_for_event;
3033
use serde_json::json;
3134
use test_case::test_case;
35+
use wiremock::Mock;
36+
use wiremock::Respond;
37+
use wiremock::ResponseTemplate;
38+
use wiremock::matchers::method;
39+
use wiremock::matchers::path_regex;
3240

3341
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
3442
apply_patch_harness_with(|builder| builder).await
@@ -718,6 +726,132 @@ async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() ->
718726
Ok(())
719727
}
720728

729+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
730+
async fn apply_patch_cli_can_use_shell_command_output_as_patch_input() -> Result<()> {
731+
skip_if_no_network!(Ok(()));
732+
733+
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
734+
735+
let source_contents = "line1\nnaïve café\nline3\n";
736+
let source_path = harness.path("source.txt");
737+
fs::write(&source_path, source_contents)?;
738+
739+
let read_call_id = "read-source";
740+
let apply_call_id = "apply-from-read";
741+
742+
fn stdout_from_shell_output(output: &str) -> String {
743+
let normalized = output.replace("\r\n", "\n").replace('\r', "\n");
744+
normalized
745+
.split_once("Output:\n")
746+
.map(|x| x.1)
747+
.unwrap_or("")
748+
.trim_end_matches('\n')
749+
.to_string()
750+
}
751+
752+
fn function_call_output_text(body: &serde_json::Value, call_id: &str) -> String {
753+
body.get("input")
754+
.and_then(serde_json::Value::as_array)
755+
.and_then(|items| {
756+
items.iter().find(|item| {
757+
item.get("type").and_then(serde_json::Value::as_str)
758+
== Some("function_call_output")
759+
&& item.get("call_id").and_then(serde_json::Value::as_str) == Some(call_id)
760+
})
761+
})
762+
.and_then(|item| item.get("output").and_then(serde_json::Value::as_str))
763+
.expect("function_call_output output string")
764+
.to_string()
765+
}
766+
767+
struct DynamicApplyFromRead {
768+
num_calls: AtomicI32,
769+
read_call_id: String,
770+
apply_call_id: String,
771+
}
772+
773+
impl Respond for DynamicApplyFromRead {
774+
fn respond(&self, request: &wiremock::Request) -> ResponseTemplate {
775+
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
776+
match call_num {
777+
0 => {
778+
let command = if cfg!(windows) {
779+
"Get-Content -Encoding utf8 source.txt"
780+
} else {
781+
"cat source.txt"
782+
};
783+
let body = sse(vec![
784+
ev_response_created("resp-1"),
785+
ev_shell_command_call(&self.read_call_id, command),
786+
ev_completed("resp-1"),
787+
]);
788+
ResponseTemplate::new(200)
789+
.insert_header("content-type", "text/event-stream")
790+
.set_body_string(body)
791+
}
792+
1 => {
793+
let body_json: serde_json::Value =
794+
request.body_json().expect("request body should be json");
795+
let read_output = function_call_output_text(&body_json, &self.read_call_id);
796+
eprintln!("read_output: \n{read_output}");
797+
let stdout = stdout_from_shell_output(&read_output);
798+
eprintln!("stdout: \n{stdout}");
799+
let patch_lines = stdout
800+
.lines()
801+
.map(|line| format!("+{line}"))
802+
.collect::<Vec<_>>()
803+
.join("\n");
804+
let patch = format!(
805+
"*** Begin Patch\n*** Add File: target.txt\n{patch_lines}\n*** End Patch"
806+
);
807+
808+
eprintln!("patch: \n{patch}");
809+
810+
let body = sse(vec![
811+
ev_response_created("resp-2"),
812+
ev_apply_patch_custom_tool_call(&self.apply_call_id, &patch),
813+
ev_completed("resp-2"),
814+
]);
815+
ResponseTemplate::new(200)
816+
.insert_header("content-type", "text/event-stream")
817+
.set_body_string(body)
818+
}
819+
2 => {
820+
let body = sse(vec![
821+
ev_assistant_message("msg-1", "ok"),
822+
ev_completed("resp-3"),
823+
]);
824+
ResponseTemplate::new(200)
825+
.insert_header("content-type", "text/event-stream")
826+
.set_body_string(body)
827+
}
828+
_ => panic!("no response for call {call_num}"),
829+
}
830+
}
831+
}
832+
833+
let responder = DynamicApplyFromRead {
834+
num_calls: AtomicI32::new(0),
835+
read_call_id: read_call_id.to_string(),
836+
apply_call_id: apply_call_id.to_string(),
837+
};
838+
Mock::given(method("POST"))
839+
.and(path_regex(".*/responses$"))
840+
.respond_with(responder)
841+
.expect(3)
842+
.mount(harness.server())
843+
.await;
844+
845+
harness
846+
.submit("read source.txt, then apply it to target.txt")
847+
.await?;
848+
849+
let target_contents = fs::read_to_string(harness.path("target.txt"))?;
850+
assert_eq!(target_contents, source_contents);
851+
852+
Ok(())
853+
}
854+
721855
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
722856
async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<()> {
723857
skip_if_no_network!(Ok(()));

codex-rs/core/tests/suite/shell_command.rs

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use anyhow::Result;
2+
use codex_core::features::Feature;
23
use core_test_support::assert_regex_match;
34
use core_test_support::responses::ev_assistant_message;
45
use core_test_support::responses::ev_completed;
@@ -12,6 +13,7 @@ use core_test_support::test_codex::TestCodexBuilder;
1213
use core_test_support::test_codex::TestCodexHarness;
1314
use core_test_support::test_codex::test_codex;
1415
use serde_json::json;
16+
use test_case::test_case;
1517

1618
fn shell_responses_with_timeout(
1719
call_id: &str,
@@ -201,7 +203,6 @@ async fn shell_command_times_out_with_timeout_ms() -> anyhow::Result<()> {
201203
skip_if_no_network!(Ok(()));
202204

203205
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
204-
205206
let call_id = "shell-command-timeout";
206207
let command = if cfg!(windows) {
207208
"timeout /t 5"
@@ -224,3 +225,61 @@ async fn shell_command_times_out_with_timeout_ms() -> anyhow::Result<()> {
224225

225226
Ok(())
226227
}
228+
229+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
230+
#[test_case(true ; "with_login")]
231+
#[test_case(false ; "without_login")]
232+
async fn unicode_output(login: bool) -> anyhow::Result<()> {
233+
skip_if_no_network!(Ok(()));
234+
235+
let harness = shell_command_harness_with(|builder| {
236+
builder.with_model("gpt-5.2").with_config(|config| {
237+
config.features.enable(Feature::PowershellUtf8);
238+
})
239+
})
240+
.await?;
241+
242+
let call_id = "unicode_output";
243+
mount_shell_responses(
244+
&harness,
245+
call_id,
246+
"git -c alias.say='!printf \"%s\" \"naïve_café\"' say",
247+
Some(login),
248+
)
249+
.await;
250+
harness.submit("run the command without login").await?;
251+
252+
let output = harness.function_call_stdout(call_id).await;
253+
assert_shell_command_output(&output, "naïve_café")?;
254+
255+
Ok(())
256+
}
257+
258+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
259+
#[test_case(true ; "with_login")]
260+
#[test_case(false ; "without_login")]
261+
async fn unicode_output_with_newlines(login: bool) -> anyhow::Result<()> {
262+
skip_if_no_network!(Ok(()));
263+
264+
let harness = shell_command_harness_with(|builder| {
265+
builder.with_model("gpt-5.2").with_config(|config| {
266+
config.features.enable(Feature::PowershellUtf8);
267+
})
268+
})
269+
.await?;
270+
271+
let call_id = "unicode_output";
272+
mount_shell_responses(
273+
&harness,
274+
call_id,
275+
"echo 'line1\nnaïve café\nline3'",
276+
Some(login),
277+
)
278+
.await;
279+
harness.submit("run the command without login").await?;
280+
281+
let output = harness.function_call_stdout(call_id).await;
282+
assert_shell_command_output(&output, "line1\\nnaïve café\\nline3")?;
283+
284+
Ok(())
285+
}

0 commit comments

Comments
 (0)