Skip to content

Commit 1cc7d68

Browse files
Cleanup osstr conversions (#5724)
* Cleanup `osstr` conversions * Add proper `OsStr` tests on `wasi` * Fixup `Path` tests * Move around `Ext` imports
1 parent 95fd08e commit 1cc7d68

File tree

2 files changed

+58
-35
lines changed

2 files changed

+58
-35
lines changed

src/conversions/std/osstr.rs

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,24 @@
11
use crate::conversion::IntoPyObject;
2+
#[cfg(not(target_os = "wasi"))]
3+
use crate::ffi;
4+
#[cfg(not(target_os = "wasi"))]
25
use crate::ffi_ptr_ext::FfiPtrExt;
36
#[cfg(feature = "experimental-inspect")]
47
use crate::inspect::PyStaticExpr;
58
use crate::instance::Bound;
69
#[cfg(feature = "experimental-inspect")]
710
use crate::type_object::PyTypeInfo;
811
use crate::types::PyString;
9-
use crate::{ffi, Borrowed, FromPyObject, PyAny, PyErr, Python};
12+
#[cfg(any(unix, target_os = "emscripten"))]
13+
use crate::types::{PyBytes, PyBytesMethods};
14+
use crate::{Borrowed, FromPyObject, PyAny, PyErr, Python};
1015
use std::borrow::Cow;
1116
use std::convert::Infallible;
1217
use std::ffi::{OsStr, OsString};
18+
#[cfg(any(unix, target_os = "emscripten"))]
19+
use std::os::unix::ffi::OsStrExt;
20+
#[cfg(windows)]
21+
use std::os::windows::ffi::OsStrExt;
1322

1423
impl<'py> IntoPyObject<'py> for &OsStr {
1524
type Target = PyString;
@@ -21,42 +30,43 @@ impl<'py> IntoPyObject<'py> for &OsStr {
2130

2231
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
2332
// If the string is UTF-8, take the quick and easy shortcut
33+
#[cfg(not(target_os = "wasi"))]
2434
if let Some(valid_utf8_path) = self.to_str() {
2535
return valid_utf8_path.into_pyobject(py);
2636
}
2737

28-
// All targets besides windows support the std::os::unix::ffi::OsStrExt API:
29-
// https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59
30-
#[cfg(not(windows))]
38+
#[cfg(target_os = "wasi")]
3139
{
32-
#[cfg(target_os = "wasi")]
33-
let bytes = self.to_str().expect("wasi strings are UTF8").as_bytes();
34-
#[cfg(not(target_os = "wasi"))]
35-
let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self);
40+
self.to_str()
41+
.expect("wasi strings are UTF8")
42+
.into_pyobject(py)
43+
}
3644

45+
#[cfg(any(unix, target_os = "emscripten"))]
46+
{
47+
let bytes = self.as_bytes();
3748
let ptr = bytes.as_ptr().cast();
3849
let len = bytes.len() as ffi::Py_ssize_t;
3950
unsafe {
4051
// DecodeFSDefault automatically chooses an appropriate decoding mechanism to
4152
// parse os strings losslessly (i.e. surrogateescape most of the time)
4253
Ok(ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len)
4354
.assume_owned(py)
44-
.cast_into_unchecked::<PyString>())
55+
.cast_into_unchecked())
4556
}
4657
}
4758

4859
#[cfg(windows)]
4960
{
50-
let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect();
51-
61+
let wstr: Vec<u16> = self.encode_wide().collect();
5262
unsafe {
5363
// This will not panic because the data from encode_wide is well-formed Windows
5464
// string data
5565

5666
Ok(
5767
ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t)
5868
.assume_owned(py)
59-
.cast_into_unchecked::<PyString>(),
69+
.cast_into_unchecked(),
6070
)
6171
}
6272
}
@@ -86,10 +96,13 @@ impl FromPyObject<'_, '_> for OsString {
8696
fn extract(ob: Borrowed<'_, '_, PyAny>) -> Result<Self, Self::Error> {
8797
let pystring = ob.cast::<PyString>()?;
8898

89-
#[cfg(not(windows))]
99+
#[cfg(target_os = "wasi")]
90100
{
91-
use crate::types::{PyBytes, PyBytesMethods};
101+
Ok(pystring.to_cow()?.into_owned().into())
102+
}
92103

104+
#[cfg(any(unix, target_os = "emscripten"))]
105+
{
93106
// Decode from Python's lossless bytes string representation back into raw bytes
94107
// SAFETY: PyUnicode_EncodeFSDefault returns a new reference or null on error, known to
95108
// be a `bytes` object, thread is attached to the interpreter
@@ -100,13 +113,7 @@ impl FromPyObject<'_, '_> for OsString {
100113
};
101114

102115
// Create an OsStr view into the raw bytes from Python
103-
//
104-
// For WASI: OS strings are UTF-8 by definition.
105-
#[cfg(target_os = "wasi")]
106-
let os_str: &OsStr = OsStr::new(std::str::from_utf8(fs_encoded_bytes.as_bytes())?);
107-
#[cfg(not(target_os = "wasi"))]
108-
let os_str: &OsStr =
109-
std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes());
116+
let os_str: &OsStr = OsStrExt::from_bytes(fs_encoded_bytes.as_bytes());
110117

111118
Ok(os_str.to_os_string())
112119
}
@@ -217,10 +224,12 @@ impl<'py> IntoPyObject<'py> for &OsString {
217224

218225
#[cfg(test)]
219226
mod tests {
227+
#[cfg(target_os = "wasi")]
228+
use crate::exceptions::PyFileNotFoundError;
220229
use crate::types::{PyAnyMethods, PyString, PyStringMethods};
221230
use crate::{Bound, BoundObject, IntoPyObject, Python};
222231
use std::fmt::Debug;
223-
#[cfg(unix)]
232+
#[cfg(any(unix, target_os = "emscripten"))]
224233
use std::os::unix::ffi::OsStringExt;
225234
#[cfg(windows)]
226235
use std::os::windows::ffi::OsStringExt;
@@ -230,13 +239,10 @@ mod tests {
230239
};
231240

232241
#[test]
233-
#[cfg(not(any(windows, target_os = "wasi")))]
242+
#[cfg(any(unix, target_os = "emscripten"))]
234243
fn test_non_utf8_conversion() {
235244
Python::attach(|py| {
236-
#[cfg(not(target_os = "wasi"))]
237245
use std::os::unix::ffi::OsStrExt;
238-
#[cfg(target_os = "wasi")]
239-
use std::os::wasi::ffi::OsStrExt;
240246

241247
// this is not valid UTF-8
242248
let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
@@ -249,6 +255,26 @@ mod tests {
249255
});
250256
}
251257

258+
#[test]
259+
#[cfg(target_os = "wasi")]
260+
fn test_extract_non_utf8_wasi_should_error() {
261+
Python::attach(|py| {
262+
// Non utf-8 strings are not valid wasi paths
263+
let open_result = py.run(c"open('\\udcff', 'rb')", None, None).unwrap_err();
264+
assert!(
265+
!open_result.is_instance_of::<PyFileNotFoundError>(py),
266+
"Opening invalid utf8 will error with OSError, not FileNotFoundError"
267+
);
268+
269+
// Create a Python string with not valid UTF-8: &[255]
270+
let py_str = py.eval(c"'\\udcff'", None, None).unwrap();
271+
assert!(
272+
py_str.extract::<OsString>().is_err(),
273+
"Extracting invalid UTF-8 as OsString should error"
274+
);
275+
});
276+
}
277+
252278
#[test]
253279
fn test_intopyobject_roundtrip() {
254280
Python::attach(|py| {
@@ -326,11 +352,11 @@ mod tests {
326352
OsString::from_wide(&['A' as u16, 0xD800, 'B' as u16])
327353
};
328354

329-
#[cfg(unix)]
355+
#[cfg(any(unix, target_os = "emscripten"))]
330356
let os_str = { OsString::from_vec(vec![250, 251, 252, 253, 254, 255, 0, 255]) };
331357

332358
// This cannot be borrowed because it is not valid UTF-8
333-
#[cfg(any(windows, unix))]
359+
#[cfg(any(windows, unix, target_os = "emscripten"))]
334360
test_extract::<OsStr>(py, &os_str, false);
335361
});
336362
}

src/conversions/std/path.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,19 +138,16 @@ mod tests {
138138
#[cfg(not(target_os = "wasi"))]
139139
use std::ffi::OsStr;
140140
use std::fmt::Debug;
141-
#[cfg(unix)]
141+
#[cfg(any(unix, target_os = "emscripten"))]
142142
use std::os::unix::ffi::OsStringExt;
143143
#[cfg(windows)]
144144
use std::os::windows::ffi::OsStringExt;
145145

146146
#[test]
147-
#[cfg(not(any(windows, target_os = "wasi")))]
147+
#[cfg(any(unix, target_os = "emscripten"))]
148148
fn test_non_utf8_conversion() {
149149
Python::attach(|py| {
150-
#[cfg(not(target_os = "wasi"))]
151150
use std::os::unix::ffi::OsStrExt;
152-
#[cfg(target_os = "wasi")]
153-
use std::os::wasi::ffi::OsStrExt;
154151

155152
// this is not valid UTF-8
156153
let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
@@ -220,11 +217,11 @@ mod tests {
220217
OsString::from_wide(&['A' as u16, 0xD800, 'B' as u16])
221218
};
222219

223-
#[cfg(unix)]
220+
#[cfg(any(unix, target_os = "emscripten"))]
224221
let os_str = { OsString::from_vec(vec![250, 251, 252, 253, 254, 255, 0, 255]) };
225222

226223
// This cannot be borrowed because it is not valid UTF-8
227-
#[cfg(any(unix, windows))]
224+
#[cfg(any(unix, windows, target_os = "emscripten"))]
228225
test_extract::<OsStr>(py, &os_str, false);
229226
});
230227
}

0 commit comments

Comments
 (0)