11use crate :: conversion:: IntoPyObject ;
2+ #[ cfg( not( target_os = "wasi" ) ) ]
3+ use crate :: ffi;
4+ #[ cfg( not( target_os = "wasi" ) ) ]
25use crate :: ffi_ptr_ext:: FfiPtrExt ;
36#[ cfg( feature = "experimental-inspect" ) ]
47use crate :: inspect:: PyStaticExpr ;
58use crate :: instance:: Bound ;
69#[ cfg( feature = "experimental-inspect" ) ]
710use crate :: type_object:: PyTypeInfo ;
811use crate :: types:: PyString ;
9- use crate :: { ffi, Borrowed , FromPyObject , PyAny , PyErr , Python } ;
12+ #[ cfg( any( unix, target_os = "emscripten" ) ) ]
13+ use crate :: types:: { PyBytes , PyBytesMethods } ;
14+ use crate :: { Borrowed , FromPyObject , PyAny , PyErr , Python } ;
1015use std:: borrow:: Cow ;
1116use std:: convert:: Infallible ;
1217use std:: ffi:: { OsStr , OsString } ;
18+ #[ cfg( any( unix, target_os = "emscripten" ) ) ]
19+ use std:: os:: unix:: ffi:: OsStrExt ;
20+ #[ cfg( windows) ]
21+ use std:: os:: windows:: ffi:: OsStrExt ;
1322
1423impl < ' py > IntoPyObject < ' py > for & OsStr {
1524 type Target = PyString ;
@@ -21,42 +30,43 @@ impl<'py> IntoPyObject<'py> for &OsStr {
2130
2231 fn into_pyobject ( self , py : Python < ' py > ) -> Result < Self :: Output , Self :: Error > {
2332 // If the string is UTF-8, take the quick and easy shortcut
33+ #[ cfg( not( target_os = "wasi" ) ) ]
2434 if let Some ( valid_utf8_path) = self . to_str ( ) {
2535 return valid_utf8_path. into_pyobject ( py) ;
2636 }
2737
28- // All targets besides windows support the std::os::unix::ffi::OsStrExt API:
29- // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59
30- #[ cfg( not( windows) ) ]
38+ #[ cfg( target_os = "wasi" ) ]
3139 {
32- # [ cfg ( target_os = "wasi" ) ]
33- let bytes = self . to_str ( ) . expect ( "wasi strings are UTF8" ) . as_bytes ( ) ;
34- # [ cfg ( not ( target_os = "wasi" ) ) ]
35- let bytes = std :: os :: unix :: ffi :: OsStrExt :: as_bytes ( self ) ;
40+ self . to_str ( )
41+ . expect ( "wasi strings are UTF8" )
42+ . into_pyobject ( py )
43+ }
3644
45+ #[ cfg( any( unix, target_os = "emscripten" ) ) ]
46+ {
47+ let bytes = self . as_bytes ( ) ;
3748 let ptr = bytes. as_ptr ( ) . cast ( ) ;
3849 let len = bytes. len ( ) as ffi:: Py_ssize_t ;
3950 unsafe {
4051 // DecodeFSDefault automatically chooses an appropriate decoding mechanism to
4152 // parse os strings losslessly (i.e. surrogateescape most of the time)
4253 Ok ( ffi:: PyUnicode_DecodeFSDefaultAndSize ( ptr, len)
4354 . assume_owned ( py)
44- . cast_into_unchecked :: < PyString > ( ) )
55+ . cast_into_unchecked ( ) )
4556 }
4657 }
4758
4859 #[ cfg( windows) ]
4960 {
50- let wstr: Vec < u16 > = std:: os:: windows:: ffi:: OsStrExt :: encode_wide ( self ) . collect ( ) ;
51-
61+ let wstr: Vec < u16 > = self . encode_wide ( ) . collect ( ) ;
5262 unsafe {
5363 // This will not panic because the data from encode_wide is well-formed Windows
5464 // string data
5565
5666 Ok (
5767 ffi:: PyUnicode_FromWideChar ( wstr. as_ptr ( ) , wstr. len ( ) as ffi:: Py_ssize_t )
5868 . assume_owned ( py)
59- . cast_into_unchecked :: < PyString > ( ) ,
69+ . cast_into_unchecked ( ) ,
6070 )
6171 }
6272 }
@@ -86,10 +96,13 @@ impl FromPyObject<'_, '_> for OsString {
8696 fn extract ( ob : Borrowed < ' _ , ' _ , PyAny > ) -> Result < Self , Self :: Error > {
8797 let pystring = ob. cast :: < PyString > ( ) ?;
8898
89- #[ cfg( not ( windows ) ) ]
99+ #[ cfg( target_os = "wasi" ) ]
90100 {
91- use crate :: types:: { PyBytes , PyBytesMethods } ;
101+ Ok ( pystring. to_cow ( ) ?. into_owned ( ) . into ( ) )
102+ }
92103
104+ #[ cfg( any( unix, target_os = "emscripten" ) ) ]
105+ {
93106 // Decode from Python's lossless bytes string representation back into raw bytes
94107 // SAFETY: PyUnicode_EncodeFSDefault returns a new reference or null on error, known to
95108 // be a `bytes` object, thread is attached to the interpreter
@@ -100,13 +113,7 @@ impl FromPyObject<'_, '_> for OsString {
100113 } ;
101114
102115 // Create an OsStr view into the raw bytes from Python
103- //
104- // For WASI: OS strings are UTF-8 by definition.
105- #[ cfg( target_os = "wasi" ) ]
106- let os_str: & OsStr = OsStr :: new ( std:: str:: from_utf8 ( fs_encoded_bytes. as_bytes ( ) ) ?) ;
107- #[ cfg( not( target_os = "wasi" ) ) ]
108- let os_str: & OsStr =
109- std:: os:: unix:: ffi:: OsStrExt :: from_bytes ( fs_encoded_bytes. as_bytes ( ) ) ;
116+ let os_str: & OsStr = OsStrExt :: from_bytes ( fs_encoded_bytes. as_bytes ( ) ) ;
110117
111118 Ok ( os_str. to_os_string ( ) )
112119 }
@@ -217,10 +224,12 @@ impl<'py> IntoPyObject<'py> for &OsString {
217224
218225#[ cfg( test) ]
219226mod tests {
227+ #[ cfg( target_os = "wasi" ) ]
228+ use crate :: exceptions:: PyFileNotFoundError ;
220229 use crate :: types:: { PyAnyMethods , PyString , PyStringMethods } ;
221230 use crate :: { Bound , BoundObject , IntoPyObject , Python } ;
222231 use std:: fmt:: Debug ;
223- #[ cfg( unix) ]
232+ #[ cfg( any ( unix, target_os = "emscripten" ) ) ]
224233 use std:: os:: unix:: ffi:: OsStringExt ;
225234 #[ cfg( windows) ]
226235 use std:: os:: windows:: ffi:: OsStringExt ;
@@ -230,13 +239,10 @@ mod tests {
230239 } ;
231240
232241 #[ test]
233- #[ cfg( not ( any( windows , target_os = "wasi" ) ) ) ]
242+ #[ cfg( any( unix , target_os = "emscripten" ) ) ]
234243 fn test_non_utf8_conversion ( ) {
235244 Python :: attach ( |py| {
236- #[ cfg( not( target_os = "wasi" ) ) ]
237245 use std:: os:: unix:: ffi:: OsStrExt ;
238- #[ cfg( target_os = "wasi" ) ]
239- use std:: os:: wasi:: ffi:: OsStrExt ;
240246
241247 // this is not valid UTF-8
242248 let payload = & [ 250 , 251 , 252 , 253 , 254 , 255 , 0 , 255 ] ;
@@ -249,6 +255,26 @@ mod tests {
249255 } ) ;
250256 }
251257
258+ #[ test]
259+ #[ cfg( target_os = "wasi" ) ]
260+ fn test_extract_non_utf8_wasi_should_error ( ) {
261+ Python :: attach ( |py| {
262+ // Non utf-8 strings are not valid wasi paths
263+ let open_result = py. run ( c"open('\\ udcff', 'rb')" , None , None ) . unwrap_err ( ) ;
264+ assert ! (
265+ !open_result. is_instance_of:: <PyFileNotFoundError >( py) ,
266+ "Opening invalid utf8 will error with OSError, not FileNotFoundError"
267+ ) ;
268+
269+ // Create a Python string with not valid UTF-8: &[255]
270+ let py_str = py. eval ( c"'\\ udcff'" , None , None ) . unwrap ( ) ;
271+ assert ! (
272+ py_str. extract:: <OsString >( ) . is_err( ) ,
273+ "Extracting invalid UTF-8 as OsString should error"
274+ ) ;
275+ } ) ;
276+ }
277+
252278 #[ test]
253279 fn test_intopyobject_roundtrip ( ) {
254280 Python :: attach ( |py| {
@@ -326,11 +352,11 @@ mod tests {
326352 OsString :: from_wide ( & [ 'A' as u16 , 0xD800 , 'B' as u16 ] )
327353 } ;
328354
329- #[ cfg( unix) ]
355+ #[ cfg( any ( unix, target_os = "emscripten" ) ) ]
330356 let os_str = { OsString :: from_vec ( vec ! [ 250 , 251 , 252 , 253 , 254 , 255 , 0 , 255 ] ) } ;
331357
332358 // This cannot be borrowed because it is not valid UTF-8
333- #[ cfg( any( windows, unix) ) ]
359+ #[ cfg( any( windows, unix, target_os = "emscripten" ) ) ]
334360 test_extract :: < OsStr > ( py, & os_str, false ) ;
335361 } ) ;
336362 }
0 commit comments