Skip to content

Commit 9cb5ab7

Browse files
committed
mbcs_codec
1 parent 590da47 commit 9cb5ab7

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed

crates/vm/src/stdlib/codecs.rs

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,190 @@ mod _codecs {
225225
}};
226226
}
227227

228+
#[cfg(windows)]
229+
#[derive(FromArgs)]
230+
struct MbcsEncodeArgs {
231+
#[pyarg(positional)]
232+
s: PyStrRef,
233+
#[pyarg(positional, optional)]
234+
errors: Option<PyStrRef>,
235+
}
236+
237+
#[cfg(windows)]
238+
#[pyfunction]
239+
fn mbcs_encode(args: MbcsEncodeArgs, vm: &VirtualMachine) -> PyResult<(Vec<u8>, usize)> {
240+
use std::os::windows::ffi::OsStrExt;
241+
use windows_sys::Win32::Globalization::{
242+
CP_ACP, WC_NO_BEST_FIT_CHARS, WideCharToMultiByte,
243+
};
244+
245+
let errors = args.errors.as_ref().map(|s| s.as_str()).unwrap_or("strict");
246+
let s = args.s.as_str();
247+
let char_len = args.s.char_len();
248+
249+
if s.is_empty() {
250+
return Ok((Vec::new(), char_len));
251+
}
252+
253+
// Convert UTF-8 string to UTF-16
254+
let wide: Vec<u16> = std::ffi::OsStr::new(s).encode_wide().collect();
255+
256+
// Get the required buffer size
257+
let size = unsafe {
258+
WideCharToMultiByte(
259+
CP_ACP,
260+
WC_NO_BEST_FIT_CHARS,
261+
wide.as_ptr(),
262+
wide.len() as i32,
263+
std::ptr::null_mut(),
264+
0,
265+
std::ptr::null(),
266+
std::ptr::null_mut(),
267+
)
268+
};
269+
270+
if size == 0 {
271+
let err = std::io::Error::last_os_error();
272+
return Err(vm.new_os_error(format!("mbcs_encode failed: {}", err)));
273+
}
274+
275+
let mut buffer = vec![0u8; size as usize];
276+
let mut used_default_char: i32 = 0;
277+
278+
let result = unsafe {
279+
WideCharToMultiByte(
280+
CP_ACP,
281+
WC_NO_BEST_FIT_CHARS,
282+
wide.as_ptr(),
283+
wide.len() as i32,
284+
buffer.as_mut_ptr().cast(),
285+
size,
286+
std::ptr::null(),
287+
if errors == "strict" {
288+
&mut used_default_char
289+
} else {
290+
std::ptr::null_mut()
291+
},
292+
)
293+
};
294+
295+
if result == 0 {
296+
let err = std::io::Error::last_os_error();
297+
return Err(vm.new_os_error(format!("mbcs_encode failed: {}", err)));
298+
}
299+
300+
if errors == "strict" && used_default_char != 0 {
301+
return Err(vm.new_unicode_encode_error(
302+
"'mbcs' codec can't encode characters: invalid character".to_string(),
303+
));
304+
}
305+
306+
buffer.truncate(result as usize);
307+
Ok((buffer, char_len))
308+
}
309+
310+
#[cfg(not(windows))]
228311
#[pyfunction]
229312
fn mbcs_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
230313
delegate_pycodecs!(mbcs_encode, args, vm)
231314
}
315+
316+
#[cfg(windows)]
317+
#[derive(FromArgs)]
318+
struct MbcsDecodeArgs {
319+
#[pyarg(positional)]
320+
data: ArgBytesLike,
321+
#[pyarg(positional, optional)]
322+
errors: Option<PyStrRef>,
323+
#[pyarg(positional, default = false)]
324+
#[allow(dead_code)]
325+
r#final: bool,
326+
}
327+
328+
#[cfg(windows)]
329+
#[pyfunction]
330+
fn mbcs_decode(args: MbcsDecodeArgs, vm: &VirtualMachine) -> PyResult<(String, usize)> {
331+
use windows_sys::Win32::Globalization::{
332+
CP_ACP, MB_ERR_INVALID_CHARS, MultiByteToWideChar,
333+
};
334+
335+
let _errors = args.errors.as_ref().map(|s| s.as_str()).unwrap_or("strict");
336+
let data = args.data.borrow_buf();
337+
let len = data.len();
338+
339+
if data.is_empty() {
340+
return Ok((String::new(), 0));
341+
}
342+
343+
// Get the required buffer size for UTF-16
344+
let size = unsafe {
345+
MultiByteToWideChar(
346+
CP_ACP,
347+
MB_ERR_INVALID_CHARS,
348+
data.as_ptr().cast(),
349+
len as i32,
350+
std::ptr::null_mut(),
351+
0,
352+
)
353+
};
354+
355+
if size == 0 {
356+
// Try without MB_ERR_INVALID_CHARS for non-strict mode
357+
let size = unsafe {
358+
MultiByteToWideChar(
359+
CP_ACP,
360+
0,
361+
data.as_ptr().cast(),
362+
len as i32,
363+
std::ptr::null_mut(),
364+
0,
365+
)
366+
};
367+
if size == 0 {
368+
let err = std::io::Error::last_os_error();
369+
return Err(vm.new_os_error(format!("mbcs_decode failed: {}", err)));
370+
}
371+
}
372+
373+
let size = unsafe {
374+
MultiByteToWideChar(
375+
CP_ACP,
376+
0, // Use 0 flags for replacement behavior
377+
data.as_ptr().cast(),
378+
len as i32,
379+
std::ptr::null_mut(),
380+
0,
381+
)
382+
};
383+
384+
let mut buffer = vec![0u16; size as usize];
385+
386+
let result = unsafe {
387+
MultiByteToWideChar(
388+
CP_ACP,
389+
0,
390+
data.as_ptr().cast(),
391+
len as i32,
392+
buffer.as_mut_ptr(),
393+
size,
394+
)
395+
};
396+
397+
if result == 0 {
398+
let err = std::io::Error::last_os_error();
399+
return Err(vm.new_os_error(format!("mbcs_decode failed: {}", err)));
400+
}
401+
402+
buffer.truncate(result as usize);
403+
404+
// Convert UTF-16 to UTF-8 String
405+
let s = String::from_utf16(&buffer)
406+
.map_err(|e| vm.new_unicode_decode_error(format!("mbcs_decode failed: {}", e)))?;
407+
408+
Ok((s, len))
409+
}
410+
411+
#[cfg(not(windows))]
232412
#[pyfunction]
233413
fn mbcs_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
234414
delegate_pycodecs!(mbcs_decode, args, vm)

0 commit comments

Comments
 (0)