Skip to content

Commit a204467

Browse files
committed
oem codec
1 parent 9cb5ab7 commit a204467

File tree

1 file changed

+190
-0
lines changed

1 file changed

+190
-0
lines changed

crates/vm/src/stdlib/codecs.rs

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,196 @@ mod _codecs {
413413
fn mbcs_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
414414
delegate_pycodecs!(mbcs_decode, args, vm)
415415
}
416+
417+
#[cfg(windows)]
418+
#[derive(FromArgs)]
419+
struct OemEncodeArgs {
420+
#[pyarg(positional)]
421+
s: PyStrRef,
422+
#[pyarg(positional, optional)]
423+
errors: Option<PyStrRef>,
424+
}
425+
426+
#[cfg(windows)]
427+
#[pyfunction]
428+
fn oem_encode(args: OemEncodeArgs, vm: &VirtualMachine) -> PyResult<(Vec<u8>, usize)> {
429+
use std::os::windows::ffi::OsStrExt;
430+
use windows_sys::Win32::Globalization::{
431+
CP_OEMCP, WC_NO_BEST_FIT_CHARS, WideCharToMultiByte,
432+
};
433+
434+
let errors = args.errors.as_ref().map(|s| s.as_str()).unwrap_or("strict");
435+
let s = args.s.as_str();
436+
let char_len = args.s.char_len();
437+
438+
if s.is_empty() {
439+
return Ok((Vec::new(), char_len));
440+
}
441+
442+
// Convert UTF-8 string to UTF-16
443+
let wide: Vec<u16> = std::ffi::OsStr::new(s).encode_wide().collect();
444+
445+
// Get the required buffer size
446+
let size = unsafe {
447+
WideCharToMultiByte(
448+
CP_OEMCP,
449+
WC_NO_BEST_FIT_CHARS,
450+
wide.as_ptr(),
451+
wide.len() as i32,
452+
std::ptr::null_mut(),
453+
0,
454+
std::ptr::null(),
455+
std::ptr::null_mut(),
456+
)
457+
};
458+
459+
if size == 0 {
460+
let err = std::io::Error::last_os_error();
461+
return Err(vm.new_os_error(format!("oem_encode failed: {}", err)));
462+
}
463+
464+
let mut buffer = vec![0u8; size as usize];
465+
let mut used_default_char: i32 = 0;
466+
467+
let result = unsafe {
468+
WideCharToMultiByte(
469+
CP_OEMCP,
470+
WC_NO_BEST_FIT_CHARS,
471+
wide.as_ptr(),
472+
wide.len() as i32,
473+
buffer.as_mut_ptr().cast(),
474+
size,
475+
std::ptr::null(),
476+
if errors == "strict" {
477+
&mut used_default_char
478+
} else {
479+
std::ptr::null_mut()
480+
},
481+
)
482+
};
483+
484+
if result == 0 {
485+
let err = std::io::Error::last_os_error();
486+
return Err(vm.new_os_error(format!("oem_encode failed: {}", err)));
487+
}
488+
489+
if errors == "strict" && used_default_char != 0 {
490+
return Err(vm.new_unicode_encode_error(
491+
"'oem' codec can't encode characters: invalid character".to_string(),
492+
));
493+
}
494+
495+
buffer.truncate(result as usize);
496+
Ok((buffer, char_len))
497+
}
498+
499+
#[cfg(not(windows))]
500+
#[pyfunction]
501+
fn oem_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
502+
delegate_pycodecs!(oem_encode, args, vm)
503+
}
504+
505+
#[cfg(windows)]
506+
#[derive(FromArgs)]
507+
struct OemDecodeArgs {
508+
#[pyarg(positional)]
509+
data: ArgBytesLike,
510+
#[pyarg(positional, optional)]
511+
errors: Option<PyStrRef>,
512+
#[pyarg(positional, default = false)]
513+
#[allow(dead_code)]
514+
r#final: bool,
515+
}
516+
517+
#[cfg(windows)]
518+
#[pyfunction]
519+
fn oem_decode(args: OemDecodeArgs, vm: &VirtualMachine) -> PyResult<(String, usize)> {
520+
use windows_sys::Win32::Globalization::{
521+
CP_OEMCP, MB_ERR_INVALID_CHARS, MultiByteToWideChar,
522+
};
523+
524+
let _errors = args.errors.as_ref().map(|s| s.as_str()).unwrap_or("strict");
525+
let data = args.data.borrow_buf();
526+
let len = data.len();
527+
528+
if data.is_empty() {
529+
return Ok((String::new(), 0));
530+
}
531+
532+
// Get the required buffer size for UTF-16
533+
let size = unsafe {
534+
MultiByteToWideChar(
535+
CP_OEMCP,
536+
MB_ERR_INVALID_CHARS,
537+
data.as_ptr().cast(),
538+
len as i32,
539+
std::ptr::null_mut(),
540+
0,
541+
)
542+
};
543+
544+
if size == 0 {
545+
// Try without MB_ERR_INVALID_CHARS for non-strict mode
546+
let size = unsafe {
547+
MultiByteToWideChar(
548+
CP_OEMCP,
549+
0,
550+
data.as_ptr().cast(),
551+
len as i32,
552+
std::ptr::null_mut(),
553+
0,
554+
)
555+
};
556+
if size == 0 {
557+
let err = std::io::Error::last_os_error();
558+
return Err(vm.new_os_error(format!("oem_decode failed: {}", err)));
559+
}
560+
}
561+
562+
let size = unsafe {
563+
MultiByteToWideChar(
564+
CP_OEMCP,
565+
0, // Use 0 flags for replacement behavior
566+
data.as_ptr().cast(),
567+
len as i32,
568+
std::ptr::null_mut(),
569+
0,
570+
)
571+
};
572+
573+
let mut buffer = vec![0u16; size as usize];
574+
575+
let result = unsafe {
576+
MultiByteToWideChar(
577+
CP_OEMCP,
578+
0,
579+
data.as_ptr().cast(),
580+
len as i32,
581+
buffer.as_mut_ptr(),
582+
size,
583+
)
584+
};
585+
586+
if result == 0 {
587+
let err = std::io::Error::last_os_error();
588+
return Err(vm.new_os_error(format!("oem_decode failed: {}", err)));
589+
}
590+
591+
buffer.truncate(result as usize);
592+
593+
// Convert UTF-16 to UTF-8 String
594+
let s = String::from_utf16(&buffer)
595+
.map_err(|e| vm.new_unicode_decode_error(format!("oem_decode failed: {}", e)))?;
596+
597+
Ok((s, len))
598+
}
599+
600+
#[cfg(not(windows))]
601+
#[pyfunction]
602+
fn oem_decode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
603+
delegate_pycodecs!(oem_decode, args, vm)
604+
}
605+
416606
#[pyfunction]
417607
fn readbuffer_encode(args: FuncArgs, vm: &VirtualMachine) -> PyResult {
418608
delegate_pycodecs!(readbuffer_encode, args, vm)

0 commit comments

Comments
 (0)