|
2 | 2 |
|
3 | 3 | use std::{io, os::raw::c_char, path::PathBuf, ptr};
|
4 | 4 |
|
5 |
| -use crate::{translate::*, ConvertError, Error, GStr, GString, Slice}; |
| 5 | +use crate::{translate::*, ConvertError, Error, GStr, GString, NormalizeMode, Slice}; |
6 | 6 |
|
7 | 7 | // rustdoc-stripper-ignore-next
|
8 | 8 | /// A wrapper for [`ConvertError`](crate::ConvertError) that can hold an offset into the input
|
@@ -318,6 +318,74 @@ pub fn locale_to_utf8(opsysstring: &[u8]) -> Result<(crate::GString, usize), Cvt
|
318 | 318 | }
|
319 | 319 | }
|
320 | 320 |
|
| 321 | +#[doc(alias = "g_utf8_to_ucs4")] |
| 322 | +#[doc(alias = "g_utf8_to_ucs4_fast")] |
| 323 | +#[doc(alias = "utf8_to_ucs4")] |
| 324 | +pub fn utf8_to_utf32(str: impl AsRef<str>) -> Slice<char> { |
| 325 | + unsafe { |
| 326 | + let mut items_written = 0; |
| 327 | + |
| 328 | + let str_as_utf32 = ffi::g_utf8_to_ucs4_fast( |
| 329 | + str.as_ref().as_ptr().cast::<c_char>(), |
| 330 | + str.as_ref().len() as _, |
| 331 | + &mut items_written, |
| 332 | + ); |
| 333 | + |
| 334 | + // NOTE: We assume that u32 and char have the same layout and trust that glib won't give us |
| 335 | + // invalid UTF-32 codepoints |
| 336 | + Slice::from_glib_full_num(str_as_utf32.cast::<char>(), items_written as usize) |
| 337 | + } |
| 338 | +} |
| 339 | + |
| 340 | +#[doc(alias = "g_ucs4_to_utf8")] |
| 341 | +#[doc(alias = "ucs4_to_utf8")] |
| 342 | +pub fn utf32_to_utf8(str: impl AsRef<[char]>) -> GString { |
| 343 | + let mut items_read = 0; |
| 344 | + let mut items_written = 0; |
| 345 | + let mut error = ptr::null_mut(); |
| 346 | + |
| 347 | + unsafe { |
| 348 | + let str_as_utf8 = ffi::g_ucs4_to_utf8( |
| 349 | + str.as_ref().as_ptr().cast::<u32>(), |
| 350 | + str.as_ref().len() as _, |
| 351 | + &mut items_read, |
| 352 | + &mut items_written, |
| 353 | + &mut error, |
| 354 | + ); |
| 355 | + |
| 356 | + assert!( |
| 357 | + error.is_null(), |
| 358 | + "Rust `char` should always be convertible to UTF-8" |
| 359 | + ); |
| 360 | + |
| 361 | + GString::from_glib_full_num(str_as_utf8, items_written as usize) |
| 362 | + } |
| 363 | +} |
| 364 | + |
| 365 | +#[doc(alias = "g_utf8_casefold")] |
| 366 | +#[doc(alias = "utf8_casefold")] |
| 367 | +pub fn casefold(str: impl AsRef<str>) -> GString { |
| 368 | + unsafe { |
| 369 | + let str = ffi::g_utf8_casefold(str.as_ref().as_ptr().cast(), str.as_ref().len() as isize); |
| 370 | + |
| 371 | + from_glib_full(str) |
| 372 | + } |
| 373 | +} |
| 374 | + |
| 375 | +#[doc(alias = "g_utf8_normalize")] |
| 376 | +#[doc(alias = "utf8_normalize")] |
| 377 | +pub fn normalize(str: impl AsRef<str>, mode: NormalizeMode) -> GString { |
| 378 | + unsafe { |
| 379 | + let str = ffi::g_utf8_normalize( |
| 380 | + str.as_ref().as_ptr().cast(), |
| 381 | + str.as_ref().len() as isize, |
| 382 | + mode.into_glib(), |
| 383 | + ); |
| 384 | + |
| 385 | + from_glib_full(str) |
| 386 | + } |
| 387 | +} |
| 388 | + |
321 | 389 | #[cfg(test)]
|
322 | 390 | mod tests {
|
323 | 391 | #[test]
|
@@ -350,4 +418,15 @@ mod tests {
|
350 | 418 | fn filename_charsets() {
|
351 | 419 | let _ = super::filename_charsets();
|
352 | 420 | }
|
| 421 | + |
| 422 | + #[test] |
| 423 | + fn utf8_and_utf32() { |
| 424 | + let utf32 = ['A', 'b', '🤔']; |
| 425 | + let utf8 = super::utf32_to_utf8(utf32); |
| 426 | + assert_eq!(utf8, "Ab🤔"); |
| 427 | + |
| 428 | + let utf8 = "🤔 ț"; |
| 429 | + let utf32 = super::utf8_to_utf32(utf8); |
| 430 | + assert_eq!(utf32.as_slice(), &['🤔', ' ', 'ț']); |
| 431 | + } |
353 | 432 | }
|
0 commit comments