Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/libcore/char/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ pub fn from_u32(i: u32) -> Option<char> {
#[inline]
#[stable(feature = "char_from_unchecked", since = "1.5.0")]
pub unsafe fn from_u32_unchecked(i: u32) -> char {
transmute(i)
if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { transmute(i) }
}

#[stable(feature = "char_convert", since = "1.13.0")]
Expand Down Expand Up @@ -218,7 +218,7 @@ impl TryFrom<u32> for char {
Err(CharTryFromError(()))
} else {
// SAFETY: checked that it's a legal unicode value
Ok(unsafe { from_u32_unchecked(i) })
Ok(unsafe { transmute(i) })
}
}
}
Expand Down
99 changes: 59 additions & 40 deletions src/libcore/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,16 +593,7 @@ impl char {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn len_utf8(self) -> usize {
let code = self as u32;
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
}
len_utf8(self as u32)
}

/// Returns the number of 16-bit code units this `char` would need if
Expand Down Expand Up @@ -670,36 +661,7 @@ impl char {
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
#[inline]
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
let code = self as u32;
let len = self.len_utf8();
match (len, &mut dst[..]) {
(1, [a, ..]) => {
*a = code as u8;
}
(2, [a, b, ..]) => {
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
*b = (code & 0x3F) as u8 | TAG_CONT;
}
(3, [a, b, c, ..]) => {
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*c = (code & 0x3F) as u8 | TAG_CONT;
}
(4, [a, b, c, d, ..]) => {
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*d = (code & 0x3F) as u8 | TAG_CONT;
}
_ => panic!(
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
len,
code,
dst.len(),
),
};
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
encode_utf8_raw(self as u32, dst)
}

/// Encodes this character as UTF-16 into the provided `u16` buffer,
Expand Down Expand Up @@ -1673,3 +1635,60 @@ impl char {
}
}
}

#[inline]
fn len_utf8(code: u32) -> usize {
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
}
}

/// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
/// and then returns the subslice of the buffer that contains the encoded character.
///
/// Unlike `char::encode_utf8`, this method can be called on codepoints in the surrogate range.
///
/// # Panics
///
/// Panics if the buffer is not large enough.
/// A buffer of length four is large enough to encode any `char`.
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
#[doc(hidden)]
#[inline]
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut str {
let len = len_utf8(code);
match (len, &mut dst[..]) {
(1, [a, ..]) => {
*a = code as u8;
}
(2, [a, b, ..]) => {
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
*b = (code & 0x3F) as u8 | TAG_CONT;
}
(3, [a, b, c, ..]) => {
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*c = (code & 0x3F) as u8 | TAG_CONT;
}
(4, [a, b, c, d, ..]) => {
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*d = (code & 0x3F) as u8 | TAG_CONT;
}
_ => panic!(
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
len,
code,
dst.len(),
),
};
// SAFETY: We just wrote UTF-8 content in, so converting to str is fine.
unsafe { from_utf8_unchecked_mut(&mut dst[..len]) }
}
4 changes: 4 additions & 0 deletions src/libcore/char/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
#[stable(feature = "unicode_version", since = "1.45.0")]
pub use crate::unicode::UNICODE_VERSION;

// perma-unstable re-exports
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
pub use self::methods::encode_utf8_raw;

use crate::fmt::{self, Write};
use crate::iter::FusedIterator;

Expand Down