diff --git a/Cargo.lock b/Cargo.lock index 0f6368e4497cb..e16648dbef325 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5430,9 +5430,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.39.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9f9fe3d2b7b75cf4f2805e5b9926e8ac47146667b16b86298c4a8bf08cc469" +checksum = "14311e7e9a03114cd4b65eedd54e8fed2945e17f08586ae97ef53bc0669f9581" dependencies = [ "libc", "objc2-core-foundation", diff --git a/library/core/src/array/drain.rs b/library/core/src/array/drain.rs index b2ff54bdfa21c..329b0e18b982b 100644 --- a/library/core/src/array/drain.rs +++ b/library/core/src/array/drain.rs @@ -6,8 +6,7 @@ impl<'l, 'f, T, U, F: FnMut(T) -> U> Drain<'l, 'f, T, F> { /// This function returns a function that lets you index the given array in const. /// As implemented it can optimize better than iterators, and can be constified. /// It acts like a sort of guard (owns the array) and iterator combined, which can be implemented - /// as it is a struct that implements const fn; - /// in that regard it is somewhat similar to an array::Iter implementing `UncheckedIterator`. + /// as it is a struct that implements const fn. /// The only method you're really allowed to call is `next()`, /// anything else is more or less UB, hence this function being unsafe. /// Moved elements will not be dropped. diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index ab10120fe5548..56f188cfa622a 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -11,7 +11,7 @@ use crate::convert::Infallible; use crate::error::Error; use crate::hash::{self, Hash}; use crate::intrinsics::transmute_unchecked; -use crate::iter::{TrustedLen, UncheckedIterator, repeat_n}; +use crate::iter::{TrustedLen, repeat_n}; use crate::marker::Destruct; use crate::mem::{self, ManuallyDrop, MaybeUninit}; use crate::ops::{ @@ -52,7 +52,10 @@ pub use iter::IntoIter; #[must_use = "cloning is often expensive and is not expected to have side effects"] #[stable(feature = "array_repeat", since = "1.91.0")] pub fn repeat(val: T) -> [T; N] { - from_trusted_iterator(repeat_n(val, N)) + let mut iter = repeat_n(val, N); + // SAFETY: Unless a panic occurs, from_fn will call the closure N times, + // and repeat_n's next() will return Some for N times. + from_fn(move |_| unsafe { iter.next().unwrap_unchecked() }) } /// Creates an array where each element is produced by calling `f` with @@ -464,7 +467,15 @@ trait SpecArrayClone: Clone { impl SpecArrayClone for T { #[inline] default fn clone(array: &[T; N]) -> [T; N] { - from_trusted_iterator(array.iter().cloned()) + let mut ptr: *const T = array.as_ptr(); + // SAFETY: Unless a panic occurs, from_fn will call the closure N times, + // so our pointer arithmetic will be in bounds for the N-element array. + // This works even for ZSTs, since in that case, add() is a no-op. + from_fn(move |_| unsafe { + let old = ptr; + ptr = ptr.add(1); + (&*old).clone() + }) } } @@ -877,39 +888,6 @@ impl [T; N] { } } -/// Populate an array from the first `N` elements of `iter` -/// -/// # Panics -/// -/// If the iterator doesn't actually have enough items. -/// -/// By depending on `TrustedLen`, however, we can do that check up-front (where -/// it easily optimizes away) so it doesn't impact the loop that fills the array. -#[inline] -fn from_trusted_iterator(iter: impl UncheckedIterator) -> [T; N] { - try_from_trusted_iterator(iter.map(NeverShortCircuit)).0 -} - -#[inline] -fn try_from_trusted_iterator( - iter: impl UncheckedIterator, -) -> ChangeOutputType -where - R: Try, - R::Residual: Residual<[T; N]>, -{ - assert!(iter.size_hint().0 >= N); - fn next(mut iter: impl UncheckedIterator) -> impl FnMut(usize) -> T { - move |_| { - // SAFETY: We know that `from_fn` will call this at most N times, - // and we checked to ensure that we have at least that many items. - unsafe { iter.next_unchecked() } - } - } - - try_from_fn(next(iter)) -} - /// Version of [`try_from_fn`] using a passed-in slice in order to avoid /// needing to monomorphize for every array length. /// diff --git a/library/core/src/iter/adapters/cloned.rs b/library/core/src/iter/adapters/cloned.rs index 54d132813e4db..c0b6a4053c825 100644 --- a/library/core/src/iter/adapters/cloned.rs +++ b/library/core/src/iter/adapters/cloned.rs @@ -2,7 +2,7 @@ use core::num::NonZero; use crate::iter::adapters::zip::try_get_unchecked; use crate::iter::adapters::{SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; -use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen, UncheckedIterator}; +use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen}; use crate::ops::Try; /// An iterator that clones the elements of an underlying iterator. @@ -142,19 +142,6 @@ where { } -impl<'a, I, T: 'a> UncheckedIterator for Cloned -where - I: UncheckedIterator, - T: Clone, -{ - unsafe fn next_unchecked(&mut self) -> T { - // SAFETY: `Cloned` is 1:1 with the inner iterator, so if the caller promised - // that there's an element left, the inner iterator has one too. - let item = unsafe { self.it.next_unchecked() }; - item.clone() - } -} - #[stable(feature = "default_iters", since = "1.70.0")] impl Default for Cloned { /// Creates a `Cloned` iterator from the default value of `I` diff --git a/library/core/src/iter/adapters/map.rs b/library/core/src/iter/adapters/map.rs index f768f077aa27e..75f70dcd9b58f 100644 --- a/library/core/src/iter/adapters/map.rs +++ b/library/core/src/iter/adapters/map.rs @@ -1,7 +1,7 @@ use crate::fmt; use crate::iter::adapters::zip::try_get_unchecked; use crate::iter::adapters::{SourceIter, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; -use crate::iter::{FusedIterator, InPlaceIterable, TrustedFused, TrustedLen, UncheckedIterator}; +use crate::iter::{FusedIterator, InPlaceIterable, TrustedFused, TrustedLen}; use crate::num::NonZero; use crate::ops::Try; @@ -194,19 +194,6 @@ where { } -impl UncheckedIterator for Map -where - I: UncheckedIterator, - F: FnMut(I::Item) -> B, -{ - unsafe fn next_unchecked(&mut self) -> B { - // SAFETY: `Map` is 1:1 with the inner iterator, so if the caller promised - // that there's an element left, the inner iterator has one too. - let item = unsafe { self.iter.next_unchecked() }; - (self.f)(item) - } -} - #[doc(hidden)] #[unstable(feature = "trusted_random_access", issue = "none")] unsafe impl TrustedRandomAccess for Map where I: TrustedRandomAccess {} diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs index c5e199c30821d..4b19c7ffc00f8 100644 --- a/library/core/src/iter/adapters/zip.rs +++ b/library/core/src/iter/adapters/zip.rs @@ -1,8 +1,6 @@ use crate::cmp; use crate::fmt::{self, Debug}; -use crate::iter::{ - FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen, UncheckedIterator, -}; +use crate::iter::{FusedIterator, InPlaceIterable, SourceIter, TrustedFused, TrustedLen}; use crate::num::NonZero; /// An iterator that iterates two other iterators simultaneously. @@ -456,13 +454,6 @@ where { } -impl UncheckedIterator for Zip -where - A: UncheckedIterator, - B: UncheckedIterator, -{ -} - // Arbitrarily selects the left side of the zip iteration as extractable "source" // it would require negative trait bounds to be able to try both #[unstable(issue = "none", feature = "inplace_iteration")] diff --git a/library/core/src/iter/mod.rs b/library/core/src/iter/mod.rs index d532f1e568071..9ddafd47807f2 100644 --- a/library/core/src/iter/mod.rs +++ b/library/core/src/iter/mod.rs @@ -458,7 +458,6 @@ pub use self::traits::TrustedFused; pub use self::traits::TrustedLen; #[unstable(feature = "trusted_step", issue = "85731")] pub use self::traits::TrustedStep; -pub(crate) use self::traits::UncheckedIterator; #[stable(feature = "rust1", since = "1.0.0")] pub use self::traits::{ DoubleEndedIterator, ExactSizeIterator, Extend, FromIterator, IntoIterator, Product, Sum, diff --git a/library/core/src/iter/sources/repeat_n.rs b/library/core/src/iter/sources/repeat_n.rs index 4cbaf41852142..0d4ced1b0c8a6 100644 --- a/library/core/src/iter/sources/repeat_n.rs +++ b/library/core/src/iter/sources/repeat_n.rs @@ -1,5 +1,5 @@ use crate::fmt; -use crate::iter::{FusedIterator, TrustedLen, UncheckedIterator}; +use crate::iter::{FusedIterator, TrustedLen}; use crate::num::NonZero; use crate::ops::Try; @@ -211,5 +211,3 @@ impl FusedIterator for RepeatN {} #[unstable(feature = "trusted_len", issue = "37572")] unsafe impl TrustedLen for RepeatN {} -#[stable(feature = "iter_repeat_n", since = "1.82.0")] -impl UncheckedIterator for RepeatN {} diff --git a/library/core/src/iter/traits/mod.rs b/library/core/src/iter/traits/mod.rs index b330e9ffe21ac..7639704d5799c 100644 --- a/library/core/src/iter/traits/mod.rs +++ b/library/core/src/iter/traits/mod.rs @@ -4,7 +4,6 @@ mod double_ended; mod exact_size; mod iterator; mod marker; -mod unchecked_iterator; #[unstable(issue = "none", feature = "inplace_iteration")] pub use self::marker::InPlaceIterable; @@ -12,7 +11,6 @@ pub use self::marker::InPlaceIterable; pub use self::marker::TrustedFused; #[unstable(feature = "trusted_step", issue = "85731")] pub use self::marker::TrustedStep; -pub(crate) use self::unchecked_iterator::UncheckedIterator; #[stable(feature = "rust1", since = "1.0.0")] pub use self::{ accum::{Product, Sum}, diff --git a/library/core/src/iter/traits/unchecked_iterator.rs b/library/core/src/iter/traits/unchecked_iterator.rs deleted file mode 100644 index ae4bfcad4e68f..0000000000000 --- a/library/core/src/iter/traits/unchecked_iterator.rs +++ /dev/null @@ -1,36 +0,0 @@ -use crate::iter::TrustedLen; - -/// [`TrustedLen`] cannot have methods, so this allows augmenting it. -/// -/// It currently requires `TrustedLen` because it's unclear whether it's -/// reasonably possible to depend on the `size_hint` of anything else. -pub(crate) trait UncheckedIterator: TrustedLen { - /// Gets the next item from a non-empty iterator. - /// - /// Because there's always a value to return, that means it can return - /// the `Item` type directly, without wrapping it in an `Option`. - /// - /// # Safety - /// - /// This can only be called if `size_hint().0 != 0`, guaranteeing that - /// there's at least one item available. - /// - /// Otherwise (aka when `size_hint().1 == Some(0)`), this is UB. - /// - /// # Note to Implementers - /// - /// This has a default implementation using [`Option::unwrap_unchecked`]. - /// That's probably sufficient if your `next` *always* returns `Some`, - /// such as for infinite iterators. In more complicated situations, however, - /// sometimes there can still be `insertvalue`/`assume`/`extractvalue` - /// instructions remaining in the IR from the `Option` handling, at which - /// point you might want to implement this manually instead. - #[unstable(feature = "trusted_len_next_unchecked", issue = "37572")] - #[inline] - unsafe fn next_unchecked(&mut self) -> Self::Item { - let opt = self.next(); - // SAFETY: The caller promised that we're not empty, and - // `Self: TrustedLen` so we can actually trust the `size_hint`. - unsafe { opt.unwrap_unchecked() } - } -} diff --git a/library/core/src/num/uint_macros.rs b/library/core/src/num/uint_macros.rs index 8dc668584b658..0e6fa12702c46 100644 --- a/library/core/src/num/uint_macros.rs +++ b/library/core/src/num/uint_macros.rs @@ -710,6 +710,7 @@ macro_rules! uint_impl { /// assert_eq!(n.extract_bits(0b0010_0100), 0b0000_0011); /// assert_eq!(n.extract_bits(0xF0), 0b0000_1011); /// ``` + #[doc(alias = "pext")] #[unstable(feature = "uint_gather_scatter_bits", issue = "149069")] #[must_use = "this returns the result of the operation, \ without modifying the original"] @@ -727,6 +728,7 @@ macro_rules! uint_impl { /// assert_eq!(n.deposit_bits(0b0101_0101), 0b0101_0001); /// assert_eq!(n.deposit_bits(0xF0), 0b1101_0000); /// ``` + #[doc(alias = "pdep")] #[unstable(feature = "uint_gather_scatter_bits", issue = "149069")] #[must_use = "this returns the result of the operation, \ without modifying the original"] diff --git a/library/core/src/slice/iter.rs b/library/core/src/slice/iter.rs index ac096afb38af0..18abdbed5af6f 100644 --- a/library/core/src/slice/iter.rs +++ b/library/core/src/slice/iter.rs @@ -5,9 +5,7 @@ mod macros; use super::{from_raw_parts, from_raw_parts_mut}; use crate::hint::assert_unchecked; -use crate::iter::{ - FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator, -}; +use crate::iter::{FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce}; use crate::marker::PhantomData; use crate::mem::{self, SizedTypeProperties}; use crate::num::NonZero; diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index 236bdf9d89cae..1c8a4d5ba3c2b 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -238,7 +238,7 @@ macro_rules! iterator { // SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs. unsafe { self.post_inc_start(n); - Some(self.next_unchecked()) + Some(self.post_inc_start(1).$into_ref()) } } @@ -481,16 +481,6 @@ macro_rules! iterator { #[unstable(feature = "trusted_len", issue = "37572")] unsafe impl TrustedLen for $name<'_, T> {} - impl<'a, T> UncheckedIterator for $name<'a, T> { - #[inline] - unsafe fn next_unchecked(&mut self) -> $elem { - // SAFETY: The caller promised there's at least one more item. - unsafe { - self.post_inc_start(1).$into_ref() - } - } - } - #[stable(feature = "default_iters", since = "1.70.0")] impl Default for $name<'_, T> { /// Creates an empty slice iterator. diff --git a/library/core/src/wtf8.rs b/library/core/src/wtf8.rs index a0978c3dafb48..698e17a6b8e6d 100644 --- a/library/core/src/wtf8.rs +++ b/library/core/src/wtf8.rs @@ -454,25 +454,50 @@ impl Wtf8 { #[track_caller] #[inline] pub fn check_utf8_boundary(&self, index: usize) { + let Err(err) = self.try_check_utf8_boundary(index) else { return }; + match err { + Utf8BoundaryError::NotABoundary => { + panic!("byte index {index} is not a codepoint boundary") + } + Utf8BoundaryError::OutOfBounds => panic!("byte index {index} is out of bounds"), + Utf8BoundaryError::BetweenSurrogates => { + panic!("byte index {index} lies between surrogate codepoints") + } + } + } + + #[track_caller] + #[inline] + pub fn try_check_utf8_boundary(&self, index: usize) -> Result<(), Utf8BoundaryError> { if index == 0 { - return; + return Ok(()); } match self.bytes.get(index) { Some(0xED) => (), // Might be a surrogate - Some(&b) if (b as i8) >= -0x40 => return, - Some(_) => panic!("byte index {index} is not a codepoint boundary"), - None if index == self.len() => return, - None => panic!("byte index {index} is out of bounds"), + Some(&b) if (b as i8) >= -0x40 => return Ok(()), + Some(_) => return Err(Utf8BoundaryError::NotABoundary), + None if index == self.len() => return Ok(()), + None => return Err(Utf8BoundaryError::OutOfBounds), } if self.bytes[index + 1] >= 0xA0 { // There's a surrogate after index. Now check before index. if index >= 3 && self.bytes[index - 3] == 0xED && self.bytes[index - 2] >= 0xA0 { - panic!("byte index {index} lies between surrogate codepoints"); + return Err(Utf8BoundaryError::BetweenSurrogates); } } + Ok(()) } } +// This error type is only used temporarily to provide better panic messages +// It does not implement Error. +#[derive(Debug)] +pub enum Utf8BoundaryError { + NotABoundary, + OutOfBounds, + BetweenSurrogates, +} + /// Copied from core::str::raw::slice_unchecked #[inline] unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 { diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index f0cebb05c76a5..be606ae69d6d9 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -1055,6 +1055,61 @@ impl OsStr { OsString { inner: Buf::from_box(boxed) } } + /// Divides one string slice into two at an index. + /// + /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to the end of the string slice. + /// + /// The argument, `mid`, should be a byte offset from the start of the string. + /// It must also be on a valid `OsStr` boundary. + /// See [`split_at_checked`][Self::split_at_checked] for the definition of a valid boundary. + /// + /// Panics + /// + /// Panics if `mid` is not on a valid boundary, or if it is past the end of the last code point of the string slice. + /// For a non-panicking alternative see [`split_at_checked`][Self::split_at_checked]. + #[unstable(feature = "os_str_split_at", issue = "none")] + pub fn split_at(&self, mid: usize) -> (&OsStr, &OsStr) { + self.inner.check_public_boundary(mid); + + // SAFETY: we've checked it's in bounds and a valid boundary + unsafe { self.split_at_unchecked(mid) } + } + + /// Divides one string slice into two at an index. + /// + /// The two slices returned go from the start of the string slice to `mid`, and from `mid` to the end of the string slice. + /// + /// The argument, `mid`, should be a valid byte offset from the start of the string. + /// It must also be on a valid `OsStr` boundary. + /// The method returns `None` if that’s not the case. + /// A valid `OsStr` boundary is one of: + /// - The start of the string + /// - The end of the string + /// - The start of a valid non-empty UTF-8 substring + /// - Immediately follows a valid non-empty UTF-8 substring + #[unstable(feature = "os_str_split_at", issue = "none")] + pub fn split_at_checked(&self, mid: usize) -> Option<(&OsStr, &OsStr)> { + self.inner.try_check_public_boundary(mid)?; + + // SAFETY: we've checked it's in bounds and a valid boundary + unsafe { Some(self.split_at_unchecked(mid)) } + } + + /// Splits an `OsStr` without checking if `mid` is a valid boundary. + /// You should use `split_at` or `split_at_checked` instead. + /// + /// # Safety + /// + /// Any caller must ensure `mid` is within bounds and lies on + /// a valid `OsStr` boundary for the platform. + unsafe fn split_at_unchecked(&self, mid: usize) -> (&OsStr, &OsStr) { + // SAFETY: it's up to the caller to ensure this is safe. + unsafe { + let (first, second) = self.as_encoded_bytes().split_at_unchecked(mid); + (Self::from_encoded_bytes_unchecked(first), Self::from_encoded_bytes_unchecked(second)) + } + } + /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS /// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function. /// diff --git a/library/std/src/ffi/os_str/tests.rs b/library/std/src/ffi/os_str/tests.rs index 3474f0ab50684..1075c43e0b3bf 100644 --- a/library/std/src/ffi/os_str/tests.rs +++ b/library/std/src/ffi/os_str/tests.rs @@ -289,6 +289,108 @@ fn slice_surrogate_edge() { assert_eq!(post_crab.slice_encoded_bytes(4..), surrogate); } +#[test] +fn os_str_slice_at() { + #[track_caller] + fn slice_at_ok(input: &OsStr, index: usize, expected: (&str, &str)) { + let expected = (OsStr::new(expected.0), OsStr::new(expected.1)); + assert_eq!(input.split_at(index), expected); + assert_eq!(input.split_at_checked(index), Some(expected)); + } + + let os_str = OsStr::new("123αƒ’πŸ¦€4"); + slice_at_ok(os_str, 0, ("", "123αƒ’πŸ¦€4")); + slice_at_ok(os_str, 1, ("1", "23αƒ’πŸ¦€4")); + slice_at_ok(os_str, 2, ("12", "3αƒ’πŸ¦€4")); + slice_at_ok(os_str, 3, ("123", "αƒ’πŸ¦€4")); + slice_at_ok(os_str, 6, ("123αƒ’", "πŸ¦€4")); + slice_at_ok(os_str, 10, ("123αƒ’πŸ¦€", "4")); + slice_at_ok(os_str, 11, ("123αƒ’πŸ¦€4", "")); + + // Invalid boundaries should fail. + assert!(os_str.split_at_checked(4).is_none()); + assert!(os_str.split_at_checked(5).is_none()); + assert!(os_str.split_at_checked(7).is_none()); + assert!(os_str.split_at_checked(8).is_none()); + assert!(os_str.split_at_checked(9).is_none()); + // Out of bounds + assert!(os_str.split_at_checked(12).is_none()); +} + +#[test] +#[should_panic] +fn os_str_slice_at_out_of_bounds() { + let crab = OsStr::new("πŸ¦€"); + let _ = crab.split_at(5); +} + +#[test] +#[should_panic] +fn os_str_slice_at_mid_char() { + let crab = OsStr::new("πŸ¦€"); + let _ = crab.split_at(2); +} + +#[cfg(unix)] +#[test] +fn os_str_slice_at_unix() { + use crate::os::unix::ffi::OsStrExt; + + let broken_utf8 = OsStr::from_bytes(&"πŸ¦€".as_bytes()[..3]); + let invalid = OsStr::from_bytes(b"\xFF"); + + // Check that broken UTF-8 isn't treated as if it's valid. + let mut os_string = invalid.to_os_string(); + os_string.push(broken_utf8); + assert_eq!(os_string.split_at_checked(1), None); + + // We should be able to split on ascii with invalid UTF-8 between + let os_string = OsStr::from_bytes(b"a\xFFa"); + assert_eq!(os_string.split_at_checked(1), Some(("a".as_ref(), OsStr::from_bytes(b"\xFFa")))); + assert_eq!(os_string.split_at_checked(2), Some((OsStr::from_bytes(b"a\xFF"), "a".as_ref(),))); + + let os_string = OsStr::from_bytes(&"abcπŸ¦€".as_bytes()[..6]); + assert_eq!( + os_string.split_at_checked(3), + Some(("abc".as_ref(), OsStr::from_bytes(b"\xF0\x9F\xA6"))) + ); + + let mut os_string = invalid.to_os_string(); + os_string.push("πŸ¦€"); + assert_eq!(os_string.split_at_checked(1), Some((invalid, "πŸ¦€".as_ref()))); +} + +#[test] +#[cfg(windows)] +fn os_str_slice_at_windows() { + use crate::os::windows::ffi::OsStringExt; + + // slicing between unpaired surrogates should not be possible + // checking is implemented as a loop so we're agnostic towards + // the internal encoding + let os_string = OsString::from_wide(&[0xD800, 0xD800]); + for i in 1..os_string.len() { + assert_eq!(os_string.split_at_checked(i), None); + } + // For completeness, check that splitting at the start and end still works. + assert!(os_string.split_at_checked(0).is_some()); + assert!(os_string.split_at_checked(os_string.len()).is_some()); + + // check that slicing before and after unpaired surrogates work + let surrogate = OsString::from_wide(&[0xD800]); + + let mut os_string = surrogate.clone(); + os_string.push("πŸ¦€"); + assert_eq!( + os_string.split_at_checked(surrogate.len()), + Some((surrogate.as_ref(), "πŸ¦€".as_ref())) + ); + + let mut os_string = OsString::from("πŸ¦€"); + os_string.push(&surrogate); + assert_eq!(os_string.split_at_checked("πŸ¦€".len()), Some(("πŸ¦€".as_ref(), surrogate.as_ref()))); +} + #[test] fn clone_to_uninit() { let a = OsStr::new("hello.txt"); diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs index 5482663ef0079..a57da01a5d85d 100644 --- a/library/std/src/sys/os_str/bytes.rs +++ b/library/std/src/sys/os_str/bytes.rs @@ -238,16 +238,24 @@ impl Slice { #[track_caller] #[inline] pub fn check_public_boundary(&self, index: usize) { + if self.try_check_public_boundary(index).is_none() { + panic!("byte index {index} is not an OsStr boundary"); + } + } + + #[track_caller] + #[inline] + pub fn try_check_public_boundary(&self, index: usize) -> Option<()> { if index == 0 || index == self.inner.len() { - return; + return Some(()); } if index < self.inner.len() && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) { - return; + return Some(()); } - slow_path(&self.inner, index); + return slow_path(&self.inner, index); /// We're betting that typical splits will involve an ASCII character. /// @@ -255,26 +263,26 @@ impl Slice { /// better assembly. #[track_caller] #[inline(never)] - fn slow_path(bytes: &[u8], index: usize) { - let (before, after) = bytes.split_at(index); + fn slow_path(bytes: &[u8], index: usize) -> Option<()> { + let (before, after) = bytes.split_at_checked(index)?; // UTF-8 takes at most 4 bytes per codepoint, so we don't // need to check more than that. let after = after.get(..4).unwrap_or(after); match str::from_utf8(after) { - Ok(_) => return, - Err(err) if err.valid_up_to() != 0 => return, + Ok(_) => return Some(()), + Err(err) if err.valid_up_to() != 0 => return Some(()), Err(_) => (), } for len in 2..=4.min(index) { let before = &before[index - len..]; if str::from_utf8(before).is_ok() { - return; + return Some(()); } } - panic!("byte index {index} is not an OsStr boundary"); + None } } diff --git a/library/std/src/sys/os_str/utf8.rs b/library/std/src/sys/os_str/utf8.rs index a324a478325e6..289f58aa480f7 100644 --- a/library/std/src/sys/os_str/utf8.rs +++ b/library/std/src/sys/os_str/utf8.rs @@ -224,10 +224,16 @@ impl Slice { Slice::from_str(unsafe { str::from_utf8_unchecked(s) }) } + #[track_caller] + #[inline] + pub fn try_check_public_boundary(&self, index: usize) -> Option<()> { + if self.inner.is_char_boundary(index) { Some(()) } else { None } + } + #[track_caller] #[inline] pub fn check_public_boundary(&self, index: usize) { - if !self.inner.is_char_boundary(index) { + if self.try_check_public_boundary(index).is_none() { panic!("byte index {index} is not an OsStr boundary"); } } diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index 1f130d91cf393..9a32ab3f3ea12 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -240,6 +240,11 @@ impl Slice { unsafe { mem::transmute(Wtf8::from_bytes_unchecked(s)) } } + #[inline] + pub fn try_check_public_boundary(&self, index: usize) -> Option<()> { + self.inner.try_check_utf8_boundary(index).ok() + } + #[track_caller] #[inline] pub fn check_public_boundary(&self, index: usize) { diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock index e22fde79a8444..7c890e3f2004c 100644 --- a/src/bootstrap/Cargo.lock +++ b/src/bootstrap/Cargo.lock @@ -742,9 +742,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.39.0" +version = "0.39.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd9f9fe3d2b7b75cf4f2805e5b9926e8ac47146667b16b86298c4a8bf08cc469" +checksum = "14311e7e9a03114cd4b65eedd54e8fed2945e17f08586ae97ef53bc0669f9581" dependencies = [ "libc", "memchr", diff --git a/src/bootstrap/Cargo.toml b/src/bootstrap/Cargo.toml index 363802093a13a..f4b89e8a28b0f 100644 --- a/src/bootstrap/Cargo.toml +++ b/src/bootstrap/Cargo.toml @@ -57,7 +57,7 @@ walkdir = "2.4" xz2 = "0.1" # Dependencies needed by the build-metrics feature -sysinfo = { version = "0.39.0", default-features = false, optional = true, features = ["system"] } +sysinfo = { version = "0.39.2", default-features = false, optional = true, features = ["system"] } # Dependencies needed by the `tracing` feature chrono = { version = "0.4", default-features = false, optional = true, features = ["now", "std"] } diff --git a/src/tools/opt-dist/Cargo.toml b/src/tools/opt-dist/Cargo.toml index d80e6caac0885..5893bf87b5c34 100644 --- a/src/tools/opt-dist/Cargo.toml +++ b/src/tools/opt-dist/Cargo.toml @@ -10,7 +10,7 @@ log = "0.4" anyhow = "1" humantime = "2" humansize = "2" -sysinfo = { version = "0.39.0", default-features = false, features = ["disk"] } +sysinfo = { version = "0.39.2", default-features = false, features = ["disk"] } fs_extra = "1" camino = "1" tar = "0.4.45"