Skip to content

Commit b00e1ff

Browse files
committed
Impl String::into_chars
Signed-off-by: tison <[email protected]>
1 parent c113247 commit b00e1ff

File tree

1 file changed

+163
-2
lines changed

1 file changed

+163
-2
lines changed

library/alloc/src/string.rs

Lines changed: 163 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ use crate::alloc::Allocator;
6262
use crate::borrow::{Cow, ToOwned};
6363
use crate::boxed::Box;
6464
use crate::collections::TryReserveError;
65-
use crate::str::{self, Chars, Utf8Error, from_utf8_unchecked_mut};
65+
use crate::str::{self, CharIndices, Chars, Utf8Error, from_utf8_unchecked_mut};
6666
#[cfg(not(no_global_oom_handling))]
6767
use crate::str::{FromStr, from_boxed_utf8_unchecked};
68-
use crate::vec::Vec;
68+
use crate::vec::{self, Vec};
6969

7070
/// A UTF-8–encoded, growable string.
7171
///
@@ -1952,6 +1952,61 @@ impl String {
19521952
Drain { start, end, iter: chars_iter, string: self_ptr }
19531953
}
19541954

1955+
/// Converts a `String` into an iterator over the [`char`]s of the string.
1956+
///
1957+
/// As a string consists of valid UTF-8, we can iterate through a string
1958+
/// by [`char`]. This method returns such an iterator.
1959+
///
1960+
/// It's important to remember that [`char`] represents a Unicode Scalar
1961+
/// Value, and might not match your idea of what a 'character' is. Iteration
1962+
/// over grapheme clusters may be what you actually want. That functionality
1963+
/// is not provided by Rust's standard library, check crates.io instead.
1964+
///
1965+
/// # Examples
1966+
///
1967+
/// Basic usage:
1968+
///
1969+
/// ```
1970+
/// #![feature(string_into_chars)]
1971+
///
1972+
/// let word = String::from("goodbye");
1973+
///
1974+
/// let mut chars = word.into_chars();
1975+
///
1976+
/// assert_eq!(Some('g'), chars.next());
1977+
/// assert_eq!(Some('o'), chars.next());
1978+
/// assert_eq!(Some('o'), chars.next());
1979+
/// assert_eq!(Some('d'), chars.next());
1980+
/// assert_eq!(Some('b'), chars.next());
1981+
/// assert_eq!(Some('y'), chars.next());
1982+
/// assert_eq!(Some('e'), chars.next());
1983+
///
1984+
/// assert_eq!(None, chars.next());
1985+
/// ```
1986+
///
1987+
/// Remember, [`char`]s might not match your intuition about characters:
1988+
///
1989+
/// ```
1990+
/// #![feature(string_into_chars)]
1991+
///
1992+
/// let y = String::from("y̆");
1993+
///
1994+
/// let mut chars = y.into_chars();
1995+
///
1996+
/// assert_eq!(Some('y'), chars.next()); // not 'y̆'
1997+
/// assert_eq!(Some('\u{0306}'), chars.next());
1998+
///
1999+
/// assert_eq!(None, chars.next());
2000+
/// ```
2001+
///
2002+
/// [`char`]: prim@char
2003+
#[inline]
2004+
#[must_use = "`self` will be dropped if the result is not used"]
2005+
#[unstable(feature = "string_into_chars", issue = "133125")]
2006+
pub fn into_chars(self) -> IntoChars {
2007+
IntoChars { bytes: self.into_bytes().into_iter() }
2008+
}
2009+
19552010
/// Removes the specified range in the string,
19562011
/// and replaces it with the given string.
19572012
/// The given string doesn't need to be the same length as the range.
@@ -3090,6 +3145,112 @@ impl fmt::Write for String {
30903145
}
30913146
}
30923147

3148+
/// An iterator over the [`char`]s of a string.
3149+
///
3150+
/// This struct is created by the [`into_chars`] method on [`String`].
3151+
/// See its documentation for more.
3152+
///
3153+
/// [`char`]: prim@char
3154+
/// [`into_chars`]: String::into_chars
3155+
#[cfg_attr(not(no_global_oom_handling), derive(Clone))]
3156+
#[must_use = "iterators are lazy and do nothing unless consumed"]
3157+
#[unstable(feature = "string_into_chars", issue = "133125")]
3158+
pub struct IntoChars {
3159+
bytes: vec::IntoIter<u8>,
3160+
}
3161+
3162+
#[unstable(feature = "string_into_chars", issue = "133125")]
3163+
impl fmt::Debug for IntoChars {
3164+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3165+
f.debug_tuple("IntoChars").field(&self.as_str()).finish()
3166+
}
3167+
}
3168+
3169+
impl IntoChars {
3170+
/// Views the underlying data as a subslice of the original data.
3171+
///
3172+
/// # Examples
3173+
///
3174+
/// ```
3175+
/// #![feature(string_into_chars)]
3176+
///
3177+
/// let mut chars = String::from("abc").into_chars();
3178+
///
3179+
/// assert_eq!(chars.as_str(), "abc");
3180+
/// chars.next();
3181+
/// assert_eq!(chars.as_str(), "bc");
3182+
/// chars.next();
3183+
/// chars.next();
3184+
/// assert_eq!(chars.as_str(), "");
3185+
/// ```
3186+
#[unstable(feature = "string_into_chars", issue = "133125")]
3187+
#[must_use]
3188+
#[inline]
3189+
pub fn as_str(&self) -> &str {
3190+
// SAFETY: `bytes` is a valid UTF-8 string.
3191+
unsafe { str::from_utf8_unchecked(self.bytes.as_slice()) }
3192+
}
3193+
3194+
#[inline]
3195+
fn iter(&self) -> CharIndices<'_> {
3196+
self.as_str().char_indices()
3197+
}
3198+
}
3199+
3200+
#[unstable(feature = "string_into_chars", issue = "133125")]
3201+
impl Iterator for IntoChars {
3202+
type Item = char;
3203+
3204+
#[inline]
3205+
fn next(&mut self) -> Option<char> {
3206+
let mut iter = self.iter();
3207+
match iter.next() {
3208+
None => None,
3209+
Some((_, ch)) => {
3210+
let offset = iter.offset();
3211+
// `offset` is a valid index.
3212+
let _ = self.bytes.advance_by(offset);
3213+
Some(ch)
3214+
}
3215+
}
3216+
}
3217+
3218+
#[inline]
3219+
fn count(self) -> usize {
3220+
self.iter().count()
3221+
}
3222+
3223+
#[inline]
3224+
fn size_hint(&self) -> (usize, Option<usize>) {
3225+
self.iter().size_hint()
3226+
}
3227+
3228+
#[inline]
3229+
fn last(mut self) -> Option<char> {
3230+
self.next_back()
3231+
}
3232+
}
3233+
3234+
#[unstable(feature = "string_into_chars", issue = "133125")]
3235+
impl DoubleEndedIterator for IntoChars {
3236+
#[inline]
3237+
fn next_back(&mut self) -> Option<char> {
3238+
let len = self.as_str().len();
3239+
let mut iter = self.iter();
3240+
match iter.next_back() {
3241+
None => None,
3242+
Some((idx, ch)) => {
3243+
// `idx` is a valid index.
3244+
let _ = self.bytes.advance_back_by(len - idx);
3245+
Some(ch)
3246+
}
3247+
}
3248+
}
3249+
}
3250+
3251+
#[unstable(feature = "string_into_chars", issue = "133125")]
3252+
impl FusedIterator for IntoChars {}
3253+
30933254
/// A draining iterator for `String`.
30943255
///
30953256
/// This struct is created by the [`drain`] method on [`String`]. See its

0 commit comments

Comments
 (0)