Skip to content

Commit 00bd4f6

Browse files
committed
Add the string implementation
1 parent d4d683f commit 00bd4f6

File tree

4 files changed

+295
-5
lines changed

4 files changed

+295
-5
lines changed

rustfmt.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
wrap_comments = true
2+
format_code_in_doc_comments = true

src/lib.rs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,32 @@
1-
#[cfg(test)]
2-
mod tests {
3-
#[test]
4-
fn it_works() {
5-
assert_eq!(2 + 2, 4);
1+
#![no_std]
2+
extern crate alloc;
3+
4+
mod string;
5+
6+
use core::ops::{Deref, DerefMut};
7+
8+
pub use string::RetainMoreString;
9+
10+
/// A wrapper type which implements the traits safely
11+
#[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Hash, Ord, PartialOrd)]
12+
pub struct SafeImpl<T>(pub T);
13+
14+
impl<T> From<T> for SafeImpl<T> {
15+
fn from(it: T) -> Self {
16+
Self(it)
17+
}
18+
}
19+
20+
impl<T> DerefMut for SafeImpl<T> {
21+
fn deref_mut(&mut self) -> &mut Self::Target {
22+
&mut self.0
23+
}
24+
}
25+
26+
impl<T> Deref for SafeImpl<T> {
27+
type Target = T;
28+
29+
fn deref(&self) -> &Self::Target {
30+
&self.0
631
}
732
}

src/string.rs

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
use core::{slice, str::from_utf8_unchecked_mut};
2+
3+
use alloc::string::String;
4+
5+
/// Extension methods on [`String`].
6+
///
7+
/// This trait is sealed and cannot be implemented for types outside of
8+
/// `retain_more`.
9+
pub trait RetainMoreString: sealed::AllocMoreSealedString {
10+
/// A version of [`String::retain`] which allows the predicate mutable
11+
/// access to the valid parts of the full string.
12+
///
13+
/// The arguments of the predicate are:
14+
/// - 0: `&mut str`; The already retained parts of `self`, for which
15+
/// predicate returned `true`
16+
/// - 1: [`char`]; The current character being considered
17+
/// - 2: `&mut str`; The parts of `self` yet to be considered.
18+
///
19+
/// # Usage
20+
///
21+
/// ```
22+
/// # use retain_more::RetainMoreString as _;
23+
/// let mut my_string = "Super secret code: -100054321;-78912. EOF\
24+
/// Here is some content which shouldn't be seen"
25+
/// .to_string();
26+
/// /// Remove all numbers from the string, including a single leading `'-'` and
27+
/// /// additionally remove all characters after the first occurence of `"EOF"`
28+
/// fn cleanup(before: &mut str, it: char, after: &mut str) -> bool {
29+
/// if before.ends_with("EOF") {
30+
/// false
31+
/// } else {
32+
/// match (it, after.chars().next()) {
33+
/// ('-', Some(c)) => !c.is_ascii_digit(),
34+
/// (c, _) => !c.is_ascii_digit(),
35+
/// }
36+
/// }
37+
/// }
38+
/// my_string.retain_all(cleanup);
39+
/// assert_eq!(&my_string, "Super secret code: ;. EOF");
40+
/// ```
41+
fn retain_all<F: FnMut(&mut str, char, &mut str) -> bool>(&mut self, f: F);
42+
43+
/// A helper for the common case where only access to the parts of the
44+
/// [`String`] which haven't been considered are required
45+
fn retain_after<F: FnMut(char, &mut str) -> bool>(&mut self, mut f: F) {
46+
self.retain_all(move |_, current, after| f(current, after))
47+
}
48+
49+
/// A reimplmentation of [`String::retain`] using
50+
/// [`retain_all`](`RetainMoreString::retain_all`)
51+
///
52+
/// This is used to demonstrate that
53+
/// [`retain_all`](`RetainMoreString::retain_all`) is a strictly more
54+
/// powerful abstraction than [`String::retain`] from [`alloc`].
55+
///
56+
/// # Examples
57+
/// (Taken from alloc docs)
58+
/// ```
59+
/// let mut s = String::from("f_o_ob_ar");
60+
///
61+
/// s.retain(|c| c != '_');
62+
///
63+
/// assert_eq!(s, "foobar");
64+
/// ```
65+
///
66+
/// The exact order may be useful for tracking external state, like an
67+
/// index.
68+
///
69+
/// ```
70+
/// let mut s = String::from("abcde");
71+
/// let keep = [false, true, true, false, true];
72+
/// let mut i = 0;
73+
/// s.retain(|_| (keep[i], i += 1).0);
74+
/// assert_eq!(s, "bce");
75+
/// ```
76+
fn retain_default<F: FnMut(char) -> bool>(&mut self, mut f: F) {
77+
self.retain_all(move |_, current, _| f(current))
78+
}
79+
}
80+
81+
impl RetainMoreString for String {
82+
fn retain_all<F: FnMut(&mut str, char, &mut str) -> bool>(&mut self, mut f: F) {
83+
let len = self.len();
84+
// This is required for panic safety, see https://github.com/rust-lang/rust/issues/78498
85+
// SAFETY: 0..0 is empty and hence that region is valid UTF-8
86+
// SAFETY: 0 <= self.len(), since self.len() is a usize
87+
unsafe {
88+
self.as_mut_vec().set_len(0);
89+
}
90+
let mut del_bytes = 0;
91+
// The index of the start of the region which has not yet been considered.
92+
// This is always at a UTF-8 character boundary.
93+
let mut idx = 0;
94+
95+
while idx < len {
96+
let ptr = self.as_mut_ptr();
97+
// The implementation in `alloc` uses `self.get_unchecked(idx..len)` for
98+
// the equivalent section. <https://github.com/rust-lang/rust/blob/a6bd5246da78/library/alloc/src/string.rs#L1243>
99+
// This would be unsafe here because the reciever of that method
100+
// (`DerefMut::deref_mut(&mut self)`) is the empty `str`, since `len` is set to
101+
// 0 above. However, `get_unchecked` requires that the index is
102+
// within the bounds of the reciever, not just the allocation of the
103+
// reciever. This is not a safety issue within `alloc`, because the
104+
// implementation of `get_unchecked` within `core` expands to the
105+
// equivalent code as below. However, we cannot make that assumption
106+
// here, so have to go the long way around.
107+
let ch = unsafe {
108+
// SAFETY: `len` came from `self.len()`. Therefore `idx < len` implies `idx` is
109+
// within the heap allocation owned by self. Therefore the
110+
// result is within the same allocation as `ptr`.
111+
let start = ptr.add(idx);
112+
// SAFETY: The region is not aliased because the method has a mutable reference
113+
// to self. Additionally, there is no other acess across the
114+
// loop, and this is the start of the loop body, and no other references exist
115+
// before this line. We drop the region before any further
116+
// access later in the loop body.
117+
let region = slice::from_raw_parts_mut(start, len - idx);
118+
119+
// `region` is `idx..len` within the original string.
120+
// idx is on a character boundary, and the rest of this method has not modified
121+
// this region of bytes (except through the `&mut str` as the third closure
122+
// parameter, any access through which is required to maintain the UTF-8
123+
// invariant of that region)
124+
let ch = from_utf8_unchecked_mut(region).chars().next().unwrap();
125+
ch
126+
// region is dropped here, so its access to the region of
127+
};
128+
let ch_len = ch.len_utf8();
129+
let (before, after) = unsafe {
130+
(
131+
// SAFETY: UTF-8 is maintained in the before section by only copying
132+
// a full character at a time.
133+
from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, idx - del_bytes)),
134+
// SAFETY: idx + ch_len <= len because self, hence `idx + ch_len` is within the allocation of self.
135+
// was valid UTF-8 by invariant, hence after is valid.
136+
// This does not alias with `before`, because `-del_bytes < ch_len`
137+
from_utf8_unchecked_mut(slice::from_raw_parts_mut(
138+
ptr.add(idx + ch_len),
139+
len - idx - ch_len,
140+
)),
141+
)
142+
};
143+
if !f(before, ch, after) {
144+
del_bytes += ch_len;
145+
} else if del_bytes > 0 {
146+
// Copy `ch` del_bytes bytes back.
147+
// Use the version in the allocation of self, which is already UTF-8 encoded.
148+
149+
// Safety: We copy a region which is a single UTF-8 character.
150+
// We can't use copy_nonoverlapping here in case del_bytes > ch_len
151+
unsafe {
152+
core::ptr::copy(ptr.add(idx), ptr.add(idx - del_bytes), ch_len);
153+
}
154+
}
155+
156+
// 'Point' idx to the next char
157+
idx += ch_len;
158+
}
159+
// len-del_bytes <= len <= capacity
160+
unsafe {
161+
self.as_mut_vec().set_len(len - del_bytes);
162+
}
163+
}
164+
}
165+
166+
/// Implementation of the sealed pattern for [`RetainMoreString`]
167+
/// See [C-SEALED] from rust-api-guidelines for explanation
168+
///
169+
/// [C-SEALED]: https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed
170+
mod sealed {
171+
use alloc::string::String;
172+
173+
pub trait AllocMoreSealedString {}
174+
impl AllocMoreSealedString for String {}
175+
}
176+
177+
#[cfg(test)]
178+
mod tests {
179+
use super::*;
180+
use alloc::string::ToString;
181+
182+
fn redact(current: char, rest: &mut str) -> bool {
183+
match (current, rest.chars().next()) {
184+
('-', Some(c)) => !c.is_ascii_digit(),
185+
(c, _) => !c.is_ascii_digit(),
186+
}
187+
}
188+
189+
fn after_helper<F: FnMut(char, &mut str) -> bool>(input: &str, output: &str, f: F) {
190+
let mut input = input.to_string();
191+
input.retain_after(f);
192+
193+
assert_eq!(&input[..], output);
194+
}
195+
#[test]
196+
fn retain_after() {
197+
after_helper("this has no numbers", "this has no numbers", redact);
198+
after_helper("54321", "", redact);
199+
after_helper("-12345", "", redact);
200+
after_helper("--12345", "-", redact);
201+
after_helper("-12-3-45--", "--", redact);
202+
}
203+
}

tests/string_panic.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
//! The tests which require access to [`std::panic::catch_unwind`], which is
2+
//! unavailable in a `no_std` crate
3+
4+
use retain_more::RetainMoreString as _;
5+
6+
#[test]
7+
fn retain_default() {
8+
// Adapted from https://github.com/rust-lang/rust/blob/2ad5292aea63/library/alloc/tests/string.rs#L364-L396
9+
let mut s = String::from("α_β_γ");
10+
11+
s.retain_default(|_| true);
12+
assert_eq!(s, "α_β_γ");
13+
14+
s.retain_default(|c| c != '_');
15+
assert_eq!(s, "αβγ");
16+
17+
s.retain_default(|c| c != 'β');
18+
assert_eq!(s, "αγ");
19+
20+
s.retain_default(|c| c == 'α');
21+
assert_eq!(s, "α");
22+
23+
s.retain_default(|_| false);
24+
assert_eq!(s, "");
25+
26+
let mut s = String::from("0è0");
27+
let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
28+
let mut count = 0;
29+
s.retain_default(|_| {
30+
count += 1;
31+
match count {
32+
1 => false,
33+
2 => true,
34+
_ => panic!(),
35+
}
36+
});
37+
}));
38+
assert!(std::str::from_utf8(s.as_bytes()).is_ok());
39+
}
40+
41+
/// Independently discovered reproduction of
42+
/// https://github.com/rust-lang/rust/issues/78498
43+
#[test]
44+
fn retain_all_safety_78498() {
45+
let mut index = 0;
46+
let mut input = "૱uu".to_string();
47+
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
48+
input.retain_all(|_, _, _| {
49+
let ret = match index {
50+
0 => false,
51+
2 => panic!("What happens here"),
52+
_ => true,
53+
};
54+
index += 1;
55+
return ret;
56+
})
57+
}))
58+
.unwrap_err();
59+
assert!(std::str::from_utf8(input.as_bytes()).is_ok());
60+
}

0 commit comments

Comments
 (0)