Skip to content

Commit 1b5227d

Browse files
committed
Add exhaustive tests for next_code_point and next_code_point_reverse
There are only 0x10FFFF possible codepoints, so we can exhaustively test all of them.
1 parent df8102f commit 1b5227d

File tree

4 files changed

+75
-2
lines changed

4 files changed

+75
-2
lines changed

library/alloctests/tests/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#![feature(round_char_boundary)]
2626
#![feature(slice_partition_dedup)]
2727
#![feature(string_from_utf8_lossy_owned)]
28+
#![feature(str_internals)]
29+
#![feature(char_internals)]
2830
#![feature(string_remove_matches)]
2931
#![feature(const_btree_len)]
3032
#![feature(const_trait_impl)]

library/alloctests/tests/str.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,76 @@ fn test_total_ord() {
11561156
assert_eq!("22".cmp("1234"), Greater);
11571157
}
11581158

1159+
fn test_next_code_point(iter: impl Iterator<Item = u32>) {
1160+
for expected in iter {
1161+
let mut bytes = [0; 4];
1162+
let mut bytes = std::char::encode_utf8_raw(expected, &mut bytes).iter();
1163+
1164+
// SAFETY: `bytes` is UTF8-like
1165+
let got = unsafe { core::str::next_code_point(&mut bytes) };
1166+
assert_eq!(got, Some(expected));
1167+
1168+
// SAFETY: `bytes` is UTF8-like
1169+
let got = unsafe { core::str::next_code_point(&mut bytes) };
1170+
assert_eq!(got, None);
1171+
}
1172+
}
1173+
1174+
fn test_next_code_point_reverse(iter: impl Iterator<Item = u32>) {
1175+
for expected in iter {
1176+
let mut bytes = [0; 4];
1177+
let mut bytes = std::char::encode_utf8_raw(expected, &mut bytes).iter();
1178+
1179+
// SAFETY: `bytes` is UTF8-like
1180+
let got = unsafe { core::str::next_code_point_reverse(&mut bytes) };
1181+
assert_eq!(got, Some(expected));
1182+
1183+
// SAFETY: `bytes` is UTF8-like
1184+
let got = unsafe { core::str::next_code_point_reverse(&mut bytes) };
1185+
assert_eq!(got, None);
1186+
}
1187+
}
1188+
1189+
#[test]
1190+
fn test_next_code_point_1byte() {
1191+
test_next_code_point(0..0x80);
1192+
}
1193+
1194+
#[test]
1195+
fn test_next_code_point_2byte() {
1196+
test_next_code_point(0x80..0x800);
1197+
}
1198+
1199+
#[test]
1200+
fn test_next_code_point_3byte() {
1201+
test_next_code_point(0x800..0x10_000);
1202+
}
1203+
1204+
#[test]
1205+
fn test_next_code_point_4byte() {
1206+
test_next_code_point(0x010_000..=u32::from(char::MAX));
1207+
}
1208+
1209+
#[test]
1210+
fn test_next_code_point_reverse_1byte() {
1211+
test_next_code_point_reverse(0..0x80);
1212+
}
1213+
1214+
#[test]
1215+
fn test_next_code_point_reverse_2byte() {
1216+
test_next_code_point_reverse(0x80..0x800);
1217+
}
1218+
1219+
#[test]
1220+
fn test_next_code_point_reverse_3byte() {
1221+
test_next_code_point_reverse(0x800..0x10_000);
1222+
}
1223+
1224+
#[test]
1225+
fn test_next_code_point_reverse_4byte() {
1226+
test_next_code_point_reverse(0x10_000..=u32::from(char::MAX));
1227+
}
1228+
11591229
#[test]
11601230
fn test_iterator() {
11611231
let s = "ศไทย中华Việt Nam";

library/core/src/str/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pub use lossy::{Utf8Chunk, Utf8Chunks};
5858
#[stable(feature = "rust1", since = "1.0.0")]
5959
pub use traits::FromStr;
6060
#[unstable(feature = "str_internals", issue = "none")]
61-
pub use validations::{next_code_point, utf8_char_width};
61+
pub use validations::{next_code_point, next_code_point_reverse, utf8_char_width};
6262

6363
#[inline(never)]
6464
#[cold]

library/core/src/str/validations.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,9 @@ pub unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) ->
7474
/// # Safety
7575
///
7676
/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
77+
#[unstable(feature = "str_internals", issue = "none")]
7778
#[inline]
78-
pub(super) unsafe fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
79+
pub unsafe fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
7980
where
8081
I: DoubleEndedIterator<Item = &'a u8>,
8182
{

0 commit comments

Comments
 (0)