@@ -1156,6 +1156,84 @@ fn test_total_ord() {
11561156 assert_eq ! ( "22" . cmp( "1234" ) , Greater ) ;
11571157}
11581158
1159+ // There are only 1,114,112 code points (including surrogates for WTF-8). So we
1160+ // can test `next_code_point` and `next_code_point_reverse` exhaustively on all
1161+ // possible inputs.
1162+
1163+ /// Assert that encoding a codepoint with `encode_utf8_raw` and then decoding it
1164+ /// with `next_code_point` preserves the codepoint.
1165+ fn test_next_code_point ( codepoint : u32 ) {
1166+ let mut bytes = [ 0 ; 4 ] ;
1167+ let mut bytes = std:: char:: encode_utf8_raw ( codepoint, & mut bytes) . iter ( ) ;
1168+
1169+ // SAFETY: `bytes` is UTF8-like
1170+ let got = unsafe { core:: str:: next_code_point ( & mut bytes) } ;
1171+ assert_eq ! ( got, Some ( codepoint) ) ;
1172+
1173+ // SAFETY: `bytes` is UTF8-like
1174+ let got = unsafe { core:: str:: next_code_point ( & mut bytes) } ;
1175+ assert_eq ! ( got, None ) ;
1176+ }
1177+
1178+ /// The same but for `next_code_point_reverse`.
1179+ fn test_next_code_point_reverse ( codepoint : u32 ) {
1180+ let mut bytes = [ 0 ; 4 ] ;
1181+ let mut bytes = std:: char:: encode_utf8_raw ( codepoint, & mut bytes) . iter ( ) ;
1182+
1183+ // SAFETY: `bytes` is UTF8-like
1184+ let got = unsafe { core:: str:: next_code_point_reverse ( & mut bytes) } ;
1185+ assert_eq ! ( got, Some ( codepoint) ) ;
1186+
1187+ // SAFETY: `bytes` is UTF8-like
1188+ let got = unsafe { core:: str:: next_code_point_reverse ( & mut bytes) } ;
1189+ assert_eq ! ( got, None ) ;
1190+ }
1191+
1192+ #[ test]
1193+ #[ cfg_attr( miri, ignore) ] // Disabled on Miri because it is too slow
1194+ fn test_next_code_point_exhaustive ( ) {
1195+ for c in 0 ..=u32:: from ( char:: MAX ) {
1196+ test_next_code_point ( c) ;
1197+ }
1198+ }
1199+
1200+ #[ test]
1201+ #[ cfg_attr( miri, ignore) ] // Disabled on Miri because it is too slow
1202+ fn test_next_code_point_reverse_exhaustive ( ) {
1203+ for c in 0 ..=u32:: from ( char:: MAX ) {
1204+ test_next_code_point_reverse ( c) ;
1205+ }
1206+ }
1207+
1208+ #[ rustfmt:: skip]
1209+ const CODEPOINT_BOUNDARIES : & [ u32 ] = & [
1210+ // 1 byte codepoints (U+0000 ..= U+007F):
1211+ 0x0000 , 0x007F ,
1212+
1213+ // 2 byte codepoints (U+0080 ..= U+07FF):
1214+ 0x0080 , 0x07FF ,
1215+
1216+ // 3 byte codepoints (U+0800 ..= U+FFFF):
1217+ 0800 , 0xFFFF ,
1218+
1219+ // 4 byte codepoints (U+01_0000 ..= U+10_FFFF):
1220+ 0x01_0000 , 0x10_FFFF ,
1221+ ] ;
1222+
1223+ #[ test]
1224+ fn test_next_code_point_boundary_conditions ( ) {
1225+ for c in CODEPOINT_BOUNDARIES {
1226+ test_next_code_point ( * c) ;
1227+ }
1228+ }
1229+
1230+ #[ test]
1231+ fn test_next_code_point_reverse_boundary_conditions ( ) {
1232+ for c in CODEPOINT_BOUNDARIES {
1233+ test_next_code_point_reverse ( * c) ;
1234+ }
1235+ }
1236+
11591237#[ test]
11601238fn test_iterator ( ) {
11611239 let s = "ศไทย中华Việt Nam" ;
0 commit comments