33use super :: Utf8Error ;
44use crate :: mem;
55
6+ #[ cfg( kani) ]
7+ use crate :: kani;
8+
69/// Returns the initial codepoint accumulator for the first byte.
710/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
811/// for width 3, and 3 bits for width 4.
@@ -132,6 +135,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
132135 let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 } ;
133136 let align = v. as_ptr ( ) . align_offset ( usize_bytes) ;
134137
138+ #[ safety:: loop_invariant( index <= len + ascii_block_size) ]
135139 while index < len {
136140 let old_offset = index;
137141 macro_rules! err {
@@ -211,6 +215,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
211215 // until we find a word containing a non-ascii byte.
212216 if align != usize:: MAX && align. wrapping_sub ( index) % usize_bytes == 0 {
213217 let ptr = v. as_ptr ( ) ;
218+ #[ safety:: loop_invariant( index <= blocks_end + ascii_block_size && align. wrapping_sub( index) % usize_bytes == 0 ) ]
214219 while index < blocks_end {
215220 // SAFETY: since `align - index` and `ascii_block_size` are
216221 // multiples of `usize_bytes`, `block = ptr.add(index)` is
@@ -228,6 +233,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
228233 index += ascii_block_size;
229234 }
230235 // step from the point where the wordwise loop stopped
236+ #[ safety:: loop_invariant( index <= len) ]
231237 while index < len && v[ index] < 128 {
232238 index += 1 ;
233239 }
@@ -271,3 +277,28 @@ pub const fn utf8_char_width(b: u8) -> usize {
271277
272278/// Mask of the value bits of a continuation byte.
273279const CONT_MASK : u8 = 0b0011_1111 ;
280+
281+ #[ cfg( kani) ]
282+ #[ unstable( feature = "kani" , issue = "none" ) ]
283+ pub mod verify {
284+ use super :: * ;
285+
286+ #[ kani:: proof]
287+ #[ kani:: unwind( 8 ) ]
288+ pub fn check_run_utf8_validation ( ) {
289+ if kani:: any ( ) {
290+ // TODO: ARR_SIZE can be `std::usize::MAX` with cbmc argument
291+ // `--arrays-uf-always`
292+ const ARR_SIZE : usize = 1000 ;
293+ let mut x: [ u8 ; ARR_SIZE ] = kani:: any ( ) ;
294+ let mut xs = kani:: slice:: any_slice_of_array_mut ( & mut x) ;
295+ run_utf8_validation ( xs) ;
296+ } else {
297+ let ptr = kani:: any_where :: < usize , _ > ( |val| * val != 0 ) as * const u8 ;
298+ kani:: assume ( ptr. is_aligned ( ) ) ;
299+ unsafe {
300+ run_utf8_validation ( crate :: slice:: from_raw_parts ( ptr, 0 ) ) ;
301+ }
302+ }
303+ }
304+ }
0 commit comments