@@ -350,16 +350,10 @@ fn read_but_last_n_lines(
350350/// Return the index in `input` just after the `n`th line from the end.
351351///
352352/// If `n` exceeds the number of lines in this file, then return 0.
353- ///
354- /// The cursor must be at the start of the seekable input before
355- /// calling this function. This function rewinds the cursor to the
353+ /// This function rewinds the cursor to the
356354/// beginning of the input just before returning unless there is an
357355/// I/O error.
358356///
359- /// If `zeroed` is `false`, interpret the newline character `b'\n'` as
360- /// a line ending. If `zeroed` is `true`, interpret the null character
361- /// `b'\0'` as a line ending instead.
362- ///
363357/// # Errors
364358///
365359/// This function returns an error if there is a problem seeking
@@ -390,20 +384,21 @@ fn find_nth_line_from_end<R>(input: &mut R, n: u64, separator: u8) -> std::io::R
390384where
391385 R : Read + Seek ,
392386{
393- let size = input. seek ( SeekFrom :: End ( 0 ) ) ?;
387+ let file_size = input. seek ( SeekFrom :: End ( 0 ) ) ?;
394388
395389 let mut buffer = [ 0u8 ; BUF_SIZE ] ;
396- let buf_size: usize = ( BUF_SIZE as u64 ) . min ( size) . try_into ( ) . unwrap ( ) ;
397- let buffer = & mut buffer[ ..buf_size] ;
398390
399391 let mut i = 0u64 ;
400392 let mut lines = 0u64 ;
401393
402394 loop {
403395 // the casts here are ok, `buffer.len()` should never be above a few k
404- input. seek ( SeekFrom :: Current (
405- -( ( buffer. len ( ) as i64 ) . min ( ( size - i) as i64 ) ) ,
406- ) ) ?;
396+ let bytes_remaining_to_search = file_size - i;
397+ let bytes_to_read_this_loop = bytes_remaining_to_search. min ( BUF_SIZE . try_into ( ) . unwrap ( ) ) ;
398+ let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
399+ let buffer = & mut buffer[ ..bytes_to_read_this_loop. try_into ( ) . unwrap ( ) ] ;
400+
401+ input. seek ( SeekFrom :: Start ( read_start_offset) ) ?;
407402 input. read_exact ( buffer) ?;
408403 for byte in buffer. iter ( ) . rev ( ) {
409404 if byte == & separator {
@@ -412,11 +407,11 @@ where
412407 // if it were just `n`,
413408 if lines == n + 1 {
414409 input. rewind ( ) ?;
415- return Ok ( size - i) ;
410+ return Ok ( file_size - i) ;
416411 }
417412 i += 1 ;
418413 }
419- if size - i == 0 {
414+ if file_size - i == 0 {
420415 input. rewind ( ) ?;
421416 return Ok ( 0 ) ;
422417 }
@@ -700,12 +695,59 @@ mod tests {
700695
701696 #[ test]
702697 fn test_find_nth_line_from_end ( ) {
703- let mut input = Cursor :: new ( "x\n y\n z\n " ) ;
704- assert_eq ! ( find_nth_line_from_end( & mut input, 0 , b'\n' ) . unwrap( ) , 6 ) ;
705- assert_eq ! ( find_nth_line_from_end( & mut input, 1 , b'\n' ) . unwrap( ) , 4 ) ;
706- assert_eq ! ( find_nth_line_from_end( & mut input, 2 , b'\n' ) . unwrap( ) , 2 ) ;
707- assert_eq ! ( find_nth_line_from_end( & mut input, 3 , b'\n' ) . unwrap( ) , 0 ) ;
708- assert_eq ! ( find_nth_line_from_end( & mut input, 4 , b'\n' ) . unwrap( ) , 0 ) ;
709- assert_eq ! ( find_nth_line_from_end( & mut input, 1000 , b'\n' ) . unwrap( ) , 0 ) ;
698+ // Make sure our input buffer is several multiples of BUF_SIZE in size
699+ // such that we can be reasonably confident we've exercised all logic paths.
700+ // Make the contents of the buffer look like...
701+ // aaaa\n
702+ // aaaa\n
703+ // aaaa\n
704+ // aaaa\n
705+ // aaaa\n
706+ // ...
707+ // This will make it easier to validate the results since each line will have
708+ // 5 bytes in it.
709+
710+ let minimum_buffer_size = BUF_SIZE * 4 ;
711+ let mut input_buffer = vec ! [ ] ;
712+ let mut loop_iteration: u64 = 0 ;
713+ while input_buffer. len ( ) < minimum_buffer_size {
714+ for _n in 0 ..4 {
715+ input_buffer. push ( b'a' ) ;
716+ }
717+ loop_iteration += 1 ;
718+ input_buffer. push ( b'\n' ) ;
719+ }
720+
721+ let lines_in_input_file = loop_iteration;
722+ let input_length = lines_in_input_file * 5 ;
723+ assert_eq ! ( input_length, input_buffer. len( ) . try_into( ) . unwrap( ) ) ;
724+ let mut input = Cursor :: new ( input_buffer) ;
725+ // We now have loop_iteration lines in the buffer Now walk backwards through the buffer
726+ // to confirm everything parses correctly.
727+ // Use a large step size to prevent the test from taking too long, but don't use a power
728+ // of 2 in case we miss some corner case.
729+ let step_size = 511 ;
730+ for n in ( 0 ..lines_in_input_file) . filter ( |v| v % step_size == 0 ) {
731+ // The 5*n comes from 5-bytes per row.
732+ assert_eq ! (
733+ find_nth_line_from_end( & mut input, n, b'\n' ) . unwrap( ) ,
734+ input_length - 5 * n
735+ ) ;
736+ }
737+
738+ // Now confirm that if we query with a value >= lines_in_input_file we get an offset
739+ // of 0
740+ assert_eq ! (
741+ find_nth_line_from_end( & mut input, lines_in_input_file, b'\n' ) . unwrap( ) ,
742+ 0
743+ ) ;
744+ assert_eq ! (
745+ find_nth_line_from_end( & mut input, lines_in_input_file + 1 , b'\n' ) . unwrap( ) ,
746+ 0
747+ ) ;
748+ assert_eq ! (
749+ find_nth_line_from_end( & mut input, lines_in_input_file + 1000 , b'\n' ) . unwrap( ) ,
750+ 0
751+ ) ;
710752 }
711753}
0 commit comments