Skip to content

Commit f94ff78

Browse files
authored
head: fix bug reading back through files (#7248)
* head: fix bug reading back through files Fix issue #7247. Rework logic for reading/seeking backwards through files. Bug was seen when reading back through large files. Added test case to validate fix.
1 parent 93d58b1 commit f94ff78

File tree

2 files changed

+104
-22
lines changed

2 files changed

+104
-22
lines changed

src/uu/head/src/head.rs

Lines changed: 64 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -350,16 +350,10 @@ fn read_but_last_n_lines(
350350
/// Return the index in `input` just after the `n`th line from the end.
351351
///
352352
/// If `n` exceeds the number of lines in this file, then return 0.
353-
///
354-
/// The cursor must be at the start of the seekable input before
355-
/// calling this function. This function rewinds the cursor to the
353+
/// This function rewinds the cursor to the
356354
/// beginning of the input just before returning unless there is an
357355
/// I/O error.
358356
///
359-
/// If `zeroed` is `false`, interpret the newline character `b'\n'` as
360-
/// a line ending. If `zeroed` is `true`, interpret the null character
361-
/// `b'\0'` as a line ending instead.
362-
///
363357
/// # Errors
364358
///
365359
/// This function returns an error if there is a problem seeking
@@ -390,20 +384,21 @@ fn find_nth_line_from_end<R>(input: &mut R, n: u64, separator: u8) -> std::io::R
390384
where
391385
R: Read + Seek,
392386
{
393-
let size = input.seek(SeekFrom::End(0))?;
387+
let file_size = input.seek(SeekFrom::End(0))?;
394388

395389
let mut buffer = [0u8; BUF_SIZE];
396-
let buf_size: usize = (BUF_SIZE as u64).min(size).try_into().unwrap();
397-
let buffer = &mut buffer[..buf_size];
398390

399391
let mut i = 0u64;
400392
let mut lines = 0u64;
401393

402394
loop {
403395
// the casts here are ok, `buffer.len()` should never be above a few k
404-
input.seek(SeekFrom::Current(
405-
-((buffer.len() as i64).min((size - i) as i64)),
406-
))?;
396+
let bytes_remaining_to_search = file_size - i;
397+
let bytes_to_read_this_loop = bytes_remaining_to_search.min(BUF_SIZE.try_into().unwrap());
398+
let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
399+
let buffer = &mut buffer[..bytes_to_read_this_loop.try_into().unwrap()];
400+
401+
input.seek(SeekFrom::Start(read_start_offset))?;
407402
input.read_exact(buffer)?;
408403
for byte in buffer.iter().rev() {
409404
if byte == &separator {
@@ -412,11 +407,11 @@ where
412407
// if it were just `n`,
413408
if lines == n + 1 {
414409
input.rewind()?;
415-
return Ok(size - i);
410+
return Ok(file_size - i);
416411
}
417412
i += 1;
418413
}
419-
if size - i == 0 {
414+
if file_size - i == 0 {
420415
input.rewind()?;
421416
return Ok(0);
422417
}
@@ -700,12 +695,59 @@ mod tests {
700695

701696
#[test]
702697
fn test_find_nth_line_from_end() {
703-
let mut input = Cursor::new("x\ny\nz\n");
704-
assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 6);
705-
assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 4);
706-
assert_eq!(find_nth_line_from_end(&mut input, 2, b'\n').unwrap(), 2);
707-
assert_eq!(find_nth_line_from_end(&mut input, 3, b'\n').unwrap(), 0);
708-
assert_eq!(find_nth_line_from_end(&mut input, 4, b'\n').unwrap(), 0);
709-
assert_eq!(find_nth_line_from_end(&mut input, 1000, b'\n').unwrap(), 0);
698+
// Make sure our input buffer is several multiples of BUF_SIZE in size
699+
// such that we can be reasonably confident we've exercised all logic paths.
700+
// Make the contents of the buffer look like...
701+
// aaaa\n
702+
// aaaa\n
703+
// aaaa\n
704+
// aaaa\n
705+
// aaaa\n
706+
// ...
707+
// This will make it easier to validate the results since each line will have
708+
// 5 bytes in it.
709+
710+
let minimum_buffer_size = BUF_SIZE * 4;
711+
let mut input_buffer = vec![];
712+
let mut loop_iteration: u64 = 0;
713+
while input_buffer.len() < minimum_buffer_size {
714+
for _n in 0..4 {
715+
input_buffer.push(b'a');
716+
}
717+
loop_iteration += 1;
718+
input_buffer.push(b'\n');
719+
}
720+
721+
let lines_in_input_file = loop_iteration;
722+
let input_length = lines_in_input_file * 5;
723+
assert_eq!(input_length, input_buffer.len().try_into().unwrap());
724+
let mut input = Cursor::new(input_buffer);
725+
// We now have loop_iteration lines in the buffer Now walk backwards through the buffer
726+
// to confirm everything parses correctly.
727+
// Use a large step size to prevent the test from taking too long, but don't use a power
728+
// of 2 in case we miss some corner case.
729+
let step_size = 511;
730+
for n in (0..lines_in_input_file).filter(|v| v % step_size == 0) {
731+
// The 5*n comes from 5-bytes per row.
732+
assert_eq!(
733+
find_nth_line_from_end(&mut input, n, b'\n').unwrap(),
734+
input_length - 5 * n
735+
);
736+
}
737+
738+
// Now confirm that if we query with a value >= lines_in_input_file we get an offset
739+
// of 0
740+
assert_eq!(
741+
find_nth_line_from_end(&mut input, lines_in_input_file, b'\n').unwrap(),
742+
0
743+
);
744+
assert_eq!(
745+
find_nth_line_from_end(&mut input, lines_in_input_file + 1, b'\n').unwrap(),
746+
0
747+
);
748+
assert_eq!(
749+
find_nth_line_from_end(&mut input, lines_in_input_file + 1000, b'\n').unwrap(),
750+
0
751+
);
710752
}
711753
}

tests/by-util/test_head.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,46 @@ fn test_presume_input_pipe_5_chars() {
392392
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
393393
}
394394

395+
#[test]
396+
fn test_read_backwards_lines_large_file() {
397+
// Create our fixtures on the fly. We need the input file to be at least double
398+
// the size of BUF_SIZE as specified in head.rs. Go for something a bit bigger
399+
// than that.
400+
let scene = TestScenario::new(util_name!());
401+
let fixtures = &scene.fixtures;
402+
let seq_30000_file_name = "seq_30000";
403+
let seq_1000_file_name = "seq_1000";
404+
scene
405+
.cmd("seq")
406+
.arg("30000")
407+
.set_stdout(fixtures.make_file(seq_30000_file_name))
408+
.succeeds();
409+
scene
410+
.cmd("seq")
411+
.arg("1000")
412+
.set_stdout(fixtures.make_file(seq_1000_file_name))
413+
.succeeds();
414+
415+
// Now run our tests.
416+
scene
417+
.ucmd()
418+
.args(&["-n", "-29000", "seq_30000"])
419+
.succeeds()
420+
.stdout_is_fixture("seq_1000");
421+
422+
scene
423+
.ucmd()
424+
.args(&["-n", "-30000", "seq_30000"])
425+
.run()
426+
.stdout_is_fixture("emptyfile.txt");
427+
428+
scene
429+
.ucmd()
430+
.args(&["-n", "-30001", "seq_30000"])
431+
.run()
432+
.stdout_is_fixture("emptyfile.txt");
433+
}
434+
395435
#[cfg(all(
396436
not(target_os = "windows"),
397437
not(target_os = "macos"),

0 commit comments

Comments
 (0)