uutils · sylvestre · Sep 29, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025
diff --git a/src/uu/uniq/locales/en-US.ftl b/src/uu/uniq/locales/en-US.ftl
@@ -21,6 +21,7 @@ uniq-help-zero-terminated = end lines with 0 byte, not newline
 # Error messages
 uniq-error-write-line-terminator = Could not write line terminator
 uniq-error-write-error = write error
+uniq-error-read-error = read error
 uniq-error-invalid-argument = Invalid argument for { $opt_name }: { $arg }
 uniq-error-try-help = Try 'uniq --help' for more information.
 uniq-error-group-mutually-exclusive = --group is mutually exclusive with -c/-d/-D/-u

diff --git a/src/uu/uniq/locales/fr-FR.ftl b/src/uu/uniq/locales/fr-FR.ftl
@@ -20,6 +20,7 @@ uniq-help-zero-terminated = terminer les lignes avec un octet 0, pas une nouvell
 # Messages d'erreur
 uniq-error-write-line-terminator = Impossible d'écrire le terminateur de ligne
 uniq-error-write-error = erreur d'écriture
+uniq-error-read-error = erreur de lecture
 uniq-error-invalid-argument = Argument invalide pour { $opt_name } : { $arg }
 uniq-error-try-help = Essayez 'uniq --help' pour plus d'informations.
 uniq-error-group-mutually-exclusive = --group est mutuellement exclusif avec -c/-d/-D/-u

diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs
@@ -42,6 +42,8 @@ enum Delimiters {
     None,
 }
 
+const OUTPUT_BUFFER_CAPACITY: usize = 128 * 1024;
+
 struct Uniq {
     repeats_only: bool,
     uniques_only: bool,
@@ -55,6 +57,14 @@ struct Uniq {
     zero_terminated: bool,
 }
 
+#[derive(Default)]
+struct LineMeta {
+    key_start: usize,
+    key_end: usize,
+    lowercase: Vec<u8>,
+    use_lowercase: bool,
+}
+
 macro_rules! write_line_terminator {
     ($writer:expr, $line_terminator:expr) => {
         $writer
@@ -64,42 +74,53 @@ macro_rules! write_line_terminator {
 }
 
 impl Uniq {
-    pub fn print_uniq(&self, reader: impl BufRead, mut writer: impl Write) -> UResult<()> {
+    pub fn print_uniq(&self, mut reader: impl BufRead, mut writer: impl Write) -> UResult<()> {
         let mut first_line_printed = false;
         let mut group_count = 1;
         let line_terminator = self.get_line_terminator();
-        let mut lines = reader.split(line_terminator);
-        let mut line = match lines.next() {
-            Some(l) => l?,
-            None => return Ok(()),
-        };
-
         let writer = &mut writer;
 
-        // compare current `line` with consecutive lines (`next_line`) of the input
-        // and if needed, print `line` based on the command line options provided
-        for next_line in lines {
-            let next_line = next_line?;
-            if self.cmp_keys(&line, &next_line) {
+        let mut current_buf = Vec::with_capacity(1024);
+        if !Self::read_line(&mut reader, &mut current_buf, line_terminator)? {
+            return Ok(());
+        }
+        let mut current_meta = LineMeta::default();
+        self.build_meta(&current_buf, &mut current_meta);
+
+        let mut next_buf = Vec::with_capacity(1024);
+        let mut next_meta = LineMeta::default();
+
+        loop {
+            if !Self::read_line(&mut reader, &mut next_buf, line_terminator)? {
+                break;
+            }
+
+            self.build_meta(&next_buf, &mut next_meta);
+
+            if self.keys_differ(&current_buf, &current_meta, &next_buf, &next_meta) {
                 if (group_count == 1 && !self.repeats_only)
                     || (group_count > 1 && !self.uniques_only)
                 {
-                    self.print_line(writer, &line, group_count, first_line_printed)?;
+                    self.print_line(writer, &current_buf, group_count, first_line_printed)?;
                     first_line_printed = true;
                 }
-                line = next_line;
+                std::mem::swap(&mut current_buf, &mut next_buf);
+                std::mem::swap(&mut current_meta, &mut next_meta);
                 group_count = 1;
             } else {
                 if self.all_repeated {
-                    self.print_line(writer, &line, group_count, first_line_printed)?;
+                    self.print_line(writer, &current_buf, group_count, first_line_printed)?;
                     first_line_printed = true;
-                    line = next_line;
+                    std::mem::swap(&mut current_buf, &mut next_buf);
+                    std::mem::swap(&mut current_meta, &mut next_meta);
                 }
                 group_count += 1;
             }
+            next_buf.clear();
         }
+
         if (group_count == 1 && !self.repeats_only) || (group_count > 1 && !self.uniques_only) {
-            self.print_line(writer, &line, group_count, first_line_printed)?;
+            self.print_line(writer, &current_buf, group_count, first_line_printed)?;
             first_line_printed = true;
         }
         if (self.delimiters == Delimiters::Append || self.delimiters == Delimiters::Both)
@@ -113,79 +134,134 @@ impl Uniq {
         Ok(())
     }
 
-    fn skip_fields(&self, line: &[u8]) -> Vec<u8> {
+    fn get_line_terminator(&self) -> u8 {
+        if self.zero_terminated { 0 } else { b'\n' }
+    }
+
+    fn keys_differ(
+        &self,
+        first_line: &[u8],
+        first_meta: &LineMeta,
+        second_line: &[u8],
+        second_meta: &LineMeta,
+    ) -> bool {
+        let first_slice = &first_line[first_meta.key_start..first_meta.key_end];
+        let second_slice = &second_line[second_meta.key_start..second_meta.key_end];
+
+        if !self.ignore_case {
+            return first_slice != second_slice;
+        }
+
+        let first_cmp = if first_meta.use_lowercase {
+            first_meta.lowercase.as_slice()
+        } else {
+            first_slice
+        };
+        let second_cmp = if second_meta.use_lowercase {
+            second_meta.lowercase.as_slice()
+        } else {
+            second_slice
+        };
+
+        first_cmp != second_cmp
+    }
+
+    fn key_bounds(&self, line: &[u8]) -> (usize, usize) {
+        let mut start = self.skip_fields_offset(line);
+        if let Some(skip_bytes) = self.slice_start {
+            start = start.saturating_add(skip_bytes).min(line.len());
+        }
+
+        let end = self.key_end_index(line, start);
+        (start, end)
+    }
+
+    fn skip_fields_offset(&self, line: &[u8]) -> usize {
         if let Some(skip_fields) = self.skip_fields {
-            let mut line = line.iter();
-            let mut line_after_skipped_field: Vec<u8>;
+            let mut idx = 0;
             for _ in 0..skip_fields {
-                if line.all(|u| u.is_ascii_whitespace()) {
-                    return Vec::new();
+                while idx < line.len() && line[idx].is_ascii_whitespace() {
+                    idx += 1;
                 }
-                line_after_skipped_field = line
-                    .by_ref()
-                    .skip_while(|u| !u.is_ascii_whitespace())
-                    .copied()
-                    .collect::<Vec<u8>>();
-
-                if line_after_skipped_field.is_empty() {
-                    return Vec::new();
+                if idx >= line.len() {
+                    return line.len();
+                }
+                while idx < line.len() && !line[idx].is_ascii_whitespace() {
+                    idx += 1;
+                }
+                if idx >= line.len() {
+                    return line.len();
                 }
-                line = line_after_skipped_field.iter();
             }
-            line.copied().collect::<Vec<u8>>()
+            idx
         } else {
-            line.to_vec()
+            0
         }
     }
 
-    fn get_line_terminator(&self) -> u8 {
-        if self.zero_terminated { 0 } else { b'\n' }
+    fn key_end_index(&self, line: &[u8], key_start: usize) -> usize {
+        let remainder = &line[key_start..];
+        match self.slice_stop {
+            None => line.len(),
+            Some(limit) => {
+                if remainder.is_empty() {
+                    return key_start;
+                }
+                if let Ok(valid) = std::str::from_utf8(remainder) {
+                    let prefix_len = Self::char_prefix_len(valid, limit);
+                    key_start + prefix_len
+                } else {
+                    key_start + remainder.len().min(limit)
+                }
+            }
+        }
     }
 
-    fn cmp_keys(&self, first: &[u8], second: &[u8]) -> bool {
-        self.cmp_key(first, |first_iter| {
-            self.cmp_key(second, |second_iter| first_iter.ne(second_iter))
-        })
+    fn char_prefix_len(text: &str, limit: usize) -> usize {
+        for (count, (idx, _)) in text.char_indices().enumerate() {
+            if count == limit {
+                return idx;
+            }
+        }
+        text.len()
     }
 
-    fn cmp_key<F>(&self, line: &[u8], mut closure: F) -> bool
-    where
-        F: FnMut(&mut dyn Iterator<Item = char>) -> bool,
-    {
-        let fields_to_check = self.skip_fields(line);
-
-        // Skip self.slice_start bytes (if -s was used).
-        // self.slice_start is how many characters to skip, but historically
-        // uniq's `-s N` means "skip N *bytes*," so do that literally:
-        let skip_bytes = self.slice_start.unwrap_or(0);
-        let fields_to_check = if skip_bytes < fields_to_check.len() {
-            &fields_to_check[skip_bytes..]
-        } else {
-            // If skipping beyond end-of-line, leftover is empty => effectively ""
-            &[]
-        };
-
-        // Convert the leftover bytes to UTF-8 for character-based -w
-        // If invalid UTF-8, just compare them as individual bytes (fallback).
-        let Ok(string_after_skip) = std::str::from_utf8(fields_to_check) else {
-            // Fallback: if invalid UTF-8, treat them as single-byte "chars"
-            return closure(&mut fields_to_check.iter().map(|&b| b as char));
-        };
-
-        let total_chars = string_after_skip.chars().count();
-
-        // `-w N` => Compare no more than N characters
-        let slice_stop = self.slice_stop.unwrap_or(total_chars);
-        let slice_start = slice_stop.min(total_chars);
+    fn build_meta(&self, line: &[u8], meta: &mut LineMeta) {
+        let (key_start, key_end) = self.key_bounds(line);
+        meta.key_start = key_start;
+        meta.key_end = key_end;
+
+        if self.ignore_case && key_start < key_end {
+            let slice = &line[key_start..key_end];
+            if slice.iter().any(|b| b.is_ascii_uppercase()) {
+                meta.lowercase.clear();
+                meta.lowercase.reserve(slice.len());
+                meta.lowercase
+                    .extend(slice.iter().map(|b| b.to_ascii_lowercase()));
+                meta.use_lowercase = true;
+                return;
+            }
+        }
 
-        let mut iter = string_after_skip.chars().take(slice_start);
+        meta.use_lowercase = false;
+    }
 
-        if self.ignore_case {
-            // We can do ASCII-lowercase or full Unicode-lowercase. For minimal changes, do ASCII:
-            closure(&mut iter.map(|c| c.to_ascii_lowercase()))
-        } else {
-            closure(&mut iter)
+    fn read_line(
+        reader: &mut impl BufRead,
+        buffer: &mut Vec<u8>,
+        line_terminator: u8,
+    ) -> UResult<bool> {
+        buffer.clear();
+        let bytes_read = reader
+            .read_until(line_terminator, buffer)
+            .map_err_context(|| translate!("uniq-error-read-error"))?;
+        if bytes_read == 0 {
+            return Ok(false);
         }
+        if buffer.last().is_some_and(|last| *last == line_terminator) {
+            buffer.pop();
+        }
+        Ok(true)
     }
 
     fn should_print_delimiter(&self, group_count: usize, first_line_printed: bool) -> bool {
@@ -214,22 +290,59 @@ impl Uniq {
             write_line_terminator!(writer, line_terminator)?;
         }
 
+        let mut count_buf = [0u8; Self::COUNT_PREFIX_BUF_SIZE];
+
         if self.show_counts {
-            let prefix = format!("{count:7} ");
-            let out = prefix
-                .as_bytes()
-                .iter()
-                .chain(line.iter())
-                .copied()
-                .collect::<Vec<u8>>();
-            writer.write_all(out.as_slice())
-        } else {
-            writer.write_all(line)
+            // Call the associated function (no &self) after the refactor above.
+            let prefix = Self::build_count_prefix(count, &mut count_buf);
+            writer
+                .write_all(prefix)
+                .map_err_context(|| translate!("uniq-error-write-error"))?;
         }
-        .map_err_context(|| translate!("uniq-error-write-error"))?;
+
+        writer
+            .write_all(line)
+            .map_err_context(|| translate!("uniq-error-write-error"))?;
 
         write_line_terminator!(writer, line_terminator)
     }
+
+    const COUNT_PREFIX_WIDTH: usize = 7;
+    const COUNT_PREFIX_BUF_SIZE: usize = 32;
+
+    // This function does not use `self`, so make it an associated function.
+    // Also remove needless explicit lifetimes to satisfy clippy::needless-lifetimes.
+    fn build_count_prefix(count: usize, buf: &mut [u8; Self::COUNT_PREFIX_BUF_SIZE]) -> &[u8] {
+        let mut digits_buf = [0u8; 20];
+        let mut value = count;
+        let mut idx = digits_buf.len();
+
+        if value == 0 {
+            idx -= 1;
+            digits_buf[idx] = b'0';
+        } else {
+            while value > 0 {
+                idx -= 1;
+                digits_buf[idx] = b'0' + (value % 10) as u8;
+                value /= 10;
+            }
+        }
+
+        let digits = &digits_buf[idx..];
+        let width = Self::COUNT_PREFIX_WIDTH;
+
+        if digits.len() <= width {
+            let pad = width - digits.len();
+            buf[..pad].fill(b' ');
+            buf[pad..pad + digits.len()].copy_from_slice(digits);
+            buf[width] = b' ';
+            &buf[..=width]
+        } else {
+            buf[..digits.len()].copy_from_slice(digits);
+            buf[digits.len()] = b' ';
+            &buf[..=digits.len()]
+        }
+    }
 }
 
 fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> UResult<Option<usize>> {
@@ -741,8 +854,11 @@ fn open_output_file(out_file_name: Option<&OsStr>) -> UResult<Box<dyn Write>> {
             let out_file = File::create(path).map_err_context(
                 || translate!("uniq-error-could-not-open", "path" => path.maybe_quote()),
             )?;
-            Box::new(BufWriter::new(out_file))
+            Box::new(BufWriter::with_capacity(OUTPUT_BUFFER_CAPACITY, out_file))
         }
-        _ => Box::new(stdout().lock()),
+        _ => Box::new(BufWriter::with_capacity(
+            OUTPUT_BUFFER_CAPACITY,
+            stdout().lock(),
+        )),
     })
 }