diff --git a/src/uu/unexpand/src/unexpand.rs b/src/uu/unexpand/src/unexpand.rs index 896318484dd..2ca0e8eae87 100644 --- a/src/uu/unexpand/src/unexpand.rs +++ b/src/uu/unexpand/src/unexpand.rs @@ -34,27 +34,92 @@ enum ParseError { impl UError for ParseError {} -fn tabstops_parse(s: &str) -> Result, ParseError> { +fn parse_tab_num(word: &str, allow_zero: bool) -> Result { + match word.parse::() { + Ok(0) if !allow_zero => Err(ParseError::TabSizeCannotBeZero), + Ok(num) => Ok(num), + Err(e) => match e.kind() { + IntErrorKind::PosOverflow => Err(ParseError::TabSizeTooLarge), + _ => Err(ParseError::InvalidCharacter( + word.trim_start_matches(char::is_numeric).to_string(), + )), + }, + } +} + +fn parse_tabstops(s: &str) -> Result { let words = s.split(','); let mut nums = Vec::new(); + let mut increment_size: Option = None; + let mut extend_size: Option = None; for word in words { - match word.parse::() { - Ok(num) => nums.push(num), - Err(e) => { - return match e.kind() { - IntErrorKind::PosOverflow => Err(ParseError::TabSizeTooLarge), - _ => Err(ParseError::InvalidCharacter( - word.trim_start_matches(char::is_numeric).to_string(), - )), - }; + if word.is_empty() { + continue; + } + + // Handle extended syntax: +N (increment) and /N (repeat) + if let Some(word) = word.strip_prefix('+') { + // +N means N positions after the last tab stop (only allowed at end) + if increment_size.is_some() || extend_size.is_some() { + return Err(ParseError::InvalidCharacter("+".to_string())); } + let value = parse_tab_num(word, true)?; + if nums.is_empty() { + // Standalone +N: treat as tab stops at multiples of N + if value == 0 { + return Err(ParseError::TabSizeCannotBeZero); + } + return Ok(TabConfig { + tabstops: vec![value], + increment_size: None, + extend_size: None, + }); + } + increment_size = Some(value); + } else if let Some(word) = word.strip_prefix('/') { + // /N means repeat every N positions after the last tab stop + if increment_size.is_some() || extend_size.is_some() { + return Err(ParseError::InvalidCharacter("/".to_string())); + } + let value = parse_tab_num(word, true)?; + if nums.is_empty() { + // Standalone /N: treat as tab stops at multiples of N + if value == 0 { + return Err(ParseError::TabSizeCannotBeZero); + } + return Ok(TabConfig { + tabstops: vec![value], + increment_size: None, + extend_size: None, + }); + } + extend_size = Some(value); + } else { + // Regular number + if increment_size.is_some() || extend_size.is_some() { + return Err(ParseError::InvalidCharacter(word.to_string())); + } + nums.push(parse_tab_num(word, false)?); } } - if nums.contains(&0) { - return Err(ParseError::TabSizeCannotBeZero); + if nums.is_empty() && increment_size.is_none() && extend_size.is_none() { + return Ok(TabConfig { + tabstops: vec![DEFAULT_TABSTOP], + increment_size: None, + extend_size: None, + }); + } + + // Handle the increment if specified + // Only add an extra tab stop if increment is non-zero + if let Some(inc) = increment_size { + if inc > 0 { + let last = *nums.last().unwrap(); + nums.push(last + inc); + } } if let (false, _) = nums @@ -64,7 +129,11 @@ fn tabstops_parse(s: &str) -> Result, ParseError> { return Err(ParseError::TabSizesMustBeAscending); } - Ok(nums) + Ok(TabConfig { + tabstops: nums, + increment_size, + extend_size, + }) } mod options { @@ -75,18 +144,28 @@ mod options { pub const NO_UTF8: &str = "no-utf8"; } +struct TabConfig { + tabstops: Vec, + increment_size: Option, + extend_size: Option, +} + struct Options { files: Vec, - tabstops: Vec, + tab_config: TabConfig, aflag: bool, uflag: bool, } impl Options { fn new(matches: &clap::ArgMatches) -> Result { - let tabstops = match matches.get_many::(options::TABS) { - None => vec![DEFAULT_TABSTOP], - Some(s) => tabstops_parse(&s.map(|s| s.as_str()).collect::>().join(","))?, + let tab_config = match matches.get_many::(options::TABS) { + None => TabConfig { + tabstops: vec![DEFAULT_TABSTOP], + increment_size: None, + extend_size: None, + }, + Some(s) => parse_tabstops(&s.map(|s| s.as_str()).collect::>().join(","))?, }; let aflag = (matches.get_flag(options::ALL) || matches.contains_id(options::TABS)) @@ -100,7 +179,7 @@ impl Options { Ok(Self { files, - tabstops, + tab_config, aflag, uflag, }) @@ -216,19 +295,58 @@ fn open(path: &OsString) -> UResult>> { } } -fn next_tabstop(tabstops: &[usize], col: usize) -> Option { - if tabstops.len() == 1 { +fn next_tabstop(tab_config: &TabConfig, col: usize) -> Option { + let tabstops = &tab_config.tabstops; + + if tabstops.is_empty() { + return None; + } + + if tabstops.len() == 1 + && tab_config.increment_size.is_none() + && tab_config.extend_size.is_none() + { + // Simple case: single tab stop, repeat at that interval Some(tabstops[0] - col % tabstops[0]) } else { - // find next larger tab - // if there isn't one in the list, tab becomes a single space - tabstops.iter().find(|&&t| t > col).map(|t| t - col) + // Find next larger tab + if let Some(&next_tab) = tabstops.iter().find(|&&t| t > col) { + Some(next_tab - col) + } else { + // We're past the last explicit tab stop + if let Some(&last_tab) = tabstops.last() { + if let Some(extend_size) = tab_config.extend_size { + // /N: tab stops at multiples of N + if extend_size == 0 { + return None; + } + Some(extend_size - (col % extend_size)) + } else if let Some(increment_size) = tab_config.increment_size { + // +N: continue with increment after last tab stop + if increment_size == 0 || col < last_tab { + return None; + } + let distance_from_last = col - last_tab; + let remainder = distance_from_last % increment_size; + Some(if remainder == 0 { + increment_size + } else { + increment_size - remainder + }) + } else { + // No more tabs + None + } + } else { + None + } + } } } fn write_tabs( output: &mut BufWriter, - tabstops: &[usize], + tab_config: &TabConfig, mut scol: usize, col: usize, prevtab: bool, @@ -240,7 +358,7 @@ fn write_tabs( // a tab, unless it's at the start of the line. let ai = init || amode; if (ai && !prevtab && col > scol + 1) || (col > scol && (init || ai && prevtab)) { - while let Some(nts) = next_tabstop(tabstops, scol) { + while let Some(nts) = next_tabstop(tab_config, scol) { if col < scol + nts { break; } @@ -311,7 +429,7 @@ fn unexpand_line( output: &mut BufWriter, options: &Options, lastcol: usize, - ts: &[usize], + tab_config: &TabConfig, ) -> UResult<()> { // Fast path: if we're not converting all spaces (-a flag not set) // and the line doesn't start with spaces, just write it directly @@ -338,7 +456,7 @@ fn unexpand_line( byte += 1; } b'\t' => { - col += next_tabstop(ts, col).unwrap_or(1); + col += next_tabstop(tab_config, col).unwrap_or(1); byte += 1; pctype = CharType::Tab; } @@ -348,7 +466,15 @@ fn unexpand_line( // If we found spaces/tabs, write them as tabs if byte > 0 { - write_tabs(output, ts, 0, col, pctype == CharType::Tab, true, true)?; + write_tabs( + output, + tab_config, + 0, + col, + pctype == CharType::Tab, + true, + true, + )?; } // Write the rest of the line directly (no more tab conversion needed) @@ -362,7 +488,15 @@ fn unexpand_line( while byte < buf.len() { // when we have a finite number of columns, never convert past the last column if lastcol > 0 && col >= lastcol { - write_tabs(output, ts, scol, col, pctype == CharType::Tab, init, true)?; + write_tabs( + output, + tab_config, + scol, + col, + pctype == CharType::Tab, + init, + true, + )?; output.write_all(&buf[byte..])?; scol = col; break; @@ -379,7 +513,7 @@ fn unexpand_line( col += if ctype == CharType::Space { 1 } else { - next_tabstop(ts, col).unwrap_or(1) + next_tabstop(tab_config, col).unwrap_or(1) }; if !tabs_buffered { @@ -391,7 +525,7 @@ fn unexpand_line( // always write_tabs( output, - ts, + tab_config, scol, col, pctype == CharType::Tab, @@ -418,7 +552,15 @@ fn unexpand_line( } // write out anything remaining - write_tabs(output, ts, scol, col, pctype == CharType::Tab, init, true)?; + write_tabs( + output, + tab_config, + scol, + col, + pctype == CharType::Tab, + init, + true, + )?; buf.truncate(0); // clear out the buffer Ok(()) @@ -426,9 +568,16 @@ fn unexpand_line( fn unexpand(options: &Options) -> UResult<()> { let mut output = BufWriter::new(stdout()); - let ts = &options.tabstops[..]; + let tab_config = &options.tab_config; let mut buf = Vec::new(); - let lastcol = if ts.len() > 1 { *ts.last().unwrap() } else { 0 }; + let lastcol = if tab_config.tabstops.len() > 1 + && tab_config.increment_size.is_none() + && tab_config.extend_size.is_none() + { + *tab_config.tabstops.last().unwrap() + } else { + 0 + }; for file in &options.files { let mut fh = match open(file) { @@ -443,7 +592,7 @@ fn unexpand(options: &Options) -> UResult<()> { Ok(s) => s > 0, Err(_) => !buf.is_empty(), } { - unexpand_line(&mut buf, &mut output, options, lastcol, ts)?; + unexpand_line(&mut buf, &mut output, options, lastcol, tab_config)?; } } output.flush()?; @@ -452,7 +601,7 @@ fn unexpand(options: &Options) -> UResult<()> { #[cfg(test)] mod tests { - use crate::is_digit_or_comma; + use crate::{ParseError, is_digit_or_comma, parse_tab_num, parse_tabstops}; #[test] fn test_is_digit_or_comma() { @@ -460,4 +609,103 @@ mod tests { assert!(is_digit_or_comma(',')); assert!(!is_digit_or_comma('a')); } + + #[test] + fn test_parse_tab_num() { + assert_eq!(parse_tab_num("6", false).unwrap(), 6); + assert_eq!(parse_tab_num("12", false).unwrap(), 12); + assert_eq!(parse_tab_num("9", false).unwrap(), 9); + assert_eq!(parse_tab_num("4", false).unwrap(), 4); + } + + #[test] + fn test_parse_tab_num_errors() { + // Zero is not allowed when allow_zero is false + assert!(matches!( + parse_tab_num("0", false), + Err(ParseError::TabSizeCannotBeZero) + )); + + // Zero is allowed when allow_zero is true + assert_eq!(parse_tab_num("0", true).unwrap(), 0); + + // Invalid character + assert!(matches!( + parse_tab_num("6x", false), + Err(ParseError::InvalidCharacter(_)) + )); + + // Invalid character + assert!(matches!( + parse_tab_num("9y", false), + Err(ParseError::InvalidCharacter(_)) + )); + } + + #[test] + fn test_parse_tabstops_extended_syntax() { + // Standalone +N is now allowed (treated as multiples of N) + let config = parse_tabstops("+6").unwrap(); + assert_eq!(config.tabstops, vec![6]); + assert_eq!(config.increment_size, None); + assert_eq!(config.extend_size, None); + + // Standalone /N is now allowed (treated as multiples of N) + let config = parse_tabstops("/9").unwrap(); + assert_eq!(config.tabstops, vec![9]); + assert_eq!(config.increment_size, None); + assert_eq!(config.extend_size, None); + + // +0 and /0 are not allowed as standalone + assert!(matches!( + parse_tabstops("+0"), + Err(ParseError::TabSizeCannotBeZero) + )); + assert!(matches!( + parse_tabstops("/0"), + Err(ParseError::TabSizeCannotBeZero) + )); + + // Valid +N with previous tab stop + let config = parse_tabstops("3,+6").unwrap(); + assert_eq!(config.tabstops, vec![3, 9]); + assert_eq!(config.increment_size, Some(6)); + + // Valid /N with previous tab stop + let config = parse_tabstops("3,/4").unwrap(); + assert_eq!(config.tabstops, vec![3]); + assert_eq!(config.extend_size, Some(4)); + + // +0 with previous tab stop should be allowed + let config = parse_tabstops("3,+0").unwrap(); + assert_eq!(config.tabstops, vec![3]); + assert_eq!(config.increment_size, Some(0)); + + // /0 with previous tab stop should be allowed + let config = parse_tabstops("3,/0").unwrap(); + assert_eq!(config.tabstops, vec![3]); + assert_eq!(config.extend_size, Some(0)); + } + + #[test] + fn test_next_tabstop_with_increment() { + use crate::{next_tabstop, parse_tabstops}; + + // Test with "3,+6" configuration + let config = parse_tabstops("3,+6").unwrap(); + + // Verify the parsed configuration + assert_eq!(config.tabstops, vec![3, 9]); + assert_eq!(config.increment_size, Some(6)); + + // Tab stops should be at 3, 9, 15, 21, ... + assert_eq!(next_tabstop(&config, 0), Some(3)); // 0 → 3 + assert_eq!(next_tabstop(&config, 1), Some(2)); // 1 → 3 + assert_eq!(next_tabstop(&config, 2), Some(1)); // 2 → 3 + assert_eq!(next_tabstop(&config, 3), Some(6)); // 3 → 9 + assert_eq!(next_tabstop(&config, 4), Some(5)); // 4 → 9 + assert_eq!(next_tabstop(&config, 8), Some(1)); // 8 → 9 + assert_eq!(next_tabstop(&config, 9), Some(6)); // 9 → 15 + assert_eq!(next_tabstop(&config, 15), Some(6)); // 15 → 21 + } } diff --git a/tests/by-util/test_unexpand.rs b/tests/by-util/test_unexpand.rs index 0720dabb043..1029d67e602 100644 --- a/tests/by-util/test_unexpand.rs +++ b/tests/by-util/test_unexpand.rs @@ -307,3 +307,25 @@ fn unexpand_multibyte_utf8_gnu_compat() { .succeeds() .stdout_is("1ΔΔΔ5 99999\n"); } + +#[test] +fn test_blanks_ext1() { + // Test case from GNU test suite: blanks-ext1 + // ['blanks-ext1', '-t', '3,+6', {IN=> "\t "}, {OUT=> "\t\t"}], + new_ucmd!() + .args(&["-t", "3,+6"]) + .pipe_in("\t ") + .succeeds() + .stdout_is("\t\t"); +} + +#[test] +fn test_blanks_ext2() { + // Test case from GNU test suite: blanks-ext2 + // ['blanks-ext2', '-t', '3,/9', {IN=> "\t "}, {OUT=> "\t\t"}], + new_ucmd!() + .args(&["-t", "3,/9"]) + .pipe_in("\t ") + .succeeds() + .stdout_is("\t\t"); +}