Skip to content

Commit 06d843f

Browse files
authored
Add legacy +POS/-POS handling in sort to pass GNU sort-field-limit test (#9501)
* sort: add legacy +POS/-POS parsing for GNU compat Support GNU’s obsolescent +POS1 [-POS2] syntax by translating it to -k before clap parses args, gated by _POSIX2_VERSION. Adds tests for accept and reject cases to ensure sort-field-limit GNU test passes. * sort: align legacy key tests with GNU field limit * sort: rename legacy max-field test for clarity * Simplify legacy key parsing inputs * Inline legacy key end serialization * Use starts_with for legacy arg digit check
1 parent 64203e3 commit 06d843f

File tree

2 files changed

+174
-2
lines changed

2 files changed

+174
-2
lines changed

src/uu/sort/src/sort.rs

Lines changed: 147 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
88
// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html
99

10-
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit
10+
// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef
1111

1212
mod buffer_hint;
1313
mod check;
@@ -51,6 +51,7 @@ use uucore::line_ending::LineEnding;
5151
use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError};
5252
use uucore::parser::parse_size::{ParseSizeError, Parser};
5353
use uucore::parser::shortcut_value_parser::ShortcutValueParser;
54+
use uucore::posix::{MODERN, TRADITIONAL};
5455
use uucore::show_error;
5556
use uucore::translate;
5657
use uucore::version_cmp::version_cmp;
@@ -1085,6 +1086,146 @@ fn get_rlimit() -> UResult<usize> {
10851086
}
10861087

10871088
const STDIN_FILE: &str = "-";
1089+
1090+
/// Legacy `+POS1 [-POS2]` syntax is permitted unless `_POSIX2_VERSION` is in
1091+
/// the [TRADITIONAL, MODERN) range (matches GNU behaviour).
1092+
fn allows_traditional_usage() -> bool {
1093+
!matches!(uucore::posix::posix_version(), Some(ver) if (TRADITIONAL..MODERN).contains(&ver))
1094+
}
1095+
1096+
#[derive(Debug, Clone)]
1097+
struct LegacyKeyPart {
1098+
field: usize,
1099+
char_pos: usize,
1100+
opts: String,
1101+
}
1102+
1103+
fn parse_usize_or_max(num: &str) -> Option<usize> {
1104+
match num.parse::<usize>() {
1105+
Ok(v) => Some(v),
1106+
Err(e) if *e.kind() == IntErrorKind::PosOverflow => Some(usize::MAX),
1107+
Err(_) => None,
1108+
}
1109+
}
1110+
1111+
fn parse_legacy_part(spec: &str) -> Option<LegacyKeyPart> {
1112+
let idx = spec.chars().take_while(|c| c.is_ascii_digit()).count();
1113+
if idx == 0 {
1114+
return None;
1115+
}
1116+
1117+
let field = parse_usize_or_max(&spec[..idx])?;
1118+
let mut char_pos = 0;
1119+
let mut rest = &spec[idx..];
1120+
1121+
if let Some(stripped) = rest.strip_prefix('.') {
1122+
let char_idx = stripped.chars().take_while(|c| c.is_ascii_digit()).count();
1123+
if char_idx == 0 {
1124+
return None;
1125+
}
1126+
char_pos = parse_usize_or_max(&stripped[..char_idx])?;
1127+
rest = &stripped[char_idx..];
1128+
}
1129+
1130+
Some(LegacyKeyPart {
1131+
field,
1132+
char_pos,
1133+
opts: rest.to_string(),
1134+
})
1135+
}
1136+
1137+
/// Convert legacy +POS1 [-POS2] into a `-k` key specification using saturating arithmetic.
1138+
fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String {
1139+
let start_field = from.field.saturating_add(1);
1140+
let start_char = from.char_pos.saturating_add(1);
1141+
1142+
let mut keydef = format!(
1143+
"{}{}{}",
1144+
start_field,
1145+
if from.char_pos == 0 {
1146+
String::new()
1147+
} else {
1148+
format!(".{start_char}")
1149+
},
1150+
from.opts
1151+
);
1152+
1153+
if let Some(to) = to {
1154+
let end_field = if to.char_pos == 0 {
1155+
// When the end character index is zero, GNU keeps the field number as-is.
1156+
// Clamp to 1 to avoid generating an invalid field 0.
1157+
to.field.max(1)
1158+
} else {
1159+
to.field.saturating_add(1)
1160+
};
1161+
1162+
keydef.push(',');
1163+
keydef.push_str(&end_field.to_string());
1164+
if to.char_pos != 0 {
1165+
keydef.push('.');
1166+
keydef.push_str(&to.char_pos.to_string());
1167+
}
1168+
keydef.push_str(&to.opts);
1169+
}
1170+
1171+
keydef
1172+
}
1173+
1174+
/// Preprocess argv to handle legacy +POS1 [-POS2] syntax by converting it into -k forms
1175+
/// before clap sees the arguments.
1176+
fn preprocess_legacy_args<I>(args: I) -> Vec<OsString>
1177+
where
1178+
I: IntoIterator,
1179+
I::Item: Into<OsString>,
1180+
{
1181+
if !allows_traditional_usage() {
1182+
return args.into_iter().map(Into::into).collect();
1183+
}
1184+
1185+
let mut processed = Vec::new();
1186+
let mut iter = args.into_iter().map(Into::into).peekable();
1187+
1188+
while let Some(arg) = iter.next() {
1189+
if arg == "--" {
1190+
processed.push(arg);
1191+
processed.extend(iter);
1192+
break;
1193+
}
1194+
1195+
let as_str = arg.to_string_lossy();
1196+
if let Some(from_spec) = as_str.strip_prefix('+') {
1197+
if let Some(from) = parse_legacy_part(from_spec) {
1198+
let mut to_part = None;
1199+
1200+
let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string());
1201+
1202+
if let Some(next_str) = next_candidate {
1203+
if let Some(stripped) = next_str.strip_prefix('-') {
1204+
if stripped.starts_with(|c: char| c.is_ascii_digit()) {
1205+
let next_arg = iter.next().unwrap();
1206+
if let Some(parsed) = parse_legacy_part(stripped) {
1207+
to_part = Some(parsed);
1208+
} else {
1209+
processed.push(arg);
1210+
processed.push(next_arg);
1211+
continue;
1212+
}
1213+
}
1214+
}
1215+
}
1216+
1217+
let keydef = legacy_key_to_k(&from, to_part.as_ref());
1218+
processed.push(OsString::from(format!("-k{keydef}")));
1219+
continue;
1220+
}
1221+
}
1222+
1223+
processed.push(arg);
1224+
}
1225+
1226+
processed
1227+
}
1228+
10881229
#[cfg(target_os = "linux")]
10891230
const LINUX_BATCH_DIVISOR: usize = 4;
10901231
#[cfg(target_os = "linux")]
@@ -1116,7 +1257,11 @@ fn default_merge_batch_size() -> usize {
11161257
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
11171258
let mut settings = GlobalSettings::default();
11181259

1119-
let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?;
1260+
let matches = uucore::clap_localization::handle_clap_result_with_exit_code(
1261+
uu_app(),
1262+
preprocess_legacy_args(args),
1263+
2,
1264+
)?;
11201265

11211266
// Prevent -o/--output to be specified multiple times
11221267
if matches

tests/by-util/test_sort.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,33 @@ fn test_invalid_buffer_size() {
107107
}
108108
}
109109

110+
#[test]
111+
fn test_legacy_plus_minus_accepts_when_modern_posix2() {
112+
let size_max = usize::MAX;
113+
let (at, mut ucmd) = at_and_ucmd!();
114+
at.write("input.txt", "aa\nbb\n");
115+
116+
ucmd.env("_POSIX2_VERSION", "200809")
117+
.arg(format!("+0.{size_max}R"))
118+
.arg("input.txt")
119+
.succeeds()
120+
.stdout_is("aa\nbb\n");
121+
}
122+
123+
#[test]
124+
fn test_legacy_plus_minus_accepts_with_size_max() {
125+
let size_max = usize::MAX;
126+
let (at, mut ucmd) = at_and_ucmd!();
127+
at.write("input.txt", "aa\nbb\n");
128+
129+
ucmd.env("_POSIX2_VERSION", "200809")
130+
.arg("+1")
131+
.arg(format!("-1.{size_max}R"))
132+
.arg("input.txt")
133+
.succeeds()
134+
.stdout_is("aa\nbb\n");
135+
}
136+
110137
#[test]
111138
fn test_ext_sort_stable() {
112139
new_ucmd!()

0 commit comments

Comments
 (0)