|
7 | 7 | // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html |
8 | 8 | // https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html |
9 | 9 |
|
10 | | -// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit |
| 10 | +// spell-checker:ignore (misc) HFKJFK Mbdfhn getrlimit RLIMIT_NOFILE rlim bigdecimal extendedbigdecimal hexdigit behaviour keydef |
11 | 11 |
|
12 | 12 | mod buffer_hint; |
13 | 13 | mod check; |
@@ -51,6 +51,7 @@ use uucore::line_ending::LineEnding; |
51 | 51 | use uucore::parser::num_parser::{ExtendedParser, ExtendedParserError}; |
52 | 52 | use uucore::parser::parse_size::{ParseSizeError, Parser}; |
53 | 53 | use uucore::parser::shortcut_value_parser::ShortcutValueParser; |
| 54 | +use uucore::posix::{MODERN, TRADITIONAL}; |
54 | 55 | use uucore::show_error; |
55 | 56 | use uucore::translate; |
56 | 57 | use uucore::version_cmp::version_cmp; |
@@ -1085,6 +1086,146 @@ fn get_rlimit() -> UResult<usize> { |
1085 | 1086 | } |
1086 | 1087 |
|
1087 | 1088 | const STDIN_FILE: &str = "-"; |
| 1089 | + |
| 1090 | +/// Legacy `+POS1 [-POS2]` syntax is permitted unless `_POSIX2_VERSION` is in |
| 1091 | +/// the [TRADITIONAL, MODERN) range (matches GNU behaviour). |
| 1092 | +fn allows_traditional_usage() -> bool { |
| 1093 | + !matches!(uucore::posix::posix_version(), Some(ver) if (TRADITIONAL..MODERN).contains(&ver)) |
| 1094 | +} |
| 1095 | + |
| 1096 | +#[derive(Debug, Clone)] |
| 1097 | +struct LegacyKeyPart { |
| 1098 | + field: usize, |
| 1099 | + char_pos: usize, |
| 1100 | + opts: String, |
| 1101 | +} |
| 1102 | + |
| 1103 | +fn parse_usize_or_max(num: &str) -> Option<usize> { |
| 1104 | + match num.parse::<usize>() { |
| 1105 | + Ok(v) => Some(v), |
| 1106 | + Err(e) if *e.kind() == IntErrorKind::PosOverflow => Some(usize::MAX), |
| 1107 | + Err(_) => None, |
| 1108 | + } |
| 1109 | +} |
| 1110 | + |
| 1111 | +fn parse_legacy_part(spec: &str) -> Option<LegacyKeyPart> { |
| 1112 | + let idx = spec.chars().take_while(|c| c.is_ascii_digit()).count(); |
| 1113 | + if idx == 0 { |
| 1114 | + return None; |
| 1115 | + } |
| 1116 | + |
| 1117 | + let field = parse_usize_or_max(&spec[..idx])?; |
| 1118 | + let mut char_pos = 0; |
| 1119 | + let mut rest = &spec[idx..]; |
| 1120 | + |
| 1121 | + if let Some(stripped) = rest.strip_prefix('.') { |
| 1122 | + let char_idx = stripped.chars().take_while(|c| c.is_ascii_digit()).count(); |
| 1123 | + if char_idx == 0 { |
| 1124 | + return None; |
| 1125 | + } |
| 1126 | + char_pos = parse_usize_or_max(&stripped[..char_idx])?; |
| 1127 | + rest = &stripped[char_idx..]; |
| 1128 | + } |
| 1129 | + |
| 1130 | + Some(LegacyKeyPart { |
| 1131 | + field, |
| 1132 | + char_pos, |
| 1133 | + opts: rest.to_string(), |
| 1134 | + }) |
| 1135 | +} |
| 1136 | + |
| 1137 | +/// Convert legacy +POS1 [-POS2] into a `-k` key specification using saturating arithmetic. |
| 1138 | +fn legacy_key_to_k(from: &LegacyKeyPart, to: Option<&LegacyKeyPart>) -> String { |
| 1139 | + let start_field = from.field.saturating_add(1); |
| 1140 | + let start_char = from.char_pos.saturating_add(1); |
| 1141 | + |
| 1142 | + let mut keydef = format!( |
| 1143 | + "{}{}{}", |
| 1144 | + start_field, |
| 1145 | + if from.char_pos == 0 { |
| 1146 | + String::new() |
| 1147 | + } else { |
| 1148 | + format!(".{start_char}") |
| 1149 | + }, |
| 1150 | + from.opts |
| 1151 | + ); |
| 1152 | + |
| 1153 | + if let Some(to) = to { |
| 1154 | + let end_field = if to.char_pos == 0 { |
| 1155 | + // When the end character index is zero, GNU keeps the field number as-is. |
| 1156 | + // Clamp to 1 to avoid generating an invalid field 0. |
| 1157 | + to.field.max(1) |
| 1158 | + } else { |
| 1159 | + to.field.saturating_add(1) |
| 1160 | + }; |
| 1161 | + |
| 1162 | + keydef.push(','); |
| 1163 | + keydef.push_str(&end_field.to_string()); |
| 1164 | + if to.char_pos != 0 { |
| 1165 | + keydef.push('.'); |
| 1166 | + keydef.push_str(&to.char_pos.to_string()); |
| 1167 | + } |
| 1168 | + keydef.push_str(&to.opts); |
| 1169 | + } |
| 1170 | + |
| 1171 | + keydef |
| 1172 | +} |
| 1173 | + |
| 1174 | +/// Preprocess argv to handle legacy +POS1 [-POS2] syntax by converting it into -k forms |
| 1175 | +/// before clap sees the arguments. |
| 1176 | +fn preprocess_legacy_args<I>(args: I) -> Vec<OsString> |
| 1177 | +where |
| 1178 | + I: IntoIterator, |
| 1179 | + I::Item: Into<OsString>, |
| 1180 | +{ |
| 1181 | + if !allows_traditional_usage() { |
| 1182 | + return args.into_iter().map(Into::into).collect(); |
| 1183 | + } |
| 1184 | + |
| 1185 | + let mut processed = Vec::new(); |
| 1186 | + let mut iter = args.into_iter().map(Into::into).peekable(); |
| 1187 | + |
| 1188 | + while let Some(arg) = iter.next() { |
| 1189 | + if arg == "--" { |
| 1190 | + processed.push(arg); |
| 1191 | + processed.extend(iter); |
| 1192 | + break; |
| 1193 | + } |
| 1194 | + |
| 1195 | + let as_str = arg.to_string_lossy(); |
| 1196 | + if let Some(from_spec) = as_str.strip_prefix('+') { |
| 1197 | + if let Some(from) = parse_legacy_part(from_spec) { |
| 1198 | + let mut to_part = None; |
| 1199 | + |
| 1200 | + let next_candidate = iter.peek().map(|next| next.to_string_lossy().to_string()); |
| 1201 | + |
| 1202 | + if let Some(next_str) = next_candidate { |
| 1203 | + if let Some(stripped) = next_str.strip_prefix('-') { |
| 1204 | + if stripped.starts_with(|c: char| c.is_ascii_digit()) { |
| 1205 | + let next_arg = iter.next().unwrap(); |
| 1206 | + if let Some(parsed) = parse_legacy_part(stripped) { |
| 1207 | + to_part = Some(parsed); |
| 1208 | + } else { |
| 1209 | + processed.push(arg); |
| 1210 | + processed.push(next_arg); |
| 1211 | + continue; |
| 1212 | + } |
| 1213 | + } |
| 1214 | + } |
| 1215 | + } |
| 1216 | + |
| 1217 | + let keydef = legacy_key_to_k(&from, to_part.as_ref()); |
| 1218 | + processed.push(OsString::from(format!("-k{keydef}"))); |
| 1219 | + continue; |
| 1220 | + } |
| 1221 | + } |
| 1222 | + |
| 1223 | + processed.push(arg); |
| 1224 | + } |
| 1225 | + |
| 1226 | + processed |
| 1227 | +} |
| 1228 | + |
1088 | 1229 | #[cfg(target_os = "linux")] |
1089 | 1230 | const LINUX_BATCH_DIVISOR: usize = 4; |
1090 | 1231 | #[cfg(target_os = "linux")] |
@@ -1116,7 +1257,11 @@ fn default_merge_batch_size() -> usize { |
1116 | 1257 | pub fn uumain(args: impl uucore::Args) -> UResult<()> { |
1117 | 1258 | let mut settings = GlobalSettings::default(); |
1118 | 1259 |
|
1119 | | - let matches = uucore::clap_localization::handle_clap_result_with_exit_code(uu_app(), args, 2)?; |
| 1260 | + let matches = uucore::clap_localization::handle_clap_result_with_exit_code( |
| 1261 | + uu_app(), |
| 1262 | + preprocess_legacy_args(args), |
| 1263 | + 2, |
| 1264 | + )?; |
1120 | 1265 |
|
1121 | 1266 | // Prevent -o/--output to be specified multiple times |
1122 | 1267 | if matches |
|
0 commit comments