Skip to content

Commit 8039955

Browse files
dspinellissylvestre
authored andcommitted
Add support for GNU ~step address ranges
1 parent 8a2934e commit 8039955

File tree

9 files changed

+133
-14
lines changed

9 files changed

+133
-14
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ cargo test
8585
* The `--follow-symlinks` flag for in-place editing.
8686
* Address 0 can be used to specify an address range that is already
8787
active on line 1 and can finish with the specified regular expression.
88+
* Address steps can be specified in the form of start~step and start,~step
89+
ranges.
8890

8991
### Supported BSD and GNU extensions
9092
* The second address in a range can be specified as a relative address with +N.

src/sed/command.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ pub enum Address {
9393
Line(usize), // Specific line
9494
RelLine(usize), // Relative line
9595
Last, // Last line
96+
StepMatch(usize), // Lines matching specified step from first
97+
StepEnd(usize), // Range ending at specified step from first
9698
}
9799

98100
#[derive(Debug)]

src/sed/compiler.rs

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,27 +339,66 @@ fn compile_address_range(
339339
cmd.addr1 = Some(addr1);
340340
if is_line0 && context.posix {
341341
// 0 starting address is a GNU extension.
342-
return compilation_error(lines, line, "address 0 invalid in POSIX mode");
342+
return compilation_error(lines, line, "address 0 is invalid in POSIX mode");
343343
}
344344
n_addr += 1;
345345
}
346346

347347
line.eat_spaces();
348-
if n_addr == 1 && !line.eol() && line.current() == ',' {
348+
if n_addr == 1 && !line.eol() && matches!(line.current(), ',' | '~') {
349+
let is_step_match = line.current() == '~'; // E.g. 0~2: Pick even-numbered lines
349350
line.advance();
350351
line.eat_spaces();
352+
let is_step_end = if line.current() == '~' {
353+
// E.g. /foo/,~10: Start at foo, include all lines until multiple of 10 is reached.
354+
line.advance();
355+
line.eat_spaces();
356+
true
357+
} else {
358+
false
359+
};
360+
361+
if (is_step_match || is_step_end) && context.posix {
362+
// ~ steps are a GNU extension.
363+
return compilation_error(lines, line, "~step is invalid in POSIX mode");
364+
}
365+
366+
// Look for second address.
351367
if !line.eol()
352368
&& let Ok(addr2) = compile_address(lines, line, context)
353369
{
354-
let addr2_is_re = matches!(addr2, Address::Re(_));
355-
cmd.addr2 = Some(addr2);
356-
if is_line0 && !addr2_is_re {
370+
// Set step_n to the number specified in the (required numeric) address.
371+
let step_n = if is_step_match || is_step_end {
372+
match addr2 {
373+
Address::Line(n) => n,
374+
_ => {
375+
return compilation_error(
376+
lines,
377+
line,
378+
"~step can only be specified on numeric addresses",
379+
);
380+
}
381+
}
382+
} else {
383+
0 // dummy, not used
384+
};
385+
386+
if is_line0 && !matches!(addr2, Address::Re(_)) && !is_step_match {
357387
return compilation_error(
358388
lines,
359389
line,
360-
"address 0 can only be used with a regular expression second address",
390+
"address 0 can only be used with a regular expression or ~step",
361391
);
362392
}
393+
394+
// If needed, transform Address::Line into Address::Step*.
395+
cmd.addr2 = if is_step_match {
396+
Some(Address::StepMatch(step_n))
397+
} else if is_step_end {
398+
Some(Address::StepEnd(step_n))
399+
} else {
400+
Some(addr2)
401+
};
363402
n_addr += 1;
364403
}
365404
}
@@ -390,6 +429,8 @@ fn read_file_path(lines: &ScriptLineProvider, line: &mut ScriptCharProvider) ->
390429
}
391430

392431
/// Compile and return a single range address specification.
432+
// Due to their irregular syntax ~ addresses are returned as Line() and adjusted
433+
// in compile_address_range().
393434
fn compile_address(
394435
lines: &ScriptLineProvider,
395436
line: &mut ScriptCharProvider,
@@ -1625,6 +1666,28 @@ mod tests {
16251666
assert!(matches!(cmd.borrow().addr2, Some(Address::RelLine(3))));
16261667
}
16271668

1669+
#[test]
1670+
fn test_compile_step_match_address() {
1671+
let (lines, mut chars) = make_providers("0~2");
1672+
let mut cmd = Rc::new(RefCell::new(Command::default()));
1673+
let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap();
1674+
1675+
assert_eq!(n_addr, 2);
1676+
assert!(matches!(cmd.borrow().addr1, Some(Address::Line(0))));
1677+
assert!(matches!(cmd.borrow().addr2, Some(Address::StepMatch(2))));
1678+
}
1679+
1680+
#[test]
1681+
fn test_compile_step_end_address() {
1682+
let (lines, mut chars) = make_providers("1,~10");
1683+
let mut cmd = Rc::new(RefCell::new(Command::default()));
1684+
let n_addr = compile_address_range(&lines, &mut chars, &mut cmd, &ctx()).unwrap();
1685+
1686+
assert_eq!(n_addr, 2);
1687+
assert!(matches!(cmd.borrow().addr1, Some(Address::Line(1))));
1688+
assert!(matches!(cmd.borrow().addr2, Some(Address::StepEnd(10))));
1689+
}
1690+
16281691
#[test]
16291692
fn test_compile_last_address() {
16301693
let (lines, mut chars) = make_providers("$");

src/sed/processor.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ fn applies(
8686
// Range is already latched active.
8787
match addr2 {
8888
Address::RelLine(n) => {
89-
if linenum - start <= *n {
90-
Ok(true)
91-
} else {
89+
if linenum - start > *n {
9290
command.start_line = None;
9391
Ok(false)
92+
} else {
93+
Ok(true)
9494
}
9595
}
9696
Address::Line(n) => {
@@ -102,14 +102,20 @@ fn applies(
102102
Ok(true)
103103
}
104104
}
105+
Address::StepMatch(step) => Ok((linenum - start).is_multiple_of(*step)),
106+
Address::StepEnd(step) => {
107+
// Inclusive end on multiple of step
108+
if linenum.is_multiple_of(*step) {
109+
command.start_line = None;
110+
}
111+
Ok(true)
112+
}
105113
_ => {
106114
if match_address(addr2, reader, pattern, context, &command.location)? {
107115
command.start_line = None;
108116
context.last_address = true;
109-
Ok(true)
110-
} else {
111-
Ok(true)
112117
}
118+
Ok(true)
113119
}
114120
}
115121
} else if let Some(addr1) = &command.addr1 {

tests/by-util/test_sed.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,10 @@ check_output!(
269269
);
270270
check_output!(addr_empty_re_reuse, ["-n", "/_2/,//p", LINES1, LINES2]);
271271
check_output!(addr_simple_negation, ["-e", r"4,12!s/^/^/", LINES1]);
272+
check_output!(addr_range_even, ["-n", "0~2p", LINES1]);
273+
check_output!(addr_range_odd, ["-n", "1~2p", LINES1]);
274+
check_output!(addr_range_step_zero, ["-n", "10~0p", LINES1]);
275+
check_output!(addr_range_end_multiple, ["-n", "/l1_2/,~10p", LINES1]);
272276

273277
////////////////////////////////////////////////////////////
274278
// Substitution: s
@@ -1068,7 +1072,7 @@ fn test_addr0_non_posix() {
10681072
.args(&["--posix", "0,/foo/p"])
10691073
.fails()
10701074
.code_is(1)
1071-
.stderr_is("sed: <script argument 1>:1:2: error: address 0 invalid in POSIX mode\n");
1075+
.stderr_is("sed: <script argument 1>:1:2: error: address 0 is invalid in POSIX mode\n");
10721076
}
10731077

10741078
#[test]
@@ -1086,7 +1090,25 @@ fn test_addr0_second_re_only() {
10861090
.args(&["0,4p"])
10871091
.fails()
10881092
.code_is(1)
1089-
.stderr_is("sed: <script argument 1>:1:4: error: address 0 can only be used with a regular expression second address\n");
1093+
.stderr_is("sed: <script argument 1>:1:4: error: address 0 can only be used with a regular expression or ~step\n");
1094+
}
1095+
1096+
#[test]
1097+
fn test_step_match_non_posix() {
1098+
new_ucmd!()
1099+
.args(&["--posix", "3~2p"])
1100+
.fails()
1101+
.code_is(1)
1102+
.stderr_is("sed: <script argument 1>:1:3: error: ~step is invalid in POSIX mode\n");
1103+
}
1104+
1105+
#[test]
1106+
fn test_step_end_non_posix() {
1107+
new_ucmd!()
1108+
.args(&["--posix", "3,~2p"])
1109+
.fails()
1110+
.code_is(1)
1111+
.stderr_is("sed: <script argument 1>:1:4: error: ~step is invalid in POSIX mode\n");
10901112
}
10911113

10921114
// The following test diverse ways in which regexes are matched.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
l1_2
2+
l1_3
3+
l1_4
4+
l1_5
5+
l1_6
6+
l1_7
7+
l1_8
8+
l1_9
9+
l1_10
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
l1_2
2+
l1_4
3+
l1_6
4+
l1_8
5+
l1_10
6+
l1_12
7+
l1_14
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
l1_1
2+
l1_3
3+
l1_5
4+
l1_7
5+
l1_9
6+
l1_11
7+
l1_13
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
l1_10

0 commit comments

Comments
 (0)