Skip to content

Commit 613fb30

Browse files
committed
Merge branch 'es/chainlint-output'
Teach chainlint.pl to annotate the original test definition instead of the token stream. * es/chainlint-output: chainlint: annotate original test definition rather than token stream chainlint: latch start/end position of each token chainlint: tighten accuracy when consuming input stream chainlint: add explanatory comments
2 parents 58d80df + 73c768d commit 613fb30

22 files changed

+206
-66
lines changed

t/chainlint.pl

Lines changed: 67 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ sub scan_heredoc_tag {
7575
my $self = shift @_;
7676
${$self->{buff}} =~ /\G(-?)/gc;
7777
my $indented = $1;
78-
my $tag = $self->scan_token();
78+
my $token = $self->scan_token();
79+
return "<<$indented" unless $token;
80+
my $tag = $token->[0];
7981
$tag =~ s/['"\\]//g;
8082
push(@{$self->{heretags}}, $indented ? "\t$tag" : "$tag");
8183
return "<<$indented$tag";
@@ -149,7 +151,7 @@ sub scan_dollar {
149151
my $self = shift @_;
150152
my $b = $self->{buff};
151153
return $self->scan_balanced('(', ')') if $$b =~ /\G\((?=\()/gc; # $((...))
152-
return '(' . join(' ', $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
154+
return '(' . join(' ', map {$_->[0]} $self->scan_subst()) . ')' if $$b =~ /\G\(/gc; # $(...)
153155
return $self->scan_balanced('{', '}') if $$b =~ /\G\{/gc; # ${...}
154156
return $1 if $$b =~ /\G(\w+)/gc; # $var
155157
return $1 if $$b =~ /\G([@*#?$!0-9-])/gc; # $*, $1, $$, etc.
@@ -170,16 +172,18 @@ sub scan_token {
170172
my $self = shift @_;
171173
my $b = $self->{buff};
172174
my $token = '';
175+
my $start;
173176
RESTART:
174177
$$b =~ /\G[ \t]+/gc; # skip whitespace (but not newline)
175-
return "\n" if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
178+
$start = pos($$b) || 0;
179+
return ["\n", $start, pos($$b)] if $$b =~ /\G#[^\n]*(?:\n|\z)/gc; # comment
176180
while (1) {
177181
# slurp up non-special characters
178182
$token .= $1 if $$b =~ /\G([^\\;&|<>(){}'"\$\s]+)/gc;
179183
# handle special characters
180184
last unless $$b =~ /\G(.)/sgc;
181185
my $c = $1;
182-
last if $c =~ /^[ \t]$/; # whitespace ends token
186+
pos($$b)--, last if $c =~ /^[ \t]$/; # whitespace ends token
183187
pos($$b)--, last if length($token) && $c =~ /^[;&|<>(){}\n]$/;
184188
$token .= $self->scan_sqstring(), next if $c eq "'";
185189
$token .= $self->scan_dqstring(), next if $c eq '"';
@@ -197,7 +201,7 @@ sub scan_token {
197201
}
198202
die("internal error scanning character '$c'\n");
199203
}
200-
return length($token) ? $token : undef;
204+
return length($token) ? [$token, $start, pos($$b)] : undef;
201205
}
202206

203207
# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
@@ -239,14 +243,14 @@ sub stop_at {
239243
my ($self, $token) = @_;
240244
return 1 unless defined($token);
241245
my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
242-
return defined($stop) && $token =~ $stop;
246+
return defined($stop) && $token->[0] =~ $stop;
243247
}
244248

245249
sub expect {
246250
my ($self, $expect) = @_;
247251
my $token = $self->next_token();
248-
return $token if defined($token) && $token eq $expect;
249-
push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n");
252+
return $token if defined($token) && $token->[0] eq $expect;
253+
push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token->[0] : "<end-of-input>") . "'\n");
250254
$self->untoken($token) if defined($token);
251255
return ();
252256
}
@@ -255,7 +259,7 @@ sub optional_newlines {
255259
my $self = shift @_;
256260
my @tokens;
257261
while (my $token = $self->peek()) {
258-
last unless $token eq "\n";
262+
last unless $token->[0] eq "\n";
259263
push(@tokens, $self->next_token());
260264
}
261265
return @tokens;
@@ -278,7 +282,7 @@ sub parse_case_pattern {
278282
my @tokens;
279283
while (defined(my $token = $self->next_token())) {
280284
push(@tokens, $token);
281-
last if $token eq ')';
285+
last if $token->[0] eq ')';
282286
}
283287
return @tokens;
284288
}
@@ -293,13 +297,13 @@ sub parse_case {
293297
$self->optional_newlines());
294298
while (1) {
295299
my $token = $self->peek();
296-
last unless defined($token) && $token ne 'esac';
300+
last unless defined($token) && $token->[0] ne 'esac';
297301
push(@tokens,
298302
$self->parse_case_pattern(),
299303
$self->optional_newlines(),
300304
$self->parse(qr/^(?:;;|esac)$/)); # item body
301305
$token = $self->peek();
302-
last unless defined($token) && $token ne 'esac';
306+
last unless defined($token) && $token->[0] ne 'esac';
303307
push(@tokens,
304308
$self->expect(';;'),
305309
$self->optional_newlines());
@@ -315,7 +319,7 @@ sub parse_for {
315319
$self->next_token(), # variable
316320
$self->optional_newlines());
317321
my $token = $self->peek();
318-
if (defined($token) && $token eq 'in') {
322+
if (defined($token) && $token->[0] eq 'in') {
319323
push(@tokens,
320324
$self->expect('in'),
321325
$self->optional_newlines());
@@ -339,11 +343,11 @@ sub parse_if {
339343
$self->optional_newlines(),
340344
$self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
341345
my $token = $self->peek();
342-
last unless defined($token) && $token eq 'elif';
346+
last unless defined($token) && $token->[0] eq 'elif';
343347
push(@tokens, $self->expect('elif'));
344348
}
345349
my $token = $self->peek();
346-
if (defined($token) && $token eq 'else') {
350+
if (defined($token) && $token->[0] eq 'else') {
347351
push(@tokens,
348352
$self->expect('else'),
349353
$self->optional_newlines(),
@@ -380,7 +384,7 @@ sub parse_bash_array_assignment {
380384
my @tokens = $self->expect('(');
381385
while (defined(my $token = $self->next_token())) {
382386
push(@tokens, $token);
383-
last if $token eq ')';
387+
last if $token->[0] eq ')';
384388
}
385389
return @tokens;
386390
}
@@ -398,29 +402,31 @@ sub parse_cmd {
398402
my $self = shift @_;
399403
my $cmd = $self->next_token();
400404
return () unless defined($cmd);
401-
return $cmd if $cmd eq "\n";
405+
return $cmd if $cmd->[0] eq "\n";
402406

403407
my $token;
404408
my @tokens = $cmd;
405-
if ($cmd eq '!') {
409+
if ($cmd->[0] eq '!') {
406410
push(@tokens, $self->parse_cmd());
407411
return @tokens;
408-
} elsif (my $f = $compound{$cmd}) {
412+
} elsif (my $f = $compound{$cmd->[0]}) {
409413
push(@tokens, $self->$f());
410-
} elsif (defined($token = $self->peek()) && $token eq '(') {
411-
if ($cmd !~ /\w=$/) {
414+
} elsif (defined($token = $self->peek()) && $token->[0] eq '(') {
415+
if ($cmd->[0] !~ /\w=$/) {
412416
push(@tokens, $self->parse_func());
413417
return @tokens;
414418
}
415-
$tokens[-1] .= join(' ', $self->parse_bash_array_assignment());
419+
my @array = $self->parse_bash_array_assignment();
420+
$tokens[-1]->[0] .= join(' ', map {$_->[0]} @array);
421+
$tokens[-1]->[2] = $array[$#array][2] if @array;
416422
}
417423

418424
while (defined(my $token = $self->next_token())) {
419425
$self->untoken($token), last if $self->stop_at($token);
420426
push(@tokens, $token);
421-
last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/;
427+
last if $token->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
422428
}
423-
push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n";
429+
push(@tokens, $self->next_token()) if $tokens[-1]->[0] ne "\n" && defined($token = $self->peek()) && $token->[0] eq "\n";
424430
return @tokens;
425431
}
426432

@@ -453,11 +459,18 @@ package TestParser;
453459

454460
use base 'ShellParser';
455461

462+
sub new {
463+
my $class = shift @_;
464+
my $self = $class->SUPER::new(@_);
465+
$self->{problems} = [];
466+
return $self;
467+
}
468+
456469
sub find_non_nl {
457470
my $tokens = shift @_;
458471
my $n = shift @_;
459472
$n = $#$tokens if !defined($n);
460-
$n-- while $n >= 0 && $$tokens[$n] eq "\n";
473+
$n-- while $n >= 0 && $$tokens[$n]->[0] eq "\n";
461474
return $n;
462475
}
463476

@@ -467,7 +480,7 @@ sub ends_with {
467480
for my $needle (reverse(@$needles)) {
468481
return undef if $n < 0;
469482
$n = find_non_nl($tokens, $n), next if $needle eq "\n";
470-
return undef if $$tokens[$n] !~ $needle;
483+
return undef if $$tokens[$n]->[0] !~ $needle;
471484
$n--;
472485
}
473486
return 1;
@@ -486,13 +499,13 @@ sub parse_loop_body {
486499
my $self = shift @_;
487500
my @tokens = $self->SUPER::parse_loop_body(@_);
488501
# did loop signal failure via "|| return" or "|| exit"?
489-
return @tokens if !@tokens || grep(/^(?:return|exit|\$\?)$/, @tokens);
502+
return @tokens if !@tokens || grep {$_->[0] =~ /^(?:return|exit|\$\?)$/} @tokens;
490503
# did loop upstream of a pipe signal failure via "|| echo 'impossible
491504
# text'" as the final command in the loop body?
492505
return @tokens if ends_with(\@tokens, [qr/^\|\|$/, "\n", qr/^echo$/, qr/^.+$/]);
493506
# flag missing "return/exit" handling explicit failure in loop body
494507
my $n = find_non_nl(\@tokens);
495-
splice(@tokens, $n + 1, 0, '?!LOOP?!');
508+
push(@{$self->{problems}}, ['LOOP', $tokens[$n]]);
496509
return @tokens;
497510
}
498511

@@ -505,29 +518,34 @@ sub parse_loop_body {
505518

506519
sub accumulate {
507520
my ($self, $tokens, $cmd) = @_;
521+
my $problems = $self->{problems};
522+
523+
# no previous command to check for missing "&&"
508524
goto DONE unless @$tokens;
509-
goto DONE if @$cmd == 1 && $$cmd[0] eq "\n";
525+
526+
# new command is empty line; can't yet check if previous is missing "&&"
527+
goto DONE if @$cmd == 1 && $$cmd[0]->[0] eq "\n";
510528

511529
# did previous command end with "&&", "|", "|| return" or similar?
512530
goto DONE if match_ending($tokens, \@safe_endings);
513531

514532
# if this command handles "$?" specially, then okay for previous
515533
# command to be missing "&&"
516534
for my $token (@$cmd) {
517-
goto DONE if $token =~ /\$\?/;
535+
goto DONE if $token->[0] =~ /\$\?/;
518536
}
519537

520538
# if this command is "false", "return 1", or "exit 1" (which signal
521539
# failure explicitly), then okay for all preceding commands to be
522540
# missing "&&"
523-
if ($$cmd[0] =~ /^(?:false|return|exit)$/) {
524-
@$tokens = grep(!/^\?!AMP\?!$/, @$tokens);
541+
if ($$cmd[0]->[0] =~ /^(?:false|return|exit)$/) {
542+
@$problems = grep {$_->[0] ne 'AMP'} @$problems;
525543
goto DONE;
526544
}
527545

528546
# flag missing "&&" at end of previous command
529547
my $n = find_non_nl($tokens);
530-
splice(@$tokens, $n + 1, 0, '?!AMP?!') unless $n < 0;
548+
push(@$problems, ['AMP', $tokens->[$n]]) unless $n < 0;
531549

532550
DONE:
533551
$self->SUPER::accumulate($tokens, $cmd);
@@ -553,7 +571,7 @@ sub new {
553571
# composition of multiple strings and non-string character runs; for instance,
554572
# `"test body"` unwraps to `test body`; `word"a b"42'c d'` to `worda b42c d`
555573
sub unwrap {
556-
my $token = @_ ? shift @_ : $_;
574+
my $token = (@_ ? shift @_ : $_)->[0];
557575
# simple case: 'sqstring' or "dqstring"
558576
return $token if $token =~ s/^'([^']*)'$/$1/;
559577
return $token if $token =~ s/^"([^"]*)"$/$1/;
@@ -584,12 +602,21 @@ sub check_test {
584602
$self->{ntests}++;
585603
my $parser = TestParser->new(\$body);
586604
my @tokens = $parser->parse();
587-
return unless $emit_all || grep(/\?![^?]+\?!/, @tokens);
605+
my $problems = $parser->{problems};
606+
return unless $emit_all || @$problems;
588607
my $c = main::fd_colors(1);
589-
my $checked = join(' ', @tokens);
608+
my $start = 0;
609+
my $checked = '';
610+
for (sort {$a->[1]->[2] <=> $b->[1]->[2]} @$problems) {
611+
my ($label, $token) = @$_;
612+
my $pos = $token->[2];
613+
$checked .= substr($body, $start, $pos - $start) . " ?!$label?! ";
614+
$start = $pos;
615+
}
616+
$checked .= substr($body, $start);
590617
$checked =~ s/^\n//;
591-
$checked =~ s/^ //mg;
592-
$checked =~ s/ $//mg;
618+
$checked =~ s/(\s) \?!/$1?!/mg;
619+
$checked =~ s/\?! (\s)/?!$1/mg;
593620
$checked =~ s/(\?![^?]+\?!)/$c->{rev}$c->{red}$1$c->{reset}/mg;
594621
$checked .= "\n" unless $checked =~ /\n$/;
595622
push(@{$self->{output}}, "$c->{blue}# chainlint: $title$c->{reset}\n$checked");
@@ -598,9 +625,9 @@ sub check_test {
598625
sub parse_cmd {
599626
my $self = shift @_;
600627
my @tokens = $self->SUPER::parse_cmd();
601-
return @tokens unless @tokens && $tokens[0] =~ /^test_expect_(?:success|failure)$/;
628+
return @tokens unless @tokens && $tokens[0]->[0] =~ /^test_expect_(?:success|failure)$/;
602629
my $n = $#tokens;
603-
$n-- while $n >= 0 && $tokens[$n] =~ /^(?:[;&\n|]|&&|\|\|)$/;
630+
$n-- while $n >= 0 && $tokens[$n]->[0] =~ /^(?:[;&\n|]|&&|\|\|)$/;
604631
$self->check_test($tokens[1], $tokens[2]) if $n == 2; # title body
605632
$self->check_test($tokens[2], $tokens[3]) if $n > 2; # prereq title body
606633
return @tokens;

t/chainlint/block-comment.expect

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
(
22
{
3+
# show a
34
echo a &&
5+
# show b
46
echo b
57
}
68
)

t/chainlint/case-comment.expect

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
(
22
case "$x" in
3+
# found foo
34
x) foo ;;
5+
# found other
46
*)
7+
# treat it as bar
58
bar
69
;;
710
esac

t/chainlint/close-subshell.expect

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
) | wuzzle &&
1616
(
1717
bop
18-
) | fazz fozz &&
18+
) | fazz \
19+
fozz &&
1920
(
2021
bup
2122
) |

t/chainlint/comment.expect

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
(
2+
# comment 1
23
nothing &&
4+
# comment 2
35
something
6+
# comment 3
7+
# comment 4
48
)

t/chainlint/double-here-doc.expect

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,12 @@
1-
run_sub_test_lib_test_err run-inv-range-start "--run invalid range start" --run="a-5" <<-EOF &&
2-
check_sub_test_lib_test_err run-inv-range-start <<-EOF_OUT 3 <<-EOF_ERR
1+
run_sub_test_lib_test_err run-inv-range-start \
2+
"--run invalid range start" \
3+
--run="a-5" <<-\EOF &&
4+
test_expect_success "passing test #1" "true"
5+
test_done
6+
EOF
7+
check_sub_test_lib_test_err run-inv-range-start \
8+
<<-\EOF_OUT 3<<-EOF_ERR
9+
> FATAL: Unexpected exit with code 1
10+
EOF_OUT
11+
> error: --run: invalid non-numeric in range start: ${SQ}a-5${SQ}
12+
EOF_ERR

t/chainlint/empty-here-doc.expect

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
git ls-tree $tree path > current &&
2-
cat > expected <<EOF &&
2+
cat > expected <<\EOF &&
3+
EOF
34
test_output

t/chainlint/for-loop.expect

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
for i in a b c
33
do
44
echo $i ?!AMP?!
5-
cat <<-EOF ?!LOOP?!
5+
cat <<-\EOF ?!LOOP?!
6+
bar
7+
EOF
68
done ?!AMP?!
79
for i in a b c; do
810
echo $i &&
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
(
2-
cat <<-INPUT)
2+
cat <<-\INPUT)
3+
fizz
4+
INPUT
Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1-
cat > expect <<-EOF &&
1+
cat >expect <<- EOF &&
2+
header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
3+
num_commits: $1
4+
chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
5+
EOF
26

3-
cat > expect <<-EOF ?!AMP?!
7+
cat >expect << -EOF ?!AMP?!
8+
this is not indented
9+
-EOF
410

511
cleanup

0 commit comments

Comments
 (0)