Skip to content

Commit 6594554

Browse files
sunshinecogitster
authored andcommitted
chainlint.pl: add POSIX shell parser
Continue fleshing out chainlint.pl by adding a general purpose recursive descent parser for the POSIX shell command language. Although never invoked directly, upcoming parser subclasses will extend its functionality for specific purposes, such as plucking test definitions from input scripts and applying domain-specific knowledge to perform test validation. Signed-off-by: Eric Sunshine <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 7d48047 commit 6594554

File tree

1 file changed

+243
-0
lines changed

1 file changed

+243
-0
lines changed

t/chainlint.pl

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,249 @@ sub scan_token {
198198
return length($token) ? $token : undef;
199199
}
200200

201+
# ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
202+
# is a recursive descent parser very roughly modeled after section 2.10 "Shell
203+
# Grammar" of POSIX chapter 2 "Shell Command Language".
204+
package ShellParser;
205+
206+
sub new {
207+
my ($class, $s) = @_;
208+
my $self = bless {
209+
buff => [],
210+
stop => [],
211+
output => []
212+
} => $class;
213+
$self->{lexer} = Lexer->new($self, $s);
214+
return $self;
215+
}
216+
217+
sub next_token {
218+
my $self = shift @_;
219+
return pop(@{$self->{buff}}) if @{$self->{buff}};
220+
return $self->{lexer}->scan_token();
221+
}
222+
223+
sub untoken {
224+
my $self = shift @_;
225+
push(@{$self->{buff}}, @_);
226+
}
227+
228+
sub peek {
229+
my $self = shift @_;
230+
my $token = $self->next_token();
231+
return undef unless defined($token);
232+
$self->untoken($token);
233+
return $token;
234+
}
235+
236+
sub stop_at {
237+
my ($self, $token) = @_;
238+
return 1 unless defined($token);
239+
my $stop = ${$self->{stop}}[-1] if @{$self->{stop}};
240+
return defined($stop) && $token =~ $stop;
241+
}
242+
243+
sub expect {
244+
my ($self, $expect) = @_;
245+
my $token = $self->next_token();
246+
return $token if defined($token) && $token eq $expect;
247+
push(@{$self->{output}}, "?!ERR?! expected '$expect' but found '" . (defined($token) ? $token : "<end-of-input>") . "'\n");
248+
$self->untoken($token) if defined($token);
249+
return ();
250+
}
251+
252+
sub optional_newlines {
253+
my $self = shift @_;
254+
my @tokens;
255+
while (my $token = $self->peek()) {
256+
last unless $token eq "\n";
257+
push(@tokens, $self->next_token());
258+
}
259+
return @tokens;
260+
}
261+
262+
sub parse_group {
263+
my $self = shift @_;
264+
return ($self->parse(qr/^}$/),
265+
$self->expect('}'));
266+
}
267+
268+
sub parse_subshell {
269+
my $self = shift @_;
270+
return ($self->parse(qr/^\)$/),
271+
$self->expect(')'));
272+
}
273+
274+
sub parse_case_pattern {
275+
my $self = shift @_;
276+
my @tokens;
277+
while (defined(my $token = $self->next_token())) {
278+
push(@tokens, $token);
279+
last if $token eq ')';
280+
}
281+
return @tokens;
282+
}
283+
284+
sub parse_case {
285+
my $self = shift @_;
286+
my @tokens;
287+
push(@tokens,
288+
$self->next_token(), # subject
289+
$self->optional_newlines(),
290+
$self->expect('in'),
291+
$self->optional_newlines());
292+
while (1) {
293+
my $token = $self->peek();
294+
last unless defined($token) && $token ne 'esac';
295+
push(@tokens,
296+
$self->parse_case_pattern(),
297+
$self->optional_newlines(),
298+
$self->parse(qr/^(?:;;|esac)$/)); # item body
299+
$token = $self->peek();
300+
last unless defined($token) && $token ne 'esac';
301+
push(@tokens,
302+
$self->expect(';;'),
303+
$self->optional_newlines());
304+
}
305+
push(@tokens, $self->expect('esac'));
306+
return @tokens;
307+
}
308+
309+
sub parse_for {
310+
my $self = shift @_;
311+
my @tokens;
312+
push(@tokens,
313+
$self->next_token(), # variable
314+
$self->optional_newlines());
315+
my $token = $self->peek();
316+
if (defined($token) && $token eq 'in') {
317+
push(@tokens,
318+
$self->expect('in'),
319+
$self->optional_newlines());
320+
}
321+
push(@tokens,
322+
$self->parse(qr/^do$/), # items
323+
$self->expect('do'),
324+
$self->optional_newlines(),
325+
$self->parse_loop_body(),
326+
$self->expect('done'));
327+
return @tokens;
328+
}
329+
330+
sub parse_if {
331+
my $self = shift @_;
332+
my @tokens;
333+
while (1) {
334+
push(@tokens,
335+
$self->parse(qr/^then$/), # if/elif condition
336+
$self->expect('then'),
337+
$self->optional_newlines(),
338+
$self->parse(qr/^(?:elif|else|fi)$/)); # if/elif body
339+
my $token = $self->peek();
340+
last unless defined($token) && $token eq 'elif';
341+
push(@tokens, $self->expect('elif'));
342+
}
343+
my $token = $self->peek();
344+
if (defined($token) && $token eq 'else') {
345+
push(@tokens,
346+
$self->expect('else'),
347+
$self->optional_newlines(),
348+
$self->parse(qr/^fi$/)); # else body
349+
}
350+
push(@tokens, $self->expect('fi'));
351+
return @tokens;
352+
}
353+
354+
sub parse_loop_body {
355+
my $self = shift @_;
356+
return $self->parse(qr/^done$/);
357+
}
358+
359+
sub parse_loop {
360+
my $self = shift @_;
361+
return ($self->parse(qr/^do$/), # condition
362+
$self->expect('do'),
363+
$self->optional_newlines(),
364+
$self->parse_loop_body(),
365+
$self->expect('done'));
366+
}
367+
368+
sub parse_func {
369+
my $self = shift @_;
370+
return ($self->expect('('),
371+
$self->expect(')'),
372+
$self->optional_newlines(),
373+
$self->parse_cmd()); # body
374+
}
375+
376+
sub parse_bash_array_assignment {
377+
my $self = shift @_;
378+
my @tokens = $self->expect('(');
379+
while (defined(my $token = $self->next_token())) {
380+
push(@tokens, $token);
381+
last if $token eq ')';
382+
}
383+
return @tokens;
384+
}
385+
386+
my %compound = (
387+
'{' => \&parse_group,
388+
'(' => \&parse_subshell,
389+
'case' => \&parse_case,
390+
'for' => \&parse_for,
391+
'if' => \&parse_if,
392+
'until' => \&parse_loop,
393+
'while' => \&parse_loop);
394+
395+
sub parse_cmd {
396+
my $self = shift @_;
397+
my $cmd = $self->next_token();
398+
return () unless defined($cmd);
399+
return $cmd if $cmd eq "\n";
400+
401+
my $token;
402+
my @tokens = $cmd;
403+
if ($cmd eq '!') {
404+
push(@tokens, $self->parse_cmd());
405+
return @tokens;
406+
} elsif (my $f = $compound{$cmd}) {
407+
push(@tokens, $self->$f());
408+
} elsif (defined($token = $self->peek()) && $token eq '(') {
409+
if ($cmd !~ /\w=$/) {
410+
push(@tokens, $self->parse_func());
411+
return @tokens;
412+
}
413+
$tokens[-1] .= join(' ', $self->parse_bash_array_assignment());
414+
}
415+
416+
while (defined(my $token = $self->next_token())) {
417+
$self->untoken($token), last if $self->stop_at($token);
418+
push(@tokens, $token);
419+
last if $token =~ /^(?:[;&\n|]|&&|\|\|)$/;
420+
}
421+
push(@tokens, $self->next_token()) if $tokens[-1] ne "\n" && defined($token = $self->peek()) && $token eq "\n";
422+
return @tokens;
423+
}
424+
425+
sub accumulate {
426+
my ($self, $tokens, $cmd) = @_;
427+
push(@$tokens, @$cmd);
428+
}
429+
430+
sub parse {
431+
my ($self, $stop) = @_;
432+
push(@{$self->{stop}}, $stop);
433+
goto DONE if $self->stop_at($self->peek());
434+
my @tokens;
435+
while (my @cmd = $self->parse_cmd()) {
436+
$self->accumulate(\@tokens, \@cmd);
437+
last if $self->stop_at($self->peek());
438+
}
439+
DONE:
440+
pop(@{$self->{stop}});
441+
return @tokens;
442+
}
443+
201444
package ScriptParser;
202445

203446
sub new {

0 commit comments

Comments
 (0)