@@ -198,6 +198,249 @@ sub scan_token {
198198 return length ($token ) ? $token : undef ;
199199}
200200
201+ # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
202+ # is a recursive descent parser very roughly modeled after section 2.10 "Shell
203+ # Grammar" of POSIX chapter 2 "Shell Command Language".
204+ package ShellParser ;
205+
206+ sub new {
207+ my ($class , $s ) = @_ ;
208+ my $self = bless {
209+ buff => [],
210+ stop => [],
211+ output => []
212+ } => $class ;
213+ $self -> {lexer } = Lexer-> new($self , $s );
214+ return $self ;
215+ }
216+
217+ sub next_token {
218+ my $self = shift @_ ;
219+ return pop (@{$self -> {buff }}) if @{$self -> {buff }};
220+ return $self -> {lexer }-> scan_token();
221+ }
222+
223+ sub untoken {
224+ my $self = shift @_ ;
225+ push (@{$self -> {buff }}, @_ );
226+ }
227+
228+ sub peek {
229+ my $self = shift @_ ;
230+ my $token = $self -> next_token();
231+ return undef unless defined ($token );
232+ $self -> untoken($token );
233+ return $token ;
234+ }
235+
236+ sub stop_at {
237+ my ($self , $token ) = @_ ;
238+ return 1 unless defined ($token );
239+ my $stop = ${$self -> {stop }}[-1] if @{$self -> {stop }};
240+ return defined ($stop ) && $token =~ $stop ;
241+ }
242+
243+ sub expect {
244+ my ($self , $expect ) = @_ ;
245+ my $token = $self -> next_token();
246+ return $token if defined ($token ) && $token eq $expect ;
247+ push (@{$self -> {output }}, " ?!ERR?! expected '$expect ' but found '" . (defined ($token ) ? $token : " <end-of-input>" ) . " '\n " );
248+ $self -> untoken($token ) if defined ($token );
249+ return ();
250+ }
251+
252+ sub optional_newlines {
253+ my $self = shift @_ ;
254+ my @tokens ;
255+ while (my $token = $self -> peek()) {
256+ last unless $token eq " \n " ;
257+ push (@tokens , $self -> next_token());
258+ }
259+ return @tokens ;
260+ }
261+
262+ sub parse_group {
263+ my $self = shift @_ ;
264+ return ($self -> parse(qr / ^}$ / ),
265+ $self -> expect(' }' ));
266+ }
267+
268+ sub parse_subshell {
269+ my $self = shift @_ ;
270+ return ($self -> parse(qr / ^\) $ / ),
271+ $self -> expect(' )' ));
272+ }
273+
274+ sub parse_case_pattern {
275+ my $self = shift @_ ;
276+ my @tokens ;
277+ while (defined (my $token = $self -> next_token())) {
278+ push (@tokens , $token );
279+ last if $token eq ' )' ;
280+ }
281+ return @tokens ;
282+ }
283+
284+ sub parse_case {
285+ my $self = shift @_ ;
286+ my @tokens ;
287+ push (@tokens ,
288+ $self -> next_token(), # subject
289+ $self -> optional_newlines(),
290+ $self -> expect(' in' ),
291+ $self -> optional_newlines());
292+ while (1) {
293+ my $token = $self -> peek();
294+ last unless defined ($token ) && $token ne ' esac' ;
295+ push (@tokens ,
296+ $self -> parse_case_pattern(),
297+ $self -> optional_newlines(),
298+ $self -> parse(qr / ^(?:;;|esac)$ / )); # item body
299+ $token = $self -> peek();
300+ last unless defined ($token ) && $token ne ' esac' ;
301+ push (@tokens ,
302+ $self -> expect(' ;;' ),
303+ $self -> optional_newlines());
304+ }
305+ push (@tokens , $self -> expect(' esac' ));
306+ return @tokens ;
307+ }
308+
309+ sub parse_for {
310+ my $self = shift @_ ;
311+ my @tokens ;
312+ push (@tokens ,
313+ $self -> next_token(), # variable
314+ $self -> optional_newlines());
315+ my $token = $self -> peek();
316+ if (defined ($token ) && $token eq ' in' ) {
317+ push (@tokens ,
318+ $self -> expect(' in' ),
319+ $self -> optional_newlines());
320+ }
321+ push (@tokens ,
322+ $self -> parse(qr / ^do$ / ), # items
323+ $self -> expect(' do' ),
324+ $self -> optional_newlines(),
325+ $self -> parse_loop_body(),
326+ $self -> expect(' done' ));
327+ return @tokens ;
328+ }
329+
330+ sub parse_if {
331+ my $self = shift @_ ;
332+ my @tokens ;
333+ while (1) {
334+ push (@tokens ,
335+ $self -> parse(qr / ^then$ / ), # if/elif condition
336+ $self -> expect(' then' ),
337+ $self -> optional_newlines(),
338+ $self -> parse(qr / ^(?:elif|else|fi)$ / )); # if/elif body
339+ my $token = $self -> peek();
340+ last unless defined ($token ) && $token eq ' elif' ;
341+ push (@tokens , $self -> expect(' elif' ));
342+ }
343+ my $token = $self -> peek();
344+ if (defined ($token ) && $token eq ' else' ) {
345+ push (@tokens ,
346+ $self -> expect(' else' ),
347+ $self -> optional_newlines(),
348+ $self -> parse(qr / ^fi$ / )); # else body
349+ }
350+ push (@tokens , $self -> expect(' fi' ));
351+ return @tokens ;
352+ }
353+
354+ sub parse_loop_body {
355+ my $self = shift @_ ;
356+ return $self -> parse(qr / ^done$ / );
357+ }
358+
359+ sub parse_loop {
360+ my $self = shift @_ ;
361+ return ($self -> parse(qr / ^do$ / ), # condition
362+ $self -> expect(' do' ),
363+ $self -> optional_newlines(),
364+ $self -> parse_loop_body(),
365+ $self -> expect(' done' ));
366+ }
367+
368+ sub parse_func {
369+ my $self = shift @_ ;
370+ return ($self -> expect(' (' ),
371+ $self -> expect(' )' ),
372+ $self -> optional_newlines(),
373+ $self -> parse_cmd()); # body
374+ }
375+
376+ sub parse_bash_array_assignment {
377+ my $self = shift @_ ;
378+ my @tokens = $self -> expect(' (' );
379+ while (defined (my $token = $self -> next_token())) {
380+ push (@tokens , $token );
381+ last if $token eq ' )' ;
382+ }
383+ return @tokens ;
384+ }
385+
386+ my %compound = (
387+ ' {' => \&parse_group,
388+ ' (' => \&parse_subshell,
389+ ' case' => \&parse_case,
390+ ' for' => \&parse_for,
391+ ' if' => \&parse_if,
392+ ' until' => \&parse_loop,
393+ ' while' => \&parse_loop);
394+
395+ sub parse_cmd {
396+ my $self = shift @_ ;
397+ my $cmd = $self -> next_token();
398+ return () unless defined ($cmd );
399+ return $cmd if $cmd eq " \n " ;
400+
401+ my $token ;
402+ my @tokens = $cmd ;
403+ if ($cmd eq ' !' ) {
404+ push (@tokens , $self -> parse_cmd());
405+ return @tokens ;
406+ } elsif (my $f = $compound {$cmd }) {
407+ push (@tokens , $self -> $f ());
408+ } elsif (defined ($token = $self -> peek()) && $token eq ' (' ) {
409+ if ($cmd !~ / \w =$ / ) {
410+ push (@tokens , $self -> parse_func());
411+ return @tokens ;
412+ }
413+ $tokens [-1] .= join (' ' , $self -> parse_bash_array_assignment());
414+ }
415+
416+ while (defined (my $token = $self -> next_token())) {
417+ $self -> untoken($token ), last if $self -> stop_at($token );
418+ push (@tokens , $token );
419+ last if $token =~ / ^(?:[;&\n |]|&&|\|\| )$ / ;
420+ }
421+ push (@tokens , $self -> next_token()) if $tokens [-1] ne " \n " && defined ($token = $self -> peek()) && $token eq " \n " ;
422+ return @tokens ;
423+ }
424+
425+ sub accumulate {
426+ my ($self , $tokens , $cmd ) = @_ ;
427+ push (@$tokens , @$cmd );
428+ }
429+
430+ sub parse {
431+ my ($self , $stop ) = @_ ;
432+ push (@{$self -> {stop }}, $stop );
433+ goto DONE if $self -> stop_at($self -> peek());
434+ my @tokens ;
435+ while (my @cmd = $self -> parse_cmd()) {
436+ $self -> accumulate(\@tokens , \@cmd );
437+ last if $self -> stop_at($self -> peek());
438+ }
439+ DONE:
440+ pop (@{$self -> {stop }});
441+ return @tokens ;
442+ }
443+
201444package ScriptParser ;
202445
203446sub new {
0 commit comments