@@ -198,6 +198,249 @@ sub scan_token {
198
198
return length ($token ) ? $token : undef ;
199
199
}
200
200
201
+ # ShellParser parses POSIX shell scripts (with minor extensions for Bash). It
202
+ # is a recursive descent parser very roughly modeled after section 2.10 "Shell
203
+ # Grammar" of POSIX chapter 2 "Shell Command Language".
204
+ package ShellParser ;
205
+
206
+ sub new {
207
+ my ($class , $s ) = @_ ;
208
+ my $self = bless {
209
+ buff => [],
210
+ stop => [],
211
+ output => []
212
+ } => $class ;
213
+ $self -> {lexer } = Lexer-> new($self , $s );
214
+ return $self ;
215
+ }
216
+
217
+ sub next_token {
218
+ my $self = shift @_ ;
219
+ return pop (@{$self -> {buff }}) if @{$self -> {buff }};
220
+ return $self -> {lexer }-> scan_token();
221
+ }
222
+
223
+ sub untoken {
224
+ my $self = shift @_ ;
225
+ push (@{$self -> {buff }}, @_ );
226
+ }
227
+
228
+ sub peek {
229
+ my $self = shift @_ ;
230
+ my $token = $self -> next_token();
231
+ return undef unless defined ($token );
232
+ $self -> untoken($token );
233
+ return $token ;
234
+ }
235
+
236
+ sub stop_at {
237
+ my ($self , $token ) = @_ ;
238
+ return 1 unless defined ($token );
239
+ my $stop = ${$self -> {stop }}[-1] if @{$self -> {stop }};
240
+ return defined ($stop ) && $token =~ $stop ;
241
+ }
242
+
243
+ sub expect {
244
+ my ($self , $expect ) = @_ ;
245
+ my $token = $self -> next_token();
246
+ return $token if defined ($token ) && $token eq $expect ;
247
+ push (@{$self -> {output }}, " ?!ERR?! expected '$expect ' but found '" . (defined ($token ) ? $token : " <end-of-input>" ) . " '\n " );
248
+ $self -> untoken($token ) if defined ($token );
249
+ return ();
250
+ }
251
+
252
+ sub optional_newlines {
253
+ my $self = shift @_ ;
254
+ my @tokens ;
255
+ while (my $token = $self -> peek()) {
256
+ last unless $token eq " \n " ;
257
+ push (@tokens , $self -> next_token());
258
+ }
259
+ return @tokens ;
260
+ }
261
+
262
+ sub parse_group {
263
+ my $self = shift @_ ;
264
+ return ($self -> parse(qr / ^}$ / ),
265
+ $self -> expect(' }' ));
266
+ }
267
+
268
+ sub parse_subshell {
269
+ my $self = shift @_ ;
270
+ return ($self -> parse(qr / ^\) $ / ),
271
+ $self -> expect(' )' ));
272
+ }
273
+
274
+ sub parse_case_pattern {
275
+ my $self = shift @_ ;
276
+ my @tokens ;
277
+ while (defined (my $token = $self -> next_token())) {
278
+ push (@tokens , $token );
279
+ last if $token eq ' )' ;
280
+ }
281
+ return @tokens ;
282
+ }
283
+
284
+ sub parse_case {
285
+ my $self = shift @_ ;
286
+ my @tokens ;
287
+ push (@tokens ,
288
+ $self -> next_token(), # subject
289
+ $self -> optional_newlines(),
290
+ $self -> expect(' in' ),
291
+ $self -> optional_newlines());
292
+ while (1) {
293
+ my $token = $self -> peek();
294
+ last unless defined ($token ) && $token ne ' esac' ;
295
+ push (@tokens ,
296
+ $self -> parse_case_pattern(),
297
+ $self -> optional_newlines(),
298
+ $self -> parse(qr / ^(?:;;|esac)$ / )); # item body
299
+ $token = $self -> peek();
300
+ last unless defined ($token ) && $token ne ' esac' ;
301
+ push (@tokens ,
302
+ $self -> expect(' ;;' ),
303
+ $self -> optional_newlines());
304
+ }
305
+ push (@tokens , $self -> expect(' esac' ));
306
+ return @tokens ;
307
+ }
308
+
309
+ sub parse_for {
310
+ my $self = shift @_ ;
311
+ my @tokens ;
312
+ push (@tokens ,
313
+ $self -> next_token(), # variable
314
+ $self -> optional_newlines());
315
+ my $token = $self -> peek();
316
+ if (defined ($token ) && $token eq ' in' ) {
317
+ push (@tokens ,
318
+ $self -> expect(' in' ),
319
+ $self -> optional_newlines());
320
+ }
321
+ push (@tokens ,
322
+ $self -> parse(qr / ^do$ / ), # items
323
+ $self -> expect(' do' ),
324
+ $self -> optional_newlines(),
325
+ $self -> parse_loop_body(),
326
+ $self -> expect(' done' ));
327
+ return @tokens ;
328
+ }
329
+
330
+ sub parse_if {
331
+ my $self = shift @_ ;
332
+ my @tokens ;
333
+ while (1) {
334
+ push (@tokens ,
335
+ $self -> parse(qr / ^then$ / ), # if/elif condition
336
+ $self -> expect(' then' ),
337
+ $self -> optional_newlines(),
338
+ $self -> parse(qr / ^(?:elif|else|fi)$ / )); # if/elif body
339
+ my $token = $self -> peek();
340
+ last unless defined ($token ) && $token eq ' elif' ;
341
+ push (@tokens , $self -> expect(' elif' ));
342
+ }
343
+ my $token = $self -> peek();
344
+ if (defined ($token ) && $token eq ' else' ) {
345
+ push (@tokens ,
346
+ $self -> expect(' else' ),
347
+ $self -> optional_newlines(),
348
+ $self -> parse(qr / ^fi$ / )); # else body
349
+ }
350
+ push (@tokens , $self -> expect(' fi' ));
351
+ return @tokens ;
352
+ }
353
+
354
+ sub parse_loop_body {
355
+ my $self = shift @_ ;
356
+ return $self -> parse(qr / ^done$ / );
357
+ }
358
+
359
+ sub parse_loop {
360
+ my $self = shift @_ ;
361
+ return ($self -> parse(qr / ^do$ / ), # condition
362
+ $self -> expect(' do' ),
363
+ $self -> optional_newlines(),
364
+ $self -> parse_loop_body(),
365
+ $self -> expect(' done' ));
366
+ }
367
+
368
+ sub parse_func {
369
+ my $self = shift @_ ;
370
+ return ($self -> expect(' (' ),
371
+ $self -> expect(' )' ),
372
+ $self -> optional_newlines(),
373
+ $self -> parse_cmd()); # body
374
+ }
375
+
376
+ sub parse_bash_array_assignment {
377
+ my $self = shift @_ ;
378
+ my @tokens = $self -> expect(' (' );
379
+ while (defined (my $token = $self -> next_token())) {
380
+ push (@tokens , $token );
381
+ last if $token eq ' )' ;
382
+ }
383
+ return @tokens ;
384
+ }
385
+
386
+ my %compound = (
387
+ ' {' => \&parse_group,
388
+ ' (' => \&parse_subshell,
389
+ ' case' => \&parse_case,
390
+ ' for' => \&parse_for,
391
+ ' if' => \&parse_if,
392
+ ' until' => \&parse_loop,
393
+ ' while' => \&parse_loop);
394
+
395
+ sub parse_cmd {
396
+ my $self = shift @_ ;
397
+ my $cmd = $self -> next_token();
398
+ return () unless defined ($cmd );
399
+ return $cmd if $cmd eq " \n " ;
400
+
401
+ my $token ;
402
+ my @tokens = $cmd ;
403
+ if ($cmd eq ' !' ) {
404
+ push (@tokens , $self -> parse_cmd());
405
+ return @tokens ;
406
+ } elsif (my $f = $compound {$cmd }) {
407
+ push (@tokens , $self -> $f ());
408
+ } elsif (defined ($token = $self -> peek()) && $token eq ' (' ) {
409
+ if ($cmd !~ / \w =$ / ) {
410
+ push (@tokens , $self -> parse_func());
411
+ return @tokens ;
412
+ }
413
+ $tokens [-1] .= join (' ' , $self -> parse_bash_array_assignment());
414
+ }
415
+
416
+ while (defined (my $token = $self -> next_token())) {
417
+ $self -> untoken($token ), last if $self -> stop_at($token );
418
+ push (@tokens , $token );
419
+ last if $token =~ / ^(?:[;&\n |]|&&|\|\| )$ / ;
420
+ }
421
+ push (@tokens , $self -> next_token()) if $tokens [-1] ne " \n " && defined ($token = $self -> peek()) && $token eq " \n " ;
422
+ return @tokens ;
423
+ }
424
+
425
+ sub accumulate {
426
+ my ($self , $tokens , $cmd ) = @_ ;
427
+ push (@$tokens , @$cmd );
428
+ }
429
+
430
+ sub parse {
431
+ my ($self , $stop ) = @_ ;
432
+ push (@{$self -> {stop }}, $stop );
433
+ goto DONE if $self -> stop_at($self -> peek());
434
+ my @tokens ;
435
+ while (my @cmd = $self -> parse_cmd()) {
436
+ $self -> accumulate(\@tokens , \@cmd );
437
+ last if $self -> stop_at($self -> peek());
438
+ }
439
+ DONE:
440
+ pop (@{$self -> {stop }});
441
+ return @tokens ;
442
+ }
443
+
201
444
package ScriptParser ;
202
445
203
446
sub new {
0 commit comments