forked from antlr/antlr3
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGrammar.java
More file actions
3222 lines (2908 loc) · 102 KB
/
Grammar.java
File metadata and controls
3222 lines (2908 loc) · 102 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.tool;
import org.antlr.Tool;
import org.antlr.analysis.DFA;
import org.antlr.analysis.DFAState;
import org.antlr.analysis.LL1Analyzer;
import org.antlr.analysis.LL1DFA;
import org.antlr.analysis.Label;
import org.antlr.analysis.LookaheadSet;
import org.antlr.analysis.NFA;
import org.antlr.analysis.NFAConversionThread;
import org.antlr.analysis.NFAState;
import org.antlr.analysis.NFAToDFAConverter;
import org.antlr.analysis.SemanticContext;
import org.antlr.analysis.Transition;
import org.antlr.codegen.CodeGenerator;
import org.antlr.codegen.Target;
import org.antlr.grammar.v3.ANTLRLexer;
import org.antlr.grammar.v3.ANTLRParser;
import org.antlr.grammar.v3.ANTLRTreePrinter;
import org.antlr.grammar.v3.ActionAnalysis;
import org.antlr.grammar.v3.DefineGrammarItemsWalker;
import org.antlr.grammar.v3.TreeToNFAConverter;
import org.antlr.misc.Barrier;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.misc.MultiMap;
import org.antlr.misc.OrderedHashSet;
import org.antlr.misc.Utils;
import org.antlr.runtime.ANTLRReaderStream;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.Token;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.stringtemplate.v4.ST;
import org.stringtemplate.v4.STGroup;
import org.stringtemplate.v4.STGroupString;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
/** Represents a grammar in memory. */
public class Grammar {
public static final String SYNPRED_RULE_PREFIX = "synpred";
public static final String GRAMMAR_FILE_EXTENSION = ".g";
/** used for generating lexer temp files */
public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g";
public static final int INITIAL_DECISION_LIST_SIZE = 300;
public static final int INVALID_RULE_INDEX = -1;
// the various kinds of labels. t=type, id=ID, types+=type ids+=ID
public static final int RULE_LABEL = 1;
public static final int TOKEN_LABEL = 2;
public static final int RULE_LIST_LABEL = 3;
public static final int TOKEN_LIST_LABEL = 4;
public static final int CHAR_LABEL = 5; // used in lexer for x='a'
public static final int WILDCARD_TREE_LABEL = 6; // Used in tree grammar x=.
public static final int WILDCARD_TREE_LIST_LABEL = 7; // Used in tree grammar x+=.
public static String[] LabelTypeToString =
{"<invalid>", "rule", "token", "rule-list", "token-list", "wildcard-tree", "wildcard-tree-list"};
public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens";
public static final String FRAGMENT_RULE_MODIFIER = "fragment";
public static final String SYNPREDGATE_ACTION_NAME = "synpredgate";
/** When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static int ANTLRLiteralEscapedCharValue[] = new int[255];
/** Given a char, we need to be able to show as an ANTLR literal.
*/
public static String ANTLRLiteralCharValueEscape[] = new String[255];
static {
ANTLRLiteralEscapedCharValue['n'] = '\n';
ANTLRLiteralEscapedCharValue['r'] = '\r';
ANTLRLiteralEscapedCharValue['t'] = '\t';
ANTLRLiteralEscapedCharValue['b'] = '\b';
ANTLRLiteralEscapedCharValue['f'] = '\f';
ANTLRLiteralEscapedCharValue['\\'] = '\\';
ANTLRLiteralEscapedCharValue['\''] = '\'';
ANTLRLiteralEscapedCharValue['"'] = '"';
ANTLRLiteralCharValueEscape['\n'] = "\\n";
ANTLRLiteralCharValueEscape['\r'] = "\\r";
ANTLRLiteralCharValueEscape['\t'] = "\\t";
ANTLRLiteralCharValueEscape['\b'] = "\\b";
ANTLRLiteralCharValueEscape['\f'] = "\\f";
ANTLRLiteralCharValueEscape['\\'] = "\\\\";
ANTLRLiteralCharValueEscape['\''] = "\\'";
}
public static final int LEXER = 1;
public static final int PARSER = 2;
public static final int TREE_PARSER = 3;
public static final int COMBINED = 4;
public static final String[] grammarTypeToString = new String[] {
"<invalid>",
"lexer",
"parser",
"tree",
"combined"
};
public static final String[] grammarTypeToFileNameSuffix = new String[] {
"<invalid>",
"Lexer",
"Parser",
"", // no suffix for tree grammars
"Parser" // if combined grammar, gen Parser and Lexer will be done later
};
/** Set of valid imports. E.g., can only import a tree parser into
* another tree parser. Maps delegate to set of delegator grammar types.
* validDelegations.get(LEXER) gives list of the kinds of delegators
* that can import lexers.
*/
public static MultiMap<Integer,Integer> validDelegations =
new MultiMap<Integer,Integer>() {
{
map(LEXER, LEXER);
map(LEXER, PARSER);
map(LEXER, COMBINED);
map(PARSER, PARSER);
map(PARSER, COMBINED);
map(TREE_PARSER, TREE_PARSER);
// TODO: allow COMBINED
// map(COMBINED, COMBINED);
}
};
/** This is the buffer of *all* tokens found in the grammar file
* including whitespace tokens etc... I use this to extract
* lexer rules from combined grammars.
*/
public CommonTokenStream tokenBuffer;
public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__";
public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
public static class Decision {
public Grammar grammar;
public int decision;
public NFAState startState;
public GrammarAST blockAST;
public DFA dfa;
}
public class LabelElementPair {
public Token label;
public GrammarAST elementRef;
public String referencedRuleName;
/** Has an action referenced the label? Set by ActionAnalysis.g
* Currently only set for rule labels.
*/
public boolean actionReferencesLabel;
public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL}
public LabelElementPair(Token label, GrammarAST elementRef) {
this.label = label;
this.elementRef = elementRef;
this.referencedRuleName = elementRef.getText();
}
public Rule getReferencedRule() {
return getRule(referencedRuleName);
}
@Override
public String toString() {
return elementRef.toString();
}
}
/** What name did the user provide for this grammar? */
public String name;
/** What type of grammar is this: lexer, parser, tree walker */
public int type;
/** A list of options specified at the grammar level such as language=Java.
* The value can be an AST for complicated values such as character sets.
* There may be code generator specific options in here. I do no
* interpretation of the key/value pairs...they are simply available for
* who wants them.
*/
protected Map<String, Object> options;
public static final Set<String> legalLexerOptions =
new HashSet<String>() {
{
add("language"); add("tokenVocab");
add("TokenLabelType");
add("superClass");
add("filter");
add("k");
add("backtrack");
add("memoize");
}
};
public static final Set<String> legalParserOptions =
new HashSet<String>() {
{
add("language"); add("tokenVocab");
add("output"); add("rewrite"); add("ASTLabelType");
add("TokenLabelType");
add("superClass");
add("k");
add("backtrack");
add("memoize");
}
};
public static final Set<String> legalTreeParserOptions =
new HashSet<String>() {
{
add("language"); add("tokenVocab");
add("output"); add("rewrite"); add("ASTLabelType");
add("TokenLabelType");
add("superClass");
add("k");
add("backtrack");
add("memoize");
add("filter");
}
};
public static final Set<String> doNotCopyOptionsToLexer =
new HashSet<String>() {
{
add("output"); add("ASTLabelType"); add("superClass");
add("k"); add("backtrack"); add("memoize"); add("rewrite");
}
};
public static final Map<String, String> defaultOptions =
new HashMap<String, String>() {
{
put("language","Java");
}
};
public static final Set<String> legalBlockOptions =
new HashSet<String>() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}};
/** What are the default options for a subrule? */
public static final Map<String, String> defaultBlockOptions =
new HashMap<String, String>() {{put("greedy","true");}};
public static final Map<String, String> defaultLexerBlockOptions =
new HashMap<String, String>() {{put("greedy","true");}};
// Token options are here to avoid contaminating Token object in runtime
/** Legal options for terminal refs like ID<node=MyVarNode> */
public static final Set<String> legalTokenOptions =
new HashSet<String>() {
{
add(defaultTokenOption);
add("type");
add("text");
add("assoc");
}
};
public static final String defaultTokenOption = "node";
/** Is there a global fixed lookahead set for this grammar?
* If 0, nothing specified. -1 implies we have not looked at
* the options table yet to set k.
*/
protected int global_k = -1;
/** Map a scope to a map of name:action pairs.
* Map<String, Map<String,GrammarAST>>
* The code generator will use this to fill holes in the output files.
* I track the AST node for the action in case I need the line number
* for errors.
*/
private Map<String, Map<String, Object>> actions =
new HashMap<String, Map<String, Object>>();
/** The NFA that represents the grammar with edges labelled with tokens
* or epsilon. It is more suitable to analysis than an AST representation.
*/
public NFA nfa;
protected NFAFactory factory;
/** If this grammar is part of a larger composite grammar via delegate
* statement, then this points at the composite. The composite holds
* a global list of rules, token types, decision numbers, etc...
*/
public CompositeGrammar composite;
/** A pointer back into grammar tree. Needed so we can add delegates. */
public CompositeGrammarTree compositeTreeNode;
/** If this is a delegate of another grammar, this is the label used
* as an instance var by that grammar to point at this grammar. null
* if no label was specified in the delegate statement.
*/
public String label;
/** TODO: hook this to the charVocabulary option */
protected IntSet charVocabulary = null;
/** For ANTLRWorks, we want to be able to map a line:col to a specific
* decision DFA so it can display DFA.
*/
Map<String, DFA> lineColumnToLookaheadDFAMap = new HashMap<String, DFA>();
public Tool tool;
/** The unique set of all rule references in any rule; set of tree node
* objects so two refs to same rule can exist but at different line/position.
*/
protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>();
protected Set<GrammarAST> scopedRuleRefs = new HashSet<GrammarAST>();
/** The unique set of all token ID references in any rule */
protected Set<Token> tokenIDRefs = new HashSet<Token>();
/** Be able to assign a number to every decision in grammar;
* decisions in 1..n
*/
protected int decisionCount = 0;
/** A list of all rules that are in any left-recursive cycle. There
* could be multiple cycles, but this is a flat list of all problematic
* rules. This is stuff we couldn't refactor to precedence rule.
*/
protected Set<Rule> leftRecursiveRules;
/** An external tool requests that DFA analysis abort prematurely. Stops
* at DFA granularity, which are limited to a DFA size and time computation
* as failsafe.
*/
protected boolean externalAnalysisAbort;
public int numNonLLStar = 0; // hack to track for -report
/** When we read in a grammar, we track the list of syntactic predicates
* and build faux rules for them later. See my blog entry Dec 2, 2005:
* http://www.antlr.org/blog/antlr3/lookahead.tml
* This maps the name (we make up) for a pred to the AST grammar fragment.
*/
protected LinkedHashMap<String, GrammarAST> nameToSynpredASTMap;
/** Each left-recursive precedence rule must define precedence array
* for binary operators like:
*
* static int[] e_prec = new int[tokenNames.length];
* static {
* e_prec[75] = 1;
* }
* Track and we push into parser later; this is computed
* early when we look for prec rules.
*/
public List<String> precRuleInitCodeBlocks = new ArrayList<String>();
/** At least one rule has memoize=true */
public boolean atLeastOneRuleMemoizes;
/** At least one backtrack=true in rule or decision or grammar. */
public boolean atLeastOneBacktrackOption;
/** Was this created from a COMBINED grammar? */
public boolean implicitLexer;
/** Map a rule to it's Rule object */
protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>();
/** If this rule is a delegate, some rules might be overridden; don't
* want to gen code for them.
*/
public Set<String> overriddenRules = new HashSet<String>();
/** The list of all rules referenced in this grammar, not defined here,
* and defined in a delegate grammar. Not all of these will be generated
* in the recognizer for this file; only those that are affected by rule
* definitions in this grammar. I am not sure the Java target will need
* this but I'm leaving in case other targets need it.
* see NameSpaceChecker.lookForReferencesToUndefinedSymbols()
*/
protected Set<Rule> delegatedRuleReferences = new HashSet<Rule>();
/** The ANTLRParser tracks lexer rules when reading combined grammars
* so we can build the Tokens rule.
*/
public List<String> lexerRuleNamesInCombined = new ArrayList<String>();
/** Track the scopes defined outside of rules and the scopes associated
* with all rules (even if empty).
*/
protected Map<String, AttributeScope> scopes = new HashMap<String, AttributeScope>();
/** An AST that records entire input grammar with all rules. A simple
* grammar with one rule, "grammar t; a : A | B ;", looks like:
* ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) )
*/
protected GrammarAST grammarTree = null;
/** Each subrule/rule is a decision point and we must track them so we
* can go back later and build DFA predictors for them. This includes
* all the rules, subrules, optional blocks, ()+, ()* etc...
*/
protected Vector<Decision> indexToDecision =
new Vector<Decision>(INITIAL_DECISION_LIST_SIZE);
/** If non-null, this is the code generator we will use to generate
* recognizers in the target language.
*/
protected CodeGenerator generator;
public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this);
public LL1Analyzer ll1Analyzer = new LL1Analyzer(this);
/** For merged lexer/parsers, we must construct a separate lexer spec.
* This is the template for lexer; put the literals first then the
* regular rules. We don't need to specify a token vocab import as
* I make the new grammar import from the old all in memory; don't want
* to force it to read from the disk. Lexer grammar will have same
* name as original grammar but will be in different filename. Foo.g
* with combined grammar will have FooParser.java generated and
* Foo__.g with again Foo inside. It will however generate FooLexer.java
* as it's a lexer grammar. A bit odd, but autogenerated. Can tweak
* later if we want.
*/
protected String lexerGrammarTemplate =
"grammar(name, options, imports, actionNames, actions, literals, rules) ::= <<\n" +
"lexer grammar <name>;\n" +
"<if(options)>" +
"options {\n" +
" <options:{it | <it.name>=<it.value>;<\\n>}>\n" +
"}<\\n>\n" +
"<endif>\n" +
"<if(imports)>import <imports; separator=\", \">;<endif>\n" +
"<actionNames,actions:{n,a|@<n> {<a>\\}\n}>\n" +
"<literals:{it | <it.ruleName> : <it.literal> ;\n}>\n" +
"<rules>\n" +
">>\n";
protected ST lexerGrammarST;
/** What file name holds this grammar? */
protected String fileName;
/** How long in ms did it take to build DFAs for this grammar?
* If this grammar is a combined grammar, it only records time for
* the parser grammar component. This only records the time to
* do the LL(*) work; NFA→DFA conversion.
*/
public long DFACreationWallClockTimeInMS;
public int numberOfSemanticPredicates = 0;
public int numberOfManualLookaheadOptions = 0;
public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>();
public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates =
new HashSet<Integer>();
/** Track decisions with syn preds specified for reporting.
* This is the a set of BLOCK type AST nodes.
*/
public Set<GrammarAST> blocksWithSynPreds = new HashSet<GrammarAST>();
/** Track decisions that actually use the syn preds in the DFA.
* Computed during NFA to DFA conversion.
*/
public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>();
/** Track names of preds so we can avoid generating preds that aren't used
* Computed during NFA to DFA conversion. Just walk accept states
* and look for synpreds because that is the only state target whose
* incident edges can have synpreds. Same is try for
* decisionsWhoseDFAsUsesSynPreds.
*/
public Set<String> synPredNamesUsedInDFA = new HashSet<String>();
/** Track decisions with syn preds specified for reporting.
* This is the a set of BLOCK type AST nodes.
*/
public Set<GrammarAST> blocksWithSemPreds = new HashSet<GrammarAST>();
/** Track decisions that actually use the syn preds in the DFA. */
public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet<DFA>();
protected boolean allDecisionDFACreated = false;
/** We need a way to detect when a lexer grammar is autogenerated from
* another grammar or we are just sending in a string representing a
* grammar. We don't want to generate a .tokens file, for example,
* in such cases.
*/
protected boolean builtFromString = false;
/** Factored out the sanity checking code; delegate to it. */
GrammarSanity sanity = new GrammarSanity(this);
/** Useful for asking questions about target during analysis */
Target target;
/** Create a grammar from file name. */
public Grammar(Tool tool, String fileName, CompositeGrammar composite) {
this.composite = composite;
setTool(tool);
setFileName(fileName);
// ensure we have the composite set to something
if ( composite.delegateGrammarTreeRoot==null ) {
composite.setDelegationRoot(this);
}
STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate);
lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar");
target = CodeGenerator.loadLanguageTarget((String) getOption("language"));
}
/** Useful for when you are sure that you are not part of a composite
* already. Used in Interp/RandomPhrase and testing.
*/
public Grammar() { this((Tool)null); }
public Grammar(Tool tool) {
setTool(tool);
builtFromString = true;
composite = new CompositeGrammar(this);
STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate);
lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar");
target = CodeGenerator.loadLanguageTarget((String)getOption("language"));
}
/** Used for testing; only useful on noncomposite grammars.*/
public Grammar(String grammarString)
throws RecognitionException
{
this(null, grammarString);
}
/** Used for testing and Interp/RandomPhrase. Only useful on
* noncomposite grammars.
*/
public Grammar(Tool tool, String grammarString)
throws RecognitionException
{
this(tool);
setFileName("<string>");
StringReader r = new StringReader(grammarString);
parseAndBuildAST(r);
composite.assignTokenTypes();
//composite.translateLeftRecursiveRules();
addRulesForSyntacticPredicates();
composite.defineGrammarSymbols();
//composite.createNFAs();
checkNameSpaceAndActions();
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public String getFileName() {
return fileName;
}
public void setName(String name) {
if ( name==null ) {
return;
}
// don't error check autogenerated files (those with '__' in them)
String saneFile = fileName.replace('\\', '/');
int lastSlash = saneFile.lastIndexOf('/');
String onlyFileName = saneFile.substring(lastSlash+1, fileName.length());
if ( !builtFromString ) {
int lastDot = onlyFileName.lastIndexOf('.');
String onlyFileNameNoSuffix;
if ( lastDot < 0 ) {
ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName);
onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION;
}
else {
onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot);
}
if ( !name.equals(onlyFileNameNoSuffix) ) {
ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER,
name,
fileName);
}
}
this.name = name;
}
public void setGrammarContent(String grammarString) throws RecognitionException {
StringReader r = new StringReader(grammarString);
parseAndBuildAST(r);
composite.assignTokenTypes();
composite.defineGrammarSymbols();
}
public void parseAndBuildAST()
throws IOException
{
FileReader fr;
BufferedReader br = null;
try {
fr = new FileReader(fileName);
br = new BufferedReader(fr);
parseAndBuildAST(br);
br.close();
br = null;
}
finally {
if ( br!=null ) {
br.close();
}
}
}
public void parseAndBuildAST(Reader r) {
// BUILD AST FROM GRAMMAR
ANTLRLexer lexer;
try {
lexer = new ANTLRLexer(new ANTLRReaderStream(r));
} catch (IOException e) {
ErrorManager.internalError("unexpected stream error from parsing "+fileName, e);
return;
}
lexer.setFileName(this.getFileName());
tokenBuffer = new CommonTokenStream(lexer);
ANTLRParser parser = ANTLRParser.createParser(tokenBuffer);
parser.setFileName(this.getFileName());
ANTLRParser.grammar__return result = null;
try {
result = parser.grammar_(this);
}
catch (RecognitionException re) {
ErrorManager.internalError("unexpected parser recognition error from "+fileName, re);
}
dealWithTreeFilterMode(); // tree grammar and filter=true?
if ( lexer.hasASTOperator && !buildAST() ) {
Object value = getOption("output");
if ( value == null ) {
ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION,
this, null);
setOption("output", "AST", null);
}
else {
ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION,
this, null, value);
}
}
setGrammarTree(result.getTree());
//if ( grammarTree!=null ) System.out.println("grammar tree: "+grammarTree.toStringTree());
grammarTree.setUnknownTokenBoundaries();
setFileName(lexer.getFileName()); // the lexer #src might change name
if ( grammarTree.findFirstType(ANTLRParser.RULE)==null ) {
ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName());
}
}
protected void dealWithTreeFilterMode() {
Object filterMode = (String)getOption("filter");
if ( type==TREE_PARSER && filterMode!=null && filterMode.toString().equals("true") ) {
// check for conflicting options
// filter => backtrack=true
// filter&&output=AST => rewrite=true
// filter&&output!=AST => error
// any deviation from valid option set is an error
Object backtrack = (String)getOption("backtrack");
Object output = getOption("output");
Object rewrite = getOption("rewrite");
if ( backtrack!=null && !backtrack.toString().equals("true") ) {
ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
"backtrack", backtrack);
}
if ( output!=null && !output.toString().equals("AST") ) {
ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
"output", output);
setOption("output", "", null);
}
if ( rewrite!=null && !rewrite.toString().equals("true") ) {
ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
"rewrite", rewrite);
}
// set options properly
setOption("backtrack", "true", null);
if ( output!=null && output.toString().equals("AST") ) {
setOption("rewrite", "true", null);
}
// @synpredgate set to state.backtracking==1 by code gen when filter=true
// superClass set in template target::treeParser
}
}
public void translateLeftRecursiveRule(GrammarAST ruleAST) {
//System.out.println(ruleAST.toStringTree());
CommonTreeNodeStream input = new CommonTreeNodeStream(ruleAST);
LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker =
new LeftRecursiveRuleAnalyzer(input, this, ruleAST.enclosingRuleName);
boolean isLeftRec = false;
try {
//System.out.println("TESTING "+ruleAST.enclosingRuleName);
isLeftRec = leftRecursiveRuleWalker.rec_rule(this);
}
catch (RecognitionException re) {
ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re);
}
if ( !isLeftRec ) return;
List<String> rules = new ArrayList<String>();
rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ;
rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() );
rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() );
for (String r : rules) {
GrammarAST t = parseArtificialRule(r);
addRule(grammarTree, t);
//System.out.println(t.toStringTree());
}
//precRuleInitCodeBlocks.add( precRuleWalker.getOpPrecJavaCode() );
}
public void defineGrammarSymbols() {
if ( Tool.internalOption_PrintGrammarTree ) {
System.out.println(grammarTree.toStringList());
}
// DEFINE RULES
//System.out.println("### define "+name+" rules");
DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(new CommonTreeNodeStream(getGrammarTree()));
try {
defineItemsWalker.grammar_(this);
}
catch (RecognitionException re) {
ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
re);
}
}
/** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */
public void checkNameSpaceAndActions() {
examineAllExecutableActions();
checkAllRulesForUselessLabels();
nameSpaceChecker.checkConflicts();
}
/** Many imports are illegal such as lexer into a tree grammar */
public boolean validImport(Grammar delegate) {
List<Integer> validDelegators = validDelegations.get(delegate.type);
return validDelegators!=null && validDelegators.contains(this.type);
}
/** If the grammar is a combined grammar, return the text of the implicit
* lexer grammar.
*/
public String getLexerGrammar() {
if ( lexerGrammarST.getAttribute("literals")==null &&
lexerGrammarST.getAttribute("rules")==null )
{
// if no rules, return nothing
return null;
}
lexerGrammarST.add("name", name);
// if there are any actions set for lexer, pass them in
if ( getActions().get("lexer")!=null ) {
lexerGrammarST.add("actionNames",
getActions().get("lexer").keySet());
lexerGrammarST.add("actions",
getActions().get("lexer").values());
}
// make sure generated grammar has the same options
if ( options!=null ) {
for (String optionName : options.keySet()) {
if ( !doNotCopyOptionsToLexer.contains(optionName) ) {
Object value = options.get(optionName);
lexerGrammarST.addAggr("options.{name,value}", optionName, value);
}
}
}
return lexerGrammarST.render();
}
public String getImplicitlyGeneratedLexerFileName() {
return name+
IGNORE_STRING_IN_GRAMMAR_FILE_NAME +
LEXER_GRAMMAR_FILE_EXTENSION;
}
/** Get the name of the generated recognizer; may or may not be same
* as grammar name.
* Recognizer is TParser and TLexer from T if combined, else
* just use T regardless of grammar type.
*/
public String getRecognizerName() {
String suffix = "";
List<Grammar> grammarsFromRootToMe = composite.getDelegators(this);
//System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe);
String qualifiedName = name;
if ( grammarsFromRootToMe!=null ) {
StringBuilder buf = new StringBuilder();
for (Grammar g : grammarsFromRootToMe) {
buf.append(g.name);
buf.append('_');
}
buf.append(name);
qualifiedName = buf.toString();
}
if ( type==Grammar.COMBINED ||
(type==Grammar.LEXER && implicitLexer) )
{
suffix = Grammar.grammarTypeToFileNameSuffix[type];
}
return qualifiedName+suffix;
}
/** Parse a rule we add artificially that is a list of the other lexer
* rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke
* this to set the current token. Add char literals before
* the rule references.
*
* If in filter mode, we want every alt to backtrack and we need to
* do k=1 to force the "first token def wins" rule. Otherwise, the
* longest-match rule comes into play with LL(*).
*
* The ANTLRParser antlr.g file now invokes this when parsing a lexer
* grammar, which I think is proper even though it peeks at the info
* that later phases will (re)compute. It gets a list of lexer rules
* and builds a string representing the rule; then it creates a parser
* and adds the resulting tree to the grammar's tree.
*/
public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST,
List<String> ruleNames,
List<String> delegateNames,
boolean filterMode) {
ST matchTokenRuleST;
if ( filterMode ) {
matchTokenRuleST = new ST(
ARTIFICIAL_TOKENS_RULENAME+
" options {k=1; backtrack=true;} : <rules; separator=\"|\">;");
}
else {
matchTokenRuleST = new ST(
ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;");
}
// Now add token rule references
for (int i = 0; i < ruleNames.size(); i++) {
String rname = ruleNames.get(i);
matchTokenRuleST.add("rules", rname);
}
for (int i = 0; i < delegateNames.size(); i++) {
String dname = delegateNames.get(i);
matchTokenRuleST.add("rules", dname+".Tokens");
}
//System.out.println("tokens rule: "+matchTokenRuleST.toString());
GrammarAST r = parseArtificialRule(matchTokenRuleST.render());
addRule(grammarAST, r);
//addRule((GrammarAST)parser.getAST());
//return (GrammarAST)parser.getAST();
return r;
}
public GrammarAST parseArtificialRule(String ruleText) {
ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer));
parser.setGrammar(this);
parser.setGrammarType(this.type);
try {
ANTLRParser.rule_return result = parser.rule();
return result.getTree();
}
catch (Exception e) {
ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE,
e);
return null;
}
}
public void addRule(GrammarAST grammarTree, GrammarAST t) {
GrammarAST p = null;
for (int i = 0; i < grammarTree.getChildCount(); i++ ) {
p = (GrammarAST)grammarTree.getChild(i);
if (p == null || p.getType() == ANTLRParser.RULE || p.getType() == ANTLRParser.PREC_RULE) {
break;
}
}
if (p != null) {
grammarTree.addChild(t);
}
}
/** for any syntactic predicates, we need to define rules for them; they will get
* defined automatically like any other rule. :)
*/
protected List<? extends GrammarAST> getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)
{
List<GrammarAST> rules = new ArrayList<GrammarAST>();
if ( nameToSynpredASTMap==null ) {
return rules;
}
boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR;
for (Map.Entry<String, GrammarAST> entry : nameToSynpredASTMap.entrySet()) {
String synpredName = entry.getKey();
GrammarAST fragmentAST = entry.getValue();
GrammarAST ruleAST =
ANTLRParser.createSimpleRuleAST(synpredName,
fragmentAST,
isLexer);
rules.add(ruleAST);
}
return rules;
}
public void addRulesForSyntacticPredicates() {
// Get syn pred rules and add to existing tree
List<? extends GrammarAST> synpredRules =
getArtificialRulesForSyntacticPredicates(nameToSynpredASTMap);
for (int i = 0; i < synpredRules.size(); i++) {
GrammarAST rAST = (GrammarAST) synpredRules.get(i);
grammarTree.addChild(rAST);
}
}
/** Walk the list of options, altering this Grammar object according
* to any I recognize.
protected void processOptions() {
Iterator optionNames = options.keySet().iterator();
while (optionNames.hasNext()) {
String optionName = (String) optionNames.next();
Object value = options.get(optionName);
if ( optionName.equals("tokenVocab") ) {
}
}
}
*/
/** Define all the rule begin/end NFAStates to solve forward reference
* issues. Critical for composite grammars too.