Skip to content

Commit cfe2d8c

Browse files
feat: JavaCC 8 keyword utils
Signed-off-by: Andreas Reichel <[email protected]>
1 parent 00bb126 commit cfe2d8c

File tree

1 file changed

+211
-0
lines changed

1 file changed

+211
-0
lines changed
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
/*-
2+
* #%L
3+
* JSQLParser library
4+
* %%
5+
* Copyright (C) 2004 - 2022 JSQLParser
6+
* %%
7+
* Dual licensed under GNU LGPL 2.1 or Apache License 2.0
8+
* #L%
9+
*/
10+
package net.sf.jsqlparser.parser;
11+
12+
import net.sf.jsqlparser.JSQLParserException;
13+
import net.sf.jsqlparser.test.TestUtils;
14+
import org.javacc.jjtree.JJTree;
15+
import org.javacc.parser.Context;
16+
import org.javacc.parser.JavaCCErrors;
17+
import org.javacc.parser.JavaCCGlobals;
18+
import org.javacc.parser.JavaCCParser;
19+
import org.javacc.parser.RCharacterList;
20+
import org.javacc.parser.RChoice;
21+
import org.javacc.parser.RJustName;
22+
import org.javacc.parser.ROneOrMore;
23+
import org.javacc.parser.RSequence;
24+
import org.javacc.parser.RStringLiteral;
25+
import org.javacc.parser.RZeroOrMore;
26+
import org.javacc.parser.RZeroOrOne;
27+
import org.javacc.parser.RegularExpression;
28+
import org.javacc.parser.Semanticize;
29+
import org.javacc.parser.Token;
30+
import org.junit.jupiter.api.Assertions;
31+
import org.junit.jupiter.api.Test;
32+
33+
import java.io.File;
34+
import java.io.IOException;
35+
import java.io.InvalidClassException;
36+
import java.nio.charset.CharsetEncoder;
37+
import java.nio.charset.StandardCharsets;
38+
import java.nio.file.Files;
39+
import java.nio.file.Path;
40+
import java.util.Arrays;
41+
import java.util.List;
42+
import java.util.Map;
43+
import java.util.ServiceLoader;
44+
import java.util.Set;
45+
import java.util.TreeSet;
46+
import java.util.logging.Logger;
47+
48+
49+
class ParserKeywordsUtilsTest {
50+
public final static CharsetEncoder CHARSET_ENCODER = StandardCharsets.US_ASCII.newEncoder();
51+
52+
final static File FILE = new File("src/main/jjtree/net/sf/jsqlparser/parser/JSqlParserCC.jjt");
53+
final static Logger LOGGER = Logger.getLogger(ParserKeywordsUtilsTest.class.getName());
54+
55+
56+
private static void addTokenImage(TreeSet<String> allKeywords, RStringLiteral literal) {
57+
if (CHARSET_ENCODER.canEncode(literal.image) && literal.image.matches("\\w+")) {
58+
allKeywords.add(literal.image);
59+
}
60+
}
61+
62+
@SuppressWarnings({"PMD.EmptyIfStmt", "PMD.CyclomaticComplexity"})
63+
private static void addTokenImage(TreeSet<String> allKeywords, Object o) throws Exception {
64+
if (o instanceof RStringLiteral) {
65+
RStringLiteral literal = (RStringLiteral) o;
66+
addTokenImage(allKeywords, literal);
67+
} else if (o instanceof RChoice) {
68+
RChoice choice = (RChoice) o;
69+
addTokenImage(allKeywords, choice);
70+
} else if (o instanceof RSequence) {
71+
RSequence sequence1 = (RSequence) o;
72+
addTokenImage(allKeywords, sequence1);
73+
} else if (o instanceof ROneOrMore) {
74+
ROneOrMore oneOrMore = (ROneOrMore) o;
75+
addTokenImage(allKeywords, oneOrMore);
76+
} else if (o instanceof RZeroOrMore) {
77+
RZeroOrMore zeroOrMore = (RZeroOrMore) o;
78+
addTokenImage(allKeywords, zeroOrMore);
79+
} else if (o instanceof RZeroOrOne) {
80+
RZeroOrOne zeroOrOne = (RZeroOrOne) o;
81+
addTokenImage(allKeywords, zeroOrOne);
82+
} else if (o instanceof RJustName) {
83+
RJustName zeroOrOne = (RJustName) o;
84+
addTokenImage(allKeywords, zeroOrOne);
85+
} else if (o instanceof RCharacterList) {
86+
// do nothing, we are not interested in those
87+
} else {
88+
throw new InvalidClassException(
89+
"Unknown Type: " + o.getClass().getName() + " " + o.toString());
90+
}
91+
}
92+
93+
private static void addTokenImage(TreeSet<String> allKeywords, RSequence sequence)
94+
throws Exception {
95+
for (Object o : sequence.units) {
96+
addTokenImage(allKeywords, o);
97+
}
98+
}
99+
100+
private static void addTokenImage(TreeSet<String> allKeywords, ROneOrMore oneOrMore) {
101+
for (Token token : oneOrMore.lhsTokens) {
102+
if (CHARSET_ENCODER.canEncode(token.image)) {
103+
allKeywords.add(token.image);
104+
}
105+
}
106+
}
107+
108+
private static void addTokenImage(TreeSet<String> allKeywords, RZeroOrMore oneOrMore) {
109+
for (Token token : oneOrMore.lhsTokens) {
110+
if (CHARSET_ENCODER.canEncode(token.image)) {
111+
allKeywords.add(token.image);
112+
}
113+
}
114+
}
115+
116+
private static void addTokenImage(TreeSet<String> allKeywords, RZeroOrOne oneOrMore) {
117+
for (Token token : oneOrMore.lhsTokens) {
118+
if (CHARSET_ENCODER.canEncode(token.image)) {
119+
allKeywords.add(token.image);
120+
}
121+
}
122+
}
123+
124+
private static void addTokenImage(TreeSet<String> allKeywords, RJustName oneOrMore) {
125+
for (Token token : oneOrMore.lhsTokens) {
126+
if (CHARSET_ENCODER.canEncode(token.image)) {
127+
allKeywords.add(token.image);
128+
}
129+
}
130+
}
131+
132+
private static void addTokenImage(TreeSet<String> allKeywords, RChoice choice)
133+
throws Exception {
134+
for (Object o : choice.getChoices()) {
135+
addTokenImage(allKeywords, o);
136+
}
137+
}
138+
139+
public static TreeSet<String> getAllKeywordsUsingJavaCC(File file) throws Exception {
140+
TreeSet<String> allKeywords = new TreeSet<>();
141+
142+
Path jjtGrammar = file.toPath();
143+
Path jjGrammarOutputDir = Files.createTempDirectory("jjgrammer");
144+
145+
new JJTree().main(new String[] {
146+
"-JJTREE_OUTPUT_DIRECTORY=" + jjGrammarOutputDir.toString(),
147+
"-CODE_GENERATOR=java",
148+
jjtGrammar.toString()
149+
});
150+
Path jjGrammarFile = jjGrammarOutputDir.resolve("JSqlParserCC.jj");
151+
152+
Context context = new Context();
153+
JavaCCParser parser = new JavaCCParser(new java.io.FileInputStream(jjGrammarFile.toFile()));
154+
parser.javacc_input(context);
155+
156+
// needed for filling JavaCCGlobals
157+
//JavaCCErrors.reInit();
158+
Semanticize.start(context);
159+
160+
// read all the Token and get the String image
161+
for (Map.Entry<Integer, RegularExpression> item : context.globals().rexps_of_tokens
162+
.entrySet()) {
163+
addTokenImage(allKeywords, item.getValue());
164+
}
165+
166+
// clean up
167+
if (jjGrammarOutputDir.toFile().exists()) {
168+
jjGrammarOutputDir.toFile().delete();
169+
}
170+
171+
return allKeywords;
172+
}
173+
174+
@Test
175+
void getAllKeywords() throws IOException {
176+
Set<String> allKeywords = ParserKeywordsUtils.getAllKeywordsUsingRegex(FILE);
177+
Assertions.assertFalse(allKeywords.isEmpty(), "Keyword List must not be empty!");
178+
}
179+
180+
@Test
181+
void getAllKeywordsUsingJavaCC() throws Exception {
182+
Set<String> allKeywords = getAllKeywordsUsingJavaCC(FILE);
183+
Assertions.assertFalse(allKeywords.isEmpty(), "Keyword List must not be empty!");
184+
}
185+
186+
// Test, if all Tokens found per RegEx are also found from the JavaCCParser
187+
@Test
188+
void compareKeywordLists() throws Exception {
189+
Set<String> allRegexKeywords = ParserKeywordsUtils.getAllKeywordsUsingRegex(FILE);
190+
Set<String> allJavaCCParserKeywords = getAllKeywordsUsingJavaCC(FILE);
191+
192+
// Exceptions, which should not have been found from the RegEx
193+
List<String> exceptions = Arrays.asList("0x");
194+
195+
// We expect all Keywords from the Regex to be found by the JavaCC Parser
196+
for (String s : allRegexKeywords) {
197+
Assertions.assertTrue(
198+
exceptions.contains(s) || allJavaCCParserKeywords.contains(s),
199+
"The Keywords from JavaCC do not contain Keyword: " + s);
200+
}
201+
202+
// The JavaCC Parser finds some more valid Keywords (where no explicit Token has been
203+
// defined
204+
for (String s : allJavaCCParserKeywords) {
205+
if (!(exceptions.contains(s) || allRegexKeywords.contains(s))) {
206+
LOGGER.fine("Found Additional Keywords from Parser: " + s);
207+
}
208+
}
209+
}
210+
211+
}

0 commit comments

Comments
 (0)