feat: JavaCC 8 keyword utils

manticore-projects · manticore-projects · commit cfe2d8ccaf7c · 2025-05-23T22:38:25.000+07:00
Signed-off-by: Andreas Reichel &lt;andreas@manticore-projects.com&gt;
diff --git a/src/test/java/net/sf/jsqlparser/parser/ParserKeywordsUtilsTest.java b/src/test/java/net/sf/jsqlparser/parser/ParserKeywordsUtilsTest.java
@@ -0,0 +1,211 @@
+/*-
+ * #%L
+ * JSQLParser library
+ * %%
+ * Copyright (C) 2004 - 2022 JSQLParser
+ * %%
+ * Dual licensed under GNU LGPL 2.1 or Apache License 2.0
+ * #L%
+ */
+package net.sf.jsqlparser.parser;
+
+import net.sf.jsqlparser.JSQLParserException;
+import net.sf.jsqlparser.test.TestUtils;
+import org.javacc.jjtree.JJTree;
+import org.javacc.parser.Context;
+import org.javacc.parser.JavaCCErrors;
+import org.javacc.parser.JavaCCGlobals;
+import org.javacc.parser.JavaCCParser;
+import org.javacc.parser.RCharacterList;
+import org.javacc.parser.RChoice;
+import org.javacc.parser.RJustName;
+import org.javacc.parser.ROneOrMore;
+import org.javacc.parser.RSequence;
+import org.javacc.parser.RStringLiteral;
+import org.javacc.parser.RZeroOrMore;
+import org.javacc.parser.RZeroOrOne;
+import org.javacc.parser.RegularExpression;
+import org.javacc.parser.Semanticize;
+import org.javacc.parser.Token;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InvalidClassException;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.ServiceLoader;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.logging.Logger;
+
+
+class ParserKeywordsUtilsTest {
+    public final static CharsetEncoder CHARSET_ENCODER = StandardCharsets.US_ASCII.newEncoder();
+
+    final static File FILE = new File("src/main/jjtree/net/sf/jsqlparser/parser/JSqlParserCC.jjt");
+    final static Logger LOGGER = Logger.getLogger(ParserKeywordsUtilsTest.class.getName());
+
+
+    private static void addTokenImage(TreeSet<String> allKeywords, RStringLiteral literal) {
+        if (CHARSET_ENCODER.canEncode(literal.image) && literal.image.matches("\\w+")) {
+            allKeywords.add(literal.image);
+        }
+    }
+
+    @SuppressWarnings({"PMD.EmptyIfStmt", "PMD.CyclomaticComplexity"})
+    private static void addTokenImage(TreeSet<String> allKeywords, Object o) throws Exception {
+        if (o instanceof RStringLiteral) {
+            RStringLiteral literal = (RStringLiteral) o;
+            addTokenImage(allKeywords, literal);
+        } else if (o instanceof RChoice) {
+            RChoice choice = (RChoice) o;
+            addTokenImage(allKeywords, choice);
+        } else if (o instanceof RSequence) {
+            RSequence sequence1 = (RSequence) o;
+            addTokenImage(allKeywords, sequence1);
+        } else if (o instanceof ROneOrMore) {
+            ROneOrMore oneOrMore = (ROneOrMore) o;
+            addTokenImage(allKeywords, oneOrMore);
+        } else if (o instanceof RZeroOrMore) {
+            RZeroOrMore zeroOrMore = (RZeroOrMore) o;
+            addTokenImage(allKeywords, zeroOrMore);
+        } else if (o instanceof RZeroOrOne) {
+            RZeroOrOne zeroOrOne = (RZeroOrOne) o;
+            addTokenImage(allKeywords, zeroOrOne);
+        } else if (o instanceof RJustName) {
+            RJustName zeroOrOne = (RJustName) o;
+            addTokenImage(allKeywords, zeroOrOne);
+        } else if (o instanceof RCharacterList) {
+            // do nothing, we are not interested in those
+        } else {
+            throw new InvalidClassException(
+                    "Unknown Type: " + o.getClass().getName() + " " + o.toString());
+        }
+    }
+
+    private static void addTokenImage(TreeSet<String> allKeywords, RSequence sequence)
+            throws Exception {
+        for (Object o : sequence.units) {
+            addTokenImage(allKeywords, o);
+        }
+    }
+
+    private static void addTokenImage(TreeSet<String> allKeywords, ROneOrMore oneOrMore) {
+        for (Token token : oneOrMore.lhsTokens) {
+            if (CHARSET_ENCODER.canEncode(token.image)) {
+                allKeywords.add(token.image);
+            }
+        }
+    }
+
+    private static void addTokenImage(TreeSet<String> allKeywords, RZeroOrMore oneOrMore) {
+        for (Token token : oneOrMore.lhsTokens) {
+            if (CHARSET_ENCODER.canEncode(token.image)) {
+                allKeywords.add(token.image);
+            }
+        }
+    }
+
+    private static void addTokenImage(TreeSet<String> allKeywords, RZeroOrOne oneOrMore) {
+        for (Token token : oneOrMore.lhsTokens) {
+            if (CHARSET_ENCODER.canEncode(token.image)) {
+                allKeywords.add(token.image);
+            }
+        }
+    }
+
+    private static void addTokenImage(TreeSet<String> allKeywords, RJustName oneOrMore) {
+        for (Token token : oneOrMore.lhsTokens) {
+            if (CHARSET_ENCODER.canEncode(token.image)) {
+                allKeywords.add(token.image);
+            }
+        }
+    }
+
+    private static void addTokenImage(TreeSet<String> allKeywords, RChoice choice)
+            throws Exception {
+        for (Object o : choice.getChoices()) {
+            addTokenImage(allKeywords, o);
+        }
+    }
+
+    public static TreeSet<String> getAllKeywordsUsingJavaCC(File file) throws Exception {
+        TreeSet<String> allKeywords = new TreeSet<>();
+
+        Path jjtGrammar = file.toPath();
+        Path jjGrammarOutputDir = Files.createTempDirectory("jjgrammer");
+
+        new JJTree().main(new String[] {
+                "-JJTREE_OUTPUT_DIRECTORY=" + jjGrammarOutputDir.toString(),
+                "-CODE_GENERATOR=java",
+                jjtGrammar.toString()
+        });
+        Path jjGrammarFile = jjGrammarOutputDir.resolve("JSqlParserCC.jj");
+
+        Context context = new Context();
+        JavaCCParser parser = new JavaCCParser(new java.io.FileInputStream(jjGrammarFile.toFile()));
+        parser.javacc_input(context);
+
+        // needed for filling JavaCCGlobals
+        //JavaCCErrors.reInit();
+        Semanticize.start(context);
+
+        // read all the Token and get the String image
+        for (Map.Entry<Integer, RegularExpression> item : context.globals().rexps_of_tokens
+                .entrySet()) {
+            addTokenImage(allKeywords, item.getValue());
+        }
+
+        // clean up
+        if (jjGrammarOutputDir.toFile().exists()) {
+            jjGrammarOutputDir.toFile().delete();
+        }
+
+        return allKeywords;
+    }
+
+    @Test
+    void getAllKeywords() throws IOException {
+        Set<String> allKeywords = ParserKeywordsUtils.getAllKeywordsUsingRegex(FILE);
+        Assertions.assertFalse(allKeywords.isEmpty(), "Keyword List must not be empty!");
+    }
+
+    @Test
+    void getAllKeywordsUsingJavaCC() throws Exception {
+        Set<String> allKeywords = getAllKeywordsUsingJavaCC(FILE);
+        Assertions.assertFalse(allKeywords.isEmpty(), "Keyword List must not be empty!");
+    }
+
+    // Test, if all Tokens found per RegEx are also found from the JavaCCParser
+    @Test
+    void compareKeywordLists() throws Exception {
+        Set<String> allRegexKeywords = ParserKeywordsUtils.getAllKeywordsUsingRegex(FILE);
+        Set<String> allJavaCCParserKeywords = getAllKeywordsUsingJavaCC(FILE);
+
+        // Exceptions, which should not have been found from the RegEx
+        List<String> exceptions = Arrays.asList("0x");
+
+        // We expect all Keywords from the Regex to be found by the JavaCC Parser
+        for (String s : allRegexKeywords) {
+            Assertions.assertTrue(
+                    exceptions.contains(s) || allJavaCCParserKeywords.contains(s),
+                    "The Keywords from JavaCC do not contain Keyword: " + s);
+        }
+
+        // The JavaCC Parser finds some more valid Keywords (where no explicit Token has been
+        // defined
+        for (String s : allJavaCCParserKeywords) {
+            if (!(exceptions.contains(s) || allRegexKeywords.contains(s))) {
+                LOGGER.fine("Found Additional Keywords from Parser: " + s);
+            }
+        }
+    }
+
+}