diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 3790ff1..157c73a 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -40,6 +40,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] @@ -62,9 +63,7 @@ jobs: - name: clone javacc-8-java uses: actions/checkout@v4 with: - repository: javacc/javacc-8-java.git path: javacc-8-java - ref: release - name: Set up JDK 11 uses: actions/setup-java@v4 @@ -73,13 +72,6 @@ jobs: java-version: '11' cache: 'maven' - - name: Cache Maven packages - uses: actions/cache@v4 - with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} - restore-keys: ${{ runner.os }}-m2 - # -B: batch mode; -e: produce execution error messages; # -X: debug mode; -N: non-recursive (don't build submodules) @@ -88,6 +80,21 @@ jobs: - name: Run install on javacc-8-core run: mvn install -B --show-version --file javacc-8-core/pom.xml - - - name: Run install on javacc-8-java + - name: Run install on javacc-8-java (posix) + if: ${{ matrix.os != 'windows-latest' }} + run: USERPROFILE=$HOME mvn install -B --show-version --file javacc-8-java/pom.xml + - name: Run install on javacc-8-java (windows) + env: + MAVEN_OPTS: -Dfile.encoding=utf-8 + if: ${{ matrix.os == 'windows-latest' }} run: mvn install -B --show-version --file javacc-8-java/pom.xml + + - name: Upload output files for review + uses: actions/upload-artifact@v4 + if: always() + with: + name: outputs-${{ matrix.os }} + path: | + **/*.out + **/*.err + **/build.log \ No newline at end of file diff --git a/examples/GUIParsing/TokenMgrVersion/src/main/java/CharCollector.java b/examples/GUIParsing/TokenMgrVersion/src/main/java/CharCollector.java index a544f89..36d44c7 100644 --- a/examples/GUIParsing/TokenMgrVersion/src/main/java/CharCollector.java +++ b/examples/GUIParsing/TokenMgrVersion/src/main/java/CharCollector.java @@ -47,6 +47,7 @@ public class CharCollector implements CharStream { protected boolean trackLineColumn = true; protected int tabSize = 1; + private char nextChar; /** Puts a character into the buffer. Called by the GUI. */ public final synchronized void put(char c) { @@ -207,4 +208,41 @@ public int getTabSize() { public void setTabSize(int i) { tabSize = i; } + + /** + * Returns next character. + * + * @return next character in the input + */ + public char getNextChar() { + return nextChar; + } + + /** + * Checks next character. + * + * @return whether next character is available + */ + public boolean hasNextChar() { + try { + nextChar = readChar(); + return true; + } catch (java.io.IOException ex) { + return false; + } + } + + /** + * Checks next character and marks new token. + * + * @return whether next character is available + */ + public boolean hasNextToken() { + try { + nextChar = BeginToken(); + return true; + } catch (java.io.IOException ex) { + return false; + } + } } diff --git a/examples/Gwt/pom.xml b/examples/Gwt/pom.xml index 8ec94bc..613e79b 100644 --- a/examples/Gwt/pom.xml +++ b/examples/Gwt/pom.xml @@ -88,8 +88,8 @@ THE POSSIBILITY OF SUCH DAMAGE. GwtSimpleParser - ${project.basedir}/simple/out - ${project.basedir}/simple/err + ${project.basedir}/simple/compiler.out + ${project.basedir}/simple/compiler.err @@ -102,8 +102,8 @@ THE POSSIBILITY OF SUCH DAMAGE. GwtUnicodeParser - ${project.basedir}/unicode/out - ${project.basedir}/unicode/err + ${project.basedir}/unicode/compiler.out + ${project.basedir}/unicode/compiler.err @@ -122,18 +122,18 @@ THE POSSIBILITY OF SUCH DAMAGE. - + - + message="${project.basedir}/simple/compiler.out does not exist!" /> + + message="${project.basedir}/simple/compiler.ref does not exist!" /> - + + message="${project.basedir}/simple/compiler.out does not match ${project.basedir}/simple/compiler.ref!" /> @@ -145,18 +145,18 @@ THE POSSIBILITY OF SUCH DAMAGE. - + - + message="${project.basedir}/unicode/compiler.out does not exist!" /> + + message="${project.basedir}/unicode/compiler.ref does not exist!" /> - + + message="${project.basedir}/unicode/compiler.out does not match ${project.basedir}/unicode/compiler.ref!" /> diff --git a/examples/Gwt/simple/ref b/examples/Gwt/simple/compiler.ref similarity index 100% rename from examples/Gwt/simple/ref rename to examples/Gwt/simple/compiler.ref diff --git a/examples/Gwt/simple/err b/examples/Gwt/simple/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Gwt/simple/out b/examples/Gwt/simple/out deleted file mode 100644 index ce01362..0000000 --- a/examples/Gwt/simple/out +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/examples/Gwt/unicode/ref b/examples/Gwt/unicode/compiler.ref similarity index 100% rename from examples/Gwt/unicode/ref rename to examples/Gwt/unicode/compiler.ref diff --git a/examples/Gwt/unicode/err b/examples/Gwt/unicode/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Gwt/unicode/out b/examples/Gwt/unicode/out deleted file mode 100644 index ce01362..0000000 --- a/examples/Gwt/unicode/out +++ /dev/null @@ -1 +0,0 @@ -hello diff --git a/examples/Simple/idlist/in b/examples/Simple/idlist/compiler.in similarity index 100% rename from examples/Simple/idlist/in rename to examples/Simple/idlist/compiler.in diff --git a/examples/Simple/idlist/out b/examples/Simple/idlist/compiler.ref similarity index 100% rename from examples/Simple/idlist/out rename to examples/Simple/idlist/compiler.ref diff --git a/examples/Simple/idlist/err b/examples/Simple/idlist/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Simple/idlist/ref b/examples/Simple/idlist/ref deleted file mode 100644 index 93720f3..0000000 --- a/examples/Simple/idlist/ref +++ /dev/null @@ -1,6 +0,0 @@ -abc -xyz123 -A -B -C -aaa diff --git a/examples/Simple/pom.xml b/examples/Simple/pom.xml index d058a32..dc93aa6 100644 --- a/examples/Simple/pom.xml +++ b/examples/Simple/pom.xml @@ -61,9 +61,9 @@ THE POSSIBILITY OF SUCH DAMAGE. simple1.Simple1 - ${project.basedir}/simple1/in - ${project.basedir}/simple1/out - ${project.basedir}/simple1/err + ${project.basedir}/simple1/compiler.in + ${project.basedir}/simple1/compiler.out + ${project.basedir}/simple1/compiler.err @@ -76,9 +76,9 @@ THE POSSIBILITY OF SUCH DAMAGE. simple2.Simple2 - ${project.basedir}/simple2/in - ${project.basedir}/simple2/out - ${project.basedir}/simple2/err + ${project.basedir}/simple2/compiler.in + ${project.basedir}/simple2/compiler.out + ${project.basedir}/simple2/compiler.err @@ -91,9 +91,9 @@ THE POSSIBILITY OF SUCH DAMAGE. simple3.Simple3 - ${project.basedir}/simple3/in - ${project.basedir}/simple3/out - ${project.basedir}/simple3/err + ${project.basedir}/simple3/compiler.in + ${project.basedir}/simple3/compiler.out + ${project.basedir}/simple3/compiler.err @@ -106,9 +106,9 @@ THE POSSIBILITY OF SUCH DAMAGE. xlator.NL_Xlator - ${project.basedir}/xlator/in - ${project.basedir}/xlator/out - ${project.basedir}/xlator/err + ${project.basedir}/xlator/compiler.in + ${project.basedir}/xlator/compiler.out + ${project.basedir}/xlator/compiler.err @@ -121,9 +121,9 @@ THE POSSIBILITY OF SUCH DAMAGE. idlist.IdList - ${project.basedir}/idlist/in - ${project.basedir}/idlist/out - ${project.basedir}/idlist/err + ${project.basedir}/idlist/compiler.in + ${project.basedir}/idlist/compiler.out + ${project.basedir}/idlist/compiler.err @@ -142,18 +142,18 @@ THE POSSIBILITY OF SUCH DAMAGE. - + - + message="${project.basedir}/simple1/compiler.out does not exist!" /> + + message="${project.basedir}/simple1/compiler.ref does not exist!" /> - + + message="${project.basedir}/simple1/compiler.out does not match ${project.basedir}/simple1/compiler.ref!" /> @@ -166,15 +166,15 @@ THE POSSIBILITY OF SUCH DAMAGE. - - - - + + + + - + - + @@ -187,18 +187,18 @@ THE POSSIBILITY OF SUCH DAMAGE. - + - + message="${project.basedir}/simple3/compiler.out does not exist!" /> + + message="${project.basedir}/simple3/compiler.ref does not exist!" /> - + + message="${project.basedir}/simple3/compiler.out does not match ${project.basedir}/simple3/compiler.ref!" /> @@ -210,18 +210,18 @@ THE POSSIBILITY OF SUCH DAMAGE. - + - + message="${project.basedir}/xlator/compiler.out does not exist!" /> + + message="${project.basedir}/xlator/compiler.ref does not exist!" /> - + + message="${project.basedir}/xlator/compiler.out does not match ${project.basedir}/xlator/compiler.ref!" /> @@ -233,18 +233,18 @@ THE POSSIBILITY OF SUCH DAMAGE. - + - + message="${project.basedir}/idlist/compiler.out does not exist!" /> + + message="${project.basedir}/idlist/compiler.ref does not exist!" /> - + + message="${project.basedir}/idlist/compiler.out does not match ${project.basedir}/idlist/compiler.ref!" /> diff --git a/examples/Simple/simple1/in b/examples/Simple/simple1/compiler.in similarity index 100% rename from examples/Simple/simple1/in rename to examples/Simple/simple1/compiler.in diff --git a/examples/Simple/simple1/out b/examples/Simple/simple1/compiler.ref similarity index 100% rename from examples/Simple/simple1/out rename to examples/Simple/simple1/compiler.ref diff --git a/examples/Simple/simple1/err b/examples/Simple/simple1/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Simple/simple1/ref b/examples/Simple/simple1/ref deleted file mode 100644 index e99f59e..0000000 --- a/examples/Simple/simple1/ref +++ /dev/null @@ -1 +0,0 @@ -matched diff --git a/examples/Simple/simple2/in b/examples/Simple/simple2/compiler.in similarity index 100% rename from examples/Simple/simple2/in rename to examples/Simple/simple2/compiler.in diff --git a/examples/Simple/simple2/ref b/examples/Simple/simple2/compiler.ref similarity index 100% rename from examples/Simple/simple2/ref rename to examples/Simple/simple2/compiler.ref diff --git a/examples/Simple/simple2/err b/examples/Simple/simple2/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Simple/simple2/out b/examples/Simple/simple2/out deleted file mode 100644 index 3c5bd8b..0000000 --- a/examples/Simple/simple2/out +++ /dev/null @@ -1,12 +0,0 @@ -Call: Input - Call: MatchedBraces - Consumed token: <5, "{"> at 1:1 - Call: MatchedBraces - Consumed token: <5, "{"> at 1:2 - Consumed token: <6, "}"> at 1:5 - Return: MatchedBraces - Consumed token: <6, "}"> at 2:1 - Return: MatchedBraces - Consumed token: <0, > at 2:3 -Return: Input -matched diff --git a/examples/Simple/simple3/in b/examples/Simple/simple3/compiler.in similarity index 100% rename from examples/Simple/simple3/in rename to examples/Simple/simple3/compiler.in diff --git a/examples/Simple/simple3/out b/examples/Simple/simple3/compiler.ref similarity index 100% rename from examples/Simple/simple3/out rename to examples/Simple/simple3/compiler.ref diff --git a/examples/Simple/simple3/err b/examples/Simple/simple3/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Simple/simple3/ref b/examples/Simple/simple3/ref deleted file mode 100644 index 75bedbb..0000000 --- a/examples/Simple/simple3/ref +++ /dev/null @@ -1,2 +0,0 @@ -The levels of nesting is 5 -matched diff --git a/examples/Simple/xlator/in b/examples/Simple/xlator/compiler.in similarity index 100% rename from examples/Simple/xlator/in rename to examples/Simple/xlator/compiler.in diff --git a/examples/Simple/xlator/out b/examples/Simple/xlator/compiler.ref similarity index 100% rename from examples/Simple/xlator/out rename to examples/Simple/xlator/compiler.ref diff --git a/examples/Simple/xlator/err b/examples/Simple/xlator/err deleted file mode 100644 index e69de29..0000000 diff --git a/examples/Simple/xlator/ref b/examples/Simple/xlator/ref deleted file mode 100644 index 3d4f20b..0000000 --- a/examples/Simple/xlator/ref +++ /dev/null @@ -1,6 +0,0 @@ -Please type in an expression followed by a ";" or ^D to quit: - -AbCDE - -Please type in another expression followed by a ";" or ^D to quit: - diff --git a/examples/Transformer/src/main/java/Node.java b/examples/Transformer/src/main/java/Node.java index dc1e51b..d076326 100644 --- a/examples/Transformer/src/main/java/Node.java +++ b/examples/Transformer/src/main/java/Node.java @@ -51,6 +51,10 @@ public int jjtGetNumChildren() { return (children == null) ? 0 : children.length; } + public Node[] jjtGetChildren() { + return children; + } + public void jjtSetValue(Object value) { this.value = value; } diff --git a/examples/VTransformer/src/main/java/VTransformer/Node.java b/examples/VTransformer/src/main/java/VTransformer/Node.java index 82a84e8..e6870f4 100644 --- a/examples/VTransformer/src/main/java/VTransformer/Node.java +++ b/examples/VTransformer/src/main/java/VTransformer/Node.java @@ -87,6 +87,10 @@ public int jjtGetNumChildren() { return (children == null) ? 0 : children.length; } + public Node[] jjtGetChildren() { + return children; + } + /** Accept the visitor. **/ public Object jjtAccept(JavaParserVisitor visitor, Object data) { return visitor.visit(this, data); diff --git a/grammars/ecma/src/main/jjtree/EcmaScript.jjt b/grammars/ecma/src/main/jjtree/EcmaScript.jjt index a51f913..43e0181 100644 --- a/grammars/ecma/src/main/jjtree/EcmaScript.jjt +++ b/grammars/ecma/src/main/jjtree/EcmaScript.jjt @@ -2317,10 +2317,10 @@ TOKEN: } if(! foundLineSeparator){ - throw generateParseException(); + throw generateParseException("Expected line separator"); } } else { - throw generateParseException(); + throw generateParseException("Could not insert semicolon"); } } } diff --git a/issues/bas01/files/clc1.exp_err b/issues/bas01/files/clc1.exp_err index b2b437f..b23eece 100644 --- a/issues/bas01/files/clc1.exp_err +++ b/issues/bas01/files/clc1.exp_err @@ -212,18 +212,15 @@ Read 1st char for any kind: `32 / ' '´, @ 1:17 (tm) Looking to move from state: <,>-5, for token: (tm) Looking to move from state: <,>-3, for token: <9 / > (tm) Found a match of kind: 9 using the first: 17 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: <,>-7, for token: (tm) Looking to move from state: <,>-6, for token: <9 / > (tm) Found a match of kind: 9 using the first: 18 characters (tm) Looking to move from state: <,>-5, for token: (tm) Looking to move from state: <,>-3, for token: <9 / > (tm) - Cur char: `10 / '\n'´ (tm) - Looking to move from state: <,>-4, for token: <9 / > (tm) - Found a match of kind: 9 using the first: 19 characters (tm) - Done with NFA, at pos: 19 (tm) + Done with NFA, at pos: 18 (tm) Put back: 0 looked ahead chars (tm) -Found a SPECIAL_TOKEN match: <9 / " Not yet supported\r\n"> at lexical state: 1; continuing (tm) +Found a SPECIAL_TOKEN match: <9 / " Not yet supported\n"> at lexical state: 1; continuing (tm) Read 1st char for any kind: `116 / 't'´, @ 2:1 (tm) -0 Current input char: `116 / 't'´, @ 2:1 (tm) No string literal start with char: `116 / 't'´ (tm) @@ -311,20 +308,13 @@ Read 1st char for any kind: `111 / 'o'´, @ 2:10 (tm) Cur char: `110 / 'n'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Found a match of kind: 10 using the first: 8 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Done with NFA, at pos: 9 (tm) Put back: 1 looked ahead chars (tm) Found a TOKEN MATCH ***: <10 / / "other-ten"> at lexical state: 0; returning (tm) -Read 1st char for any kind: `13 / '\r'´, @ 2:19 (tm) --0 Current input char: `13 / '\r'´, @ 2:19 (tm) - Looking for string literal match of token: <4 / "\r"> (tm) - Cur char: `13 / '\r'´ (tm) - Currently matched the first: 1 chars as token: <4 / "\r"> (tm) -Put back: 0 looked ahead chars (tm) -Found a SKIP match: <4> at lexical state: 0; continuing (tm) -Read 1st char for any kind: `10 / '\n'´, @ 2:20 (tm) --0 Current input char: `10 / '\n'´, @ 2:20 (tm) +Read 1st char for any kind: `10 / '\n'´, @ 2:19 (tm) +-0 Current input char: `10 / '\n'´, @ 2:19 (tm) Looking for string literal match of token: <3 / "\n"> (tm) Cur char: `10 / '\n'´ (tm) Currently matched the first: 1 chars as token: <3 / "\n"> (tm) @@ -391,38 +381,24 @@ Read 1st char for any kind: `73 / 'I'´, @ 3:11 (tm) Cur char: `50 / '2'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Found a match of kind: 10 using the first: 3 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Done with NFA, at pos: 4 (tm) Put back: 1 looked ahead chars (tm) Found a TOKEN MATCH ***: <10 / / "Int2"> at lexical state: 0; returning (tm) -Read 1st char for any kind: `13 / '\r'´, @ 3:15 (tm) --0 Current input char: `13 / '\r'´, @ 3:15 (tm) - Looking for string literal match of token: <4 / "\r"> (tm) - Cur char: `13 / '\r'´ (tm) - Currently matched the first: 1 chars as token: <4 / "\r"> (tm) -Put back: 0 looked ahead chars (tm) -Found a SKIP match: <4> at lexical state: 0; continuing (tm) -Read 1st char for any kind: `10 / '\n'´, @ 3:16 (tm) --0 Current input char: `10 / '\n'´, @ 3:16 (tm) +Read 1st char for any kind: `10 / '\n'´, @ 3:15 (tm) +-0 Current input char: `10 / '\n'´, @ 3:15 (tm) Looking for string literal match of token: <3 / "\n"> (tm) Cur char: `10 / '\n'´ (tm) Currently matched the first: 1 chars as token: <3 / "\n"> (tm) Put back: 0 looked ahead chars (tm) Found a SKIP match: <3> at lexical state: 0; continuing (tm) -Read 1st char for any kind: `13 / '\r'´, @ 4:1 (tm) --0 Current input char: `13 / '\r'´, @ 4:1 (tm) - Looking for string literal match of token: <4 / "\r"> (tm) - Cur char: `13 / '\r'´ (tm) - Currently matched the first: 1 chars as token: <4 / "\r"> (tm) -Put back: 0 looked ahead chars (tm) -Found a SKIP match: <4> at lexical state: 0; continuing (tm) -Read 1st char for any kind: `10 / '\n'´, @ 4:2 (tm) --0 Current input char: `10 / '\n'´, @ 4:2 (tm) +Read 1st char for any kind: `10 / '\n'´, @ 4:1 (tm) +-0 Current input char: `10 / '\n'´, @ 4:1 (tm) Looking for string literal match of token: <3 / "\n"> (tm) Cur char: `10 / '\n'´ (tm) Currently matched the first: 1 chars as token: <3 / "\n"> (tm) Put back: 0 looked ahead chars (tm) Found a SKIP match: <3> at lexical state: 0; continuing (tm) --0 Reached EOF (1st), @ 4:2 (tm) +-0 Reached EOF (1st), @ 4:1 (tm) Input file parsed successfully diff --git a/issues/bas01/files/clc2.exp_err b/issues/bas01/files/clc2.exp_err index bc8fb0b..42afd32 100644 --- a/issues/bas01/files/clc2.exp_err +++ b/issues/bas01/files/clc2.exp_err @@ -95,20 +95,13 @@ Read 1st char for any kind: `98 / 'b'´, @ 1:14 (tm) Cur char: `98 / 'b'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Found a match of kind: 10 using the first: 1 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Done with NFA, at pos: 2 (tm) Put back: 1 looked ahead chars (tm) Found a TOKEN MATCH ***: <10 / / "bb"> at lexical state: 0; returning (tm) -Read 1st char for any kind: `13 / '\r'´, @ 1:16 (tm) --0 Current input char: `13 / '\r'´, @ 1:16 (tm) - Looking for string literal match of token: <4 / "\r"> (tm) - Cur char: `13 / '\r'´ (tm) - Currently matched the first: 1 chars as token: <4 / "\r"> (tm) -Put back: 0 looked ahead chars (tm) -Found a SKIP match: <4> at lexical state: 0; continuing (tm) -Read 1st char for any kind: `10 / '\n'´, @ 1:17 (tm) --0 Current input char: `10 / '\n'´, @ 1:17 (tm) +Read 1st char for any kind: `10 / '\n'´, @ 1:16 (tm) +-0 Current input char: `10 / '\n'´, @ 1:16 (tm) Looking for string literal match of token: <3 / "\n"> (tm) Cur char: `10 / '\n'´ (tm) Currently matched the first: 1 chars as token: <3 / "\n"> (tm) @@ -125,7 +118,7 @@ Read 1st char for any kind: `52 / '4'´, @ 2:1 (tm) Cur char: `53 / '5'´ (tm) Looking to move from state: -0, for token: <7 / > (tm) Found a match of kind: 7 using the first: 1 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: -0, for token: <7 / > (tm) Done with NFA, at pos: 2 (tm) Put back: 1 looked ahead chars (tm) diff --git a/issues/bas01/files/clc3.exp_err b/issues/bas01/files/clc3.exp_err index 6ae5b45..646c45e 100644 --- a/issues/bas01/files/clc3.exp_err +++ b/issues/bas01/files/clc3.exp_err @@ -95,20 +95,13 @@ Read 1st char for any kind: `98 / 'b'´, @ 1:14 (tm) Cur char: `98 / 'b'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Found a match of kind: 10 using the first: 1 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: <,>-2, for token: <10 / > (tm) Done with NFA, at pos: 2 (tm) Put back: 1 looked ahead chars (tm) Found a TOKEN MATCH ***: <10 / / "bb"> at lexical state: 0; returning (tm) -Read 1st char for any kind: `13 / '\r'´, @ 1:16 (tm) --0 Current input char: `13 / '\r'´, @ 1:16 (tm) - Looking for string literal match of token: <4 / "\r"> (tm) - Cur char: `13 / '\r'´ (tm) - Currently matched the first: 1 chars as token: <4 / "\r"> (tm) -Put back: 0 looked ahead chars (tm) -Found a SKIP match: <4> at lexical state: 0; continuing (tm) -Read 1st char for any kind: `10 / '\n'´, @ 1:17 (tm) --0 Current input char: `10 / '\n'´, @ 1:17 (tm) +Read 1st char for any kind: `10 / '\n'´, @ 1:16 (tm) +-0 Current input char: `10 / '\n'´, @ 1:16 (tm) Looking for string literal match of token: <3 / "\n"> (tm) Cur char: `10 / '\n'´ (tm) Currently matched the first: 1 chars as token: <3 / "\n"> (tm) @@ -145,18 +138,15 @@ Read 1st char for any kind: `32 / ' '´, @ 2:3 (tm) Looking to move from state: <,>-5, for token: (tm) Looking to move from state: <,>-3, for token: <9 / > (tm) Found a match of kind: 9 using the first: 2 characters (tm) - Cur char: `13 / '\r'´ (tm) + Cur char: `10 / '\n'´ (tm) Looking to move from state: <,>-7, for token: (tm) Looking to move from state: <,>-6, for token: <9 / > (tm) Found a match of kind: 9 using the first: 3 characters (tm) Looking to move from state: <,>-5, for token: (tm) Looking to move from state: <,>-3, for token: <9 / > (tm) - Cur char: `10 / '\n'´ (tm) - Looking to move from state: <,>-4, for token: <9 / > (tm) - Found a match of kind: 9 using the first: 4 characters (tm) - Done with NFA, at pos: 4 (tm) + Done with NFA, at pos: 3 (tm) Put back: 0 looked ahead chars (tm) -Found a SPECIAL_TOKEN match: <9 / " kk\r\n"> at lexical state: 1; continuing (tm) +Found a SPECIAL_TOKEN match: <9 / " kk\n"> at lexical state: 1; continuing (tm) Read 1st char for any kind: `44 / ','´, @ 3:1 (tm) -0 Current input char: `44 / ','´, @ 3:1 (tm) No string literal start with char: `44 / ','´ (tm) diff --git a/issues/bas01/pom.xml b/issues/bas01/pom.xml index 2aefa5b..2a195f4 100644 --- a/issues/bas01/pom.xml +++ b/issues/bas01/pom.xml @@ -152,6 +152,18 @@ THE POSSIBILITY OF SUCH DAMAGE. org.apache.maven.plugins maven-antrun-plugin + + crlf-test + compile + + + + + + + run + + check-res-test-1 test diff --git a/issues/bas02/files/bug1.exp_out b/issues/bas02/files/bug1.exp_out index 8c5ba8e..3f3d343 100644 --- a/issues/bas02/files/bug1.exp_out +++ b/issues/bas02/files/bug1.exp_out @@ -1,13 +1,13 @@ Call: 0: EnumerationItem-140 (pa) - Consumed token: <7 / / "{"> (in jj_consume_token()) (pa) + Consumed token: <7 / / "{">, @ 1:1 (in jj_consume_token()) (pa) Call: 2: Entering LOOKAHEAD (2) (at 145:5 in EnumerationItem-140) (la) Call: 4: NamedNumber-156: looking ahead (2)... (la) - Visited token (la=1): <17 / / "c">; Expected token: <17 / > (la) - Visited token (la=0): <8 / / "}">; Expected token: <9 / > (la) + Visited token (la=1): <17 / / "c">, @ 1:3; Expected token: <17 / > (la) + Visited token (la=0): <8 / / "}">, @ 1:5; Expected token: <9 / > (la) Return: 4: NamedNumber-156: look ahead scan (0) FAILED (la) Return: 2: Exiting FAILED LOOKAHEAD (2/0) (at 145:5 in EnumerationItem-140) (la) Call: 2: identifier-167 (pa) - Consumed token: <17 / / "c"> (in jj_consume_token()) (pa) + Consumed token: <17 / / "c">, @ 1:3 (in jj_consume_token()) (pa) Return: 2: identifier-167 (pa) - Consumed token: <8 / / "}"> (in jj_consume_token()) (pa) + Consumed token: <8 / / "}">, @ 1:5 (in jj_consume_token()) (pa) Return: 0: EnumerationItem-140 (pa) diff --git a/issues/bas02/files/bug2.exp_err b/issues/bas02/files/bug2.exp_err index 1d77d3c..f9264b0 100644 --- a/issues/bas02/files/bug2.exp_err +++ b/issues/bas02/files/bug2.exp_err @@ -1,5 +1,5 @@ ParseException parsing input file -Encountered: / "{". +Encountered: / "{", at line 1, column 2. Was expecting one of these terminals within expansion starting at 145:5: (inside 145:5) ... (inside 160:5) ... diff --git a/issues/bas02/files/bug2.exp_out b/issues/bas02/files/bug2.exp_out index 5d2923b..6d1271b 100644 --- a/issues/bas02/files/bug2.exp_out +++ b/issues/bas02/files/bug2.exp_out @@ -1,9 +1,9 @@ Call: 0: EnumerationItem-140 (pa) - Consumed token: <7 / / "{"> (in jj_consume_token()) (pa) + Consumed token: <7 / / "{">, @ 1:1 (in jj_consume_token()) (pa) Call: 2: Entering LOOKAHEAD (2) (at 145:5 in EnumerationItem-140) (la) Call: 4: NamedNumber-156: looking ahead (2)... (la) - Visited token (la=1): <7 / / "{">; Expected token: <17 / > (la) - Visited token (la=1): <7 / / "{">; Expected token: <18 / > (la) + Visited token (la=1): <7 / / "{">, @ 1:2; Expected token: <17 / > (la) + Visited token (la=1): <7 / / "{">, @ 1:2; Expected token: <18 / > (la) Return: 4: NamedNumber-156: look ahead scan (1) FAILED (la) Return: 2: Exiting FAILED LOOKAHEAD (2/1) (at 145:5 in EnumerationItem-140) (la) Return: 0: EnumerationItem-140 (pa) diff --git a/issues/bas02/files/bug3.exp_out b/issues/bas02/files/bug3.exp_out index 690a3fb..e0d9197 100644 --- a/issues/bas02/files/bug3.exp_out +++ b/issues/bas02/files/bug3.exp_out @@ -1,18 +1,18 @@ Call: 0: EnumerationItem-140 (pa) - Consumed token: <7 / / "{"> (in jj_consume_token()) (pa) + Consumed token: <7 / / "{">, @ 1:1 (in jj_consume_token()) (pa) Call: 2: Entering LOOKAHEAD (2) (at 145:5 in EnumerationItem-140) (la) Call: 4: NamedNumber-156: looking ahead (2)... (la) - Visited token (la=1): <17 / / "n">; Expected token: <17 / > (la) - Visited token (la=0): <9 / / "(">; Expected token: <9 / > (la) + Visited token (la=1): <17 / / "n">, @ 1:3; Expected token: <17 / > (la) + Visited token (la=0): <9 / / "(">, @ 1:4; Expected token: <9 / > (la) Return: 4: NamedNumber-156: look ahead SUCCESSFUL (la) Return: 2: Caught SUCCESSFUL LOOKAHEAD (2/0) (at 145:5 in EnumerationItem-140) (la) Call: 2: NamedNumber-156 (pa) Call: 4: identifier-167 (pa) - Consumed token: <17 / / "n"> (in jj_consume_token()) (pa) + Consumed token: <17 / / "n">, @ 1:3 (in jj_consume_token()) (pa) Return: 4: identifier-167 (pa) - Consumed token: <9 / / "("> (in jj_consume_token()) (pa) - Consumed token: <14 / / "2"> (in jj_consume_token()) (pa) - Consumed token: <10 / / ")"> (in jj_consume_token()) (pa) + Consumed token: <9 / / "(">, @ 1:4 (in jj_consume_token()) (pa) + Consumed token: <14 / / "2">, @ 1:5 (in jj_consume_token()) (pa) + Consumed token: <10 / / ")">, @ 1:6 (in jj_consume_token()) (pa) Return: 2: NamedNumber-156 (pa) - Consumed token: <8 / / "}"> (in jj_consume_token()) (pa) + Consumed token: <8 / / "}">, @ 1:8 (in jj_consume_token()) (pa) Return: 0: EnumerationItem-140 (pa) diff --git a/issues/bas02/src/main/javacc/Bug002.jj b/issues/bas02/src/main/javacc/Bug002.jj index 0b15c8e..b2477ea 100644 --- a/issues/bas02/src/main/javacc/Bug002.jj +++ b/issues/bas02/src/main/javacc/Bug002.jj @@ -36,7 +36,7 @@ options DEBUG_PARSER = true; // DEBUG_TOKEN_MANAGER = true; // JAVA_UNICODE_ESCAPE = true; - KEEP_LINE_COLUMN = false; + KEEP_LINE_COLUMN = true; NO_DFA = true; // PARSER_INCLUDE = "stdio.h"; // TOKEN_INCLUDE = ""; diff --git a/pom.xml b/pom.xml index e0febcc..aa8d667 100644 --- a/pom.xml +++ b/pom.xml @@ -122,6 +122,9 @@ THE POSSIBILITY OF SUCH DAMAGE. ${project.basedir}/it/settings.xml true + + !no_local_java_generator_version + diff --git a/src/main/resources/templates/gwt/JavaCharStream.template b/src/main/resources/templates/gwt/JavaCharStream.template index a66a044..1f5bdd2 100644 --- a/src/main/resources/templates/gwt/JavaCharStream.template +++ b/src/main/resources/templates/gwt/JavaCharStream.template @@ -29,6 +29,12 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ #fi +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; + /** * The implementation (but not declared as such) of interface CharStream generated by Javacc/java * (by JavaCodeGenerator.java & JavaTemplates.java with gwt/JavaCharStream.template) where the stream @@ -52,52 +58,39 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { protected char[] nextCharBuf; protected int nextCharInd = -1; + private char nextByte; + private char nextChar; #if KEEP_LINE_COLUMN - protected boolean trackLineColumn = true; protected int[] bufline; protected int[] bufcolumn; protected int column = 0; protected int line = 1; protected boolean prevCharIsCR = false; protected boolean prevCharIsLF = false; -#else - protected boolean trackLineColumn = false; #fi protected int tabSize = 1; - /** Get starting character for token. */ - // Don't know why this method is different than in SimpleCharStream. Makes EOF handling different. - public char BeginToken() throws java.io.IOException { - if (inBuf > 0) { - --inBuf; - if (++bufpos == bufsize) { - bufpos = 0; - } - tokenBegin = bufpos; - return buffer[bufpos]; - } - tokenBegin = 0; - bufpos = -1; - return readChar(); - } - /** Read a character. */ - public char readChar() throws java.io.IOException { + public boolean hasNextChar() { if (inBuf > 0) { --inBuf; if (++bufpos == bufsize) { bufpos = 0; } - return buffer[bufpos]; + nextChar = buffer[bufpos]; + return true; } char c; if (++bufpos == available) { AdjustBuffSize(); } - if ((buffer[bufpos] = c = ReadByte()) == '\\') { + if (!hasNextByte()) { + return false; + } + if ((buffer[bufpos] = c = nextByte) == '\\') { #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi @@ -107,29 +100,31 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { if (++bufpos == available) { AdjustBuffSize(); } - try { - if ((buffer[bufpos] = c = ReadByte()) != '\\') { -#if KEEP_LINE_COLUMN - UpdateLineColumn(c); -#fi - // found a non-backslash char. - if ((c == 'u') && ((backSlashCnt & 1) == 1)) { - if (--bufpos < 0) { - bufpos = bufsize - 1; - } - break; - } - backup(backSlashCnt); - return '\\'; - } - } catch (java.io.IOException e) { + + if (!hasNextByte()) { // We are returning one backslash so we should only backup (count-1) if (backSlashCnt > 1) { backup(backSlashCnt - 1); } - return '\\'; + nextChar = '\\'; + return true; } + if ((buffer[bufpos] = c = nextByte) != '\\') { +#if KEEP_LINE_COLUMN + UpdateLineColumn(c); +#fi + // found a non-backslash char. + if ((c == 'u') && ((backSlashCnt & 1) == 1)) { + if (--bufpos < 0) { + bufpos = bufsize - 1; + } + break; + } + backup(backSlashCnt); + nextChar = '\\'; + return true; + } #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi @@ -137,41 +132,44 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { } // Here, we have seen an odd number of backslash's followed by a 'u' - try { - while ((c = ReadByte()) == 'u') { + while (hasNextByte() && nextByte == 'u') { #if KEEP_LINE_COLUMN - ++column; + ++column; #else - //; -#fi - } - buffer[bufpos] = c = (char) ( hexval(c) << 12 - | hexval(ReadByte()) << 8 - | hexval(ReadByte()) << 4 - | hexval(ReadByte()) ); -#if KEEP_LINE_COLUMN - column += 4; + ; #fi - } catch (java.io.IOException e) { + } + c = (char) (hexval(nextByte) << 12); + for (int shift = 8; shift >= 0; shift -= 4) { + if (!hasNextByte()) { #if KEEP_LINE_COLUMN - throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}("Invalid escape character at line " + line + throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}("Invalid escape character at line " + line + " column " + column + "."); #else - throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}(\"Invalid escape character in input\"); + throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}(\"Invalid escape character in input\"); #fi - } + } + c |= hexval(nextByte) << shift; + } + buffer[bufpos] = c; +#if KEEP_LINE_COLUMN + column += 4; +#fi if (backSlashCnt == 1) { - return c; + nextChar = c; + return true; } else { backup(backSlashCnt - 1); - return '\\'; + nextChar = '\\'; + return true; } } else { #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi - return c; + nextChar = c; + return true; } } @@ -205,22 +203,18 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos); buffer = newbuffer; #if KEEP_LINE_COLUMN - System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); bufline = newbufline; - System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); bufcolumn = newbufcolumn; #fi - bufpos += (bufsize - tokenBegin); } else { System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); buffer = newbuffer; #if KEEP_LINE_COLUMN - System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); bufline = newbufline; @@ -238,14 +232,7 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { tokenBegin = 0; } - protected char ReadByte() throws java.io.IOException { - if (++nextCharInd >= maxNextCharInd) { - FillBuff(); - } - return nextCharBuf[nextCharInd]; - } - - protected void FillBuff() throws java.io.IOException { + protected boolean FillBuff() { int i; if (maxNextCharInd == 4096) { maxNextCharInd = nextCharInd = 0; @@ -253,24 +240,72 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { try { if ((i = inputStream.read(nextCharBuf, maxNextCharInd, 4096 - maxNextCharInd)) == -1) { inputStream.close(); - throw new java.io.IOException("End of file"); + reset(); + return false; } else { maxNextCharInd += i; + return true; } - } catch (java.io.IOException e) { - if (bufpos != 0) { - --bufpos; - backup(0); -#if KEEP_LINE_COLUMN - } else { - bufline[bufpos] = line; - bufcolumn[bufpos] = column; -#fi + } catch (IOException e) { + reset(); + return false; + } + } + + private void reset() { + if (bufpos != 0) { + --bufpos; + backup(0); + #if KEEP_LINE_COLUMN + } else { + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + #fi + } + } + + protected boolean hasNextByte() { + if (++nextCharInd >= maxNextCharInd) { + if (!FillBuff()) { + return false; } - throw e; } + nextByte = nextCharBuf[nextCharInd]; + return true; + } + + /** + * Returns next character. + * + * @return next character in the input + */ + public char getNextChar() { + return nextChar; } + /** + * Checks next character and marks new token. + * + * @return whether next character is available + */ + public boolean hasNextToken() { + if (inBuf > 0) { + --inBuf; + + if (++bufpos == bufsize) { + bufpos = 0; + } + tokenBegin = bufpos; + nextChar = buffer[bufpos]; + return true; + } + + tokenBegin = 0; + bufpos = -1; + return hasNextChar(); + } + + #if KEEP_LINE_COLUMN protected void UpdateLineColumn(char c) { column++; @@ -307,7 +342,7 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { } #fi - static int hexval(char c) throws java.io.IOException { + static int hexval(char c) { switch (c) { case '0': return 0; @@ -348,8 +383,8 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { case 'f': case 'F': return 15; - default: - throw new java.io.IOException(); // Should never come here + default: // Should never come here + throw new IllegalArgumentException("Invalid hax character :" + c); } } @@ -361,6 +396,16 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { } } + /** Set buffers back to null when finished. */ + public void Done() { + nextCharBuf = null; + buffer = null; +#if KEEP_LINE_COLUMN + bufline = null; + bufcolumn = null; +#fi + } + /** * Get the token image. * @@ -392,16 +437,6 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { return ret; } - /** Set buffers back to null when finished. */ - public void Done() { - nextCharBuf = null; - buffer = null; -#if KEEP_LINE_COLUMN - bufline = null; - bufcolumn = null; -#fi - } - /** Constructor on Provider. */ public JavaCharStream(Provider dstream, int startline, int startcolumn, int buffersize) { inputStream = dstream; @@ -542,14 +577,6 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { column = bufcolumn[j]; } - boolean getTrackLineColumn() { - return trackLineColumn; - } - - void setTrackLineColumn(boolean tlc) { - trackLineColumn = tlc; - } - #fi public int getTabSize() { return tabSize; } diff --git a/src/main/resources/templates/gwt/SimpleCharStream.template b/src/main/resources/templates/gwt/SimpleCharStream.template index 8b2e368..4e26e69 100644 --- a/src/main/resources/templates/gwt/SimpleCharStream.template +++ b/src/main/resources/templates/gwt/SimpleCharStream.template @@ -29,6 +29,12 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ #fi +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; + /** * The implementation (but not declared as such) of the interface CharStream generated by Javacc/java * (by JavaCodeGenerator.java with gwt/SimpleCharStream.template) where the stream is assumed to @@ -49,49 +55,70 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class SimpleCharStream { protected char[] buffer; protected int maxNextCharInd = 0; protected int inBuf = 0; + private char nextChar; #if KEEP_LINE_COLUMN - protected boolean trackLineColumn = true; protected int[] bufline; protected int[] bufcolumn; protected int line = 1; protected int column = 0; protected boolean prevCharIsCR = false; protected boolean prevCharIsLF = false; -#else - protected boolean trackLineColumn = false; #fi protected int tabSize = 1; - /** Start. */ - public char BeginToken() throws java.io.IOException { + /** + * Checks next character and marks token start. + * + * @return whether next character is available + */ + public boolean hasNextToken() { tokenBegin = -1; - char c = readChar(); - tokenBegin = bufpos; - return c; + if (hasNextChar()) { + tokenBegin = bufpos; + return true; + } + return false; } - /** Read a character. */ - public char readChar() throws java.io.IOException { + /** + * Gets next character from the input. + * + * @return next character in the input + */ + public char getNextChar() { + return nextChar; + } + + /** + * Checks if there is another character in the input. + * + * @return whether next character exists + */ + public boolean hasNextChar() { if (inBuf > 0) { --inBuf; if (++bufpos == bufsize) { bufpos = 0; } - return buffer[bufpos]; + nextChar = buffer[bufpos]; + return true; } if (++bufpos >= maxNextCharInd) { - FillBuff(); + if (!FillBuff()) { + return false; + } } char c = buffer[bufpos]; #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi - return c; + nextChar = c; + return true; } - protected void FillBuff() throws java.io.IOException { + protected boolean FillBuff() { if (maxNextCharInd == available) { if (available == bufsize) { if (tokenBegin > 2048) { @@ -110,22 +137,27 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class SimpleCharStream { available = tokenBegin; } } - - int i; try { - if ((i = inputStream.read(buffer, maxNextCharInd, available - maxNextCharInd)) == -1) { + final int i = inputStream.read(buffer, maxNextCharInd, available - maxNextCharInd); + if (i <= 0) { inputStream.close(); - throw new java.io.IOException("End of file"); + reset(); + return false; } else { maxNextCharInd += i; + return true; } - } catch (java.io.IOException e) { - --bufpos; - backup(0); - if (tokenBegin == -1) { - tokenBegin = bufpos; - } - throw e; + } catch (IOException e) { + reset(); + return false; + } + } + + private void reset() { + --bufpos; + backup(0); + if (tokenBegin == -1) { + tokenBegin = bufpos; } } @@ -386,14 +418,6 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class SimpleCharStream { column = bufcolumn[j]; } - boolean getTrackLineColumn() { - return trackLineColumn; - } - - void setTrackLineColumn(boolean tlc) { - trackLineColumn = tlc; - } - #fi public int getTabSize() { return tabSize; } diff --git a/src/main/resources/templates/java/CharStream.template b/src/main/resources/templates/java/CharStream.template index e9d0bc7..44ce17b 100644 --- a/src/main/resources/templates/java/CharStream.template +++ b/src/main/resources/templates/java/CharStream.template @@ -45,18 +45,27 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}interface CharStream { /** - * Returns the next character that marks the beginning of the next token.
- * All characters must remain in the buffer between two successive calls - * to this method to implement backup correctly. + * Returns the next character from the selected input. The method + * of selecting the input is the responsibility of the class + * implementing this interface. Should be called only after hasNextChar + * or hasNextToken returned true. In other cases the behavior is not defined + * and runtime exception such as IndexOutOfBoundsException may be thrown. */ - char BeginToken() throws java.io.IOException; + char getNextChar(); /** - * Returns the next character from the selected input.
- * The method of selecting the input is the responsibility of the class implementing - * this interface. Can throw any java.io.IOException. + * Checks whether there are unread characters in the input. + * + * @return whether next character exists */ - char readChar() throws java.io.IOException; + boolean hasNextChar(); + + /** + * Similar to hasNextCharacter, but may have additional side effects. + * + * @return whether next character exists + */ + boolean hasNextToken(); /** * Backs up the input stream by amount steps.
@@ -98,25 +107,25 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}interface CharStream { /** * Returns the line number of the first character for current token - * (being matched after the last call to BeginToken). + * (being matched after the last call to hasNextToken). */ int getBeginLine(); /** * Returns the column number of the first character for current token - * (being matched after the last call to BeginToken). + * (being matched after the last call to hasNextToken). */ int getBeginColumn(); /** * Returns the line number of the last character for current token - * (being matched after the last call to BeginToken). + * (being matched after the last call to hasNextToken). */ int getEndLine(); /** * Returns the column number of the last character for current token - * (being matched after the last call to BeginToken). + * (being matched after the last call to hasNextToken). */ int getEndColumn(); diff --git a/src/main/resources/templates/java/JavaCharStream.template b/src/main/resources/templates/java/JavaCharStream.template index 3d90a92..bfeb3ca 100644 --- a/src/main/resources/templates/java/JavaCharStream.template +++ b/src/main/resources/templates/java/JavaCharStream.template @@ -58,51 +58,38 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { protected char[] nextCharBuf; protected int nextCharInd = -1; + private char nextChar; + private char nextByte; #if KEEP_LINE_COLUMN - //protected boolean trackLineColumn = true; protected int[] bufline; protected int[] bufcolumn; protected int line = 1; protected int column = 0; protected boolean prevCharIsCR = false; protected boolean prevCharIsLF = false; -#else - //protected boolean trackLineColumn = false; #fi protected int tabSize = 1; - /** Get starting character for token. */ - // Don't know why this method is different than in SimpleCharStream. Makes EOF handling different. - public char BeginToken() throws IOException { - if (inBuf > 0) { - --inBuf; - if (++bufpos == bufsize) { - bufpos = 0; - } - tokenBegin = bufpos; - return buffer[bufpos]; - } - tokenBegin = 0; - bufpos = -1; - return readChar(); - } - /** Read a character. */ - public char readChar() throws IOException { + public boolean hasNextChar() { if (inBuf > 0) { --inBuf; if (++bufpos == bufsize) { bufpos = 0; } - return buffer[bufpos]; + nextChar = buffer[bufpos]; + return true; } char c; if (++bufpos == available) { AdjustBuffSize(); } - if ((buffer[bufpos] = c = ReadByte()) == '\\') { + if (!hasNextByte()) { + return false; + } + if ((buffer[bufpos] = c = nextByte) == '\\') { #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi @@ -112,29 +99,31 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { if (++bufpos == available) { AdjustBuffSize(); } - try { - if ((buffer[bufpos] = c = ReadByte()) != '\\') { -#if KEEP_LINE_COLUMN - UpdateLineColumn(c); -#fi - // found a non-backslash char. - if ((c == 'u') && ((backSlashCnt & 1) == 1)) { - if (--bufpos < 0) { - bufpos = bufsize - 1; - } - break; - } - backup(backSlashCnt); - return '\\'; - } - } catch (IOException e) { + + if (!hasNextByte()) { // We are returning one backslash so we should only backup (count-1) if (backSlashCnt > 1) { backup(backSlashCnt - 1); } - return '\\'; + nextChar = '\\'; + return true; } + if ((buffer[bufpos] = c = nextByte) != '\\') { +#if KEEP_LINE_COLUMN + UpdateLineColumn(c); +#fi + // found a non-backslash char. + if ((c == 'u') && ((backSlashCnt & 1) == 1)) { + if (--bufpos < 0) { + bufpos = bufsize - 1; + } + break; + } + backup(backSlashCnt); + nextChar = '\\'; + return true; + } #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi @@ -142,42 +131,44 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { } // Here, we have seen an odd number of backslash's followed by a 'u' - try { - while ((c = ReadByte()) == 'u') { + while (hasNextByte() && nextByte == 'u') { #if KEEP_LINE_COLUMN - ++column; + ++column; #else - //; + ; #fi - } - buffer[bufpos] = c = (char) ( hexval(c) << 12 - | hexval(ReadByte()) << 8 - | hexval(ReadByte()) << 4 - | hexval(ReadByte()) ); -#if KEEP_LINE_COLUMN - column += 4; -#fi - } catch (IOException e) { + } + c = (char) (hexval(nextByte) << 12); + for (int shift = 8; shift >= 0; shift -= 4) { + if (!hasNextByte()) { #if KEEP_LINE_COLUMN - throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}( - "Invalid escape character at line " + line + " column " + column + "."); + throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}("Invalid escape character at line " + line + + " column " + column + "."); #else - throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}( - "Invalid escape character in input."); + throw new ${LEGACY_EXCEPTION_HANDLING?Error:RuntimeException}(\"Invalid escape character in input\"); #fi - } + } + c |= hexval(nextByte) << shift; + } + buffer[bufpos] = c; +#if KEEP_LINE_COLUMN + column += 4; +#fi if (backSlashCnt == 1) { - return c; + nextChar = c; + return true; } else { backup(backSlashCnt - 1); - return '\\'; + nextChar = '\\'; + return true; } } else { #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi - return c; + nextChar = c; + return true; } } @@ -239,14 +230,7 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { tokenBegin = 0; } - protected char ReadByte() throws IOException { - if (++nextCharInd >= maxNextCharInd) { - FillBuff(); - } - return nextCharBuf[nextCharInd]; - } - - protected void FillBuff() throws IOException { + protected boolean FillBuff() { int i; if (maxNextCharInd == 4096) { maxNextCharInd = nextCharInd = 0; @@ -254,24 +238,72 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { try { if ((i = inputStream.read(nextCharBuf, maxNextCharInd, 4096 - maxNextCharInd)) == -1) { inputStream.close(); - throw new IOException("End of file"); + reset(); + return false; } else { maxNextCharInd += i; + return true; } } catch (IOException e) { - if (bufpos != 0) { - --bufpos; - backup(0); -#if KEEP_LINE_COLUMN - } else { - bufline[bufpos] = line; - bufcolumn[bufpos] = column; -#fi + reset(); + return false; + } + } + + private void reset() { + if (bufpos != 0) { + --bufpos; + backup(0); + #if KEEP_LINE_COLUMN + } else { + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + #fi + } + } + + protected boolean hasNextByte() { + if (++nextCharInd >= maxNextCharInd) { + if (!FillBuff()) { + return false; + } + } + nextByte = nextCharBuf[nextCharInd]; + return true; + } + + /** + * Returns next character. + * + * @return next character in the input + */ + public char getNextChar() { + return nextChar; + } + + /** + * Checks next character and marks new token. + * + * @return whether next character is available + */ + public boolean hasNextToken() { + if (inBuf > 0) { + --inBuf; + + if (++bufpos == bufsize) { + bufpos = 0; } - throw e; + tokenBegin = bufpos; + nextChar = buffer[bufpos]; + return true; } + + tokenBegin = 0; + bufpos = -1; + return hasNextChar(); } + #if KEEP_LINE_COLUMN protected void UpdateLineColumn(char c) { column++; @@ -308,7 +340,7 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { } #fi - static int hexval(char c) throws IOException { + static int hexval(char c) { switch (c) { case '0': return 0; @@ -349,8 +381,8 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { case 'f': case 'F': return 15; - default: - throw new IOException(); // Should never come here + default: // Should never come here + throw new IllegalArgumentException("Invalid hax character :" + c); } } @@ -691,10 +723,6 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class JavaCharStream { column = bufcolumn[j]; } - //boolean getTrackLineColumn() { return trackLineColumn; } - - //void setTrackLineColumn(boolean tlc) { trackLineColumn = tlc; } - #fi public int getTabSize() { return tabSize; } diff --git a/src/main/resources/templates/java/SimpleCharStream.template b/src/main/resources/templates/java/SimpleCharStream.template index 8551dde..35b1f9f 100644 --- a/src/main/resources/templates/java/SimpleCharStream.template +++ b/src/main/resources/templates/java/SimpleCharStream.template @@ -55,49 +55,70 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class SimpleCharStream { protected char[] buffer; protected int maxNextCharInd = 0; protected int inBuf = 0; + private char nextChar; #if KEEP_LINE_COLUMN - //protected boolean trackLineColumn = true; protected int[] bufline; protected int[] bufcolumn; protected int line = 1; protected int column = 0; protected boolean prevCharIsCR = false; protected boolean prevCharIsLF = false; -#else - //protected boolean trackLineColumn = false; #fi protected int tabSize = 1; - /** Start. */ - public char BeginToken() throws IOException { + /** + * Checks next character and marks token start. + * + * @return whether next character is available + */ + public boolean hasNextToken() { tokenBegin = -1; - char c = readChar(); - tokenBegin = bufpos; - return c; + if (hasNextChar()) { + tokenBegin = bufpos; + return true; + } + return false; + } + + /** + * Gets next character from the input. + * + * @return next character in the input + */ + public char getNextChar() { + return nextChar; } - /** Read a character. */ - public char readChar() throws IOException { + /** + * Checks if there is another character in the input. + * + * @return whether next character exists + */ + public boolean hasNextChar() { if (inBuf > 0) { --inBuf; if (++bufpos == bufsize) { bufpos = 0; } - return buffer[bufpos]; + nextChar = buffer[bufpos]; + return true; } if (++bufpos >= maxNextCharInd) { - FillBuff(); + if (!FillBuff()) { + return false; + } } char c = buffer[bufpos]; #if KEEP_LINE_COLUMN UpdateLineColumn(c); #fi - return c; + nextChar = c; + return true; } - protected void FillBuff() throws IOException { + protected boolean FillBuff() { if (maxNextCharInd == available) { if (available == bufsize) { if (tokenBegin > 2048) { @@ -120,17 +141,23 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class SimpleCharStream { final int i = inputStream.read(buffer, maxNextCharInd, available - maxNextCharInd); if (i <= 0) { inputStream.close(); - throw new IOException("End of file"); + reset(); + return false; } else { maxNextCharInd += i; + return true; } } catch (IOException e) { - --bufpos; - backup(0); - if (tokenBegin == -1) { - tokenBegin = bufpos; - } - throw e; + reset(); + return false; + } + } + + private void reset() { + --bufpos; + backup(0); + if (tokenBegin == -1) { + tokenBegin = bufpos; } } @@ -457,10 +484,6 @@ ${SUPPORT_CLASS_VISIBILITY_PUBLIC?public :}class SimpleCharStream { column = bufcolumn[j]; } - //boolean getTrackLineColumn() { return trackLineColumn; } - - //void setTrackLineColumn(final boolean tlc) { trackLineColumn = tlc; } - #fi public int getTabSize() { return tabSize; } diff --git a/src/main/resources/templates/java/TokenManagerDriver.template b/src/main/resources/templates/java/TokenManagerDriver.template index 9a8fd34..6590027 100644 --- a/src/main/resources/templates/java/TokenManagerDriver.template +++ b/src/main/resources/templates/java/TokenManagerDriver.template @@ -134,8 +134,8 @@ ${decls} EOFLoop: for (;;) { // First see if we have any input at all. - try { - curChar = input_stream.BeginToken(); + if (input_stream.hasNextToken()) { + curChar = input_stream.getNextChar(); #if DEBUG_TOKEN_MANAGER if (trace_enabled()) System.err.println( "Read 1st char for any kind: " + disp_char(curChar) + @@ -147,7 +147,7 @@ ${decls} #if IGNORE_CASE curChar = Character.toLowerCase(curChar); #fi - } catch (Exception e) { + } else { #if DEBUG_TOKEN_MANAGER if (trace_enabled()) System.err.println( disp_state(curLexState, lexStateNames) + @@ -285,8 +285,8 @@ ${decls} #fi lastReadPosition = 0; jjmatchedKind = Integer.MAX_VALUE; - try { - curChar = input_stream.readChar(); + if (input_stream.hasNextChar()) { + curChar = input_stream.getNextChar(); #if DEBUG_TOKEN_MANAGER if (trace_enabled()) System.err.println( "Read next char for a MORE: " + disp_char(curChar) + @@ -299,7 +299,7 @@ ${decls} curChar = (int) Character.toLowerCase((char) curChar); #fi continue; - } catch (java.io.IOException e1) { + } else { #if DEBUG_TOKEN_MANAGER if (trace_enabled()) System.err.println( disp_state(curLexState, lexStateNames) + @@ -434,12 +434,12 @@ ${decls} #fi protected ${STATIC?static :}boolean moveToNextChar() { - try { - curChar = input_stream.readChar(); + if (input_stream.hasNextChar()) { + curChar = input_stream.getNextChar(); #if IGNORE_CASE curChar = (int) Character.toLowerCase((char) curChar); #fi - } catch (java.io.IOException e) { + } else { return false; } return true; @@ -620,10 +620,9 @@ ${decls} int errorColumn = input_stream.getEndColumn(); String errorAfter = null; boolean eofSeen = false; - try { - input_stream.readChar(); + if (input_stream.hasNextChar()) { input_stream.backup(1); - } catch (java.io.IOException e1) { + } else { eofSeen = true; errorAfter = lastReadPosition <= 1 ? "" : input_stream.GetImage(); if (curChar == '\n' || curChar == '\r') {