Skip to content

Commit c82d78f

Browse files
authored
[java20] New grammar, Java version 20 (#3659)
* Initial version of Java20. * Add EOF-terminated rule. * Fix list of targets that should work. Remove tests that fail. * Fix lexer grammar for refactoring via transformGrammar.py.
1 parent 768b12e commit c82d78f

18 files changed

+2891
-0
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
/*
2+
* [The "BSD license"]
3+
* Copyright (c) 2014 Terence Parr
4+
* Copyright (c) 2014 Sam Harwell
5+
* Copyright (c) 2017 Chan Chung Kwong
6+
* All rights reserved.
7+
*
8+
* Redistribution and use in source and binary forms, with or without
9+
* modification, are permitted provided that the following conditions
10+
* are met:
11+
*
12+
* 1. Redistributions of source code must retain the above copyright
13+
* notice, this list of conditions and the following disclaimer.
14+
* 2. Redistributions in binary form must reproduce the above copyright
15+
* notice, this list of conditions and the following disclaimer in the
16+
* documentation and/or other materials provided with the distribution.
17+
* 3. The name of the author may not be used to endorse or promote products
18+
* derived from this software without specific prior written permission.
19+
*
20+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23+
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25+
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29+
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+
*/
31+
32+
using Antlr4.Runtime;
33+
using System;
34+
using System.IO;
35+
using System.Linq;
36+
using System.Text.RegularExpressions;
37+
38+
public abstract class JavaLexerBase : Lexer
39+
{
40+
private readonly ICharStream _input;
41+
42+
protected JavaLexerBase(ICharStream input, TextWriter output, TextWriter errorOutput)
43+
: base(input, output, errorOutput) {
44+
_input = input;
45+
}
46+
47+
private class Character
48+
{
49+
public static bool isJavaIdentifierPart(int c)
50+
{
51+
if (Char.IsLetter((char)c))
52+
return true;
53+
else if (c == (int)'$')
54+
return true;
55+
else if (c == (int)'_')
56+
return true;
57+
else if (Char.IsDigit((char)c))
58+
return true;
59+
else if (Char.IsNumber((char)c))
60+
return true;
61+
return false;
62+
}
63+
64+
public static bool isJavaIdentifierStart(int c)
65+
{
66+
if (Char.IsLetter((char)c))
67+
return true;
68+
else if (c == (int)'$')
69+
return true;
70+
else if (c == (int)'_')
71+
return true;
72+
return false;
73+
}
74+
75+
public static int toCodePoint(int high, int low)
76+
{
77+
return Char.ConvertToUtf32((char)high, (char)low);
78+
}
79+
}
80+
81+
public bool Check1()
82+
{
83+
return Character.isJavaIdentifierStart(_input.LA(-1));
84+
}
85+
86+
public bool Check2()
87+
{
88+
return Character.isJavaIdentifierStart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)));
89+
}
90+
91+
public bool Check3()
92+
{
93+
return Character.isJavaIdentifierPart(_input.LA(-1));
94+
}
95+
96+
public bool Check4()
97+
{
98+
return Character.isJavaIdentifierPart(Character.toCodePoint((char)_input.LA(-2), (char)_input.LA(-1)));
99+
}
100+
}

java/java20/Cpp/JavaLexerBase.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* [The "BSD license"]
3+
* Copyright (c) 2014 Terence Parr
4+
* Copyright (c) 2014 Sam Harwell
5+
* Copyright (c) 2017 Chan Chung Kwong
6+
* All rights reserved.
7+
*
8+
* Redistribution and use in source and binary forms, with or without
9+
* modification, are permitted provided that the following conditions
10+
* are met:
11+
*
12+
* 1. Redistributions of source code must retain the above copyright
13+
* notice, this list of conditions and the following disclaimer.
14+
* 2. Redistributions in binary form must reproduce the above copyright
15+
* notice, this list of conditions and the following disclaimer in the
16+
* documentation and/or other materials provided with the distribution.
17+
* 3. The name of the author may not be used to endorse or promote products
18+
* derived from this software without specific prior written permission.
19+
*
20+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23+
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25+
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29+
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+
*/
31+
32+
#include "antlr4-runtime.h"
33+
#include "JavaLexerBase.h"
34+
35+
JavaLexerBase::JavaLexerBase(antlr4::CharStream * input) : antlr4::Lexer(input)
36+
{
37+
_input = input;
38+
}
39+
40+
bool JavaLexerBase::Character::isJavaIdentifierPart(int c)
41+
{
42+
if ((int)'a' <= c && c <= (int)'z' || (int)'A' <= c && c <= (int)'Z')
43+
return true;
44+
else if (c == (int)'$')
45+
return true;
46+
else if (c == (int)'_')
47+
return true;
48+
else if ((int)'0' <= c && c <= (int)'9')
49+
return true;
50+
return false;
51+
}
52+
53+
bool JavaLexerBase::Character::isJavaIdentifierStart(int c)
54+
{
55+
if ((int)'a' <= c && c <= (int)'z' || (int)'A' <= c && c <= (int)'Z')
56+
return true;
57+
else if (c == (int)'$')
58+
return true;
59+
else if (c == (int)'_')
60+
return true;
61+
return false;
62+
}
63+
64+
char32_t surrogate_to_utf32(char16_t high, char16_t low) {
65+
return (high << 10) + low - 0x35fdc00;
66+
}
67+
68+
int JavaLexerBase::Character::toCodePoint(int high, int low)
69+
{
70+
return surrogate_to_utf32(high, low);
71+
}
72+
73+
bool JavaLexerBase::Check1()
74+
{
75+
return JavaLexerBase::Character::isJavaIdentifierStart(_input->LA(-1));
76+
}
77+
78+
bool JavaLexerBase::Check2()
79+
{
80+
return JavaLexerBase::Character::isJavaIdentifierStart(JavaLexerBase::Character::toCodePoint((char)_input->LA(-2), (char)_input->LA(-1)));
81+
}
82+
83+
bool JavaLexerBase::Check3()
84+
{
85+
return JavaLexerBase::Character::isJavaIdentifierPart(_input->LA(-1));
86+
}
87+
88+
bool JavaLexerBase::Check4()
89+
{
90+
return JavaLexerBase::Character::isJavaIdentifierPart(JavaLexerBase::Character::toCodePoint((char)_input->LA(-2), (char)_input->LA(-1)));
91+
}

java/java20/Cpp/JavaLexerBase.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#pragma once
2+
#include "antlr4-runtime.h"
3+
4+
class JavaLexerBase : public antlr4::Lexer
5+
{
6+
private:
7+
antlr4::CharStream * _input;
8+
9+
public:
10+
JavaLexerBase(antlr4::CharStream * input);
11+
12+
class Character
13+
{
14+
public:
15+
static bool isJavaIdentifierPart(int c);
16+
static bool isJavaIdentifierStart(int c);
17+
static int toCodePoint(int high, int low);
18+
};
19+
20+
bool Check1();
21+
bool Check2();
22+
bool Check3();
23+
bool Check4();
24+
};
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import sys, os, re, shutil
2+
from glob import glob
3+
from pathlib import Path
4+
5+
def main(argv):
6+
for file in glob("./*.g4"):
7+
fix(file)
8+
9+
def fix(file_path):
10+
print("Altering " + file_path)
11+
if not os.path.exists(file_path):
12+
print(f"Could not find file: {file_path}")
13+
sys.exit(1)
14+
parts = os.path.split(file_path)
15+
file_name = parts[-1]
16+
shutil.move(file_path, file_path + ".bak")
17+
input_file = open(file_path + ".bak",'r')
18+
output_file = open(file_path, 'w')
19+
for x in input_file:
20+
if '// Insert here @header for C++ lexer.' in x:
21+
x = x.replace('// Insert here @header for C++ lexer.', '@header {#include "JavaLexerBase.h"}')
22+
if '// Insert here @header for C++ parser.' in x:
23+
x = x.replace('// Insert here @header for C++ parser.', '@header {#include "JavaParserBase.h"}')
24+
if 'this.' in x:
25+
x = x.replace('this.', 'this->')
26+
output_file.write(x)
27+
output_file.flush()
28+
29+
print("Writing ...")
30+
input_file.close()
31+
output_file.close()
32+
33+
if __name__ == '__main__':
34+
main(sys.argv)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* [The "BSD license"]
3+
* Copyright (c) 2014 Terence Parr
4+
* Copyright (c) 2014 Sam Harwell
5+
* Copyright (c) 2017 Chan Chung Kwong
6+
* All rights reserved.
7+
*
8+
* Redistribution and use in source and binary forms, with or without
9+
* modification, are permitted provided that the following conditions
10+
* are met:
11+
*
12+
* 1. Redistributions of source code must retain the above copyright
13+
* notice, this list of conditions and the following disclaimer.
14+
* 2. Redistributions in binary form must reproduce the above copyright
15+
* notice, this list of conditions and the following disclaimer in the
16+
* documentation and/or other materials provided with the distribution.
17+
* 3. The name of the author may not be used to endorse or promote products
18+
* derived from this software without specific prior written permission.
19+
*
20+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23+
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25+
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29+
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+
*/
31+
import 'package:antlr4/antlr4.dart';
32+
import 'dart:io';
33+
import 'dart:convert';
34+
35+
abstract class JavaLexerBase extends Lexer
36+
{
37+
JavaLexerBase(CharStream input) : super(input)
38+
{
39+
}
40+
41+
bool Check1()
42+
{
43+
return Character.isJavaIdentifierStart(inputStream.LA(-1)!);
44+
}
45+
46+
bool Check2()
47+
{
48+
return Character.isJavaIdentifierStart(Character.toCodePoint(inputStream.LA(-2)!, inputStream.LA(-1)!));
49+
}
50+
51+
bool Check3()
52+
{
53+
return Character.isJavaIdentifierPart(inputStream.LA(-1)!);
54+
}
55+
56+
bool Check4()
57+
{
58+
return Character.isJavaIdentifierPart(Character.toCodePoint(inputStream.LA(-2)!, inputStream.LA(-1)!));
59+
}
60+
}
61+
62+
class Character
63+
{
64+
static bool isJavaIdentifierPart(int c)
65+
{
66+
if (('a'.codeUnitAt(0) <= c && c <= 'z'.codeUnitAt(0)) || ('A'.codeUnitAt(0) <= c && c <= 'Z'.codeUnitAt(0))) //Char.IsLetter((char)c))
67+
return true;
68+
else if (c == '\$'.codeUnitAt(0))
69+
return true;
70+
else if (c == '_'.codeUnitAt(0))
71+
return true;
72+
else if ('0'.codeUnitAt(0) <= c && c <= '9'.codeUnitAt(0)) //Char.IsDigit(c))
73+
return true;
74+
else if ('0'.codeUnitAt(0) <= c && c <= '9'.codeUnitAt(0)) //Char.IsNumber(c))
75+
return true;
76+
return false;
77+
}
78+
79+
static bool isJavaIdentifierStart(int c)
80+
{
81+
if (('a'.codeUnitAt(0) <= c && c <= 'z'.codeUnitAt(0)) || ('A'.codeUnitAt(0) <= c && c <= 'Z'.codeUnitAt(0))) //Char.IsLetter((char)c))
82+
return true;
83+
else if (c == '\$'.codeUnitAt(0))
84+
return true;
85+
else if (c == '_'.codeUnitAt(0))
86+
return true;
87+
return false;
88+
}
89+
90+
static int toCodePoint(int high, int low)
91+
{
92+
List<int> encoded = List.filled(2, 0, growable: false);
93+
encoded.add(high);
94+
encoded.add(low);
95+
return base64.encode(encoded).codeUnitAt(0);
96+
97+
// return Utf16CodeUnitDecoder(encoded).codeUnitAt(0);
98+
// return utf16.decode(encoded);
99+
// return Char.ConvertToUtf32(high, low);
100+
}
101+
}
102+

0 commit comments

Comments
 (0)