Skip to content

Commit a0e876b

Browse files
authored
[java] Fix for #4583: update readme's. (#4588)
* Fix for #4583. Addresses other documentation issues for the Java grammars, e.g., metntioned in #4530. * Update readme.md * Update java/java/README.md. * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update readme.md * Update README.md * Update readme, update Trash toolkit to 0.23.26. The readme now references trunfoldlit, consequently we should just update to the latest toolkit. * Update README.md
1 parent c40b921 commit a0e876b

File tree

3 files changed

+121
-98
lines changed

3 files changed

+121
-98
lines changed

.config/dotnet-tools.json

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,186 +3,193 @@
33
"isRoot": true,
44
"tools": {
55
"trcaret": {
6-
"version": "0.23.24",
6+
"version": "0.23.26",
77
"commands": [
88
"trcaret"
99
],
1010
"rollForward": false
1111
},
1212
"trcover": {
13-
"version": "0.23.24",
13+
"version": "0.23.26",
1414
"commands": [
1515
"trcover"
1616
],
1717
"rollForward": false
1818
},
1919
"trgen": {
20-
"version": "0.23.24",
20+
"version": "0.23.26",
2121
"commands": [
2222
"trgen"
2323
],
2424
"rollForward": false
2525
},
2626
"trglob": {
27-
"version": "0.23.24",
27+
"version": "0.23.26",
2828
"commands": [
2929
"trglob"
3030
],
3131
"rollForward": false
3232
},
3333
"triconv": {
34-
"version": "0.23.24",
34+
"version": "0.23.26",
3535
"commands": [
3636
"triconv"
3737
],
3838
"rollForward": false
3939
},
4040
"trparse": {
41-
"version": "0.23.24",
41+
"version": "0.23.26",
4242
"commands": [
4343
"trparse"
4444
],
4545
"rollForward": false
4646
},
4747
"trquery": {
48-
"version": "0.23.24",
48+
"version": "0.23.26",
4949
"commands": [
5050
"trquery"
5151
],
5252
"rollForward": false
5353
},
5454
"trtext": {
55-
"version": "0.23.24",
55+
"version": "0.23.26",
5656
"commands": [
5757
"trtext"
5858
],
5959
"rollForward": false
6060
},
6161
"trwdog": {
62-
"version": "0.23.24",
62+
"version": "0.23.26",
6363
"commands": [
6464
"trwdog"
6565
],
6666
"rollForward": false
6767
},
6868
"trxgrep": {
69-
"version": "0.23.24",
69+
"version": "0.23.26",
7070
"commands": [
7171
"trxgrep"
7272
],
7373
"rollForward": false
7474
},
7575
"trxml": {
76-
"version": "0.23.24",
76+
"version": "0.23.26",
7777
"commands": [
7878
"trxml"
7979
],
8080
"rollForward": false
8181
},
8282
"trxml2": {
83-
"version": "0.23.24",
83+
"version": "0.23.26",
8484
"commands": [
8585
"trxml2"
8686
],
8787
"rollForward": false
8888
},
8989
"trclonereplace": {
90-
"version": "0.23.24",
90+
"version": "0.23.26",
9191
"commands": [
9292
"trclonereplace"
9393
],
9494
"rollForward": false
9595
},
9696
"trcombine": {
97-
"version": "0.23.24",
97+
"version": "0.23.26",
9898
"commands": [
9999
"trcombine"
100100
],
101101
"rollForward": false
102102
},
103103
"trconvert": {
104-
"version": "0.23.24",
104+
"version": "0.23.26",
105105
"commands": [
106106
"trconvert"
107107
],
108108
"rollForward": false
109109
},
110110
"trfoldlit": {
111-
"version": "0.23.24",
111+
"version": "0.23.26",
112112
"commands": [
113113
"trfoldlit"
114114
],
115115
"rollForward": false
116116
},
117117
"tritext": {
118-
"version": "0.23.24",
118+
"version": "0.23.26",
119119
"commands": [
120120
"tritext"
121121
],
122122
"rollForward": false
123123
},
124124
"trjson": {
125-
"version": "0.23.24",
125+
"version": "0.23.26",
126126
"commands": [
127127
"trjson"
128128
],
129129
"rollForward": false
130130
},
131131
"trperf": {
132-
"version": "0.23.24",
132+
"version": "0.23.26",
133133
"commands": [
134134
"trperf"
135135
],
136136
"rollForward": false
137137
},
138138
"trrename": {
139-
"version": "0.23.24",
139+
"version": "0.23.26",
140140
"commands": [
141141
"trrename"
142142
],
143143
"rollForward": false
144144
},
145145
"trsort": {
146-
"version": "0.23.24",
146+
"version": "0.23.26",
147147
"commands": [
148148
"trsort"
149149
],
150150
"rollForward": false
151151
},
152152
"trsplit": {
153-
"version": "0.23.24",
153+
"version": "0.23.26",
154154
"commands": [
155155
"trsplit"
156156
],
157157
"rollForward": false
158158
},
159159
"trsponge": {
160-
"version": "0.23.24",
160+
"version": "0.23.26",
161161
"commands": [
162162
"trsponge"
163163
],
164164
"rollForward": false
165165
},
166166
"trtokens": {
167-
"version": "0.23.24",
167+
"version": "0.23.26",
168168
"commands": [
169169
"trtokens"
170170
],
171171
"rollForward": false
172172
},
173173
"trtree": {
174-
"version": "0.23.24",
174+
"version": "0.23.26",
175175
"commands": [
176176
"trtree"
177177
],
178178
"rollForward": false
179179
},
180180
"trunfold": {
181-
"version": "0.23.24",
181+
"version": "0.23.26",
182182
"commands": [
183183
"trunfold"
184184
],
185185
"rollForward": false
186+
},
187+
"trunfoldlit": {
188+
"version": "0.23.26",
189+
"commands": [
190+
"trunfoldlit"
191+
],
192+
"rollForward": false
186193
}
187194
}
188195
}

java/java/README.md

Lines changed: 44 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,68 @@
1-
# Java Antlr Grammar
1+
# The optimized Java grammar
22

3-
Based on the previous optimized [Java7](../java) grammar by Terence Parr and Sam Harwell
4-
with the same BSD license. This grammar does not exactly corresponds to the formal
5-
Java specification unlike usual [Java8](../java8) grammar, but passes tests such as
6-
[AllInOne7.Java](examples/AllInOne7.java) and [AllInOne8.java](examples/AllInOne8.java).
7-
Performance, practical usage and clarity in priority.
3+
This grammar, based on the optimized Java7 grammar by Terence Parr and Sam Harwell,
4+
is meant to parse the latest for the Java language, and is optimized for
5+
performance, practical usage, and clarity.
6+
7+
It does not correspond
8+
exactly to the [Java Language Specification](https://docs.oracle.com/javase/specs/).
9+
The [java8](../java8), [java9](../java9), and [java20](../java20) grammars
10+
follow the JLS, but are slower that this grammar due to ambiguity and max-k problems
11+
in the published JLS EBNF.
812

913
This grammar parses the file [ManyStringsConcat.java](examples/ManyStringsConcat.java)
10-
much more faster than original grammar without left recursion expressions.
14+
faster than the unoptimized java grammars. It implements operator precedence
15+
using Antlr4-style alt ordering instead of operator-precedence rules. Thus, it avoids
16+
creating parse trees with long, single-child chains for each string literal constant in
17+
[ManyStringsConcat.java](examples/ManyStringsConcat.java). In addition, it is faster
18+
because it avoids the large ATN-config set construction in the
19+
`AdaptivePredict()` parsing engine.
1120

12-
## Supported Java versions
21+
[Java Enhancement Proposals (JEP)](https://openjdk.org/jeps/0)
22+
are not implemented in this grammar.
1323

14-
* Java 7
15-
* Java 8
16-
* Java 11
17-
* Java 17
24+
## Currently supported Java version
25+
* Java 24 (latest)
1826

1927
## Main contributors
20-
2128
* Terence Parr, 2013
2229
* Sam Harwell, 2013
2330
* Ivan Kochurkin ([Positive Technologies](https://github.com/PositiveTechnologies)), 2017
2431
* Michał Lorek, 2021
2532

2633
## Tests
27-
28-
The grammar contains [AllInOne7.java](examples/AllInOne7.java) and
29-
[AllInOne8.java](examples/AllInOne8.java) files that almost fully covered Java syntax.
34+
* See examples/
35+
* OpenJDK 24, `src/**/*.java` (using [Trash trgen to create app](https://github.com/kaby76/Trash/tree/main/src/trgen), then `find ~/jdk-jdk-23-ga/src/ -name '*.java' | cygpath -w -f - | ./Test -x`)
3036

3137
## Benchmarks
32-
33-
Grammar performance has been tested on the following projects:
34-
35-
* jdk8
38+
Grammar performance has been tested on the following Java projects:
39+
* [OpenJDK 24](https://github.com/openjdk/jdk/archive/refs/tags/jdk-24-ga.zip)
3640
* Spring Framework
3741
* Elasticsearch
3842
* RxJava
3943
* JUnit4
4044
* Guava
4145
* Log4j
4246

43-
See [Benchmarks](Benchmarks.md) page for details.
47+
See the [benchmarks](Benchmarks.md) page for details.
4448

4549
## Grammar style
46-
47-
### Parse rules
48-
49-
```ANTLR
50-
parserRule
51-
: token1 (token2* OPERATOR token3?)
52-
;
53-
```
54-
55-
### Tokens
56-
57-
```ANTLR
58-
INT: 'int';
59-
INTERFACE: 'interface';
60-
```
61-
62-
### Fragments
63-
64-
```ANTLR
65-
fragment
66-
HexDigit
67-
: [0-9a-fA-F]
68-
;
69-
```
70-
71-
### Tokens using
72-
73-
Please use token names instead of literal names if possible and justified.
74-
It's more convenient during parse tree bypass.
75-
76-
```ANTLR
77-
modifier
78-
: classOrInterfaceModifier
79-
| NATIVE
80-
| SYNCHRONIZED
81-
| TRANSIENT
82-
| VOLATILE
83-
;
84-
```
85-
86-
instead of
87-
88-
```ANTLR
89-
modifier
90-
: classOrInterfaceModifier
91-
| 'native'
92-
| 'synchronized'
93-
| 'transient'
94-
| 'volatile'
95-
;
96-
```
50+
Please use [antlr-format](https://github.com/antlr-ng/antlr-format) and
51+
[formatting style config](https://github.com/antlr/grammars-v4/blob/master/_scripts/repo_coding_style.json)
52+
to reformat in the [coding standard format for the repo](https://github.com/antlr/grammars-v4/wiki#is-there-a-coding-standard-for-antlr4-grammars).
53+
54+
### String literals
55+
Generally, you can use either a string literal or the corresponding lexer rule name
56+
(`TOKEN_REF`) directly in a parser rule for a token. It makes no difference because the
57+
[java/java/ grammar](https://github.com/antlr/grammars-v4/tree/master/java/java)
58+
is a split Antlr4 grammar, and the Antlr Tool prevents you from defining a token using
59+
a string literal in a parser rule (it outputs
60+
`cannot create implicit token for string literal in non-combined grammar` if you try).
61+
When writing an Antlr listener or visitor, use the corresponding lexer rule name for the
62+
string literal used in the parser rule.
63+
64+
Currently, the grammar contains a mixture of string literals
65+
and lexer rule names in parser rules. If you want a parser grammar that removes all string literals
66+
from parser rules, use [Trash trfoldlit](https://github.com/kaby76/Trash/tree/main/src/trfoldlit).
67+
If you want a parser grammar that uses string literals where a lexer rule exists for the string
68+
literal, use [Trash trunfoldlit](https://github.com/kaby76/Trash/tree/main/src/trunfoldlit).

0 commit comments

Comments
 (0)