Skip to content

Commit 5454473

Browse files
committed
Use JFlex scanners for SourceSplitter and LineBreaker
Also, tweak STANDARD_EOL to have definite matches as the start of each conditional.
1 parent eeb9592 commit 5454473

File tree

5 files changed

+199
-85
lines changed

5 files changed

+199
-85
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/util/LineBreaker.java

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -71,43 +71,11 @@ private void reset(Reader reader) throws IOException {
7171
lineOffsets = null;
7272

7373
List<Integer> newOffsets = new ArrayList<>();
74-
newOffsets.add(0);
75-
76-
int c;
77-
while ((c = reader.read()) != -1) {
78-
++length;
79-
80-
redo_c:
81-
while (true) {
82-
switch (c) {
83-
case '\r':
84-
c = reader.read();
85-
if (c == -1) {
86-
newOffsets.add(length);
87-
break redo_c;
88-
}
89-
++length;
90-
if (c == '\n') {
91-
newOffsets.add(length);
92-
break redo_c;
93-
}
94-
newOffsets.add(length - 1);
95-
continue redo_c;
96-
case '\n':
97-
newOffsets.add(length);
98-
break redo_c;
99-
default:
100-
// pass
101-
}
102-
break;
103-
}
104-
}
105-
106-
count = newOffsets.size();
107-
if (newOffsets.get(newOffsets.size() - 1) < length) {
108-
newOffsets.add(length);
109-
// Do not increment count.
110-
}
74+
LineBreakerScanner scanner = new LineBreakerScanner(reader);
75+
scanner.setTarget(newOffsets);
76+
scanner.consume();
77+
length = scanner.getLength();
78+
count = newOffsets.size() - 1;
11179

11280
lineOffsets = new int[newOffsets.size()];
11381
for (int i = 0; i < lineOffsets.length; ++i) {

opengrok-indexer/src/main/java/org/opengrok/indexer/util/SourceSplitter.java

Lines changed: 4 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -159,52 +159,10 @@ private void reset(Reader reader) throws IOException {
159159
lineOffsets = null;
160160

161161
List<String> slist = new ArrayList<>();
162-
StringBuilder bld = new StringBuilder();
163-
int c;
164-
while ((c = reader.read()) != -1) {
165-
++length;
166-
167-
redo_c:
168-
while (true) {
169-
bld.append((char) c);
170-
switch (c) {
171-
case '\r':
172-
c = reader.read();
173-
if (c == -1) {
174-
slist.add(bld.toString());
175-
bld.setLength(0);
176-
break redo_c;
177-
}
178-
++length;
179-
if (c == '\n') {
180-
bld.append((char) c);
181-
slist.add(bld.toString());
182-
bld.setLength(0);
183-
break redo_c;
184-
}
185-
slist.add(bld.toString());
186-
bld.setLength(0);
187-
continue redo_c;
188-
case '\n':
189-
slist.add(bld.toString());
190-
bld.setLength(0);
191-
break redo_c;
192-
default:
193-
// pass
194-
}
195-
break;
196-
}
197-
}
198-
if (bld.length() > 0) {
199-
slist.add(bld.toString());
200-
bld.setLength(0);
201-
} else {
202-
/*
203-
* Following JFlexXref's custom, an empty file or a file ending
204-
* with LF produces an additional line of length zero.
205-
*/
206-
slist.add("");
207-
}
162+
SourceSplitterScanner scanner = new SourceSplitterScanner(reader);
163+
scanner.setTarget(slist);
164+
scanner.consume();
165+
length = scanner.getLength();
208166

209167
lines = slist.toArray(new String[0]);
210168
setLineOffsets();

opengrok-indexer/src/main/java/org/opengrok/indexer/util/StringUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2019, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2020, Chris Fraire <[email protected]>.
2323
*/
2424

2525
package org.opengrok.indexer.util;
@@ -37,7 +37,7 @@ public final class StringUtils {
3737
/**
3838
* Matches a standard end-of-line indicator, identical to Common.lexh's {EOL}.
3939
*/
40-
public static final Pattern STANDARD_EOL = Pattern.compile("\\r?\\n|\\r");
40+
public static final Pattern STANDARD_EOL = Pattern.compile("\\r\\n?|\\n");
4141

4242
/**
4343
* Matches an apostrophe not following a backslash escape or following an
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2020, Chris Fraire <[email protected]>.
22+
*/
23+
24+
package org.opengrok.indexer.util;
25+
26+
import java.io.IOException;
27+
import java.util.List;
28+
%%
29+
%public
30+
%class LineBreakerScanner
31+
%char
32+
%unicode
33+
%type boolean
34+
%eofval{
35+
return false;
36+
%eofval}
37+
%eof{
38+
length = yychar;
39+
40+
/*
41+
* Following JFlexXref's custom, an empty file or a file ending with EOL
42+
* produces an additional line of length zero. We also ensure there are two
43+
* entries to describe the boundaries.
44+
*/
45+
if (lastHadEOL || offsets.size() <= 1) {
46+
offsets.add(yychar);
47+
}
48+
%eof}
49+
%{
50+
private int length;
51+
52+
private boolean lastHadEOL;
53+
54+
private List<Integer> offsets;
55+
56+
public int getLength() {
57+
return length;
58+
}
59+
60+
/**
61+
* Sets the required target to write.
62+
* @param offsets a required instance
63+
*/
64+
public void setTarget(List<Integer> offsets) {
65+
this.length = 0;
66+
this.lastHadEOL = false;
67+
this.offsets = offsets;
68+
offsets.add(0);
69+
}
70+
71+
/**
72+
* Call {@link #yylex()} until {@code false}, which consumes all input so
73+
* that the argument to {@link #setTarget(List)} contains the entire
74+
* transformation.
75+
*/
76+
public void consume() throws IOException {
77+
while (yylex()) {
78+
//noinspection UnnecessaryContinue
79+
continue;
80+
}
81+
}
82+
%}
83+
84+
%include Common.lexh
85+
%%
86+
87+
[^\n\r]* {EOL} {
88+
offsets.add(yychar + yylength());
89+
lastHadEOL = true;
90+
}
91+
92+
[^\n\r]+ {
93+
offsets.add(yychar + yylength());
94+
lastHadEOL = false;
95+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2020, Chris Fraire <[email protected]>.
22+
*/
23+
24+
package org.opengrok.indexer.util;
25+
26+
import java.io.IOException;
27+
import java.util.List;
28+
%%
29+
%public
30+
%class SourceSplitterScanner
31+
%char
32+
%unicode
33+
%type boolean
34+
%eofval{
35+
return false;
36+
%eofval}
37+
%eof{
38+
length = yychar;
39+
40+
/*
41+
* Following JFlexXref's custom, an empty file or a file ending with EOL
42+
* produces an additional line of length zero.
43+
*/
44+
if (lastHadEOL || lines.size() < 1) {
45+
lines.add("");
46+
}
47+
%eof}
48+
%{
49+
private int length;
50+
51+
private boolean lastHadEOL;
52+
53+
private List<String> lines;
54+
55+
public int getLength() {
56+
return length;
57+
}
58+
59+
/**
60+
* Sets the required target to write.
61+
* @param lines a required instance
62+
*/
63+
public void setTarget(List<String> lines) {
64+
this.length = 0;
65+
this.lastHadEOL = false;
66+
this.lines = lines;
67+
}
68+
69+
/**
70+
* Call {@link #yylex()} until {@code false}, which consumes all input so
71+
* that the argument to {@link #setTarget(List)} contains the entire
72+
* transformation.
73+
*/
74+
public void consume() throws IOException {
75+
while (yylex()) {
76+
//noinspection UnnecessaryContinue
77+
continue;
78+
}
79+
}
80+
%}
81+
82+
%include Common.lexh
83+
%%
84+
85+
[^\n\r]* {EOL} {
86+
lines.add(yytext());
87+
lastHadEOL = true;
88+
}
89+
90+
[^\n\r]+ {
91+
lines.add(yytext());
92+
lastHadEOL = false;
93+
}

0 commit comments

Comments
 (0)