Skip to content

Commit e663498

Browse files
jkroneggmpkorstanjeJulien Kronegg
authored
java: Optimize GherkinLine performance (cucumber#372)
Co-authored-by: M.P. Korstanje <[email protected]> Co-authored-by: Julien Kronegg <[email protected]>
1 parent 0586a71 commit e663498

File tree

13 files changed

+306
-170
lines changed

13 files changed

+306
-170
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt
88

99
## [Unreleased]
1010
### Changed
11+
- [Java] Optimize GherkinLine performance ([#361](https://github.com/cucumber/gherkin/issues/361))
1112
- [Java] Optimize number of array copies ([#388](https://github.com/cucumber/gherkin/pull/388))
1213
- [Java] Optimize Location performance ([#385](https://github.com/cucumber/gherkin/pull/385))
1314
- [Java] Optimize AstNode performance ([#383](https://github.com/cucumber/gherkin/pull/383))

java/src/main/java/io/cucumber/gherkin/GherkinLanguageConstants.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
interface GherkinLanguageConstants {
44
String TAG_PREFIX = "@";
5-
String COMMENT_PREFIX = "#";
5+
char COMMENT_PREFIX_CHAR = '#';
6+
String COMMENT_PREFIX = "" + COMMENT_PREFIX_CHAR;
67
String TITLE_KEYWORD_SEPARATOR = ":";
78
String TABLE_CELL_SEPARATOR = "|";
89
String DOCSTRING_SEPARATOR = "\"\"\"";

java/src/main/java/io/cucumber/gherkin/GherkinLine.java

Lines changed: 70 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,86 +4,113 @@
44

55
import java.util.ArrayList;
66
import java.util.List;
7+
import java.util.Map.Entry;
78
import java.util.PrimitiveIterator;
89

9-
import static io.cucumber.gherkin.GherkinLanguageConstants.COMMENT_PREFIX;
1010
import static io.cucumber.gherkin.GherkinLanguageConstants.TAG_PREFIX;
11-
import static io.cucumber.gherkin.StringUtils.ltrim;
12-
import static io.cucumber.gherkin.StringUtils.ltrimKeepNewLines;
11+
import static io.cucumber.gherkin.GherkinLanguageConstants.TITLE_KEYWORD_SEPARATOR;
12+
import static io.cucumber.gherkin.Locations.COLUMN_OFFSET;
13+
import static io.cucumber.gherkin.StringUtils.containsWhiteSpace;
1314
import static io.cucumber.gherkin.StringUtils.rtrim;
14-
import static io.cucumber.gherkin.StringUtils.rtrimKeepNewLines;
15-
import static io.cucumber.gherkin.StringUtils.symbolCount;
16-
import static io.cucumber.gherkin.StringUtils.trim;
15+
import static io.cucumber.gherkin.StringUtils.trimAndIndent;
16+
import static io.cucumber.gherkin.StringUtils.trimAndIndentKeepNewLines;
17+
import static java.util.Collections.emptyList;
1718
import static java.util.Objects.requireNonNull;
1819

1920
class GherkinLine {
20-
// TODO: set this to 0 when/if we change to 0-indexed columns
21-
private static final int OFFSET = 1;
22-
private final String lineText;
23-
private final String trimmedLineText;
21+
22+
/**
23+
* The line text, including all leading and trailing whitespace characters.
24+
*/
25+
private final String rawText;
26+
private final Location location;
27+
private final boolean empty;
28+
29+
/**
30+
* The line text with any whitespace characters trimmed.
31+
*/
32+
private final String text;
33+
34+
/**
35+
* The offset in code-points of the first non-whitespace character in this
36+
* line.
37+
*/
2438
private final int indent;
25-
private final Location line;
2639

27-
public GherkinLine(String lineText, Location line) {
28-
this.lineText = requireNonNull(lineText);
29-
this.trimmedLineText = trim(lineText);
30-
this.line = requireNonNull(line);
31-
indent = symbolCount(lineText) - symbolCount(ltrim(lineText));
40+
GherkinLine(String rawText, Location location) {
41+
this.rawText = requireNonNull(rawText);
42+
this.location = requireNonNull(location);
43+
Entry<String, Integer> trimmedIndent = trimAndIndent(rawText);
44+
this.text = trimmedIndent.getKey();
45+
this.indent = trimmedIndent.getValue();
46+
this.empty = text.isEmpty();
3247
}
3348

34-
public int indent() {
49+
int getIndent() {
3550
return indent;
3651
}
3752

38-
public String getLineText(int indentToRemove) {
39-
if (indentToRemove < 0 || indentToRemove > indent())
40-
return trimmedLineText;
41-
return lineText.substring(indentToRemove);
53+
String getText() {
54+
return text;
55+
}
56+
57+
String getRawText() {
58+
return rawText;
4259
}
4360

44-
public boolean isEmpty() {
45-
return trimmedLineText.isEmpty();
61+
String getRawTextSubstring(int beginIndex) {
62+
return rawText.substring(beginIndex);
4663
}
4764

48-
public boolean startsWith(String prefix) {
49-
return trimmedLineText.startsWith(prefix);
65+
boolean isEmpty() {
66+
return empty;
5067
}
5168

52-
public String getRestTrimmed(int length) {
53-
return trimmedLineText.substring(length).trim();
69+
boolean startsWith(String prefix) {
70+
return text.startsWith(prefix);
5471
}
5572

56-
public List<GherkinLineSpan> getTags() {
73+
String substringTrimmed(int beginIndex) {
74+
return text.substring(beginIndex).trim();
75+
}
5776

58-
String uncommentedLine = trimmedLineText.split("\\s" + COMMENT_PREFIX, 2)[0];
59-
List<GherkinLineSpan> tags = new ArrayList<>();
77+
List<GherkinLineSpan> parseTags() {
78+
// in most cases, the line contains no tag, so the code is optimized for this situation
79+
if (empty) {
80+
return emptyList();
81+
}
82+
String uncommentedLine = StringUtils.removeComments(text);
6083
int indexInUncommentedLine = 0;
6184

6285
String[] elements = uncommentedLine.split(TAG_PREFIX);
86+
if (elements.length == 0) {
87+
return emptyList();
88+
}
89+
List<GherkinLineSpan> tags = new ArrayList<>(elements.length);
6390
for (String element : elements) {
6491
String token = rtrim(element);
6592
if (token.isEmpty()) {
6693
continue;
6794
}
6895
int symbolLength = uncommentedLine.codePointCount(0, indexInUncommentedLine);
69-
int column = indent() + symbolLength + 1;
70-
if (!token.matches("^\\S+$")) {
71-
throw new ParserException("A tag may not contain whitespace", Locations.atColumn(line, column));
96+
int column = indent + symbolLength + COLUMN_OFFSET;
97+
if (containsWhiteSpace(token)) {
98+
throw new ParserException("A tag may not contain whitespace", Locations.atColumn(location, column));
7299
}
73100
tags.add(new GherkinLineSpan(column, TAG_PREFIX + token));
74101
indexInUncommentedLine += element.length() + 1;
75102
}
76103
return tags;
77104
}
78105

79-
public List<GherkinLineSpan> getTableCells() {
106+
List<GherkinLineSpan> parseTableCells() {
80107
List<GherkinLineSpan> lineSpans = new ArrayList<>();
81108
StringBuilder cellBuilder = new StringBuilder();
82109
boolean beforeFirst = true;
83110
int col = 0;
84111
int cellStart = 0;
85112
boolean escape = false;
86-
PrimitiveIterator.OfInt iterator = lineText.codePoints().iterator();
113+
PrimitiveIterator.OfInt iterator = text.codePoints().iterator();
87114
while (iterator.hasNext()) {
88115
int c = iterator.next();
89116
if (escape) {
@@ -112,10 +139,9 @@ public List<GherkinLineSpan> getTableCells() {
112139
// Skip the first empty span
113140
beforeFirst = false;
114141
} else {
115-
String cell = cellBuilder.toString();
116-
String leftTrimmedCell = ltrimKeepNewLines(cell);
117-
int cellIndent = symbolCount(cell) - symbolCount(leftTrimmedCell);
118-
lineSpans.add(new GherkinLineSpan(cellStart + cellIndent + OFFSET, rtrimKeepNewLines(leftTrimmedCell)));
142+
Entry<String, Integer> trimmedCellIndent = trimAndIndentKeepNewLines(cellBuilder.toString());
143+
int column = indent + cellStart + trimmedCellIndent.getValue() + COLUMN_OFFSET;
144+
lineSpans.add(new GherkinLineSpan(column, trimmedCellIndent.getKey()));
119145
}
120146
cellBuilder = new StringBuilder();
121147
cellStart = col + 1;
@@ -128,11 +154,11 @@ public List<GherkinLineSpan> getTableCells() {
128154
return lineSpans;
129155
}
130156

131-
public boolean startsWithTitleKeyword(String text) {
132-
int textLength = text.length();
133-
return trimmedLineText.length() > textLength &&
134-
trimmedLineText.startsWith(text) &&
135-
trimmedLineText.startsWith(GherkinLanguageConstants.TITLE_KEYWORD_SEPARATOR, textLength);
157+
boolean startsWithTitleKeyword(String keyword) {
158+
int keywordLength = keyword.length();
159+
return text.length() > keywordLength &&
160+
text.startsWith(keyword) &&
161+
text.startsWith(TITLE_KEYWORD_SEPARATOR, keywordLength);
136162
}
137163

138164
}

java/src/main/java/io/cucumber/gherkin/GherkinLineSpan.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
package io.cucumber.gherkin;
22

33
class GherkinLineSpan {
4-
// One-based line position
5-
public final int column;
4+
/**
5+
* Index-1 based position in codepoints.
6+
*/
7+
final int column;
68

7-
// text part of the line
8-
public final String text;
9+
/**
10+
* Text part of the line
11+
*/
12+
final String text;
913

10-
public GherkinLineSpan(int column, String text) {
14+
GherkinLineSpan(int column, String text) {
1115
this.column = column;
1216
this.text = text;
1317
}

java/src/main/java/io/cucumber/gherkin/Locations.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66

77
class Locations {
88

9+
/**
10+
* Columns are index-1 based.
11+
*/
12+
static final int COLUMN_OFFSET = 1;
13+
914
/**
1015
* Cache of Long objects for the range 0-4095. This is used
1116
* to avoid creating a huge amount of Long objects in getLocation().

java/src/main/java/io/cucumber/gherkin/ParserException.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import io.cucumber.messages.types.Location;
88

9+
import static io.cucumber.gherkin.Locations.COLUMN_OFFSET;
910
import static io.cucumber.gherkin.Locations.atColumn;
1011

1112
class ParserException extends RuntimeException {
@@ -58,13 +59,16 @@ static class UnexpectedTokenException extends ParserException {
5859
private static String getMessage(Token receivedToken, List<String> expectedTokenTypes) {
5960
return String.format("expected: %s, got '%s'",
6061
String.join(", ", expectedTokenTypes),
61-
receivedToken.getTokenValue().trim());
62+
receivedToken.getTokenValue()
63+
);
6264
}
6365

6466
private static Location getLocation(Token receivedToken) {
65-
return receivedToken.location.getColumn().isPresent()
66-
? receivedToken.location
67-
: atColumn(receivedToken.location, receivedToken.line.indent() + 1);
67+
if (receivedToken.location.getColumn().isPresent()) {
68+
return receivedToken.location;
69+
}
70+
int column = COLUMN_OFFSET + receivedToken.line.getIndent();
71+
return atColumn(receivedToken.location, column);
6872
}
6973
}
7074

java/src/main/java/io/cucumber/gherkin/PickleCompiler.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
import java.util.ArrayList;
2929
import java.util.Arrays;
30-
import java.util.Collections;
3130
import java.util.EnumMap;
3231
import java.util.List;
3332
import java.util.Map;

0 commit comments

Comments
 (0)