Skip to content

Commit 3c080c5

Browse files
[Java] Improve cucumber expression creation performance (#202)
Co-authored-by: M.P. Korstanje <[email protected]> Co-authored-by: M.P. Korstanje <[email protected]>
1 parent 793ddda commit 3c080c5

File tree

4 files changed

+112
-6
lines changed

4 files changed

+112
-6
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
77

88
## [Unreleased]
99

10+
### Fixed
11+
- [Java] Improve cucumber expression creation performance ([#202](https://github.com/cucumber/cucumber-expressions/pull/202))
12+
1013
## [16.1.1] - 2022-12-08
1114
### Fixed
1215
- [Java] Improve expression creation performance ([#187](https://github.com/cucumber/cucumber-expressions/pull/187), [#189](https://github.com/cucumber/cucumber-expressions/pull/189))

java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919
import static io.cucumber.cucumberexpressions.CucumberExpressionException.createOptionalMayNotBeEmpty;
2020
import static io.cucumber.cucumberexpressions.CucumberExpressionException.createParameterIsNotAllowedInOptional;
2121
import static io.cucumber.cucumberexpressions.ParameterType.isValidParameterTypeName;
22+
import static io.cucumber.cucumberexpressions.RegexpUtils.escapeRegex;
2223
import static io.cucumber.cucumberexpressions.UndefinedParameterTypeException.createUndefinedParameterType;
2324
import static java.util.stream.Collectors.joining;
2425

2526
@API(status = API.Status.STABLE)
2627
public final class CucumberExpression implements Expression {
27-
private static final Pattern ESCAPE_PATTERN = Pattern.compile("[\\\\^\\[({$.|?*+})\\]]");
2828
private final List<ParameterType<?>> parameterTypes = new ArrayList<>();
2929
private final String source;
3030
private final TreeRegexp treeRegexp;
@@ -60,11 +60,6 @@ private String rewriteToRegex(Node node) {
6060
}
6161
}
6262

63-
private static String escapeRegex(String text) {
64-
return ESCAPE_PATTERN.matcher(text).replaceAll("\\\\$0");
65-
}
66-
67-
6863
private String rewriteOptional(Node node) {
6964
assertNoParameters(node, astNode -> createParameterIsNotAllowedInOptional(astNode, source));
7065
assertNoOptionals(node, astNode -> createOptionalIsNotAllowedInOptional(astNode, source));
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package io.cucumber.cucumberexpressions;
2+
3+
class RegexpUtils {
4+
/**
5+
* List of characters to be escaped.
6+
* The last char is '}' with index 125, so we need only 126 characters.
7+
*/
8+
private static final boolean[] CHAR_TO_ESCAPE = new boolean[126];
9+
10+
static {
11+
CHAR_TO_ESCAPE['^'] = true;
12+
CHAR_TO_ESCAPE['$'] = true;
13+
CHAR_TO_ESCAPE['['] = true;
14+
CHAR_TO_ESCAPE[']'] = true;
15+
CHAR_TO_ESCAPE['('] = true;
16+
CHAR_TO_ESCAPE[')'] = true;
17+
CHAR_TO_ESCAPE['{'] = true;
18+
CHAR_TO_ESCAPE['}'] = true;
19+
CHAR_TO_ESCAPE['.'] = true;
20+
CHAR_TO_ESCAPE['|'] = true;
21+
CHAR_TO_ESCAPE['?'] = true;
22+
CHAR_TO_ESCAPE['*'] = true;
23+
CHAR_TO_ESCAPE['+'] = true;
24+
CHAR_TO_ESCAPE['\\'] = true;
25+
}
26+
27+
/**
28+
* Escapes the regexp characters (the ones from "^$(){}[].+*?\")
29+
* from the given text, so that they are not considered as regexp
30+
* characters.
31+
*
32+
* @param text the non-null input text
33+
* @return the input text with escaped regexp characters
34+
*/
35+
public static String escapeRegex(String text) {
36+
/*
37+
Note on performance: this code has been benchmarked for
38+
escaping frequencies of 100%, 50%, 20%, 10%, 1%, 0.1%.
39+
Amongst 4 other variants (including Pattern matching),
40+
this variant is the faster on all escaping frequencies.
41+
*/
42+
int length = text.length();
43+
StringBuilder sb = null; // lazy initialization
44+
int blockStart = 0;
45+
int maxChar = CHAR_TO_ESCAPE.length;
46+
for (int i = 0; i < length; i++) {
47+
char currentChar = text.charAt(i);
48+
if (currentChar < maxChar && CHAR_TO_ESCAPE[currentChar]) {
49+
if (sb == null) {
50+
sb = new StringBuilder(length * 2);
51+
}
52+
if (blockStart < i) {
53+
// flush previous block
54+
sb.append(text, blockStart, i);
55+
}
56+
sb.append('\\');
57+
sb.append(currentChar);
58+
blockStart = i + 1;
59+
}
60+
}
61+
if (sb != null) {
62+
// finalizing character escaping
63+
if (blockStart < length) {
64+
// flush remaining characters
65+
sb.append(text, blockStart, length);
66+
}
67+
return sb.toString();
68+
}
69+
return text;
70+
}
71+
72+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package io.cucumber.cucumberexpressions;
2+
3+
import org.junit.jupiter.api.Test;
4+
5+
import static io.cucumber.cucumberexpressions.RegexpUtils.escapeRegex;
6+
import static org.junit.jupiter.api.Assertions.assertEquals;
7+
8+
class RegexpUtilsTest {
9+
10+
@Test
11+
void escape_regex_characters(){
12+
assertEquals("hello \\$world", escapeRegex("hello $world"));
13+
}
14+
15+
@Test
16+
void escape_all_regexp_characters() {
17+
assertEquals("\\^\\$\\[\\]\\(\\)\\{\\}\\.\\|\\?\\*\\+\\\\", escapeRegex("^$[](){}.|?*+\\"));
18+
}
19+
20+
@Test
21+
void escape_escaped_regexp_characters() {
22+
assertEquals("\\^\\$\\[\\]\\\\\\(\\\\\\)\\{\\}\\\\\\\\\\.\\|\\?\\*\\+", escapeRegex("^$[]\\(\\){}\\\\.|?*+"));
23+
}
24+
25+
26+
@Test
27+
void do_not_escape_when_there_is_nothing_to_escape() {
28+
assertEquals("hello world", escapeRegex("hello world"));
29+
}
30+
31+
@Test
32+
void gives_no_error_for_unicode_characters() {
33+
assertEquals("🥒", escapeRegex("🥒"));
34+
}
35+
36+
}

0 commit comments

Comments
 (0)