-
-
Notifications
You must be signed in to change notification settings - Fork 57
[Java] Improve cucumber expression creation performance #202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 12 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
5481450
Update CucumberExpression.java
jkronegg 73a51e1
Update CucumberExpressionTest.java
jkronegg 531d380
Update CHANGELOG.md
jkronegg a53b0e5
Update CHANGELOG.md
jkronegg f264cf2
Created private package class `RegexpUtils`
jkronegg 20cc322
Moved tests to `RegexpUtilsTest`
jkronegg fb2e664
Reverted to original code
jkronegg 7f5c171
Moved `escapeRegex` to `RegexpUtils`
jkronegg b6f51b4
Delete RegexpUtilsTest.java
jkronegg 68999a0
Added tests for `RegexpUtils`
jkronegg 99dd95d
Minor edits based on PR comments
jkronegg 7aef82c
Added static import on `escapeRegex`
jkronegg 6c1f3ec
Update java/src/main/java/io/cucumber/cucumberexpressions/RegexpUtils…
mpkorstanje afc42c8
Update java/src/main/java/io/cucumber/cucumberexpressions/RegexpUtils…
mpkorstanje b2febb7
Add coverage for flush
mpkorstanje File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
java/src/main/java/io/cucumber/cucumberexpressions/RegexpUtils.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
package io.cucumber.cucumberexpressions; | ||
|
||
class RegexpUtils { | ||
/** | ||
* List of characters to be escaped. | ||
* The last char is '}' with index 125, so we need only 126 characters. | ||
*/ | ||
private static final boolean[] CHAR_TO_ESCAPE = new boolean[126]; | ||
|
||
static { | ||
CHAR_TO_ESCAPE['^'] = true; | ||
CHAR_TO_ESCAPE['$'] = true; | ||
CHAR_TO_ESCAPE['['] = true; | ||
CHAR_TO_ESCAPE[']'] = true; | ||
CHAR_TO_ESCAPE['('] = true; | ||
CHAR_TO_ESCAPE[')'] = true; | ||
CHAR_TO_ESCAPE['{'] = true; | ||
CHAR_TO_ESCAPE['}'] = true; | ||
CHAR_TO_ESCAPE['.'] = true; | ||
CHAR_TO_ESCAPE['|'] = true; | ||
CHAR_TO_ESCAPE['?'] = true; | ||
CHAR_TO_ESCAPE['*'] = true; | ||
CHAR_TO_ESCAPE['+'] = true; | ||
CHAR_TO_ESCAPE['\\'] = true; | ||
} | ||
|
||
/** | ||
* Escapes the regexp characters (the ones from "^$(){}[].+*?\") | ||
* from the given text, so that they are not considered as regexp | ||
* characters. | ||
* | ||
* @param text the non-null input text | ||
* @return the input text with escaped regexp characters | ||
*/ | ||
public static String escapeRegex(String text) { | ||
/* | ||
Note on performance: this code has been benchmarked for | ||
escaping frequencies of 100%, 50%, 20%, 10%, 1%, 0.1%. | ||
Amongst 4 other variants (including Pattern matching), | ||
this variant is the faster on all escaping frequencies. | ||
*/ | ||
int length = text.length(); | ||
StringBuilder sb = null; // lazy initialization | ||
int blockStart = 0; | ||
int maxChar = CHAR_TO_ESCAPE.length; | ||
for (int i = 0; i < length; i++) { | ||
char currentChar = text.charAt(i); | ||
if (currentChar < maxChar && CHAR_TO_ESCAPE[currentChar]) { | ||
if (sb == null) { | ||
sb = new StringBuilder(length * 2); | ||
} | ||
if (i > blockStart) { | ||
// flush previous block | ||
sb.append(text, blockStart, i); | ||
} | ||
sb.append('\\'); | ||
sb.append(currentChar); | ||
blockStart = i + 1; | ||
} | ||
} | ||
if (sb != null) { | ||
// finalizing character escaping | ||
if (length > blockStart) { | ||
mpkorstanje marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// flush remaining characters | ||
sb.append(text, blockStart, length); | ||
} | ||
return sb.toString(); | ||
} | ||
return text; | ||
} | ||
|
||
} |
30 changes: 30 additions & 0 deletions
30
java/src/test/java/io/cucumber/cucumberexpressions/RegexpUtilsTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package io.cucumber.cucumberexpressions; | ||
|
||
import org.junit.jupiter.api.Test; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
class RegexpUtilsTest { | ||
|
||
@Test | ||
void escape_all_regexp_characters() { | ||
assertEquals("\\^\\$\\[\\]\\(\\)\\{\\}\\.\\|\\?\\*\\+\\\\", RegexpUtils.escapeRegex("^$[](){}.|?*+\\")); | ||
} | ||
|
||
@Test | ||
void escape_escaped_regexp_characters() { | ||
assertEquals("\\^\\$\\[\\]\\\\\\(\\\\\\)\\{\\}\\\\\\\\\\.\\|\\?\\*\\+", RegexpUtils.escapeRegex("^$[]\\(\\){}\\\\.|?*+")); | ||
} | ||
|
||
|
||
@Test | ||
void do_not_escape_when_there_is_nothing_to_escape() { | ||
assertEquals("dummy", RegexpUtils.escapeRegex("dummy")); | ||
} | ||
|
||
@Test | ||
void escapeRegex_gives_no_error_for_unicode_characters() { | ||
assertEquals("🥒", RegexpUtils.escapeRegex("🥒")); | ||
} | ||
|
||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.