diff --git a/CHANGELOG.md b/CHANGELOG.md index b791844cd..2b2bea27c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Fixed +- [Java] Improve cucumber expression creation performance ([#202](https://github.com/cucumber/cucumber-expressions/pull/202)) + ## [16.1.1] - 2022-12-08 ### Fixed - [Java] Improve expression creation performance ([#187](https://github.com/cucumber/cucumber-expressions/pull/187), [#189](https://github.com/cucumber/cucumber-expressions/pull/189)) diff --git a/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java b/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java index dd5a8124f..c0ea66155 100644 --- a/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java +++ b/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java @@ -19,12 +19,12 @@ import static io.cucumber.cucumberexpressions.CucumberExpressionException.createOptionalMayNotBeEmpty; import static io.cucumber.cucumberexpressions.CucumberExpressionException.createParameterIsNotAllowedInOptional; import static io.cucumber.cucumberexpressions.ParameterType.isValidParameterTypeName; +import static io.cucumber.cucumberexpressions.RegexpUtils.escapeRegex; import static io.cucumber.cucumberexpressions.UndefinedParameterTypeException.createUndefinedParameterType; import static java.util.stream.Collectors.joining; @API(status = API.Status.STABLE) public final class CucumberExpression implements Expression { - private static final Pattern ESCAPE_PATTERN = Pattern.compile("[\\\\^\\[({$.|?*+})\\]]"); private final List> parameterTypes = new ArrayList<>(); private final String source; private final TreeRegexp treeRegexp; @@ -60,11 +60,6 @@ private String rewriteToRegex(Node node) { } } - private static String escapeRegex(String text) { - return ESCAPE_PATTERN.matcher(text).replaceAll("\\\\$0"); - } - - private String rewriteOptional(Node node) { assertNoParameters(node, astNode -> createParameterIsNotAllowedInOptional(astNode, source)); assertNoOptionals(node, astNode -> createOptionalIsNotAllowedInOptional(astNode, source)); diff --git a/java/src/main/java/io/cucumber/cucumberexpressions/RegexpUtils.java b/java/src/main/java/io/cucumber/cucumberexpressions/RegexpUtils.java new file mode 100644 index 000000000..0c8c1767e --- /dev/null +++ b/java/src/main/java/io/cucumber/cucumberexpressions/RegexpUtils.java @@ -0,0 +1,72 @@ +package io.cucumber.cucumberexpressions; + +class RegexpUtils { + /** + * List of characters to be escaped. + * The last char is '}' with index 125, so we need only 126 characters. + */ + private static final boolean[] CHAR_TO_ESCAPE = new boolean[126]; + + static { + CHAR_TO_ESCAPE['^'] = true; + CHAR_TO_ESCAPE['$'] = true; + CHAR_TO_ESCAPE['['] = true; + CHAR_TO_ESCAPE[']'] = true; + CHAR_TO_ESCAPE['('] = true; + CHAR_TO_ESCAPE[')'] = true; + CHAR_TO_ESCAPE['{'] = true; + CHAR_TO_ESCAPE['}'] = true; + CHAR_TO_ESCAPE['.'] = true; + CHAR_TO_ESCAPE['|'] = true; + CHAR_TO_ESCAPE['?'] = true; + CHAR_TO_ESCAPE['*'] = true; + CHAR_TO_ESCAPE['+'] = true; + CHAR_TO_ESCAPE['\\'] = true; + } + + /** + * Escapes the regexp characters (the ones from "^$(){}[].+*?\") + * from the given text, so that they are not considered as regexp + * characters. + * + * @param text the non-null input text + * @return the input text with escaped regexp characters + */ + public static String escapeRegex(String text) { + /* + Note on performance: this code has been benchmarked for + escaping frequencies of 100%, 50%, 20%, 10%, 1%, 0.1%. + Amongst 4 other variants (including Pattern matching), + this variant is the faster on all escaping frequencies. + */ + int length = text.length(); + StringBuilder sb = null; // lazy initialization + int blockStart = 0; + int maxChar = CHAR_TO_ESCAPE.length; + for (int i = 0; i < length; i++) { + char currentChar = text.charAt(i); + if (currentChar < maxChar && CHAR_TO_ESCAPE[currentChar]) { + if (sb == null) { + sb = new StringBuilder(length * 2); + } + if (blockStart < i) { + // flush previous block + sb.append(text, blockStart, i); + } + sb.append('\\'); + sb.append(currentChar); + blockStart = i + 1; + } + } + if (sb != null) { + // finalizing character escaping + if (blockStart < length) { + // flush remaining characters + sb.append(text, blockStart, length); + } + return sb.toString(); + } + return text; + } + +} diff --git a/java/src/test/java/io/cucumber/cucumberexpressions/RegexpUtilsTest.java b/java/src/test/java/io/cucumber/cucumberexpressions/RegexpUtilsTest.java new file mode 100644 index 000000000..7edb02136 --- /dev/null +++ b/java/src/test/java/io/cucumber/cucumberexpressions/RegexpUtilsTest.java @@ -0,0 +1,36 @@ +package io.cucumber.cucumberexpressions; + +import org.junit.jupiter.api.Test; + +import static io.cucumber.cucumberexpressions.RegexpUtils.escapeRegex; +import static org.junit.jupiter.api.Assertions.assertEquals; + +class RegexpUtilsTest { + + @Test + void escape_regex_characters(){ + assertEquals("hello \\$world", escapeRegex("hello $world")); + } + + @Test + void escape_all_regexp_characters() { + assertEquals("\\^\\$\\[\\]\\(\\)\\{\\}\\.\\|\\?\\*\\+\\\\", escapeRegex("^$[](){}.|?*+\\")); + } + + @Test + void escape_escaped_regexp_characters() { + assertEquals("\\^\\$\\[\\]\\\\\\(\\\\\\)\\{\\}\\\\\\\\\\.\\|\\?\\*\\+", escapeRegex("^$[]\\(\\){}\\\\.|?*+")); + } + + + @Test + void do_not_escape_when_there_is_nothing_to_escape() { + assertEquals("hello world", escapeRegex("hello world")); + } + + @Test + void gives_no_error_for_unicode_characters() { + assertEquals("🥒", escapeRegex("🥒")); + } + +}