Skip to content

Commit b1e9d1a

Browse files
Enhance regex performance with duplicate wildcards (#98176) (#98193)
This change avoids unnecessary substring allocations and recursion calls when more than two consecutive wildcards (`*`) are detected. Instead skipping and calling a method recursively, we now try to skip all consecutive `*` chars at once.
1 parent 466ec8e commit b1e9d1a

File tree

3 files changed

+33
-2
lines changed

3 files changed

+33
-2
lines changed

docs/changelog/98176.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 98176
2+
summary: Enhance regex performance with duplicate wildcards
3+
area: Infra/Core
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/common/regex/Regex.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,12 @@ private static boolean simpleMatchWithNormalizedStrings(String pattern, String s
177177
// str.endsWith(pattern.substring(1)), but avoiding the construction of pattern.substring(1):
178178
return str.regionMatches(str.length() - pattern.length() + 1, pattern, 1, pattern.length() - 1);
179179
} else if (nextIndex == 1) {
180-
// Double wildcard "**" - skipping the first "*"
181-
return simpleMatchWithNormalizedStrings(pattern.substring(1), str);
180+
// Double wildcard "**" detected - skipping all "*"
181+
int wildcards = nextIndex + 1;
182+
while (wildcards < pattern.length() && pattern.charAt(wildcards) == '*') {
183+
wildcards++;
184+
}
185+
return simpleMatchWithNormalizedStrings(pattern.substring(wildcards - 1), str);
182186
}
183187
final String part = pattern.substring(1, nextIndex);
184188
int partIndex = str.indexOf(part);

server/src/test/java/org/elasticsearch/common/regex/RegexTests.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,28 @@ public void testDoubleWildcardMatch() {
7878
assertFalse(Regex.simpleMatch("fff******ddd", "fffabcdd"));
7979
}
8080

81+
public void testArbitraryWildcardMatch() {
82+
final String prefix = randomAlphaOfLengthBetween(1, 20);
83+
final String suffix = randomAlphaOfLengthBetween(1, 20);
84+
final String pattern1 = "*".repeat(randomIntBetween(1, 1000));
85+
// dd***
86+
assertTrue(Regex.simpleMatch(prefix + pattern1, prefix + randomAlphaOfLengthBetween(10, 20), randomBoolean()));
87+
// ***dd
88+
assertTrue(Regex.simpleMatch(pattern1 + suffix, randomAlphaOfLengthBetween(10, 20) + suffix, randomBoolean()));
89+
// dd***dd
90+
assertTrue(Regex.simpleMatch(prefix + pattern1 + suffix, prefix + randomAlphaOfLengthBetween(10, 20) + suffix, randomBoolean()));
91+
// dd***dd***dd
92+
final String middle = randomAlphaOfLengthBetween(1, 20);
93+
final String pattern2 = "*".repeat(randomIntBetween(1, 1000));
94+
assertTrue(
95+
Regex.simpleMatch(
96+
prefix + pattern1 + middle + pattern2 + suffix,
97+
prefix + randomAlphaOfLengthBetween(10, 20) + middle + randomAlphaOfLengthBetween(10, 20) + suffix,
98+
randomBoolean()
99+
)
100+
);
101+
}
102+
81103
public void testSimpleMatch() {
82104
for (int i = 0; i < 1000; i++) {
83105
final String matchingString = randomAlphaOfLength(between(0, 50));

0 commit comments

Comments
 (0)