Skip to content

Commit 6f66c75

Browse files
[7.17] Enhance regex performance with duplicate wildcards (#98176) (#98277)
This change avoids unnecessary substring allocations and recursion calls when more than two consecutive wildcards (`*`) are detected. Instead skipping and calling a method recursively, we now try to skip all consecutive `*` chars at once.
1 parent 842e8a4 commit 6f66c75

File tree

3 files changed

+41
-2
lines changed

3 files changed

+41
-2
lines changed

docs/changelog/98176.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 98176
2+
summary: Enhance regex performance with duplicate wildcards
3+
area: Infra/Core
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/common/regex/Regex.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,12 @@ private static boolean simpleMatchWithNormalizedStrings(String pattern, String s
135135
// str.endsWith(pattern.substring(1)), but avoiding the construction of pattern.substring(1):
136136
return str.regionMatches(str.length() - pattern.length() + 1, pattern, 1, pattern.length() - 1);
137137
} else if (nextIndex == 1) {
138-
// Double wildcard "**" - skipping the first "*"
139-
return simpleMatchWithNormalizedStrings(pattern.substring(1), str);
138+
// Double wildcard "**" detected - skipping all "*"
139+
int wildcards = nextIndex + 1;
140+
while (wildcards < pattern.length() && pattern.charAt(wildcards) == '*') {
141+
wildcards++;
142+
}
143+
return simpleMatchWithNormalizedStrings(pattern.substring(wildcards - 1), str);
140144
}
141145
final String part = pattern.substring(1, nextIndex);
142146
int partIndex = str.indexOf(part);

server/src/test/java/org/elasticsearch/common/regex/RegexTests.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,28 @@ public void testDoubleWildcardMatch() {
7878
assertFalse(Regex.simpleMatch("fff******ddd", "fffabcdd"));
7979
}
8080

81+
public void testArbitraryWildcardMatch() {
82+
final String prefix = randomAlphaOfLengthBetween(1, 20);
83+
final String suffix = randomAlphaOfLengthBetween(1, 20);
84+
final String pattern1 = repeat("*", randomIntBetween(1, 1000));
85+
// dd***
86+
assertTrue(Regex.simpleMatch(prefix + pattern1, prefix + randomAlphaOfLengthBetween(10, 20), randomBoolean()));
87+
// ***dd
88+
assertTrue(Regex.simpleMatch(pattern1 + suffix, randomAlphaOfLengthBetween(10, 20) + suffix, randomBoolean()));
89+
// dd***dd
90+
assertTrue(Regex.simpleMatch(prefix + pattern1 + suffix, prefix + randomAlphaOfLengthBetween(10, 20) + suffix, randomBoolean()));
91+
// dd***dd***dd
92+
final String middle = randomAlphaOfLengthBetween(1, 20);
93+
final String pattern2 = repeat("*", randomIntBetween(1, 1000));
94+
assertTrue(
95+
Regex.simpleMatch(
96+
prefix + pattern1 + middle + pattern2 + suffix,
97+
prefix + randomAlphaOfLengthBetween(10, 20) + middle + randomAlphaOfLengthBetween(10, 20) + suffix,
98+
randomBoolean()
99+
)
100+
);
101+
}
102+
81103
public void testSimpleMatch() {
82104
for (int i = 0; i < 1000; i++) {
83105
final String matchingString = randomAlphaOfLength(between(0, 50));
@@ -189,4 +211,12 @@ private void assertMatchesNone(Automaton automaton, String... strings) {
189211
assertFalse(run.run(s));
190212
}
191213
}
214+
215+
private String repeat(String str, int count) {
216+
StringBuilder sb = new StringBuilder(str.length() * count);
217+
for (int i = 0; i < count; i++) {
218+
sb.append(str);
219+
}
220+
return sb.toString();
221+
}
192222
}

0 commit comments

Comments
 (0)