Skip to content

Commit a6f883a

Browse files
committed
ESQL: Fix Replace function memory usage
1 parent 3f5f899 commit a6f883a

File tree

7 files changed

+69
-18
lines changed

7 files changed

+69
-18
lines changed

modules/lang-painless/src/main/java/org/elasticsearch/painless/api/Augmentation.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,10 @@ public static <K, V, T> Map<T, Map<K, V>> groupBy(Map<K, V> receiver, BiFunction
456456
public static String replaceAll(CharSequence receiver, Pattern pattern, Function<Matcher, String> replacementBuilder) {
457457
Matcher m = pattern.matcher(receiver);
458458
if (false == m.find()) {
459-
// CharSequqence's toString is *supposed* to always return the characters in the sequence as a String
459+
// CharSequence's toString is *supposed* to always return the characters in the sequence as a String
460460
return receiver.toString();
461461
}
462-
StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver));
462+
StringBuilder result = new StringBuilder(initialBufferForReplaceWith(receiver));
463463
do {
464464
m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m)));
465465
} while (m.find());
@@ -477,7 +477,7 @@ public static String replaceFirst(CharSequence receiver, Pattern pattern, Functi
477477
// CharSequqence's toString is *supposed* to always return the characters in the sequence as a String
478478
return receiver.toString();
479479
}
480-
StringBuffer result = new StringBuffer(initialBufferForReplaceWith(receiver));
480+
StringBuilder result = new StringBuilder(initialBufferForReplaceWith(receiver));
481481
m.appendReplacement(result, Matcher.quoteReplacement(replacementBuilder.apply(m)));
482482
m.appendTail(result);
483483
return result.toString();

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -459,9 +459,6 @@ tests:
459459
- class: org.elasticsearch.xpack.inference.qa.mixed.OpenAIServiceMixedIT
460460
method: testOpenAiCompletions
461461
issue: https://github.com/elastic/elasticsearch/issues/127878
462-
- class: org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT
463-
method: test {p0=esql/120_profile/avg 8.14 or after}
464-
issue: https://github.com/elastic/elasticsearch/issues/127879
465462
- class: org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDeciderIT
466463
method: testRestoreSnapshotAllocationDoesNotExceedWatermarkWithMultipleRestores
467464
issue: https://github.com/elastic/elasticsearch/issues/127787

x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
import java.io.IOException;
2929
import java.util.Arrays;
3030
import java.util.List;
31+
import java.util.regex.Matcher;
3132
import java.util.regex.Pattern;
3233
import java.util.regex.PatternSyntaxException;
3334

35+
import static org.elasticsearch.common.unit.ByteSizeUnit.MB;
3436
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
3537
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
3638
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
@@ -39,6 +41,8 @@
3941
public class Replace extends EsqlScalarFunction {
4042
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Replace", Replace::new);
4143

44+
static final long MAX_RESULT_LENGTH = MB.toBytes(1);
45+
4246
private final Expression str;
4347
private final Expression regex;
4448
private final Expression newStr;
@@ -121,15 +125,15 @@ public boolean foldable() {
121125
return str.foldable() && regex.foldable() && newStr.foldable();
122126
}
123127

124-
@Evaluator(extraName = "Constant", warnExceptions = PatternSyntaxException.class)
128+
@Evaluator(extraName = "Constant", warnExceptions = IllegalArgumentException.class)
125129
static BytesRef process(BytesRef str, @Fixed Pattern regex, BytesRef newStr) {
126130
if (str == null || regex == null || newStr == null) {
127131
return null;
128132
}
129-
return new BytesRef(regex.matcher(str.utf8ToString()).replaceAll(newStr.utf8ToString()));
133+
return safeReplace(str, regex, newStr);
130134
}
131135

132-
@Evaluator(warnExceptions = PatternSyntaxException.class)
136+
@Evaluator(warnExceptions = IllegalArgumentException.class)
133137
static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) {
134138
if (str == null) {
135139
return null;
@@ -138,7 +142,30 @@ static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) {
138142
if (regex == null || newStr == null) {
139143
return str;
140144
}
141-
return new BytesRef(str.utf8ToString().replaceAll(regex.utf8ToString(), newStr.utf8ToString()));
145+
return safeReplace(str, Pattern.compile(regex.utf8ToString()), newStr);
146+
}
147+
148+
/**
149+
* Executes a Replace without surpassing the memory limit.
150+
*/
151+
private static BytesRef safeReplace(BytesRef strBytesRef, Pattern regex, BytesRef newStrBytesRef) {
152+
String str = strBytesRef.utf8ToString();
153+
Matcher m = regex.matcher(str);
154+
if (false == m.find()) {
155+
return strBytesRef;
156+
}
157+
String newStr = newStrBytesRef.utf8ToString();
158+
// Initialize the buffer with an approximate size for the first replacement
159+
StringBuilder result = new StringBuilder(str.length() + newStr.length() + 8);
160+
do {
161+
m.appendReplacement(result, newStr);
162+
163+
if (result.length() > MAX_RESULT_LENGTH) {
164+
throw new IllegalArgumentException("Creating strings with more than [" + MAX_RESULT_LENGTH + "] bytes is not supported");
165+
}
166+
} while (m.find());
167+
m.appendTail(result);
168+
return new BytesRef(result.toString());
142169
}
143170

144171
@Override

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import java.util.stream.Collectors;
4343
import java.util.stream.IntStream;
4444

45+
import static org.elasticsearch.common.unit.ByteSizeUnit.GB;
4546
import static org.elasticsearch.xpack.esql.EsqlTestUtils.unboundLogicalOptimizerContext;
4647
import static org.hamcrest.Matchers.either;
4748
import static org.hamcrest.Matchers.equalTo;
@@ -304,11 +305,17 @@ public final void testEvaluateInManyThreads() throws ExecutionException, Interru
304305
if (testCase.getExpectedBuildEvaluatorWarnings() != null) {
305306
assertWarnings(testCase.getExpectedBuildEvaluatorWarnings());
306307
}
308+
309+
List<Object> simpleData = testCase.getDataValues();
310+
// Ensure we don't run this test with too much data that could take too long to process.
311+
// The calculation "ramUsed * count" is just a hint of how much data will the function process,
312+
// and the limit is arbitrary
313+
assumeTrue("Input data too big", row(simpleData).ramBytesUsedByBlocks() * count < GB.toBytes(1));
314+
307315
ExecutorService exec = Executors.newFixedThreadPool(threads);
308316
try {
309317
List<Future<?>> futures = new ArrayList<>();
310318
for (int i = 0; i < threads; i++) {
311-
List<Object> simpleData = testCase.getDataValues();
312319
Page page = row(simpleData);
313320

314321
futures.add(exec.submit(() -> {

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.function.Supplier;
2323
import java.util.regex.PatternSyntaxException;
2424

25+
import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace.MAX_RESULT_LENGTH;
2526
import static org.hamcrest.Matchers.equalTo;
2627

2728
public class ReplaceTests extends AbstractScalarFunctionTestCase {
@@ -85,7 +86,7 @@ public static Iterable<Object[]> parameters() {
8586
return new TestCaseSupplier.TestCase(
8687
List.of(
8788
new TestCaseSupplier.TypedData(new BytesRef(text), DataType.KEYWORD, "str"),
88-
new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "oldStr"),
89+
new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "regex"),
8990
new TestCaseSupplier.TypedData(new BytesRef(newStr), DataType.KEYWORD, "newStr")
9091
),
9192
"ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",
@@ -103,6 +104,27 @@ public static Iterable<Object[]> parameters() {
103104
"Unclosed character class near index 0\n[\n^".replaceAll("\n", System.lineSeparator())
104105
);
105106
}));
107+
108+
suppliers.add(new TestCaseSupplier("result too big", List.of(DataType.KEYWORD, DataType.KEYWORD, DataType.KEYWORD), () -> {
109+
String textAndNewStr = randomAlphaOfLength((int) (MAX_RESULT_LENGTH / 10));
110+
String regex = ".";
111+
return new TestCaseSupplier.TestCase(
112+
List.of(
113+
new TestCaseSupplier.TypedData(new BytesRef(textAndNewStr), DataType.KEYWORD, "str"),
114+
new TestCaseSupplier.TypedData(new BytesRef(regex), DataType.KEYWORD, "regex"),
115+
new TestCaseSupplier.TypedData(new BytesRef(textAndNewStr), DataType.KEYWORD, "newStr")
116+
),
117+
"ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",
118+
DataType.KEYWORD,
119+
equalTo(null)
120+
).withWarning("Line 1:1: evaluation of [source] failed, treating result as null. Only first 20 failures recorded.")
121+
.withWarning(
122+
"Line 1:1: java.lang.IllegalArgumentException: "
123+
+ "Creating strings with more than ["
124+
+ MAX_RESULT_LENGTH
125+
+ "] bytes is not supported"
126+
);
127+
}));
106128
return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(false, suppliers);
107129
}
108130

0 commit comments

Comments
 (0)