Skip to content

Commit dd0b7cc

Browse files
authored
ESQL: Limit Replace function memory usage (#127924) (#128195)
The Replace string result limit was fixed to 1MB, same as Repeat
1 parent 76becd0 commit dd0b7cc

File tree

10 files changed

+225
-19
lines changed

10 files changed

+225
-19
lines changed

docs/changelog/127924.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127924
2+
summary: Limit Replace function memory usage
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.util.List;
1414

1515
import static java.util.Collections.emptyList;
16+
import static org.elasticsearch.common.unit.ByteSizeUnit.MB;
1617

1718
/**
1819
* A {@code ScalarFunction} is a {@code Function} that takes values from some
@@ -22,6 +23,14 @@
2223
*/
2324
public abstract class ScalarFunction extends Function {
2425

26+
/**
27+
* Limit for the BytesRef return of functions.
28+
* <p>
29+
* To be used when there's no CircuitBreaking, as an arbitrary measure to limit memory usage.
30+
* </p>
31+
*/
32+
public static final long MAX_BYTES_REF_RESULT_SIZE = MB.toBytes(1);
33+
2534
protected ScalarFunction(Source source) {
2635
super(source, emptyList());
2736
}

x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java

Lines changed: 2 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
import java.util.Arrays;
3131
import java.util.List;
3232

33-
import static org.elasticsearch.common.unit.ByteSizeUnit.MB;
3433
import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL;
3534
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
3635
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
@@ -40,8 +39,6 @@
4039
public class Repeat extends EsqlScalarFunction implements OptionalArgument {
4140
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Repeat", Repeat::new);
4241

43-
static final long MAX_REPEATED_LENGTH = MB.toBytes(1);
44-
4542
private final Expression str;
4643
private final Expression number;
4744

@@ -123,9 +120,9 @@ static BytesRef process(
123120

124121
static BytesRef processInner(BreakingBytesRefBuilder scratch, BytesRef str, int number) {
125122
int repeatedLen = str.length * number;
126-
if (repeatedLen > MAX_REPEATED_LENGTH) {
123+
if (repeatedLen > MAX_BYTES_REF_RESULT_SIZE) {
127124
throw new IllegalArgumentException(
128-
"Creating repeated strings with more than [" + MAX_REPEATED_LENGTH + "] bytes is not supported"
125+
"Creating repeated strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported"
129126
);
130127
}
131128
scratch.grow(repeatedLen);

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import java.io.IOException;
2727
import java.util.Arrays;
2828
import java.util.List;
29+
import java.util.regex.Matcher;
2930
import java.util.regex.Pattern;
3031
import java.util.regex.PatternSyntaxException;
3132

@@ -114,24 +115,63 @@ public boolean foldable() {
114115
return str.foldable() && regex.foldable() && newStr.foldable();
115116
}
116117

117-
@Evaluator(extraName = "Constant", warnExceptions = PatternSyntaxException.class)
118+
@Evaluator(extraName = "Constant", warnExceptions = IllegalArgumentException.class)
118119
static BytesRef process(BytesRef str, @Fixed Pattern regex, BytesRef newStr) {
119120
if (str == null || regex == null || newStr == null) {
120121
return null;
121122
}
122-
return new BytesRef(regex.matcher(str.utf8ToString()).replaceAll(newStr.utf8ToString()));
123+
return safeReplace(str, regex, newStr);
123124
}
124125

125-
@Evaluator(warnExceptions = PatternSyntaxException.class)
126+
@Evaluator(warnExceptions = IllegalArgumentException.class)
126127
static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) {
127128
if (str == null) {
128129
return null;
129130
}
130-
131131
if (regex == null || newStr == null) {
132132
return str;
133133
}
134-
return new BytesRef(str.utf8ToString().replaceAll(regex.utf8ToString(), newStr.utf8ToString()));
134+
return safeReplace(str, Pattern.compile(regex.utf8ToString()), newStr);
135+
}
136+
137+
/**
138+
* Executes a Replace without surpassing the memory limit.
139+
*/
140+
private static BytesRef safeReplace(BytesRef strBytesRef, Pattern regex, BytesRef newStrBytesRef) {
141+
String str = strBytesRef.utf8ToString();
142+
Matcher m = regex.matcher(str);
143+
if (false == m.find()) {
144+
return strBytesRef;
145+
}
146+
String newStr = newStrBytesRef.utf8ToString();
147+
148+
// Count potential groups (E.g. "$1") used in the replacement
149+
int constantReplacementLength = newStr.length();
150+
int groupsInReplacement = 0;
151+
for (int i = 0; i < newStr.length(); i++) {
152+
if (newStr.charAt(i) == '$') {
153+
groupsInReplacement++;
154+
constantReplacementLength -= 2;
155+
i++;
156+
}
157+
}
158+
159+
// Initialize the buffer with an approximate size for the first replacement
160+
StringBuilder result = new StringBuilder(str.length() + newStr.length() + 8);
161+
do {
162+
int matchSize = m.end() - m.start();
163+
int potentialReplacementSize = constantReplacementLength + groupsInReplacement * matchSize;
164+
int remainingStr = str.length() - m.end();
165+
if (result.length() + potentialReplacementSize + remainingStr > MAX_BYTES_REF_RESULT_SIZE) {
166+
throw new IllegalArgumentException(
167+
"Creating strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported"
168+
);
169+
}
170+
171+
m.appendReplacement(result, newStr);
172+
} while (m.find());
173+
m.appendTail(result);
174+
return new BytesRef(result.toString());
135175
}
136176

137177
@Override

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ public final void testEvaluateInManyThreads() throws ExecutionException, Interru
303303
if (testCase.getExpectedBuildEvaluatorWarnings() != null) {
304304
assertWarnings(testCase.getExpectedBuildEvaluatorWarnings());
305305
}
306+
306307
ExecutorService exec = Executors.newFixedThreadPool(threads);
307308
try {
308309
List<Future<?>> futures = new ArrayList<>();

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.elasticsearch.compute.test.TestBlockFactory;
2222
import org.elasticsearch.test.ESTestCase;
2323
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
24+
import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction;
2425
import org.elasticsearch.xpack.esql.core.tree.Source;
2526
import org.elasticsearch.xpack.esql.core.type.DataType;
2627
import org.elasticsearch.xpack.esql.core.type.EsField;
@@ -45,14 +46,14 @@ public class RepeatStaticTests extends ESTestCase {
4546

4647
public void testAlmostTooBig() {
4748
String str = randomAlphaOfLength(1);
48-
int number = (int) Repeat.MAX_REPEATED_LENGTH;
49+
int number = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE;
4950
String repeated = process(str, number);
5051
assertThat(repeated, equalTo(str.repeat(number)));
5152
}
5253

5354
public void testTooBig() {
5455
String str = randomAlphaOfLength(1);
55-
int number = (int) Repeat.MAX_REPEATED_LENGTH + 1;
56+
int number = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + 1;
5657
String repeated = process(str, number);
5758
assertNull(repeated);
5859
assertWarnings(
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
9+
10+
import org.apache.lucene.util.BytesRef;
11+
import org.elasticsearch.common.breaker.CircuitBreaker;
12+
import org.elasticsearch.common.unit.ByteSizeValue;
13+
import org.elasticsearch.common.util.BigArrays;
14+
import org.elasticsearch.common.util.MockBigArrays;
15+
import org.elasticsearch.common.util.PageCacheRecycler;
16+
import org.elasticsearch.compute.data.Block;
17+
import org.elasticsearch.compute.data.BlockFactory;
18+
import org.elasticsearch.compute.data.BlockUtils;
19+
import org.elasticsearch.compute.data.Page;
20+
import org.elasticsearch.compute.operator.DriverContext;
21+
import org.elasticsearch.compute.test.TestBlockFactory;
22+
import org.elasticsearch.test.ESTestCase;
23+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
24+
import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction;
25+
import org.elasticsearch.xpack.esql.core.tree.Source;
26+
import org.elasticsearch.xpack.esql.core.type.DataType;
27+
import org.elasticsearch.xpack.esql.core.type.EsField;
28+
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
29+
import org.junit.After;
30+
31+
import java.util.ArrayList;
32+
import java.util.Collections;
33+
import java.util.List;
34+
import java.util.Map;
35+
36+
import static org.hamcrest.Matchers.equalTo;
37+
38+
/**
39+
* These tests create rows that are 1MB in size. Test classes
40+
* which extend AbstractScalarFunctionTestCase rerun test cases with
41+
* many randomized inputs. Unfortunately, tests are run with
42+
* limited memory, and instantiating many copies of these
43+
* tests with large rows causes out of memory.
44+
*/
45+
public class ReplaceStaticTests extends ESTestCase {
46+
47+
public void testLimit() {
48+
int textLength = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10;
49+
String text = randomAlphaOfLength((int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10);
50+
String regex = "^(.+)$";
51+
52+
// 10 times the original text + the remainder
53+
String extraString = "a".repeat((int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE % 10);
54+
assert textLength * 10 + extraString.length() == ScalarFunction.MAX_BYTES_REF_RESULT_SIZE;
55+
String newStr = "$0$0$0$0$0$0$0$0$0$0" + extraString;
56+
57+
String result = process(text, regex, newStr);
58+
assertThat(result, equalTo(newStr.replaceAll("\\$\\d", text)));
59+
}
60+
61+
public void testTooBig() {
62+
String textAndNewStr = randomAlphaOfLength((int) (ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10));
63+
String regex = ".";
64+
65+
String result = process(textAndNewStr, regex, textAndNewStr);
66+
assertNull(result);
67+
assertWarnings(
68+
"Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.",
69+
"Line -1:-1: java.lang.IllegalArgumentException: "
70+
+ "Creating strings with more than ["
71+
+ ScalarFunction.MAX_BYTES_REF_RESULT_SIZE
72+
+ "] bytes is not supported"
73+
);
74+
}
75+
76+
public void testTooBigWithGroups() {
77+
int textLength = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10;
78+
String text = randomAlphaOfLength(textLength);
79+
String regex = "(.+)";
80+
81+
// 10 times the original text + the remainder + 1
82+
String extraString = "a".repeat(1 + (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE % 10);
83+
assert textLength * 10 + extraString.length() == ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + 1;
84+
String newStr = "$0$1$0$1$0$1$0$1$0$1" + extraString;
85+
86+
String result = process(text, regex, newStr);
87+
assertNull(result);
88+
assertWarnings(
89+
"Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.",
90+
"Line -1:-1: java.lang.IllegalArgumentException: "
91+
+ "Creating strings with more than ["
92+
+ ScalarFunction.MAX_BYTES_REF_RESULT_SIZE
93+
+ "] bytes is not supported"
94+
);
95+
}
96+
97+
public String process(String text, String regex, String newStr) {
98+
try (
99+
var eval = AbstractScalarFunctionTestCase.evaluator(
100+
new Replace(
101+
Source.EMPTY,
102+
field("text", DataType.KEYWORD),
103+
field("regex", DataType.KEYWORD),
104+
field("newStr", DataType.KEYWORD)
105+
)
106+
).get(driverContext());
107+
Block block = eval.eval(row(List.of(new BytesRef(text), new BytesRef(regex), new BytesRef(newStr))));
108+
) {
109+
return block.isNull(0) ? null : ((BytesRef) BlockUtils.toJavaObject(block, 0)).utf8ToString();
110+
}
111+
}
112+
113+
/**
114+
* The following fields and methods were borrowed from AbstractScalarFunctionTestCase
115+
*/
116+
private final List<CircuitBreaker> breakers = Collections.synchronizedList(new ArrayList<>());
117+
118+
private static Page row(List<Object> values) {
119+
return new Page(1, BlockUtils.fromListRow(TestBlockFactory.getNonBreakingInstance(), values));
120+
}
121+
122+
private static FieldAttribute field(String name, DataType type) {
123+
return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true));
124+
}
125+
126+
private DriverContext driverContext() {
127+
BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking();
128+
CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
129+
breakers.add(breaker);
130+
return new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
131+
}
132+
133+
@After
134+
public void allMemoryReleased() {
135+
for (CircuitBreaker breaker : breakers) {
136+
assertThat(breaker.getUsed(), equalTo(0L));
137+
}
138+
}
139+
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,30 @@ public static Iterable<Object[]> parameters() {
7878
)
7979
);
8080

81+
// Groups
82+
suppliers.add(fixedCase("Full group", "Cats are awesome", ".+", "<$0>", "<Cats are awesome>"));
83+
suppliers.add(
84+
fixedCase("Nested groups", "A cat is great, a cat is awesome", "\\b([Aa] (\\w+)) is (\\w+)\\b", "$1$2", "A catcat, a catcat")
85+
);
86+
suppliers.add(
87+
fixedCase(
88+
"Multiple groups",
89+
"Cats are awesome",
90+
"(\\w+) (.+)",
91+
"$0 -> $1 and dogs $2",
92+
"Cats are awesome -> Cats and dogs are awesome"
93+
)
94+
);
95+
96+
// Errors
8197
suppliers.add(new TestCaseSupplier("syntax error", List.of(DataType.KEYWORD, DataType.KEYWORD, DataType.KEYWORD), () -> {
8298
String text = randomAlphaOfLength(10);
8399
String invalidRegex = "[";
84100
String newStr = randomAlphaOfLength(5);
85101
return new TestCaseSupplier.TestCase(
86102
List.of(
87103
new TestCaseSupplier.TypedData(new BytesRef(text), DataType.KEYWORD, "str"),
88-
new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "oldStr"),
104+
new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "regex"),
89105
new TestCaseSupplier.TypedData(new BytesRef(newStr), DataType.KEYWORD, "newStr")
90106
),
91107
"ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",

0 commit comments

Comments
 (0)