Skip to content

Commit 455b1fd

Browse files
committed
Updated limiting logic to take groups into account and throw before doing the actual replacement
1 parent 162c91a commit 455b1fd

File tree

4 files changed

+158
-32
lines changed

4 files changed

+158
-32
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,16 +151,31 @@ private static BytesRef safeReplace(BytesRef strBytesRef, Pattern regex, BytesRe
151151
return strBytesRef;
152152
}
153153
String newStr = newStrBytesRef.utf8ToString();
154+
155+
// Count potential groups (E.g. "$1") used in the replacement
156+
int constantReplacementLength = newStr.length();
157+
int groupsInReplacement = 0;
158+
for (int i = 0; i < newStr.length(); i++) {
159+
if (newStr.charAt(i) == '$') {
160+
groupsInReplacement++;
161+
constantReplacementLength -= 2;
162+
i++;
163+
}
164+
}
165+
154166
// Initialize the buffer with an approximate size for the first replacement
155167
StringBuilder result = new StringBuilder(str.length() + newStr.length() + 8);
156168
do {
157-
m.appendReplacement(result, newStr);
158-
159-
if (result.length() > MAX_BYTES_REF_RESULT_SIZE) {
169+
int matchSize = m.end() - m.start();
170+
int potentialReplacementSize = constantReplacementLength + groupsInReplacement * matchSize;
171+
int remainingStr = str.length() - m.end();
172+
if (result.length() + potentialReplacementSize + remainingStr > MAX_BYTES_REF_RESULT_SIZE) {
160173
throw new IllegalArgumentException(
161174
"Creating strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported"
162175
);
163176
}
177+
178+
m.appendReplacement(result, newStr);
164179
} while (m.find());
165180
m.appendTail(result);
166181
return new BytesRef(result.toString());

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242
import java.util.stream.Collectors;
4343
import java.util.stream.IntStream;
4444

45-
import static org.elasticsearch.common.unit.ByteSizeUnit.GB;
4645
import static org.elasticsearch.xpack.esql.EsqlTestUtils.unboundLogicalOptimizerContext;
4746
import static org.hamcrest.Matchers.either;
4847
import static org.hamcrest.Matchers.equalTo;
@@ -306,16 +305,11 @@ public final void testEvaluateInManyThreads() throws ExecutionException, Interru
306305
assertWarnings(testCase.getExpectedBuildEvaluatorWarnings());
307306
}
308307

309-
List<Object> simpleData = testCase.getDataValues();
310-
// Ensure we don't run this test with too much data that could take too long to process.
311-
// The calculation "ramUsed * count" is just a hint of how much data will the function process,
312-
// and the limit is arbitrary
313-
assumeTrue("Input data too big", row(simpleData).ramBytesUsedByBlocks() * count < GB.toBytes(1));
314-
315308
ExecutorService exec = Executors.newFixedThreadPool(threads);
316309
try {
317310
List<Future<?>> futures = new ArrayList<>();
318311
for (int i = 0; i < threads; i++) {
312+
List<Object> simpleData = testCase.getDataValues();
319313
Page page = row(simpleData);
320314

321315
futures.add(exec.submit(() -> {
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function.scalar.string;
9+
10+
import org.apache.lucene.util.BytesRef;
11+
import org.elasticsearch.common.breaker.CircuitBreaker;
12+
import org.elasticsearch.common.unit.ByteSizeValue;
13+
import org.elasticsearch.common.util.BigArrays;
14+
import org.elasticsearch.common.util.MockBigArrays;
15+
import org.elasticsearch.common.util.PageCacheRecycler;
16+
import org.elasticsearch.compute.data.Block;
17+
import org.elasticsearch.compute.data.BlockFactory;
18+
import org.elasticsearch.compute.data.BlockUtils;
19+
import org.elasticsearch.compute.data.Page;
20+
import org.elasticsearch.compute.operator.DriverContext;
21+
import org.elasticsearch.compute.test.TestBlockFactory;
22+
import org.elasticsearch.test.ESTestCase;
23+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
24+
import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction;
25+
import org.elasticsearch.xpack.esql.core.tree.Source;
26+
import org.elasticsearch.xpack.esql.core.type.DataType;
27+
import org.elasticsearch.xpack.esql.core.type.EsField;
28+
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
29+
import org.junit.After;
30+
31+
import java.util.ArrayList;
32+
import java.util.Collections;
33+
import java.util.List;
34+
import java.util.Map;
35+
36+
import static org.hamcrest.Matchers.equalTo;
37+
38+
/**
39+
* These tests create rows that are 1MB in size. Test classes
40+
* which extend AbstractScalarFunctionTestCase rerun test cases with
41+
* many randomized inputs. Unfortunately, tests are run with
42+
* limited memory, and instantiating many copies of these
43+
* tests with large rows causes out of memory.
44+
*/
45+
public class ReplaceStaticTests extends ESTestCase {
46+
47+
public void testLimit() {
48+
int textLength = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10;
49+
String text = randomAlphaOfLength((int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10);
50+
String regex = "^(.+)$";
51+
52+
// 10 times the original text + the remainder
53+
String extraString = "a".repeat((int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE % 10);
54+
assert textLength * 10 + extraString.length() == ScalarFunction.MAX_BYTES_REF_RESULT_SIZE;
55+
String newStr = "$0$0$0$0$0$0$0$0$0$0" + extraString;
56+
57+
String result = process(text, regex, newStr);
58+
assertThat(result, equalTo(newStr.replaceAll("\\$\\d", text)));
59+
}
60+
61+
public void testTooBig() {
62+
String textAndNewStr = randomAlphaOfLength((int) (ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10));
63+
String regex = ".";
64+
65+
String result = process(textAndNewStr, regex, textAndNewStr);
66+
assertNull(result);
67+
assertWarnings(
68+
"Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.",
69+
"Line -1:-1: java.lang.IllegalArgumentException: "
70+
+ "Creating strings with more than ["
71+
+ ScalarFunction.MAX_BYTES_REF_RESULT_SIZE
72+
+ "] bytes is not supported"
73+
);
74+
}
75+
76+
public void testTooBigWithGroups() {
77+
int textLength = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10;
78+
String text = randomAlphaOfLength(textLength);
79+
String regex = "(.+)";
80+
81+
// 10 times the original text + the remainder + 1
82+
String extraString = "a".repeat(1 + (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE % 10);
83+
assert textLength * 10 + extraString.length() == ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + 1;
84+
String newStr = "$0$1$0$1$0$1$0$1$0$1" + extraString;
85+
86+
String result = process(text, regex, newStr);
87+
assertNull(result);
88+
assertWarnings(
89+
"Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.",
90+
"Line -1:-1: java.lang.IllegalArgumentException: "
91+
+ "Creating strings with more than ["
92+
+ ScalarFunction.MAX_BYTES_REF_RESULT_SIZE
93+
+ "] bytes is not supported"
94+
);
95+
}
96+
97+
public String process(String text, String regex, String newStr) {
98+
try (
99+
var eval = AbstractScalarFunctionTestCase.evaluator(
100+
new Replace(
101+
Source.EMPTY,
102+
field("text", DataType.KEYWORD),
103+
field("regex", DataType.KEYWORD),
104+
field("newStr", DataType.KEYWORD)
105+
)
106+
).get(driverContext());
107+
Block block = eval.eval(row(List.of(new BytesRef(text), new BytesRef(regex), new BytesRef(newStr))));
108+
) {
109+
return block.isNull(0) ? null : ((BytesRef) BlockUtils.toJavaObject(block, 0)).utf8ToString();
110+
}
111+
}
112+
113+
/**
114+
* The following fields and methods were borrowed from AbstractScalarFunctionTestCase
115+
*/
116+
private final List<CircuitBreaker> breakers = Collections.synchronizedList(new ArrayList<>());
117+
118+
private static Page row(List<Object> values) {
119+
return new Page(1, BlockUtils.fromListRow(TestBlockFactory.getNonBreakingInstance(), values));
120+
}
121+
122+
private static FieldAttribute field(String name, DataType type) {
123+
return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true));
124+
}
125+
126+
private DriverContext driverContext() {
127+
BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking();
128+
CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST);
129+
breakers.add(breaker);
130+
return new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays));
131+
}
132+
133+
@After
134+
public void allMemoryReleased() {
135+
for (CircuitBreaker breaker : breakers) {
136+
assertThat(breaker.getUsed(), equalTo(0L));
137+
}
138+
}
139+
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
import org.apache.lucene.util.BytesRef;
1414
import org.elasticsearch.xpack.esql.core.expression.Expression;
15-
import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction;
1615
import org.elasticsearch.xpack.esql.core.tree.Source;
1716
import org.elasticsearch.xpack.esql.core.type.DataType;
1817
import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
@@ -120,27 +119,6 @@ public static Iterable<Object[]> parameters() {
120119
"Unclosed character class near index 0\n[\n^".replaceAll("\n", System.lineSeparator())
121120
);
122121
}));
123-
124-
suppliers.add(new TestCaseSupplier("result too big", List.of(DataType.KEYWORD, DataType.KEYWORD, DataType.KEYWORD), () -> {
125-
String textAndNewStr = randomAlphaOfLength((int) (ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10));
126-
String regex = ".";
127-
return new TestCaseSupplier.TestCase(
128-
List.of(
129-
new TestCaseSupplier.TypedData(new BytesRef(textAndNewStr), DataType.KEYWORD, "str"),
130-
new TestCaseSupplier.TypedData(new BytesRef(regex), DataType.KEYWORD, "regex"),
131-
new TestCaseSupplier.TypedData(new BytesRef(textAndNewStr), DataType.KEYWORD, "newStr")
132-
),
133-
"ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",
134-
DataType.KEYWORD,
135-
equalTo(null)
136-
).withWarning("Line 1:1: evaluation of [source] failed, treating result as null. Only first 20 failures recorded.")
137-
.withWarning(
138-
"Line 1:1: java.lang.IllegalArgumentException: "
139-
+ "Creating strings with more than ["
140-
+ ScalarFunction.MAX_BYTES_REF_RESULT_SIZE
141-
+ "] bytes is not supported"
142-
);
143-
}));
144122
return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(false, suppliers);
145123
}
146124

0 commit comments

Comments
 (0)