diff --git a/docs/changelog/127924.yaml b/docs/changelog/127924.yaml new file mode 100644 index 0000000000000..4aaaa710563ab --- /dev/null +++ b/docs/changelog/127924.yaml @@ -0,0 +1,5 @@ +pr: 127924 +summary: Limit Replace function memory usage +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java index 09359943684b5..2a59b21a4c022 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java @@ -13,6 +13,7 @@ import java.util.List; import static java.util.Collections.emptyList; +import static org.elasticsearch.common.unit.ByteSizeUnit.MB; /** * A {@code ScalarFunction} is a {@code Function} that takes values from some @@ -22,6 +23,14 @@ */ public abstract class ScalarFunction extends Function { + /** + * Limit for the BytesRef return of functions. + *

+ * To be used when there's no CircuitBreaking, as an arbitrary measure to limit memory usage. + *

+ */ + public static final long MAX_BYTES_REF_RESULT_SIZE = MB.toBytes(1); + protected ScalarFunction(Source source) { super(source, emptyList()); } diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java index a5aa37a0db56e..f63966810a5fe 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java @@ -8,7 +8,6 @@ import java.lang.Override; import java.lang.String; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; @@ -92,7 +91,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlo } try { result.appendBytesRef(Replace.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), this.regex, newStrBlock.getBytesRef(newStrBlock.getFirstValueIndex(p), newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } @@ -109,7 +108,7 @@ public BytesRefBlock eval(int positionCount, BytesRefVector strVector, position: for (int p = 0; p < positionCount; p++) { try { result.appendBytesRef(Replace.process(strVector.getBytesRef(p, strScratch), this.regex, newStrVector.getBytesRef(p, newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java index 7a7a947453d0a..6eb3aa898b79c 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java @@ -7,7 +7,6 @@ import java.lang.IllegalArgumentException; import java.lang.Override; import java.lang.String; -import java.util.regex.PatternSyntaxException; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; @@ -111,7 +110,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlo } try { result.appendBytesRef(Replace.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), regexBlock.getBytesRef(regexBlock.getFirstValueIndex(p), regexScratch), newStrBlock.getBytesRef(newStrBlock.getFirstValueIndex(p), newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } @@ -129,7 +128,7 @@ public BytesRefBlock eval(int positionCount, BytesRefVector strVector, BytesRefV position: for (int p = 0; p < positionCount; p++) { try { result.appendBytesRef(Replace.process(strVector.getBytesRef(p, strScratch), regexVector.getBytesRef(p, regexScratch), newStrVector.getBytesRef(p, newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java index 363991d1556f1..faa7ddbf63266 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java @@ -30,7 +30,6 @@ import java.util.Arrays; import java.util.List; -import static org.elasticsearch.common.unit.ByteSizeUnit.MB; import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; @@ -40,8 +39,6 @@ public class Repeat extends EsqlScalarFunction implements OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Repeat", Repeat::new); - static final long MAX_REPEATED_LENGTH = MB.toBytes(1); - private final Expression str; private final Expression number; @@ -123,9 +120,9 @@ static BytesRef process( static BytesRef processInner(BreakingBytesRefBuilder scratch, BytesRef str, int number) { int repeatedLen = str.length * number; - if (repeatedLen > MAX_REPEATED_LENGTH) { + if (repeatedLen > MAX_BYTES_REF_RESULT_SIZE) { throw new IllegalArgumentException( - "Creating repeated strings with more than [" + MAX_REPEATED_LENGTH + "] bytes is not supported" + "Creating repeated strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported" ); } scratch.grow(repeatedLen); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java index 4b963b794aef0..eaa2b47601d96 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -114,24 +115,63 @@ public boolean foldable() { return str.foldable() && regex.foldable() && newStr.foldable(); } - @Evaluator(extraName = "Constant", warnExceptions = PatternSyntaxException.class) + @Evaluator(extraName = "Constant", warnExceptions = IllegalArgumentException.class) static BytesRef process(BytesRef str, @Fixed Pattern regex, BytesRef newStr) { if (str == null || regex == null || newStr == null) { return null; } - return new BytesRef(regex.matcher(str.utf8ToString()).replaceAll(newStr.utf8ToString())); + return safeReplace(str, regex, newStr); } - @Evaluator(warnExceptions = PatternSyntaxException.class) + @Evaluator(warnExceptions = IllegalArgumentException.class) static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) { if (str == null) { return null; } - if (regex == null || newStr == null) { return str; } - return new BytesRef(str.utf8ToString().replaceAll(regex.utf8ToString(), newStr.utf8ToString())); + return safeReplace(str, Pattern.compile(regex.utf8ToString()), newStr); + } + + /** + * Executes a Replace without surpassing the memory limit. + */ + private static BytesRef safeReplace(BytesRef strBytesRef, Pattern regex, BytesRef newStrBytesRef) { + String str = strBytesRef.utf8ToString(); + Matcher m = regex.matcher(str); + if (false == m.find()) { + return strBytesRef; + } + String newStr = newStrBytesRef.utf8ToString(); + + // Count potential groups (E.g. "$1") used in the replacement + int constantReplacementLength = newStr.length(); + int groupsInReplacement = 0; + for (int i = 0; i < newStr.length(); i++) { + if (newStr.charAt(i) == '$') { + groupsInReplacement++; + constantReplacementLength -= 2; + i++; + } + } + + // Initialize the buffer with an approximate size for the first replacement + StringBuilder result = new StringBuilder(str.length() + newStr.length() + 8); + do { + int matchSize = m.end() - m.start(); + int potentialReplacementSize = constantReplacementLength + groupsInReplacement * matchSize; + int remainingStr = str.length() - m.end(); + if (result.length() + potentialReplacementSize + remainingStr > MAX_BYTES_REF_RESULT_SIZE) { + throw new IllegalArgumentException( + "Creating strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported" + ); + } + + m.appendReplacement(result, newStr); + } while (m.find()); + m.appendTail(result); + return new BytesRef(result.toString()); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java index eb7d61dff8d19..53b748647f61f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java @@ -269,6 +269,7 @@ public final void testEvaluateInManyThreads() throws ExecutionException, Interru if (testCase.getExpectedBuildEvaluatorWarnings() != null) { assertWarnings(testCase.getExpectedBuildEvaluatorWarnings()); } + ExecutorService exec = Executors.newFixedThreadPool(threads); try { List> futures = new ArrayList<>(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java index 33a490fbde3be..95db9daa21283 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/RepeatStaticTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.compute.test.TestBlockFactory; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; @@ -45,14 +46,14 @@ public class RepeatStaticTests extends ESTestCase { public void testAlmostTooBig() { String str = randomAlphaOfLength(1); - int number = (int) Repeat.MAX_REPEATED_LENGTH; + int number = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE; String repeated = process(str, number); assertThat(repeated, equalTo(str.repeat(number))); } public void testTooBig() { String str = randomAlphaOfLength(1); - int number = (int) Repeat.MAX_REPEATED_LENGTH + 1; + int number = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + 1; String repeated = process(str, number); assertNull(repeated); assertWarnings( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java new file mode 100644 index 0000000000000..cac4b5acfa320 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceStaticTests.java @@ -0,0 +1,139 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BlockUtils; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.test.TestBlockFactory; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.junit.After; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.equalTo; + +/** + * These tests create rows that are 1MB in size. Test classes + * which extend AbstractScalarFunctionTestCase rerun test cases with + * many randomized inputs. Unfortunately, tests are run with + * limited memory, and instantiating many copies of these + * tests with large rows causes out of memory. + */ +public class ReplaceStaticTests extends ESTestCase { + + public void testLimit() { + int textLength = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10; + String text = randomAlphaOfLength((int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10); + String regex = "^(.+)$"; + + // 10 times the original text + the remainder + String extraString = "a".repeat((int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE % 10); + assert textLength * 10 + extraString.length() == ScalarFunction.MAX_BYTES_REF_RESULT_SIZE; + String newStr = "$0$0$0$0$0$0$0$0$0$0" + extraString; + + String result = process(text, regex, newStr); + assertThat(result, equalTo(newStr.replaceAll("\\$\\d", text))); + } + + public void testTooBig() { + String textAndNewStr = randomAlphaOfLength((int) (ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10)); + String regex = "."; + + String result = process(textAndNewStr, regex, textAndNewStr); + assertNull(result); + assertWarnings( + "Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.", + "Line -1:-1: java.lang.IllegalArgumentException: " + + "Creating strings with more than [" + + ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + + "] bytes is not supported" + ); + } + + public void testTooBigWithGroups() { + int textLength = (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE / 10; + String text = randomAlphaOfLength(textLength); + String regex = "(.+)"; + + // 10 times the original text + the remainder + 1 + String extraString = "a".repeat(1 + (int) ScalarFunction.MAX_BYTES_REF_RESULT_SIZE % 10); + assert textLength * 10 + extraString.length() == ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + 1; + String newStr = "$0$1$0$1$0$1$0$1$0$1" + extraString; + + String result = process(text, regex, newStr); + assertNull(result); + assertWarnings( + "Line -1:-1: evaluation of [] failed, treating result as null. Only first 20 failures recorded.", + "Line -1:-1: java.lang.IllegalArgumentException: " + + "Creating strings with more than [" + + ScalarFunction.MAX_BYTES_REF_RESULT_SIZE + + "] bytes is not supported" + ); + } + + public String process(String text, String regex, String newStr) { + try ( + var eval = AbstractScalarFunctionTestCase.evaluator( + new Replace( + Source.EMPTY, + field("text", DataType.KEYWORD), + field("regex", DataType.KEYWORD), + field("newStr", DataType.KEYWORD) + ) + ).get(driverContext()); + Block block = eval.eval(row(List.of(new BytesRef(text), new BytesRef(regex), new BytesRef(newStr)))); + ) { + return block.isNull(0) ? null : ((BytesRef) BlockUtils.toJavaObject(block, 0)).utf8ToString(); + } + } + + /** + * The following fields and methods were borrowed from AbstractScalarFunctionTestCase + */ + private final List breakers = Collections.synchronizedList(new ArrayList<>()); + + private static Page row(List values) { + return new Page(1, BlockUtils.fromListRow(TestBlockFactory.getNonBreakingInstance(), values)); + } + + private static FieldAttribute field(String name, DataType type) { + return new FieldAttribute(Source.synthetic(name), name, new EsField(name, type, Map.of(), true)); + } + + private DriverContext driverContext() { + BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking(); + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + breakers.add(breaker); + return new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays)); + } + + @After + public void allMemoryReleased() { + for (CircuitBreaker breaker : breakers) { + assertThat(breaker.getUsed(), equalTo(0L)); + } + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java index bb27be3f67d3e..2deb2677e0ce0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java @@ -78,6 +78,22 @@ public static Iterable parameters() { ) ); + // Groups + suppliers.add(fixedCase("Full group", "Cats are awesome", ".+", "<$0>", "")); + suppliers.add( + fixedCase("Nested groups", "A cat is great, a cat is awesome", "\\b([Aa] (\\w+)) is (\\w+)\\b", "$1$2", "A catcat, a catcat") + ); + suppliers.add( + fixedCase( + "Multiple groups", + "Cats are awesome", + "(\\w+) (.+)", + "$0 -> $1 and dogs $2", + "Cats are awesome -> Cats and dogs are awesome" + ) + ); + + // Errors suppliers.add(new TestCaseSupplier("syntax error", List.of(DataType.KEYWORD, DataType.KEYWORD, DataType.KEYWORD), () -> { String text = randomAlphaOfLength(10); String invalidRegex = "["; @@ -85,7 +101,7 @@ public static Iterable parameters() { return new TestCaseSupplier.TestCase( List.of( new TestCaseSupplier.TypedData(new BytesRef(text), DataType.KEYWORD, "str"), - new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "oldStr"), + new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "regex"), new TestCaseSupplier.TypedData(new BytesRef(newStr), DataType.KEYWORD, "newStr") ), "ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",