diff --git a/docs/changelog/127924.yaml b/docs/changelog/127924.yaml new file mode 100644 index 0000000000000..4aaaa710563ab --- /dev/null +++ b/docs/changelog/127924.yaml @@ -0,0 +1,5 @@ +pr: 127924 +summary: Limit Replace function memory usage +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java index 09359943684b5..2a59b21a4c022 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/scalar/ScalarFunction.java @@ -13,6 +13,7 @@ import java.util.List; import static java.util.Collections.emptyList; +import static org.elasticsearch.common.unit.ByteSizeUnit.MB; /** * A {@code ScalarFunction} is a {@code Function} that takes values from some @@ -22,6 +23,14 @@ */ public abstract class ScalarFunction extends Function { + /** + * Limit for the BytesRef return of functions. + *
+ * To be used when there's no CircuitBreaking, as an arbitrary measure to limit memory usage. + *
+ */ + public static final long MAX_BYTES_REF_RESULT_SIZE = MB.toBytes(1); + protected ScalarFunction(Source source) { super(source, emptyList()); } diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java index a5aa37a0db56e..f63966810a5fe 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java @@ -8,7 +8,6 @@ import java.lang.Override; import java.lang.String; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; @@ -92,7 +91,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlo } try { result.appendBytesRef(Replace.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), this.regex, newStrBlock.getBytesRef(newStrBlock.getFirstValueIndex(p), newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } @@ -109,7 +108,7 @@ public BytesRefBlock eval(int positionCount, BytesRefVector strVector, position: for (int p = 0; p < positionCount; p++) { try { result.appendBytesRef(Replace.process(strVector.getBytesRef(p, strScratch), this.regex, newStrVector.getBytesRef(p, newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java index 7a7a947453d0a..6eb3aa898b79c 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java @@ -7,7 +7,6 @@ import java.lang.IllegalArgumentException; import java.lang.Override; import java.lang.String; -import java.util.regex.PatternSyntaxException; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; @@ -111,7 +110,7 @@ public BytesRefBlock eval(int positionCount, BytesRefBlock strBlock, BytesRefBlo } try { result.appendBytesRef(Replace.process(strBlock.getBytesRef(strBlock.getFirstValueIndex(p), strScratch), regexBlock.getBytesRef(regexBlock.getFirstValueIndex(p), regexScratch), newStrBlock.getBytesRef(newStrBlock.getFirstValueIndex(p), newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } @@ -129,7 +128,7 @@ public BytesRefBlock eval(int positionCount, BytesRefVector strVector, BytesRefV position: for (int p = 0; p < positionCount; p++) { try { result.appendBytesRef(Replace.process(strVector.getBytesRef(p, strScratch), regexVector.getBytesRef(p, regexScratch), newStrVector.getBytesRef(p, newStrScratch))); - } catch (PatternSyntaxException e) { + } catch (IllegalArgumentException e) { warnings().registerException(e); result.appendNull(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java index 363991d1556f1..faa7ddbf63266 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Repeat.java @@ -30,7 +30,6 @@ import java.util.Arrays; import java.util.List; -import static org.elasticsearch.common.unit.ByteSizeUnit.MB; import static org.elasticsearch.compute.ann.Fixed.Scope.THREAD_LOCAL; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; @@ -40,8 +39,6 @@ public class Repeat extends EsqlScalarFunction implements OptionalArgument { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Repeat", Repeat::new); - static final long MAX_REPEATED_LENGTH = MB.toBytes(1); - private final Expression str; private final Expression number; @@ -123,9 +120,9 @@ static BytesRef process( static BytesRef processInner(BreakingBytesRefBuilder scratch, BytesRef str, int number) { int repeatedLen = str.length * number; - if (repeatedLen > MAX_REPEATED_LENGTH) { + if (repeatedLen > MAX_BYTES_REF_RESULT_SIZE) { throw new IllegalArgumentException( - "Creating repeated strings with more than [" + MAX_REPEATED_LENGTH + "] bytes is not supported" + "Creating repeated strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported" ); } scratch.grow(repeatedLen); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java index 4b963b794aef0..eaa2b47601d96 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -114,24 +115,63 @@ public boolean foldable() { return str.foldable() && regex.foldable() && newStr.foldable(); } - @Evaluator(extraName = "Constant", warnExceptions = PatternSyntaxException.class) + @Evaluator(extraName = "Constant", warnExceptions = IllegalArgumentException.class) static BytesRef process(BytesRef str, @Fixed Pattern regex, BytesRef newStr) { if (str == null || regex == null || newStr == null) { return null; } - return new BytesRef(regex.matcher(str.utf8ToString()).replaceAll(newStr.utf8ToString())); + return safeReplace(str, regex, newStr); } - @Evaluator(warnExceptions = PatternSyntaxException.class) + @Evaluator(warnExceptions = IllegalArgumentException.class) static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) { if (str == null) { return null; } - if (regex == null || newStr == null) { return str; } - return new BytesRef(str.utf8ToString().replaceAll(regex.utf8ToString(), newStr.utf8ToString())); + return safeReplace(str, Pattern.compile(regex.utf8ToString()), newStr); + } + + /** + * Executes a Replace without surpassing the memory limit. + */ + private static BytesRef safeReplace(BytesRef strBytesRef, Pattern regex, BytesRef newStrBytesRef) { + String str = strBytesRef.utf8ToString(); + Matcher m = regex.matcher(str); + if (false == m.find()) { + return strBytesRef; + } + String newStr = newStrBytesRef.utf8ToString(); + + // Count potential groups (E.g. "$1") used in the replacement + int constantReplacementLength = newStr.length(); + int groupsInReplacement = 0; + for (int i = 0; i < newStr.length(); i++) { + if (newStr.charAt(i) == '$') { + groupsInReplacement++; + constantReplacementLength -= 2; + i++; + } + } + + // Initialize the buffer with an approximate size for the first replacement + StringBuilder result = new StringBuilder(str.length() + newStr.length() + 8); + do { + int matchSize = m.end() - m.start(); + int potentialReplacementSize = constantReplacementLength + groupsInReplacement * matchSize; + int remainingStr = str.length() - m.end(); + if (result.length() + potentialReplacementSize + remainingStr > MAX_BYTES_REF_RESULT_SIZE) { + throw new IllegalArgumentException( + "Creating strings with more than [" + MAX_BYTES_REF_RESULT_SIZE + "] bytes is not supported" + ); + } + + m.appendReplacement(result, newStr); + } while (m.find()); + m.appendTail(result); + return new BytesRef(result.toString()); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java index eb7d61dff8d19..53b748647f61f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java @@ -269,6 +269,7 @@ public final void testEvaluateInManyThreads() throws ExecutionException, Interru if (testCase.getExpectedBuildEvaluatorWarnings() != null) { assertWarnings(testCase.getExpectedBuildEvaluatorWarnings()); } + ExecutorService exec = Executors.newFixedThreadPool(threads); try { List