elastic · ivancea · May 20, 2025 · May 8, 2025 · May 8, 2025 · May 8, 2025
diff --git a/docs/changelog/127924.yaml b/docs/changelog/127924.yaml
@@ -0,0 +1,5 @@
+pr: 127924
+summary: Limit Replace function memory usage
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/.../elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java b/.../elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceConstantEvaluator.java
diff --git a/...ated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java b/...ated/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceEvaluator.java
diff --git a/...src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java b/...src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/Replace.java
@@ -28,9 +28,11 @@
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
+import static org.elasticsearch.common.unit.ByteSizeUnit.MB;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND;
 import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.THIRD;
@@ -39,6 +41,8 @@
 public class Replace extends EsqlScalarFunction {
     public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Replace", Replace::new);
 
+    static final long MAX_RESULT_LENGTH = MB.toBytes(1);
 static final long MAX_REPEATED_LENGTH = MB.toBytes(1); 
 static final long MAX_REPEATED_LENGTH = MB.toBytes(1); 
+
     private final Expression str;
     private final Expression regex;
     private final Expression newStr;
@@ -121,15 +125,15 @@ public boolean foldable() {
         return str.foldable() && regex.foldable() && newStr.foldable();
     }
 
-    @Evaluator(extraName = "Constant", warnExceptions = PatternSyntaxException.class)
+    @Evaluator(extraName = "Constant", warnExceptions = IllegalArgumentException.class)
     static BytesRef process(BytesRef str, @Fixed Pattern regex, BytesRef newStr) {
         if (str == null || regex == null || newStr == null) {
             return null;
         }
-        return new BytesRef(regex.matcher(str.utf8ToString()).replaceAll(newStr.utf8ToString()));
+        return safeReplace(str, regex, newStr);
     }
 
-    @Evaluator(warnExceptions = PatternSyntaxException.class)
+    @Evaluator(warnExceptions = IllegalArgumentException.class)
     static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) {
         if (str == null) {
             return null;
@@ -138,7 +142,30 @@ static BytesRef process(BytesRef str, BytesRef regex, BytesRef newStr) {
         if (regex == null || newStr == null) {
             return str;
         }
-        return new BytesRef(str.utf8ToString().replaceAll(regex.utf8ToString(), newStr.utf8ToString()));
+        return safeReplace(str, Pattern.compile(regex.utf8ToString()), newStr);
+    }
+
+    /**
+     * Executes a Replace without surpassing the memory limit.
+     */
+    private static BytesRef safeReplace(BytesRef strBytesRef, Pattern regex, BytesRef newStrBytesRef) {
+        String str = strBytesRef.utf8ToString();
+        Matcher m = regex.matcher(str);
+        if (false == m.find()) {
+            return strBytesRef;
+        }
+        String newStr = newStrBytesRef.utf8ToString();
+        // Initialize the buffer with an approximate size for the first replacement
+        StringBuilder result = new StringBuilder(str.length() + newStr.length() + 8);
+        do {
+            m.appendReplacement(result, newStr);
+
+            if (result.length() > MAX_RESULT_LENGTH) {
+                throw new IllegalArgumentException("Creating strings with more than [" + MAX_RESULT_LENGTH + "] bytes is not supported");
+            }
+        } while (m.find());
+        m.appendTail(result);
+        return new BytesRef(result.toString());
     }
 
     @Override

diff --git a/...java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java b/...java/org/elasticsearch/xpack/esql/expression/function/AbstractScalarFunctionTestCase.java
@@ -42,6 +42,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
+import static org.elasticsearch.common.unit.ByteSizeUnit.GB;
 import static org.elasticsearch.xpack.esql.EsqlTestUtils.unboundLogicalOptimizerContext;
 import static org.hamcrest.Matchers.either;
 import static org.hamcrest.Matchers.equalTo;
@@ -304,11 +305,17 @@ public final void testEvaluateInManyThreads() throws ExecutionException, Interru
         if (testCase.getExpectedBuildEvaluatorWarnings() != null) {
             assertWarnings(testCase.getExpectedBuildEvaluatorWarnings());
         }
+
+        List<Object> simpleData = testCase.getDataValues();
+        // Ensure we don't run this test with too much data that could take too long to process.
+        // The calculation "ramUsed * count" is just a hint of how much data will the function process,
+        // and the limit is arbitrary
+        assumeTrue("Input data too big", row(simpleData).ramBytesUsedByBlocks() * count < GB.toBytes(1));
+
         ExecutorService exec = Executors.newFixedThreadPool(threads);
         try {
             List<Future<?>> futures = new ArrayList<>();
             for (int i = 0; i < threads; i++) {
-                List<Object> simpleData = testCase.getDataValues();
                 Page page = row(simpleData);
 
                 futures.add(exec.submit(() -> {

diff --git a/...est/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java b/...est/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/ReplaceTests.java
@@ -22,6 +22,7 @@
 import java.util.function.Supplier;
 import java.util.regex.PatternSyntaxException;
 
+import static org.elasticsearch.xpack.esql.expression.function.scalar.string.Replace.MAX_RESULT_LENGTH;
 import static org.hamcrest.Matchers.equalTo;
 
 public class ReplaceTests extends AbstractScalarFunctionTestCase {
@@ -85,7 +86,7 @@ public static Iterable<Object[]> parameters() {
             return new TestCaseSupplier.TestCase(
                 List.of(
                     new TestCaseSupplier.TypedData(new BytesRef(text), DataType.KEYWORD, "str"),
-                    new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "oldStr"),
+                    new TestCaseSupplier.TypedData(new BytesRef(invalidRegex), DataType.KEYWORD, "regex"),
                     new TestCaseSupplier.TypedData(new BytesRef(newStr), DataType.KEYWORD, "newStr")
                 ),
                 "ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",
@@ -103,6 +104,27 @@ public static Iterable<Object[]> parameters() {
                     "Unclosed character class near index 0\n[\n^".replaceAll("\n", System.lineSeparator())
                 );
         }));
+
+        suppliers.add(new TestCaseSupplier("result too big", List.of(DataType.KEYWORD, DataType.KEYWORD, DataType.KEYWORD), () -> {
+            String textAndNewStr = randomAlphaOfLength((int) (MAX_RESULT_LENGTH / 10));
+            String regex = ".";
+            return new TestCaseSupplier.TestCase(
+                List.of(
+                    new TestCaseSupplier.TypedData(new BytesRef(textAndNewStr), DataType.KEYWORD, "str"),
+                    new TestCaseSupplier.TypedData(new BytesRef(regex), DataType.KEYWORD, "regex"),
+                    new TestCaseSupplier.TypedData(new BytesRef(textAndNewStr), DataType.KEYWORD, "newStr")
+                ),
+                "ReplaceEvaluator[str=Attribute[channel=0], regex=Attribute[channel=1], newStr=Attribute[channel=2]]",
+                DataType.KEYWORD,
+                equalTo(null)
+            ).withWarning("Line 1:1: evaluation of [source] failed, treating result as null. Only first 20 failures recorded.")
+                .withWarning(
+                    "Line 1:1: java.lang.IllegalArgumentException: "
+                        + "Creating strings with more than ["
+                        + MAX_RESULT_LENGTH
+                        + "] bytes is not supported"
+                );
+        }));
         return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(false, suppliers);
     }