elastic · bpintea · May 30, 2025 · May 23, 2025 · May 23, 2025 · May 23, 2025
diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/EvalBenchmark.java
@@ -48,9 +48,9 @@
 import org.elasticsearch.xpack.esql.expression.function.scalar.math.RoundTo;
 import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvMin;
 import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
-import org.elasticsearch.xpack.esql.expression.function.scalar.string.RLike;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToLower;
 import org.elasticsearch.xpack.esql.expression.function.scalar.string.ToUpper;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.RLike;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThan;

diff --git a/docs/changelog/128393.yaml b/docs/changelog/128393.yaml
@@ -0,0 +1,6 @@
+pr: 128393
+summary: Pushdown constructs doing case-insensitive regexes
+area: ES|QL
+type: enhancement
+issues:
+ - 127479
diff --git a/docs/reference/query-languages/esql/_snippets/functions/appendix/values.md b/docs/reference/query-languages/esql/_snippets/functions/appendix/values.md
diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/values.json b/docs/reference/query-languages/esql/kibana/definition/functions/values.json
diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/values.md b/docs/reference/query-languages/esql/kibana/docs/functions/values.md
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -266,6 +266,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ML_INFERENCE_HUGGING_FACE_RERANK_ADDED = def(9_080_0_00);
     public static final TransportVersion SETTINGS_IN_DATA_STREAMS_DRY_RUN = def(9_081_0_00);
     public static final TransportVersion ML_INFERENCE_SAGEMAKER_CHAT_COMPLETION = def(9_082_0_00);
+    public static final TransportVersion ESQL_REGEX_MATCH_WITH_CASE_INSENSITIVITY = def(9_083_0_00);
     /*
      * STOP! READ THIS FIRST! No, really,
      *        ____ _____ ___  ____  _        ____  _____    _    ____    _____ _   _ ___ ____    _____ ___ ____  ____ _____ _

diff --git a/...a/org/elasticsearch/xpack/esql/core/expression/predicate/regex/AbstractStringPattern.java b/...a/org/elasticsearch/xpack/esql/core/expression/predicate/regex/AbstractStringPattern.java
@@ -16,11 +16,11 @@ public abstract class AbstractStringPattern implements StringPattern {
 
     private Automaton automaton;
 
-    public abstract Automaton createAutomaton();
+    public abstract Automaton createAutomaton(boolean ignoreCase);
 
     private Automaton automaton() {
         if (automaton == null) {
-            automaton = createAutomaton();
+            automaton = createAutomaton(false);
         }
         return automaton;
     }

diff --git a/...ore/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLike.java b/...ore/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLike.java
diff --git a/.../main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java b/.../main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/RLikePattern.java
@@ -21,9 +21,10 @@ public RLikePattern(String regexpPattern) {
     }
 
     @Override
-    public Automaton createAutomaton() {
+    public Automaton createAutomaton(boolean ignoreCase) {
+        int matchFlags = ignoreCase ? RegExp.CASE_INSENSITIVE : 0;
         return Operations.determinize(
-            new RegExp(regexpPattern, RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT).toAutomaton(),
+            new RegExp(regexpPattern, RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT, matchFlags).toAutomaton(),
             Operations.DEFAULT_DETERMINIZE_WORK_LIMIT
         );
     }

diff --git a/.../main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardLike.java b/.../main/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardLike.java
diff --git a/...in/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPattern.java b/...in/java/org/elasticsearch/xpack/esql/core/expression/predicate/regex/WildcardPattern.java
@@ -10,10 +10,13 @@
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
+import org.apache.lucene.util.automaton.RegExp;
 import org.elasticsearch.xpack.esql.core.util.StringUtils;
 
 import java.util.Objects;
 
+import static org.elasticsearch.xpack.esql.core.util.StringUtils.luceneWildcardToRegExp;
+
 /**
  * Similar to basic regex, supporting '?' wildcard for single character (same as regex  ".")
  * and '*' wildcard for multiple characters (same as regex ".*")
@@ -37,8 +40,14 @@ public String pattern() {
     }
 
     @Override
-    public Automaton createAutomaton() {
-        return WildcardQuery.toAutomaton(new Term(null, wildcard), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
+    public Automaton createAutomaton(boolean ignoreCase) {
+        return ignoreCase
+            ? Operations.determinize(
+                new RegExp(luceneWildcardToRegExp(wildcard), RegExp.ALL | RegExp.DEPRECATED_COMPLEMENT, RegExp.CASE_INSENSITIVE)
+                    .toAutomaton(),
+                Operations.DEFAULT_DETERMINIZE_WORK_LIMIT
+            )
+            : WildcardQuery.toAutomaton(new Term(null, wildcard), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
     }
 
     @Override

diff --git a/...ck/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java b/...ck/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java
@@ -7,6 +7,7 @@
 package org.elasticsearch.xpack.esql.core.util;
 
 import org.apache.lucene.document.InetAddressPoint;
+import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.spell.LevenshteinDistance;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CollectionUtil;
@@ -178,6 +179,44 @@ public static String wildcardToJavaPattern(String pattern, char escape) {
         return regex.toString();
     }
 
+    /**
+     * Translates a Lucene wildcard pattern to a Lucene RegExp one.
+     * @param wildcard Lucene wildcard pattern
+     * @return Lucene RegExp pattern
+     */
+    public static String luceneWildcardToRegExp(String wildcard) {
+        StringBuilder regex = new StringBuilder();
+
+        for (int i = 0, wcLen = wildcard.length(); i < wcLen; i++) {
+            char c = wildcard.charAt(i); // this will work chunking through Unicode as long as all values matched are ASCII
+            switch (c) {
+                case WildcardQuery.WILDCARD_STRING -> regex.append(".*");
+                case WildcardQuery.WILDCARD_CHAR -> regex.append(".");
+                case WildcardQuery.WILDCARD_ESCAPE -> {
+                    if (i + 1 < wcLen) {
+                        // consume the wildcard escaping, consider the next char
+                        char next = wildcard.charAt(i + 1);
+                        i++;
+                        switch (next) {
+                            case WildcardQuery.WILDCARD_STRING, WildcardQuery.WILDCARD_CHAR, WildcardQuery.WILDCARD_ESCAPE ->
+                                // escape `*`, `.`, `\`, since these are special chars in RegExp as well
+                                regex.append("\\");
+                            // default: unnecessary escaping -- just ignore the escaping
+                        }
+                        regex.append(next);
+                    } else {
+                        // "else fallthru, lenient parsing with a trailing \" -- according to WildcardQuery#toAutomaton
+                        regex.append("\\\\");
+                    }
+                }
+                case '$', '(', ')', '+', '.', '[', ']', '^', '{', '|', '}' -> regex.append("\\").append(c);
+                default -> regex.append(c);
+            }
+        }
+
+        return regex.toString();
+    }
+
     /**
      * Translates a like pattern to a Lucene wildcard.
      * This methods pays attention to the custom escape char which gets converted into \ (used by Lucene).

diff --git a/...ugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/StringUtilsTests.java b/...ugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/StringUtilsTests.java
@@ -9,7 +9,9 @@
 
 import org.elasticsearch.test.ESTestCase;
 
+import static org.elasticsearch.xpack.esql.core.util.StringUtils.luceneWildcardToRegExp;
 import static org.elasticsearch.xpack.esql.core.util.StringUtils.wildcardToJavaPattern;
+import static org.hamcrest.Matchers.is;
 
 public class StringUtilsTests extends ESTestCase {
 
@@ -55,4 +57,21 @@ public void testWildcard() {
     public void testEscapedEscape() {
         assertEquals("^\\\\\\\\$", wildcardToJavaPattern("\\\\\\\\", '\\'));
     }
+
+    public void testLuceneWildcardToRegExp() {
+        assertThat(luceneWildcardToRegExp(""), is(""));
+        assertThat(luceneWildcardToRegExp("*"), is(".*"));
+        assertThat(luceneWildcardToRegExp("?"), is("."));
+        assertThat(luceneWildcardToRegExp("\\\\"), is("\\\\"));
+        assertThat(luceneWildcardToRegExp("foo?bar"), is("foo.bar"));
+        assertThat(luceneWildcardToRegExp("foo*bar"), is("foo.*bar"));
+        assertThat(luceneWildcardToRegExp("foo\\\\bar"), is("foo\\\\bar"));
+        assertThat(luceneWildcardToRegExp("foo*bar?baz"), is("foo.*bar.baz"));
+        assertThat(luceneWildcardToRegExp("foo\\*bar"), is("foo\\*bar"));
+        assertThat(luceneWildcardToRegExp("foo\\?bar\\?"), is("foo\\?bar\\?"));
+        assertThat(luceneWildcardToRegExp("foo\\?bar\\"), is("foo\\?bar\\\\"));
+        assertThat(luceneWildcardToRegExp("[](){}^$.|+"), is("\\[\\]\\(\\)\\{\\}\\^\\$\\.\\|\\+"));
+        assertThat(luceneWildcardToRegExp("foo\\\uD83D\uDC14bar"), is("foo\uD83D\uDC14bar"));
+        assertThat(luceneWildcardToRegExp("foo\uD83D\uDC14bar"), is("foo\uD83D\uDC14bar"));
+    }
 }
diff --git a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/util/TestUtils.java
@@ -13,6 +13,7 @@
 import org.elasticsearch.xpack.esql.core.type.DataType;
 import org.elasticsearch.xpack.esql.core.type.EsField;
 
+import java.util.Locale;
 import java.util.regex.Pattern;
 
 import static java.util.Collections.emptyMap;
@@ -61,4 +62,15 @@ public static FieldAttribute getFieldAttribute(String name, DataType dataType) {
     public static String stripThrough(String input) {
         return WS_PATTERN.matcher(input).replaceAll(StringUtils.EMPTY);
     }
+
+    /** Returns the input string, but with parts of it having the letter casing changed. */
+    public static String randomCasing(String input) {
+        StringBuilder sb = new StringBuilder(input.length());
+        for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen), chunkEnd; i < inputLen; i += step) {
+            chunkEnd = Math.min(i + step, inputLen);
+            var chunk = input.substring(i, chunkEnd);
+            sb.append(randomBoolean() ? chunk.toLowerCase(Locale.ROOT) : chunk.toUpperCase(Locale.ROOT));
+        }
+        return sb.toString();
+    }
 }
diff --git a/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/...plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java
@@ -63,8 +63,8 @@
 import org.elasticsearch.xpack.esql.core.util.DateUtils;
 import org.elasticsearch.xpack.esql.core.util.StringUtils;
 import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
-import org.elasticsearch.xpack.esql.expression.function.scalar.string.RLike;
-import org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLike;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.RLike;
+import org.elasticsearch.xpack.esql.expression.function.scalar.string.regex.WildcardLike;
 import org.elasticsearch.xpack.esql.expression.predicate.Range;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
 import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.GreaterThan;