From 94585c95e106bc3d00c43fa7140c0c13e308266f Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 19 Nov 2025 19:37:26 -0800 Subject: [PATCH 1/4] Common pattern validation for all REGEXP* functions for nicer error message Move the compilePattern() utility and wire it up to all the REGEXP* functions so any invalid regex pattern will return nicer error messages to users. Otherwise, a user would get a cryptic error message "Illegal character range near index.." --- .../query/expression/RegexpExprUtils.java | 51 +++++++++++++++++++ .../expression/RegexpExtractExprMacro.java | 26 +--------- .../query/expression/RegexpLikeExprMacro.java | 2 +- .../expression/RegexpReplaceExprMacro.java | 2 +- .../RegexpExtractExprMacroTest.java | 20 ++++++++ .../expression/RegexpLikeExprMacroTest.java | 17 +++++++ .../RegexpReplaceExprMacroTest.java | 17 +++++++ 7 files changed, 108 insertions(+), 27 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java new file mode 100644 index 000000000000..8a16602b4096 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.error.InvalidInput; +import org.apache.druid.java.util.common.StringUtils; + +import javax.annotation.Nullable; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +public class RegexpExprUtils +{ + /** + * Compile the provided pattern, or provide a nice error message if it cannot be compiled. + */ + public static Pattern compilePattern(@Nullable String pattern, String functionName) + { + try { + return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern)); + } + catch (PatternSyntaxException e) { + throw InvalidInput.exception( + e, + StringUtils.format( + "An invalid pattern [%s] was provided for the %s function, error: [%s]", + e.getPattern(), + functionName, + e.getMessage() + ) + ); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index 6a690787735d..d7102936c147 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -19,8 +19,6 @@ package org.apache.druid.query.expression; -import org.apache.druid.error.InvalidInput; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; @@ -31,7 +29,6 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro { @@ -61,7 +58,7 @@ public Expr apply(final List args) } // Precompile the pattern. - final Pattern pattern = compilePattern((String) patternExpr.getLiteralValue()); + final Pattern pattern = RegexpExprUtils.compilePattern((String) patternExpr.getLiteralValue(), FN_NAME); final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); @@ -97,25 +94,4 @@ public ExpressionType getOutputType(InputBindingInspector inspector) } return new RegexpExtractExpr(args); } - - /** - * Compile the provided pattern, or provide a nice error message if it cannot be compiled. - */ - private static Pattern compilePattern(@Nullable String pattern) - { - try { - return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern)); - } - catch (PatternSyntaxException e) { - throw InvalidInput.exception( - e, - StringUtils.format( - "An invalid pattern [%s] was provided for the %s function, error: [%s]", - e.getPattern(), - FN_NAME, - e.getMessage() - ) - ); - } - } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java index 3073cee447d0..a2c8bdc3e9a1 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java @@ -86,7 +86,7 @@ private RegexpLikeExpr(List args) final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(patternString)); + this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME); } @Nonnull diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java index 835fdcd6c915..a839d4bda71b 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java @@ -96,7 +96,7 @@ private RegexpReplaceExpr(List args) final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = patternString != null ? Pattern.compile(patternString) : null; + this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME); this.replacement = (String) replacementExpr.getLiteralValue(); } diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java index 8a0212f33d5d..0149bb8f347e 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java @@ -19,9 +19,12 @@ package org.apache.druid.query.expression; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -46,6 +49,23 @@ public void testErrorFourArguments() eval("regexp_extract('a', 'b', 'c', 'd')", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpExtractPattern() + { + MatcherAssert.assertThat( + Assert.assertThrows(DruidException.class, () -> + eval( + "regexp_extract('pod-1234-node', '[ab-0-9]')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[ab-0-9]] was provided for the regexp_extract function," + + " error: [Illegal character range near index 4" + ) + ); + } + @Test public void testMatch() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java index d6e31baad0b2..b1658d0839a9 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java @@ -19,9 +19,12 @@ package org.apache.druid.query.expression; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -46,6 +49,20 @@ public void testErrorThreeArguments() eval("regexp_like('a', 'b', 'c')", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpLikePattern() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eval("regexp_like('a', '[Ab-C]')", InputBindings.nilBindings())), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[Ab-C]] was provided for the regexp_like function," + + " error: [Illegal character range near index 4" + ) + ); + } + @Test public void testMatch() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index 870bd5c8db8f..a3faa7ab751e 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -20,9 +20,12 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableMap; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -40,6 +43,20 @@ public void testErrorZeroArguments() eval("regexp_replace()", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpReplacePattern() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')", InputBindings.nilBindings())), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[Ab-cd-0]] was provided for the regexp_replace function," + + " error: [Illegal character range near index 7" + ) + ); + } + @Test public void testErrorFourArguments() { From 190fb91af74d42331d3b2f258541d940287a2037 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Wed, 19 Nov 2025 20:01:52 -0800 Subject: [PATCH 2/4] regexp_replace null semantics --- .../apache/druid/query/expression/RegexpReplaceExprMacro.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java index a839d4bda71b..e8afa62ff0c9 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java @@ -96,7 +96,7 @@ private RegexpReplaceExpr(List args) final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME); + this.pattern = patternString != null ? RegexpExprUtils.compilePattern(patternString, FN_NAME) : null; this.replacement = (String) replacementExpr.getLiteralValue(); } From 4ad2fe9b9dd5f68be7991599a68b0c34a2d12e74 Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Thu, 20 Nov 2025 11:22:53 -0800 Subject: [PATCH 3/4] Interpolate functionName --- .../java/org/apache/druid/query/expression/RegexpExprUtils.java | 2 +- .../druid/query/expression/RegexpExtractExprMacroTest.java | 2 +- .../apache/druid/query/expression/RegexpLikeExprMacroTest.java | 2 +- .../druid/query/expression/RegexpReplaceExprMacroTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java index 8a16602b4096..0e0878045ad8 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java @@ -40,7 +40,7 @@ public static Pattern compilePattern(@Nullable String pattern, String functionNa throw InvalidInput.exception( e, StringUtils.format( - "An invalid pattern [%s] was provided for the %s function, error: [%s]", + "An invalid pattern [%s] was provided for the [%s] function, error: [%s]", e.getPattern(), functionName, e.getMessage() diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java index 0149bb8f347e..c88e3d77cd0f 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java @@ -60,7 +60,7 @@ public void testInvalidRegexpExtractPattern() ) ), DruidExceptionMatcher.invalidInput().expectMessageContains( - "An invalid pattern [[ab-0-9]] was provided for the regexp_extract function," + "An invalid pattern [[ab-0-9]] was provided for the [regexp_extract] function," + " error: [Illegal character range near index 4" ) ); diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java index b1658d0839a9..75ea9b6a21e3 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java @@ -57,7 +57,7 @@ public void testInvalidRegexpLikePattern() DruidException.class, () -> eval("regexp_like('a', '[Ab-C]')", InputBindings.nilBindings())), DruidExceptionMatcher.invalidInput().expectMessageContains( - "An invalid pattern [[Ab-C]] was provided for the regexp_like function," + "An invalid pattern [[Ab-C]] was provided for the [regexp_like] function," + " error: [Illegal character range near index 4" ) ); diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index a3faa7ab751e..57b7c3c9a890 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -51,7 +51,7 @@ public void testInvalidRegexpReplacePattern() DruidException.class, () -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')", InputBindings.nilBindings())), DruidExceptionMatcher.invalidInput().expectMessageContains( - "An invalid pattern [[Ab-cd-0]] was provided for the regexp_replace function," + "An invalid pattern [[Ab-cd-0]] was provided for the [regexp_replace] function," + " error: [Illegal character range near index 7" ) ); From 9ee19a8bfecd4160d846c3c490513efe96cfed1e Mon Sep 17 00:00:00 2001 From: Abhishek Balaji Radhakrishnan Date: Thu, 20 Nov 2025 12:08:12 -0800 Subject: [PATCH 4/4] One more test fix for interpolation --- .../java/org/apache/druid/sql/calcite/CalciteQueryTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 269f58633741..d86f6f941341 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -8411,7 +8411,7 @@ public void testRegexpExtractWithBadRegexPattern() + " REGEXP_EXTRACT(dim1, '^(.))', 1)\n" + "FROM foo", DruidExceptionMatcher.invalidInput().expectMessageContains( - "An invalid pattern [^(.))] was provided for the regexp_extract function, " + + "An invalid pattern [^(.))] was provided for the [regexp_extract] function, " + "error: [Unmatched closing ')' near index 3\n^(.))\n ^]" ) );