diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java new file mode 100644 index 000000000000..0e0878045ad8 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.error.InvalidInput; +import org.apache.druid.java.util.common.StringUtils; + +import javax.annotation.Nullable; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +public class RegexpExprUtils +{ + /** + * Compile the provided pattern, or provide a nice error message if it cannot be compiled. + */ + public static Pattern compilePattern(@Nullable String pattern, String functionName) + { + try { + return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern)); + } + catch (PatternSyntaxException e) { + throw InvalidInput.exception( + e, + StringUtils.format( + "An invalid pattern [%s] was provided for the [%s] function, error: [%s]", + e.getPattern(), + functionName, + e.getMessage() + ) + ); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index 6a690787735d..d7102936c147 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -19,8 +19,6 @@ package org.apache.druid.query.expression; -import org.apache.druid.error.InvalidInput; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; @@ -31,7 +29,6 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro { @@ -61,7 +58,7 @@ public Expr apply(final List args) } // Precompile the pattern. - final Pattern pattern = compilePattern((String) patternExpr.getLiteralValue()); + final Pattern pattern = RegexpExprUtils.compilePattern((String) patternExpr.getLiteralValue(), FN_NAME); final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); @@ -97,25 +94,4 @@ public ExpressionType getOutputType(InputBindingInspector inspector) } return new RegexpExtractExpr(args); } - - /** - * Compile the provided pattern, or provide a nice error message if it cannot be compiled. - */ - private static Pattern compilePattern(@Nullable String pattern) - { - try { - return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern)); - } - catch (PatternSyntaxException e) { - throw InvalidInput.exception( - e, - StringUtils.format( - "An invalid pattern [%s] was provided for the %s function, error: [%s]", - e.getPattern(), - FN_NAME, - e.getMessage() - ) - ); - } - } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java index 3073cee447d0..a2c8bdc3e9a1 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java @@ -86,7 +86,7 @@ private RegexpLikeExpr(List args) final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(patternString)); + this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME); } @Nonnull diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java index 835fdcd6c915..e8afa62ff0c9 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java @@ -96,7 +96,7 @@ private RegexpReplaceExpr(List args) final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = patternString != null ? Pattern.compile(patternString) : null; + this.pattern = patternString != null ? RegexpExprUtils.compilePattern(patternString, FN_NAME) : null; this.replacement = (String) replacementExpr.getLiteralValue(); } diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java index 8a0212f33d5d..c88e3d77cd0f 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java @@ -19,9 +19,12 @@ package org.apache.druid.query.expression; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -46,6 +49,23 @@ public void testErrorFourArguments() eval("regexp_extract('a', 'b', 'c', 'd')", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpExtractPattern() + { + MatcherAssert.assertThat( + Assert.assertThrows(DruidException.class, () -> + eval( + "regexp_extract('pod-1234-node', '[ab-0-9]')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[ab-0-9]] was provided for the [regexp_extract] function," + + " error: [Illegal character range near index 4" + ) + ); + } + @Test public void testMatch() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java index d6e31baad0b2..75ea9b6a21e3 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java @@ -19,9 +19,12 @@ package org.apache.druid.query.expression; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -46,6 +49,20 @@ public void testErrorThreeArguments() eval("regexp_like('a', 'b', 'c')", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpLikePattern() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eval("regexp_like('a', '[Ab-C]')", InputBindings.nilBindings())), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[Ab-C]] was provided for the [regexp_like] function," + + " error: [Illegal character range near index 4" + ) + ); + } + @Test public void testMatch() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index 870bd5c8db8f..57b7c3c9a890 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -20,9 +20,12 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableMap; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -40,6 +43,20 @@ public void testErrorZeroArguments() eval("regexp_replace()", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpReplacePattern() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')", InputBindings.nilBindings())), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[Ab-cd-0]] was provided for the [regexp_replace] function," + + " error: [Illegal character range near index 7" + ) + ); + } + @Test public void testErrorFourArguments() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 269f58633741..d86f6f941341 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -8411,7 +8411,7 @@ public void testRegexpExtractWithBadRegexPattern() + " REGEXP_EXTRACT(dim1, '^(.))', 1)\n" + "FROM foo", DruidExceptionMatcher.invalidInput().expectMessageContains( - "An invalid pattern [^(.))] was provided for the regexp_extract function, " + + "An invalid pattern [^(.))] was provided for the [regexp_extract] function, " + "error: [Unmatched closing ')' near index 3\n^(.))\n ^]" ) );