Skip to content

Commit c89ec0c

Browse files
authored
Common pattern validation for all regexp* functions (#18762)
Nicer user-facing error messages for invalid patterns used in the regexp* functions. Move existing compilePattern() utility to a shared place so the functions use them.
1 parent be10abd commit c89ec0c

File tree

8 files changed

+109
-28
lines changed

8 files changed

+109
-28
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.druid.query.expression;
21+
22+
import org.apache.druid.error.InvalidInput;
23+
import org.apache.druid.java.util.common.StringUtils;
24+
25+
import javax.annotation.Nullable;
26+
import java.util.regex.Pattern;
27+
import java.util.regex.PatternSyntaxException;
28+
29+
public class RegexpExprUtils
30+
{
31+
/**
32+
* Compile the provided pattern, or provide a nice error message if it cannot be compiled.
33+
*/
34+
public static Pattern compilePattern(@Nullable String pattern, String functionName)
35+
{
36+
try {
37+
return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern));
38+
}
39+
catch (PatternSyntaxException e) {
40+
throw InvalidInput.exception(
41+
e,
42+
StringUtils.format(
43+
"An invalid pattern [%s] was provided for the [%s] function, error: [%s]",
44+
e.getPattern(),
45+
functionName,
46+
e.getMessage()
47+
)
48+
);
49+
}
50+
}
51+
}

processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919

2020
package org.apache.druid.query.expression;
2121

22-
import org.apache.druid.error.InvalidInput;
23-
import org.apache.druid.java.util.common.StringUtils;
2422
import org.apache.druid.math.expr.Expr;
2523
import org.apache.druid.math.expr.ExprEval;
2624
import org.apache.druid.math.expr.ExprMacroTable;
@@ -31,7 +29,6 @@
3129
import java.util.List;
3230
import java.util.regex.Matcher;
3331
import java.util.regex.Pattern;
34-
import java.util.regex.PatternSyntaxException;
3532

3633
public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro
3734
{
@@ -61,7 +58,7 @@ public Expr apply(final List<Expr> args)
6158
}
6259

6360
// Precompile the pattern.
64-
final Pattern pattern = compilePattern((String) patternExpr.getLiteralValue());
61+
final Pattern pattern = RegexpExprUtils.compilePattern((String) patternExpr.getLiteralValue(), FN_NAME);
6562

6663
final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue();
6764

@@ -97,25 +94,4 @@ public ExpressionType getOutputType(InputBindingInspector inspector)
9794
}
9895
return new RegexpExtractExpr(args);
9996
}
100-
101-
/**
102-
* Compile the provided pattern, or provide a nice error message if it cannot be compiled.
103-
*/
104-
private static Pattern compilePattern(@Nullable String pattern)
105-
{
106-
try {
107-
return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern));
108-
}
109-
catch (PatternSyntaxException e) {
110-
throw InvalidInput.exception(
111-
e,
112-
StringUtils.format(
113-
"An invalid pattern [%s] was provided for the %s function, error: [%s]",
114-
e.getPattern(),
115-
FN_NAME,
116-
e.getMessage()
117-
)
118-
);
119-
}
120-
}
12197
}

processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ private RegexpLikeExpr(List<Expr> args)
8686

8787
final String patternString = (String) patternExpr.getLiteralValue();
8888
this.arg = args.get(0);
89-
this.pattern = Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(patternString));
89+
this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME);
9090
}
9191

9292
@Nonnull

processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ private RegexpReplaceExpr(List<Expr> args)
9696
final String patternString = (String) patternExpr.getLiteralValue();
9797

9898
this.arg = args.get(0);
99-
this.pattern = patternString != null ? Pattern.compile(patternString) : null;
99+
this.pattern = patternString != null ? RegexpExprUtils.compilePattern(patternString, FN_NAME) : null;
100100
this.replacement = (String) replacementExpr.getLiteralValue();
101101
}
102102

processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919

2020
package org.apache.druid.query.expression;
2121

22+
import org.apache.druid.error.DruidException;
23+
import org.apache.druid.error.DruidExceptionMatcher;
2224
import org.apache.druid.math.expr.ExprEval;
2325
import org.apache.druid.math.expr.ExpressionType;
2426
import org.apache.druid.math.expr.InputBindings;
27+
import org.hamcrest.MatcherAssert;
2528
import org.junit.Assert;
2629
import org.junit.Test;
2730

@@ -46,6 +49,23 @@ public void testErrorFourArguments()
4649
eval("regexp_extract('a', 'b', 'c', 'd')", InputBindings.nilBindings());
4750
}
4851

52+
@Test
53+
public void testInvalidRegexpExtractPattern()
54+
{
55+
MatcherAssert.assertThat(
56+
Assert.assertThrows(DruidException.class, () ->
57+
eval(
58+
"regexp_extract('pod-1234-node', '[ab-0-9]')",
59+
InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo")
60+
)
61+
),
62+
DruidExceptionMatcher.invalidInput().expectMessageContains(
63+
"An invalid pattern [[ab-0-9]] was provided for the [regexp_extract] function,"
64+
+ " error: [Illegal character range near index 4"
65+
)
66+
);
67+
}
68+
4969
@Test
5070
public void testMatch()
5171
{

processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919

2020
package org.apache.druid.query.expression;
2121

22+
import org.apache.druid.error.DruidException;
23+
import org.apache.druid.error.DruidExceptionMatcher;
2224
import org.apache.druid.math.expr.ExprEval;
2325
import org.apache.druid.math.expr.ExpressionType;
2426
import org.apache.druid.math.expr.InputBindings;
27+
import org.hamcrest.MatcherAssert;
2528
import org.junit.Assert;
2629
import org.junit.Test;
2730

@@ -46,6 +49,20 @@ public void testErrorThreeArguments()
4649
eval("regexp_like('a', 'b', 'c')", InputBindings.nilBindings());
4750
}
4851

52+
@Test
53+
public void testInvalidRegexpLikePattern()
54+
{
55+
MatcherAssert.assertThat(
56+
Assert.assertThrows(
57+
DruidException.class,
58+
() -> eval("regexp_like('a', '[Ab-C]')", InputBindings.nilBindings())),
59+
DruidExceptionMatcher.invalidInput().expectMessageContains(
60+
"An invalid pattern [[Ab-C]] was provided for the [regexp_like] function,"
61+
+ " error: [Illegal character range near index 4"
62+
)
63+
);
64+
}
65+
4966
@Test
5067
public void testMatch()
5168
{

processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@
2020
package org.apache.druid.query.expression;
2121

2222
import com.google.common.collect.ImmutableMap;
23+
import org.apache.druid.error.DruidException;
24+
import org.apache.druid.error.DruidExceptionMatcher;
2325
import org.apache.druid.math.expr.ExprEval;
2426
import org.apache.druid.math.expr.ExpressionType;
2527
import org.apache.druid.math.expr.InputBindings;
28+
import org.hamcrest.MatcherAssert;
2629
import org.junit.Assert;
2730
import org.junit.Test;
2831

@@ -40,6 +43,20 @@ public void testErrorZeroArguments()
4043
eval("regexp_replace()", InputBindings.nilBindings());
4144
}
4245

46+
@Test
47+
public void testInvalidRegexpReplacePattern()
48+
{
49+
MatcherAssert.assertThat(
50+
Assert.assertThrows(
51+
DruidException.class,
52+
() -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')", InputBindings.nilBindings())),
53+
DruidExceptionMatcher.invalidInput().expectMessageContains(
54+
"An invalid pattern [[Ab-cd-0]] was provided for the [regexp_replace] function,"
55+
+ " error: [Illegal character range near index 7"
56+
)
57+
);
58+
}
59+
4360
@Test
4461
public void testErrorFourArguments()
4562
{

sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8413,7 +8413,7 @@ public void testRegexpExtractWithBadRegexPattern()
84138413
+ " REGEXP_EXTRACT(dim1, '^(.))', 1)\n"
84148414
+ "FROM foo",
84158415
DruidExceptionMatcher.invalidInput().expectMessageContains(
8416-
"An invalid pattern [^(.))] was provided for the regexp_extract function, " +
8416+
"An invalid pattern [^(.))] was provided for the [regexp_extract] function, " +
84178417
"error: [Unmatched closing ')' near index 3\n^(.))\n ^]"
84188418
)
84198419
);

0 commit comments

Comments
 (0)