Skip to content

Commit e8e9a5b

Browse files
authored
Add regexp_replace() function as alias of replace() (opensearch-project#4765)
* Support regexp_replace() function Signed-off-by: Lantao Jin <[email protected]> * Fix IT Signed-off-by: Lantao Jin <[email protected]> * revert import collapse Signed-off-by: Lantao Jin <[email protected]> * change regexp_replace() as an alias of replace() Signed-off-by: Lantao Jin <[email protected]> * fix doctest Signed-off-by: Lantao Jin <[email protected]> * remove invalid link in doc Signed-off-by: Lantao Jin <[email protected]> --------- Signed-off-by: Lantao Jin <[email protected]>
1 parent 3a3c8c8 commit e8e9a5b

File tree

22 files changed

+237
-83
lines changed

22 files changed

+237
-83
lines changed

core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ private boolean isCalciteOnlyFunction(FunctionName functionName) {
233233
// Set of functions that are only supported with Calcite engine
234234
Set<String> calciteOnlyFunctions =
235235
ImmutableSet.of(
236-
BuiltinFunctionName.REGEX_MATCH.getName().getFunctionName(),
236+
BuiltinFunctionName.REGEXP_MATCH.getName().getFunctionName(),
237237
BuiltinFunctionName.STRFTIME.getName().getFunctionName());
238238

239239
return calciteOnlyFunctions.stream()

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3434,7 +3434,7 @@ private RexNode createOptimizedSubstitution(
34343434
// 3-parameter REGEXP_REPLACE
34353435
return PPLFuncImpTable.INSTANCE.resolve(
34363436
context.rexBuilder,
3437-
BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3,
3437+
BuiltinFunctionName.REPLACE,
34383438
fieldRex,
34393439
context.rexBuilder.makeLiteral(pattern),
34403440
context.rexBuilder.makeLiteral(javaReplacement));

core/src/main/java/org/opensearch/sql/executor/QueryService.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ public void executeWithCalcite(
112112
} else {
113113
if (t instanceof Exception) {
114114
listener.onFailure((Exception) t);
115+
} else if (t instanceof ExceptionInInitializerError
116+
&& ((ExceptionInInitializerError) t).getException() instanceof Exception) {
117+
listener.onFailure((Exception) ((ExceptionInInitializerError) t).getException());
115118
} else if (t instanceof VirtualMachineError) {
116119
// throw and fast fail the VM errors such as OOM (same with v2).
117120
throw t;

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ public enum BuiltinFunctionName {
235235
LTRIM(FunctionName.of("ltrim")),
236236
POSITION(FunctionName.of("position")),
237237
REGEXP(FunctionName.of("regexp")),
238-
REGEX_MATCH(FunctionName.of("regex_match")),
238+
REGEXP_MATCH(FunctionName.of("regexp_match")),
239239
REX_EXTRACT(FunctionName.of("REX_EXTRACT")),
240240
REX_EXTRACT_MULTI(FunctionName.of("REX_EXTRACT_MULTI")),
241241
REX_OFFSET(FunctionName.of("REX_OFFSET")),
@@ -336,7 +336,6 @@ public enum BuiltinFunctionName {
336336
INTERNAL_UNCOLLECT_PATTERNS(FunctionName.of("uncollect_patterns")),
337337
INTERNAL_GROK(FunctionName.of("grok"), true),
338338
INTERNAL_PARSE(FunctionName.of("parse"), true),
339-
INTERNAL_REGEXP_REPLACE_3(FunctionName.of("regexp_replace_3"), true),
340339
INTERNAL_REGEXP_REPLACE_PG_4(FunctionName.of("regexp_replace_pg_4"), true),
341340
INTERNAL_REGEXP_REPLACE_5(FunctionName.of("regexp_replace_5"), true),
342341
INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true);

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@
8383
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PARSE;
8484
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN;
8585
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_PATTERN_PARSER;
86-
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3;
8786
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5;
8887
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4;
8988
import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_TRANSLATE3;
@@ -170,7 +169,7 @@
170169
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RANGE_BUCKET;
171170
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE;
172171
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP;
173-
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH;
172+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP_MATCH;
174173
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE;
175174
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
176175
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT;
@@ -832,7 +831,7 @@ void populate() {
832831

833832
// Register library operator
834833
registerOperator(REGEXP, SqlLibraryOperators.REGEXP);
835-
registerOperator(REGEX_MATCH, SqlLibraryOperators.REGEXP_CONTAINS);
834+
registerOperator(REGEXP_MATCH, SqlLibraryOperators.REGEXP_CONTAINS);
836835
registerOperator(CONCAT, SqlLibraryOperators.CONCAT_FUNCTION);
837836
registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS);
838837
registerOperator(CONCAT_WS, SqlLibraryOperators.CONCAT_WS);
@@ -843,7 +842,6 @@ void populate() {
843842
registerOperator(MD5, SqlLibraryOperators.MD5);
844843
registerOperator(SHA1, SqlLibraryOperators.SHA1);
845844
registerOperator(CRC32, SqlLibraryOperators.CRC32);
846-
registerOperator(INTERNAL_REGEXP_REPLACE_3, SqlLibraryOperators.REGEXP_REPLACE_3);
847845
registerOperator(INTERNAL_REGEXP_REPLACE_PG_4, SqlLibraryOperators.REGEXP_REPLACE_PG_4);
848846
registerOperator(INTERNAL_REGEXP_REPLACE_5, SqlLibraryOperators.REGEXP_REPLACE_5);
849847
registerOperator(INTERNAL_TRANSLATE3, SqlLibraryOperators.TRANSLATE3);

core/src/main/java/org/opensearch/sql/utils/ParseUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public class ParseUtils {
3232
ImmutableMap.of(
3333
ParseMethod.REGEX, BuiltinFunctionName.INTERNAL_PARSE,
3434
ParseMethod.GROK, BuiltinFunctionName.INTERNAL_GROK,
35-
ParseMethod.PATTERNS, BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3);
35+
ParseMethod.PATTERNS, BuiltinFunctionName.REPLACE);
3636

3737
/**
3838
* Construct corresponding ParseExpression by {@link ParseMethod}.

docs/user/ppl/functions/condition.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -571,15 +571,15 @@ Example::
571571
| 969 |
572572
+-----+
573573

574-
REGEX_MATCH
574+
REGEXP_MATCH
575575
-----------
576576

577577
Description
578578
>>>>>>>>>>>
579579

580580
Version: 3.3.0
581581

582-
Usage: regex_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false.
582+
Usage: regexp_match(string, pattern) returns true if the regular expression pattern finds a match against any substring of the string value, otherwise returns false.
583583

584584
The function uses Java regular expression syntax for the pattern.
585585

@@ -589,7 +589,7 @@ Return type: BOOLEAN
589589

590590
Example::
591591

592-
#os> source=logs | where regex_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message
592+
#os> source=logs | where regexp_match(message, 'ERROR|WARN|FATAL') | fields timestamp, message
593593
fetched rows / total rows = 3/100
594594
+---------------------+------------------------------------------+
595595
| timestamp | message |
@@ -599,7 +599,7 @@ Example::
599599
| 2024-01-15 10:25:33 | FATAL: System crashed unexpectedly |
600600
+---------------------+------------------------------------------+
601601

602-
#os> source=users | where regex_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email
602+
#os> source=users | where regexp_match(email, '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') | fields name, email
603603
fetched rows / total rows = 2/3
604604
+-------+----------------------+
605605
| name | email |
@@ -608,7 +608,7 @@ Example::
608608
| Alice | [email protected] |
609609
+-------+----------------------+
610610

611-
#os> source=network | where regex_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regex_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status
611+
#os> source=network | where regexp_match(ip_address, '^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') AND NOT regexp_match(ip_address, '^(10\.|172\.(1[6-9]|2[0-9]|3[01])\.|192\.168\.)') | fields ip_address, status
612612
fetched rows / total rows = 2/10
613613
+---------------+--------+
614614
| ip_address | status |
@@ -617,7 +617,7 @@ Example::
617617
| 1.1.1.1 | active |
618618
+---------------+--------+
619619

620-
#os> source=products | eval category = if(regex_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regex_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category
620+
#os> source=products | eval category = if(regexp_match(name, '(?i)(laptop|computer|desktop)'), 'Computing', if(regexp_match(name, '(?i)(phone|tablet|mobile)'), 'Mobile', 'Other')) | fields name, category
621621
fetched rows / total rows = 4/4
622622
+------------------------+----------+
623623
| name | category |

docs/user/ppl/functions/string.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,3 +415,29 @@ Example::
415415
|---------------------+---------------------|
416416
| HELLOWORLD | HELLOWORLD |
417417
+---------------------+---------------------+
418+
419+
420+
REGEXP_REPLACE
421+
-------------
422+
423+
Description
424+
>>>>>>>>>>>
425+
426+
Usage: regexp_replace(str, pattern, replacement) replace all substrings of the string value that match pattern with replacement and returns modified string value.
427+
428+
Argument type: STRING, STRING, STRING
429+
430+
Return type: STRING
431+
432+
Synonyms: `REPLACE`_
433+
434+
Example::
435+
436+
os> source=people | eval `DOMAIN` = REGEXP_REPLACE('https://opensearch.org/downloads/', '^https?://(?:www\.)?([^/]+)/.*$', '\1') | fields `DOMAIN`
437+
fetched rows / total rows = 1/1
438+
+----------------+
439+
| DOMAIN |
440+
|----------------|
441+
| opensearch.org |
442+
+----------------+
443+

integ-test/src/test/java/org/opensearch/sql/calcite/clickbench/PPLClickBenchIT.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import static org.opensearch.sql.util.MatcherUtils.assertYamlEqualsIgnoreId;
99

1010
import java.io.IOException;
11+
import java.util.HashSet;
1112
import java.util.Locale;
1213
import java.util.Map;
1314
import java.util.Set;
@@ -51,15 +52,19 @@ public static void reset() throws IOException {
5152
System.out.println();
5253
}
5354

54-
/** Ignore queries that are not supported by Calcite. */
55-
protected Set<Integer> ignored() {
56-
if (GCedMemoryUsage.initialized()) {
57-
return Set.of(29);
58-
} else {
55+
/** Ignore queries that are not supported. */
56+
protected Set<Integer> ignored() throws IOException {
57+
Set ignored = new HashSet();
58+
if (!isCalciteEnabled()) {
59+
// regexp_replace() is not supported in v2
60+
ignored.add(29);
61+
}
62+
if (!GCedMemoryUsage.initialized()) {
5963
// Ignore q30 when use RuntimeMemoryUsage,
6064
// because of too much script push down, which will cause ResourceMonitor restriction.
61-
return Set.of(29, 30);
65+
ignored.add(30);
6266
}
67+
return ignored;
6368
}
6469

6570
@Test

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public void init() throws Exception {
3232
super.init();
3333
enableCalcite();
3434
setQueryBucketSize(1000);
35-
loadIndex(Index.BANK_WITH_STRING_VALUES);
35+
loadIndex(Index.STRINGS);
3636
loadIndex(Index.BANK_WITH_NULL_VALUES);
3737
loadIndex(Index.NESTED_SIMPLE);
3838
loadIndex(Index.TIME_TEST_DATA);
@@ -687,9 +687,9 @@ public void testStatsDistinctCountApproxFunctionExplainWithPushDown() throws IOE
687687
public void testExplainRegexMatchInWhereWithScriptPushdown() throws IOException {
688688
enabledOnlyWhenPushdownIsEnabled();
689689
String query =
690-
String.format("source=%s | where regex_match(name, 'hello')", TEST_INDEX_STRINGS);
690+
String.format("source=%s | where regexp_match(name, 'hello')", TEST_INDEX_STRINGS);
691691
var result = explainQueryToString(query);
692-
String expected = loadFromFile("expectedOutput/calcite/explain_regex_match_in_where.json");
692+
String expected = loadFromFile("expectedOutput/calcite/explain_regexp_match_in_where.json");
693693
assertJsonEqualsIgnoreId(expected, result);
694694
}
695695

@@ -698,10 +698,10 @@ public void testExplainRegexMatchInEvalWithOutScriptPushdown() throws IOExceptio
698698
enabledOnlyWhenPushdownIsEnabled();
699699
String query =
700700
String.format(
701-
"source=%s |eval has_hello = regex_match(name, 'hello') | fields has_hello",
701+
"source=%s |eval has_hello = regexp_match(name, 'hello') | fields has_hello",
702702
TEST_INDEX_STRINGS);
703703
var result = explainQueryToString(query);
704-
String expected = loadFromFile("expectedOutput/calcite/explain_regex_match_in_eval.json");
704+
String expected = loadFromFile("expectedOutput/calcite/explain_regexp_match_in_eval.json");
705705
assertJsonEqualsIgnoreId(expected, result);
706706
}
707707

0 commit comments

Comments
 (0)