Skip to content

Commit fe08167

Browse files
committed
feat: Support regexp_replace default empty replacer
1 parent 6b006e5 commit fe08167

File tree

4 files changed

+47
-1
lines changed

4 files changed

+47
-1
lines changed

datafusion/core/src/physical_plan/functions.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@ fn signature(fun: &BuiltinScalarFunction) -> Signature {
609609
}
610610
BuiltinScalarFunction::RegexpReplace => Signature::one_of(
611611
vec![
612+
TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]),
612613
TypeSignature::Exact(vec![
613614
DataType::Utf8,
614615
DataType::Utf8,
@@ -2266,6 +2267,18 @@ mod tests {
22662267
Utf8,
22672268
StringArray
22682269
);
2270+
#[cfg(feature = "regex_expressions")]
2271+
test_function!(
2272+
RegexpReplace,
2273+
&[
2274+
lit(ScalarValue::Utf8(Some("ABCabcABC".to_string()))),
2275+
lit(ScalarValue::Utf8(Some("(abc)".to_string()))),
2276+
],
2277+
Ok(Some("ABCABC")),
2278+
&str,
2279+
Utf8,
2280+
StringArray
2281+
);
22692282
#[cfg(not(feature = "regex_expressions"))]
22702283
test_function!(
22712284
RegexpReplace,

datafusion/core/tests/sql/expr.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ async fn test_regex_expressions() -> Result<()> {
837837
test_expression!("regexp_replace('ABCabcABC', '(abc)', 'X', 'i')", "XabcABC");
838838
test_expression!("regexp_replace('foobarbaz', 'b..', 'X', 'g')", "fooXX");
839839
test_expression!("regexp_replace('foobarbaz', 'b..', 'X')", "fooXbaz");
840+
test_expression!("regexp_replace('foobarbaz', 'b..')", "foobaz");
840841
test_expression!(
841842
"regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g')",
842843
"fooXarYXazY"

datafusion/expr/src/expr_fn.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ mod test {
443443
replacement,
444444
flags
445445
);
446+
test_nary_scalar_expr!(RegexpReplace, regexp_replace, string, pattern);
446447
test_scalar_expr!(Replace, replace, string, from, to);
447448
test_scalar_expr!(Repeat, repeat, string, count);
448449
test_scalar_expr!(Reverse, reverse, string);

datafusion/physical-expr/src/regex_expressions.rs

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,37 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arr
8484
let mut patterns: HashMap<String, Regex> = HashMap::new();
8585

8686
match args.len() {
87+
2 => {
88+
let string_array = downcast_string_arg!(args[0], "string", T);
89+
let pattern_array = downcast_string_arg!(args[1], "pattern", T);
90+
91+
let result = string_array
92+
.iter()
93+
.zip(pattern_array.iter())
94+
.map(|(string, pattern)| match (string, pattern) {
95+
(Some(string), Some(pattern)) => {
96+
// if patterns hashmap already has regexp then use else else create and return
97+
let re = match patterns.get(pattern) {
98+
Some(re) => Ok(re.clone()),
99+
None => {
100+
match Regex::new(pattern) {
101+
Ok(re) => {
102+
patterns.insert(pattern.to_string(), re.clone());
103+
Ok(re)
104+
},
105+
Err(err) => Err(DataFusionError::Execution(err.to_string())),
106+
}
107+
}
108+
};
109+
110+
Some(re.map(|re| re.replace(string, ""))).transpose()
111+
}
112+
_ => Ok(None)
113+
})
114+
.collect::<Result<GenericStringArray<T>>>()?;
115+
116+
Ok(Arc::new(result) as ArrayRef)
117+
}
87118
3 => {
88119
let string_array = downcast_string_arg!(args[0], "string", T);
89120
let pattern_array = downcast_string_arg!(args[1], "pattern", T);
@@ -172,7 +203,7 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arr
172203
Ok(Arc::new(result) as ArrayRef)
173204
}
174205
other => Err(DataFusionError::Internal(format!(
175-
"regexp_replace was called with {} arguments. It requires at least 3 and at most 4.",
206+
"regexp_replace was called with {} arguments. It requires at least 2 and at most 4.",
176207
other
177208
))),
178209
}

0 commit comments

Comments
 (0)