Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions src/expr/src/scalar.proto
Original file line number Diff line number Diff line change
Expand Up @@ -471,12 +471,6 @@ message ProtoBinaryFunc {
mz_repr.adt.regex.ProtoRegex regex = 1;
uint64 limit = 2;
}
message ProtoRegexpReplaceResult {
oneof result {
ProtoRegexpReplace ok = 1;
ProtoEvalError err = 2;
}
}

reserved 93; // timezone_time
reserved 108; // map_get_values
Expand Down Expand Up @@ -671,7 +665,7 @@ message ProtoBinaryFunc {
google.protobuf.Empty constant_time_eq_bytes = 189;
google.protobuf.Empty timezone_offset = 190;
google.protobuf.Empty pretty_sql = 191;
ProtoRegexpReplaceResult regexp_replace = 192;
ProtoRegexpReplace regexp_replace = 192;
bool list_contains_list = 193;
bool array_contains_array = 194;
google.protobuf.Empty starts_with = 195;
Expand Down
35 changes: 25 additions & 10 deletions src/expr/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1229,17 +1229,32 @@ impl MirScalarExpr {
.map_or("", |expr| expr.as_literal_str().unwrap());
let (limit, flags) = regexp_replace_parse_flags(flags);

// Defer errors until evaluation instead of eagerly returning them here
// to match the error behavior of the dynamic function (part of database-issues#4972).
let regex = match func::build_regex(pattern, &flags) {
Ok(regex) => Ok((regex, limit)),
Err(err) => Err(err),
// The behavior of `regexp_replace` is that if the data is `NULL`, the
// function returns `NULL`, independently of whether the pattern or
// flags are correct. We need to check for this case and introduce an
// if-then-else on the error path to only surface the error if the first
// input is non-NULL.
*e = match func::build_regex(pattern, &flags) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update the above comment. Or you could just remove that comment, since there is also a comment below in the error case.

Ok(regex) => {
let mut exprs = mem::take(exprs);
let replacement = exprs.swap_remove(2);
let source = exprs.swap_remove(0);
source.call_binary(
replacement,
BinaryFunc::RegexpReplace { regex, limit },
)
}
Err(err) => {
let mut exprs = mem::take(exprs);
let source = exprs.swap_remove(0);
let scalar_type = e.typ(column_types).scalar_type;
// We need to return `NULL` on `NULL` input, and error otherwise.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe extend the comment a bit. Something like:
"We'd like the same behaviors between the regex argument being dynamic or a literal. Therefore, we need to return NULL on NULL input, and error otherwise, because this is what the original, dynamic function does."

source.call_is_null().if_then_else(
MirScalarExpr::literal_null(scalar_type.clone()),
MirScalarExpr::literal(Err(err), scalar_type),
)
}
};
let mut exprs = mem::take(exprs);
let replacement = exprs.swap_remove(2);
let source = exprs.swap_remove(0);
*e = source
.call_binary(replacement, BinaryFunc::RegexpReplace { regex });
} else if *func == VariadicFunc::RegexpSplitToArray
&& exprs[1].is_literal()
&& exprs.get(2).map_or(true, |e| e.is_literal())
Expand Down
97 changes: 26 additions & 71 deletions src/expr/src/scalar/func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2384,12 +2384,8 @@ pub enum BinaryFunc {
Gt,
Gte,
LikeEscape,
IsLikeMatch {
case_insensitive: bool,
},
IsRegexpMatch {
case_insensitive: bool,
},
IsLikeMatch { case_insensitive: bool },
IsRegexpMatch { case_insensitive: bool },
ToCharTimestamp,
ToCharTimestampTz,
DateBinTimestamp,
Expand Down Expand Up @@ -2438,13 +2434,9 @@ pub enum BinaryFunc {
TrimLeading,
TrimTrailing,
EncodedBytesCharLength,
ListLengthMax {
max_layer: usize,
},
ListLengthMax { max_layer: usize },
ArrayContains,
ArrayContainsArray {
rev: bool,
},
ArrayContainsArray { rev: bool },
ArrayLength,
ArrayLower,
ArrayRemove,
Expand All @@ -2454,9 +2446,7 @@ pub enum BinaryFunc {
ListElementConcat,
ElementListConcat,
ListRemove,
ListContainsList {
rev: bool,
},
ListContainsList { rev: bool },
DigestString,
DigestBytes,
MzRenderTypmod,
Expand All @@ -2469,13 +2459,8 @@ pub enum BinaryFunc {
GetByte,
ConstantTimeEqBytes,
ConstantTimeEqString,
RangeContainsElem {
elem_type: ScalarType,
rev: bool,
},
RangeContainsRange {
rev: bool,
},
RangeContainsElem { elem_type: ScalarType, rev: bool },
RangeContainsRange { rev: bool },
RangeOverlaps,
RangeAfter,
RangeBefore,
Expand All @@ -2489,9 +2474,7 @@ pub enum BinaryFunc {
MzAclItemContainsPrivilege,
ParseIdent,
PrettySql,
RegexpReplace {
regex: Result<(Regex, usize), EvalError>,
},
RegexpReplace { regex: Regex, limit: usize },
StartsWith,
}

Expand Down Expand Up @@ -2757,10 +2740,9 @@ impl BinaryFunc {
BinaryFunc::MzAclItemContainsPrivilege => mz_acl_item_contains_privilege(a, b),
BinaryFunc::ParseIdent => parse_ident(a, b, temp_storage),
BinaryFunc::PrettySql => pretty_sql(a, b, temp_storage),
BinaryFunc::RegexpReplace { regex } => match regex {
Ok((regex, limit)) => regexp_replace_static(a, b, regex, *limit, temp_storage),
Err(err) => Err(err.clone()),
},
BinaryFunc::RegexpReplace { regex, limit } => {
regexp_replace_static(a, b, regex, *limit, temp_storage)
}
BinaryFunc::StartsWith => Ok(starts_with(a, b)),
}
}
Expand Down Expand Up @@ -3876,16 +3858,13 @@ impl fmt::Display for BinaryFunc {
BinaryFunc::MzAclItemContainsPrivilege => f.write_str("mz_aclitem_contains_privilege"),
BinaryFunc::ParseIdent => f.write_str("parse_ident"),
BinaryFunc::PrettySql => f.write_str("pretty_sql"),
BinaryFunc::RegexpReplace { regex } => match regex {
Ok((regex, limit)) => write!(
f,
"regexp_replace[{}, case_insensitive={}, limit={}]",
regex.pattern().escaped(),
regex.case_insensitive,
limit
),
Err(err) => write!(f, "regexp_replace[EvalError]: {err}"),
},
BinaryFunc::RegexpReplace { regex, limit } => write!(
f,
"regexp_replace[{}, case_insensitive={}, limit={}]",
regex.pattern().escaped(),
regex.case_insensitive,
limit
),
BinaryFunc::StartsWith => f.write_str("starts_with"),
}
}
Expand Down Expand Up @@ -4302,18 +4281,11 @@ impl RustType<ProtoBinaryFunc> for BinaryFunc {
BinaryFunc::ConstantTimeEqBytes => ConstantTimeEqBytes(()),
BinaryFunc::ConstantTimeEqString => ConstantTimeEqString(()),
BinaryFunc::PrettySql => PrettySql(()),
BinaryFunc::RegexpReplace { regex } => {
BinaryFunc::RegexpReplace { regex, limit } => {
use crate::scalar::proto_binary_func::*;
RegexpReplace(ProtoRegexpReplaceResult {
result: Some(match regex {
Ok((regex, limit)) => {
proto_regexp_replace_result::Result::Ok(ProtoRegexpReplace {
regex: Some(regex.into_proto()),
limit: limit.into_proto(),
})
}
Err(err) => proto_regexp_replace_result::Result::Err(err.into_proto()),
}),
RegexpReplace(ProtoRegexpReplace {
regex: Some(regex.into_proto()),
limit: limit.into_proto(),
})
}
BinaryFunc::StartsWith => StartsWith(()),
Expand Down Expand Up @@ -4526,27 +4498,10 @@ impl RustType<ProtoBinaryFunc> for BinaryFunc {
ConstantTimeEqBytes(()) => Ok(BinaryFunc::ConstantTimeEqBytes),
ConstantTimeEqString(()) => Ok(BinaryFunc::ConstantTimeEqString),
PrettySql(()) => Ok(BinaryFunc::PrettySql),
RegexpReplace(inner) => {
use crate::scalar::proto_binary_func::*;
Ok(BinaryFunc::RegexpReplace {
regex: match inner.result {
Some(proto_regexp_replace_result::Result::Ok(regexp_replace)) => Ok((
regexp_replace
.regex
.into_rust_if_some("ProtoRegexReplace::regex")?,
regexp_replace.limit.into_rust()?,
)),
Some(proto_regexp_replace_result::Result::Err(err)) => {
Err(err.into_rust()?)
}
None => {
return Err(TryFromProtoError::missing_field(
"ProtoRegexpReplaceResult::result",
))
}
},
})
}
RegexpReplace(inner) => Ok(BinaryFunc::RegexpReplace {
regex: inner.regex.into_rust_if_some("ProtoRegexReplace::regex")?,
limit: inner.limit.into_rust()?,
}),
StartsWith(()) => Ok(BinaryFunc::StartsWith),
}
} else {
Expand Down
10 changes: 6 additions & 4 deletions test/sqllogictest/regex.slt
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,11 @@ Target cluster: multiprocess

EOF

query T
SELECT regexp_replace(null, 'b(..', 'X', 'ig');
----
NULL

query error invalid regular expression: regex parse error:
SELECT regexp_replace('foobarbaz', 'b(..', 'X', 'ig');

Expand All @@ -735,10 +740,7 @@ SELECT s, regexp_replace(s, 'b(..', replacement, 'ig') FROM e;
----
Explained Query:
Project (#0{s}, #3)
Map (regexp_replace[EvalError]: invalid regular expression: regex parse error:
b(..
^
error: unclosed group(#0{s}, #2{replacement}))
Map (case when (#0{s}) IS NULL then null else error("invalid regular expression: regex parse error:\n b(..\n ^\nerror: unclosed group") end)
ReadStorage materialize.public.e

Source materialize.public.e
Expand Down