Skip to content

Commit d381ca3

Browse files
authored
Crisper parse_ident function definition (#34526)
Change the types and annotations for `parse_ident` to more specific variants. No behavior changes expected. We're using a `ArrayRustType<Cow<'_, str>>` for the output, which requires some additional scaffolding. This enables us to maintain the same memory footprint as the previous implementation. --------- Signed-off-by: Moritz Hoffmann <antiguru@gmail.com>
1 parent 12b93c1 commit d381ca3

File tree

4 files changed

+94
-55
lines changed

4 files changed

+94
-55
lines changed

src/expr/src/scalar/func.rs

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ use mz_repr::adt::range::{Range, RangeOps};
4444
use mz_repr::adt::regex::Regex;
4545
use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampLike};
4646
use mz_repr::{
47-
Datum, DatumList, DatumMap, DatumType, ExcludeNull, Row, RowArena, SqlColumnType,
48-
SqlScalarType, strconv,
47+
ArrayRustType, Datum, DatumList, DatumMap, DatumType, ExcludeNull, Row, RowArena,
48+
SqlColumnType, SqlScalarType, strconv,
4949
};
5050
use mz_sql_parser::ast::display::FormatMode;
5151
use mz_sql_pretty::{PrettyConfig, pretty_str};
@@ -2298,16 +2298,9 @@ fn mz_acl_item_contains_privilege(
22982298
Ok(contains)
22992299
}
23002300

2301-
#[sqlfunc(
2302-
output_type = "mz_repr::ArrayRustType<String>",
2303-
propagates_nulls = true
2304-
)]
2301+
#[sqlfunc]
23052302
// transliterated from postgres/src/backend/utils/adt/misc.c
2306-
fn parse_ident<'a>(
2307-
ident: &str,
2308-
strict: bool,
2309-
temp_storage: &'a RowArena,
2310-
) -> Result<Datum<'a>, EvalError> {
2303+
fn parse_ident<'a>(ident: &'a str, strict: bool) -> Result<ArrayRustType<Cow<'a, str>>, EvalError> {
23112304
fn is_ident_start(c: char) -> bool {
23122305
matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\u{80}'..=char::MAX)
23132306
}
@@ -2337,13 +2330,12 @@ fn parse_ident<'a>(
23372330
detail: Some("String has unclosed double quotes.".into()),
23382331
});
23392332
}
2340-
elems.push(Datum::String(s));
2333+
elems.push(Cow::Borrowed(s));
23412334
missing_ident = false;
23422335
} else if c.map(is_ident_start).unwrap_or(false) {
23432336
buf.prev();
23442337
let s = buf.take_while(is_ident_cont);
2345-
let s = temp_storage.push_string(s.to_ascii_lowercase());
2346-
elems.push(Datum::String(s));
2338+
elems.push(Cow::Owned(s.to_ascii_lowercase()));
23472339
missing_ident = false;
23482340
}
23492341

@@ -2384,15 +2376,7 @@ fn parse_ident<'a>(
23842376
}
23852377
}
23862378

2387-
Ok(temp_storage.try_make_datum(|packer| {
2388-
packer.try_push_array(
2389-
&[ArrayDimension {
2390-
lower_bound: 1,
2391-
length: elems.len(),
2392-
}],
2393-
elems,
2394-
)
2395-
})?)
2379+
Ok(elems.into())
23962380
}
23972381

23982382
fn regexp_split_to_array_re<'a>(

src/expr/src/scalar/snapshots/mz_expr__scalar__func__parse_ident.snap

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
---
22
source: src/expr/src/scalar/func.rs
3-
expression: "#[sqlfunc(output_type = \"mz_repr::ArrayRustType<String>\", propagates_nulls = true)]\nfn parse_ident<'a>(\n ident: &str,\n strict: bool,\n temp_storage: &'a RowArena,\n) -> Result<Datum<'a>, EvalError> {\n fn is_ident_start(c: char) -> bool {\n matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\\u{80}'..= char::MAX)\n }\n fn is_ident_cont(c: char) -> bool {\n matches!(c, '0'..='9' | '$') || is_ident_start(c)\n }\n let mut elems = vec![];\n let buf = &mut LexBuf::new(ident);\n let mut after_dot = false;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n loop {\n let mut missing_ident = true;\n let c = buf.next();\n if c == Some('\"') {\n let s = buf.take_while(|ch| !matches!(ch, '\"'));\n if buf.next() != Some('\"') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"String has unclosed double quotes.\".into()),\n });\n }\n elems.push(Datum::String(s));\n missing_ident = false;\n } else if c.map(is_ident_start).unwrap_or(false) {\n buf.prev();\n let s = buf.take_while(is_ident_cont);\n let s = temp_storage.push_string(s.to_ascii_lowercase());\n elems.push(Datum::String(s));\n missing_ident = false;\n }\n if missing_ident {\n if c == Some('.') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier before \\\".\\\".\".into()),\n });\n } else if after_dot {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier after \\\".\\\".\".into()),\n });\n } else {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n }\n buf.take_while(|ch| ch.is_ascii_whitespace());\n match buf.next() {\n Some('.') => {\n after_dot = true;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n }\n Some(_) if strict => {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n _ => break,\n }\n }\n Ok(\n temp_storage\n .try_make_datum(|packer| {\n packer\n .try_push_array(\n &[\n ArrayDimension {\n lower_bound: 1,\n length: elems.len(),\n },\n ],\n elems,\n )\n })?,\n )\n}\n"
3+
expression: "#[sqlfunc()]\nfn parse_ident<'a>(\n ident: &'a str,\n strict: bool,\n) -> Result<ArrayRustType<Cow<'a, str>>, EvalError> {\n fn is_ident_start(c: char) -> bool {\n matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\\u{80}'..= char::MAX)\n }\n fn is_ident_cont(c: char) -> bool {\n matches!(c, '0'..='9' | '$') || is_ident_start(c)\n }\n let mut elems = vec![];\n let buf = &mut LexBuf::new(ident);\n let mut after_dot = false;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n loop {\n let mut missing_ident = true;\n let c = buf.next();\n if c == Some('\"') {\n let s = buf.take_while(|ch| !matches!(ch, '\"'));\n if buf.next() != Some('\"') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"String has unclosed double quotes.\".into()),\n });\n }\n elems.push(Cow::Borrowed(s));\n missing_ident = false;\n } else if c.map(is_ident_start).unwrap_or(false) {\n buf.prev();\n let s = buf.take_while(is_ident_cont);\n elems.push(Cow::Owned(s.to_ascii_lowercase()));\n missing_ident = false;\n }\n if missing_ident {\n if c == Some('.') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier before \\\".\\\".\".into()),\n });\n } else if after_dot {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier after \\\".\\\".\".into()),\n });\n } else {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n }\n buf.take_while(|ch| ch.is_ascii_whitespace());\n match buf.next() {\n Some('.') => {\n after_dot = true;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n }\n Some(_) if strict => {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n _ => break,\n }\n }\n Ok(elems.into())\n}\n"
44
---
55
#[derive(
66
proptest_derive::Arbitrary,
@@ -19,22 +19,22 @@ pub struct ParseIdent;
1919
impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent {
2020
type Input1 = &'a str;
2121
type Input2 = bool;
22-
type Output = Result<Datum<'a>, EvalError>;
22+
type Output = Result<ArrayRustType<Cow<'a, str>>, EvalError>;
2323
fn call(
2424
&self,
2525
a: Self::Input1,
2626
b: Self::Input2,
2727
temp_storage: &'a mz_repr::RowArena,
2828
) -> Self::Output {
29-
parse_ident(a, b, temp_storage)
29+
parse_ident(a, b)
3030
}
3131
fn output_type(
3232
&self,
3333
input_type_a: mz_repr::SqlColumnType,
3434
input_type_b: mz_repr::SqlColumnType,
3535
) -> mz_repr::SqlColumnType {
3636
use mz_repr::AsColumnType;
37-
let output = <mz_repr::ArrayRustType<String>>::as_column_type();
37+
let output = Self::Output::as_column_type();
3838
let propagates_nulls = crate::func::binary::EagerBinaryFunc::propagates_nulls(
3939
self,
4040
);
@@ -46,23 +46,16 @@ impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent {
4646
&& (input_type_a.nullable || input_type_b.nullable)),
4747
)
4848
}
49-
fn introduces_nulls(&self) -> bool {
50-
<mz_repr::ArrayRustType<String> as ::mz_repr::DatumType<'_, ()>>::nullable()
51-
}
52-
fn propagates_nulls(&self) -> bool {
53-
true
54-
}
5549
}
5650
impl std::fmt::Display for ParseIdent {
5751
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
5852
f.write_str(stringify!(parse_ident))
5953
}
6054
}
6155
fn parse_ident<'a>(
62-
ident: &str,
56+
ident: &'a str,
6357
strict: bool,
64-
temp_storage: &'a RowArena,
65-
) -> Result<Datum<'a>, EvalError> {
58+
) -> Result<ArrayRustType<Cow<'a, str>>, EvalError> {
6659
fn is_ident_start(c: char) -> bool {
6760
matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\u{80}'..= char::MAX)
6861
}
@@ -84,13 +77,12 @@ fn parse_ident<'a>(
8477
detail: Some("String has unclosed double quotes.".into()),
8578
});
8679
}
87-
elems.push(Datum::String(s));
80+
elems.push(Cow::Borrowed(s));
8881
missing_ident = false;
8982
} else if c.map(is_ident_start).unwrap_or(false) {
9083
buf.prev();
9184
let s = buf.take_while(is_ident_cont);
92-
let s = temp_storage.push_string(s.to_ascii_lowercase());
93-
elems.push(Datum::String(s));
85+
elems.push(Cow::Owned(s.to_ascii_lowercase()));
9486
missing_ident = false;
9587
}
9688
if missing_ident {
@@ -126,19 +118,5 @@ fn parse_ident<'a>(
126118
_ => break,
127119
}
128120
}
129-
Ok(
130-
temp_storage
131-
.try_make_datum(|packer| {
132-
packer
133-
.try_push_array(
134-
&[
135-
ArrayDimension {
136-
lower_bound: 1,
137-
length: elems.len(),
138-
},
139-
],
140-
elems,
141-
)
142-
})?,
143-
)
121+
Ok(elems.into())
144122
}

src/repr/src/row.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2643,7 +2643,7 @@ impl<'a> DatumList<'a> {
26432643
}
26442644
}
26452645

2646-
impl<'a> IntoIterator for &'a DatumList<'a> {
2646+
impl<'a> IntoIterator for &'_ DatumList<'a> {
26472647
type Item = Datum<'a>;
26482648
type IntoIter = DatumListIter<'a>;
26492649
fn into_iter(self) -> DatumListIter<'a> {

src/repr/src/scalar.rs

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// the Business Source License, use of this software will be governed
88
// by the Apache License, Version 2.0.
99

10+
use std::borrow::Cow;
1011
use std::collections::BTreeMap;
1112
use std::fmt::{self, Debug};
1213
use std::hash::Hash;
@@ -1940,6 +1941,40 @@ pub trait DatumType<'a, E>: Sized {
19401941
#[derive(Debug)]
19411942
pub struct ArrayRustType<T>(pub Vec<T>);
19421943

1944+
impl<T> From<Vec<T>> for ArrayRustType<T> {
1945+
fn from(v: Vec<T>) -> Self {
1946+
Self(v)
1947+
}
1948+
}
1949+
1950+
impl<B: ToOwned<Owned: AsColumnType>> AsColumnType for Cow<'_, B> {
1951+
fn as_column_type() -> SqlColumnType {
1952+
<B::Owned>::as_column_type()
1953+
}
1954+
}
1955+
1956+
impl<'a, E, B: ToOwned> DatumType<'a, E> for Cow<'a, B>
1957+
where
1958+
B::Owned: DatumType<'a, E>,
1959+
for<'b> &'b B: DatumType<'a, E>,
1960+
{
1961+
fn nullable() -> bool {
1962+
B::Owned::nullable()
1963+
}
1964+
fn fallible() -> bool {
1965+
B::Owned::fallible()
1966+
}
1967+
fn try_from_result(res: Result<Datum<'a>, E>) -> Result<Self, Result<Datum<'a>, E>> {
1968+
<&B>::try_from_result(res).map(|b| Cow::Borrowed(b))
1969+
}
1970+
fn into_result(self, temp_storage: &'a RowArena) -> Result<Datum<'a>, E> {
1971+
match self {
1972+
Cow::Owned(b) => b.into_result(temp_storage),
1973+
Cow::Borrowed(b) => b.into_result(temp_storage),
1974+
}
1975+
}
1976+
}
1977+
19431978
impl<B: AsColumnType> AsColumnType for Option<B> {
19441979
fn as_column_type() -> SqlColumnType {
19451980
B::as_column_type().nullable(true)
@@ -2310,6 +2345,48 @@ impl<'a, E> DatumType<'a, E> for ArrayRustType<String> {
23102345
}
23112346
}
23122347

2348+
impl AsColumnType for ArrayRustType<Cow<'_, str>> {
2349+
fn as_column_type() -> SqlColumnType {
2350+
SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(false)
2351+
}
2352+
}
2353+
2354+
impl<'a, E> DatumType<'a, E> for ArrayRustType<Cow<'a, str>> {
2355+
fn nullable() -> bool {
2356+
false
2357+
}
2358+
2359+
fn fallible() -> bool {
2360+
false
2361+
}
2362+
2363+
fn try_from_result(res: Result<Datum<'a>, E>) -> Result<Self, Result<Datum<'a>, E>> {
2364+
match res {
2365+
Ok(Datum::Array(arr)) => Ok(ArrayRustType(
2366+
arr.elements()
2367+
.into_iter()
2368+
.map(|d| Cow::Borrowed(d.unwrap_str()))
2369+
.collect(),
2370+
)),
2371+
_ => Err(res),
2372+
}
2373+
}
2374+
2375+
fn into_result(self, temp_storage: &'a RowArena) -> Result<Datum<'a>, E> {
2376+
Ok(temp_storage.make_datum(|packer| {
2377+
packer
2378+
.try_push_array(
2379+
&[ArrayDimension {
2380+
lower_bound: 1,
2381+
length: self.0.len(),
2382+
}],
2383+
self.0.iter().map(|elem| Datum::String(elem.as_ref())),
2384+
)
2385+
.expect("self is 1 dimensional, and its length is used for the array length");
2386+
}))
2387+
}
2388+
}
2389+
23132390
impl AsColumnType for Vec<u8> {
23142391
fn as_column_type() -> SqlColumnType {
23152392
SqlScalarType::Bytes.nullable(false)

0 commit comments

Comments
 (0)