Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 7 additions & 23 deletions src/expr/src/scalar/func.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ use mz_repr::adt::range::{Range, RangeOps};
use mz_repr::adt::regex::Regex;
use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampLike};
use mz_repr::{
Datum, DatumList, DatumMap, DatumType, ExcludeNull, Row, RowArena, SqlColumnType,
SqlScalarType, strconv,
ArrayRustType, Datum, DatumList, DatumMap, DatumType, ExcludeNull, Row, RowArena,
SqlColumnType, SqlScalarType, strconv,
};
use mz_sql_parser::ast::display::FormatMode;
use mz_sql_pretty::{PrettyConfig, pretty_str};
Expand Down Expand Up @@ -2298,16 +2298,9 @@ fn mz_acl_item_contains_privilege(
Ok(contains)
}

#[sqlfunc(
output_type = "mz_repr::ArrayRustType<String>",
propagates_nulls = true
)]
#[sqlfunc]
// transliterated from postgres/src/backend/utils/adt/misc.c
fn parse_ident<'a>(
ident: &str,
strict: bool,
temp_storage: &'a RowArena,
) -> Result<Datum<'a>, EvalError> {
fn parse_ident<'a>(ident: &'a str, strict: bool) -> Result<ArrayRustType<Cow<'a, str>>, EvalError> {
fn is_ident_start(c: char) -> bool {
matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\u{80}'..=char::MAX)
}
Expand Down Expand Up @@ -2337,13 +2330,12 @@ fn parse_ident<'a>(
detail: Some("String has unclosed double quotes.".into()),
});
}
elems.push(Datum::String(s));
elems.push(Cow::Borrowed(s));
missing_ident = false;
} else if c.map(is_ident_start).unwrap_or(false) {
buf.prev();
let s = buf.take_while(is_ident_cont);
let s = temp_storage.push_string(s.to_ascii_lowercase());
elems.push(Datum::String(s));
elems.push(Cow::Owned(s.to_ascii_lowercase()));
missing_ident = false;
}

Expand Down Expand Up @@ -2384,15 +2376,7 @@ fn parse_ident<'a>(
}
}

Ok(temp_storage.try_make_datum(|packer| {
packer.try_push_array(
&[ArrayDimension {
lower_bound: 1,
length: elems.len(),
}],
elems,
)
})?)
Ok(elems.into())
}

fn regexp_split_to_array_re<'a>(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
source: src/expr/src/scalar/func.rs
expression: "#[sqlfunc(output_type = \"mz_repr::ArrayRustType<String>\", propagates_nulls = true)]\nfn parse_ident<'a>(\n ident: &str,\n strict: bool,\n temp_storage: &'a RowArena,\n) -> Result<Datum<'a>, EvalError> {\n fn is_ident_start(c: char) -> bool {\n matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\\u{80}'..= char::MAX)\n }\n fn is_ident_cont(c: char) -> bool {\n matches!(c, '0'..='9' | '$') || is_ident_start(c)\n }\n let mut elems = vec![];\n let buf = &mut LexBuf::new(ident);\n let mut after_dot = false;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n loop {\n let mut missing_ident = true;\n let c = buf.next();\n if c == Some('\"') {\n let s = buf.take_while(|ch| !matches!(ch, '\"'));\n if buf.next() != Some('\"') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"String has unclosed double quotes.\".into()),\n });\n }\n elems.push(Datum::String(s));\n missing_ident = false;\n } else if c.map(is_ident_start).unwrap_or(false) {\n buf.prev();\n let s = buf.take_while(is_ident_cont);\n let s = temp_storage.push_string(s.to_ascii_lowercase());\n elems.push(Datum::String(s));\n missing_ident = false;\n }\n if missing_ident {\n if c == Some('.') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier before \\\".\\\".\".into()),\n });\n } else if after_dot {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier after \\\".\\\".\".into()),\n });\n } else {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n }\n buf.take_while(|ch| ch.is_ascii_whitespace());\n match buf.next() {\n Some('.') => {\n after_dot = true;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n }\n Some(_) if strict => {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n _ => break,\n }\n }\n Ok(\n temp_storage\n .try_make_datum(|packer| {\n packer\n .try_push_array(\n &[\n ArrayDimension {\n lower_bound: 1,\n length: elems.len(),\n },\n ],\n elems,\n )\n })?,\n )\n}\n"
expression: "#[sqlfunc()]\nfn parse_ident<'a>(\n ident: &'a str,\n strict: bool,\n) -> Result<ArrayRustType<Cow<'a, str>>, EvalError> {\n fn is_ident_start(c: char) -> bool {\n matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\\u{80}'..= char::MAX)\n }\n fn is_ident_cont(c: char) -> bool {\n matches!(c, '0'..='9' | '$') || is_ident_start(c)\n }\n let mut elems = vec![];\n let buf = &mut LexBuf::new(ident);\n let mut after_dot = false;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n loop {\n let mut missing_ident = true;\n let c = buf.next();\n if c == Some('\"') {\n let s = buf.take_while(|ch| !matches!(ch, '\"'));\n if buf.next() != Some('\"') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"String has unclosed double quotes.\".into()),\n });\n }\n elems.push(Cow::Borrowed(s));\n missing_ident = false;\n } else if c.map(is_ident_start).unwrap_or(false) {\n buf.prev();\n let s = buf.take_while(is_ident_cont);\n elems.push(Cow::Owned(s.to_ascii_lowercase()));\n missing_ident = false;\n }\n if missing_ident {\n if c == Some('.') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier before \\\".\\\".\".into()),\n });\n } else if after_dot {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier after \\\".\\\".\".into()),\n });\n } else {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n }\n buf.take_while(|ch| ch.is_ascii_whitespace());\n match buf.next() {\n Some('.') => {\n after_dot = true;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n }\n Some(_) if strict => {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n _ => break,\n }\n }\n Ok(elems.into())\n}\n"
---
#[derive(
proptest_derive::Arbitrary,
Expand All @@ -19,22 +19,22 @@ pub struct ParseIdent;
impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent {
type Input1 = &'a str;
type Input2 = bool;
type Output = Result<Datum<'a>, EvalError>;
type Output = Result<ArrayRustType<Cow<'a, str>>, EvalError>;
fn call(
&self,
a: Self::Input1,
b: Self::Input2,
temp_storage: &'a mz_repr::RowArena,
) -> Self::Output {
parse_ident(a, b, temp_storage)
parse_ident(a, b)
}
fn output_type(
&self,
input_type_a: mz_repr::SqlColumnType,
input_type_b: mz_repr::SqlColumnType,
) -> mz_repr::SqlColumnType {
use mz_repr::AsColumnType;
let output = <mz_repr::ArrayRustType<String>>::as_column_type();
let output = Self::Output::as_column_type();
let propagates_nulls = crate::func::binary::EagerBinaryFunc::propagates_nulls(
self,
);
Expand All @@ -46,23 +46,16 @@ impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent {
&& (input_type_a.nullable || input_type_b.nullable)),
)
}
fn introduces_nulls(&self) -> bool {
<mz_repr::ArrayRustType<String> as ::mz_repr::DatumType<'_, ()>>::nullable()
}
fn propagates_nulls(&self) -> bool {
true
}
}
impl std::fmt::Display for ParseIdent {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.write_str(stringify!(parse_ident))
}
}
fn parse_ident<'a>(
ident: &str,
ident: &'a str,
strict: bool,
temp_storage: &'a RowArena,
) -> Result<Datum<'a>, EvalError> {
) -> Result<ArrayRustType<Cow<'a, str>>, EvalError> {
fn is_ident_start(c: char) -> bool {
matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\u{80}'..= char::MAX)
}
Expand All @@ -84,13 +77,12 @@ fn parse_ident<'a>(
detail: Some("String has unclosed double quotes.".into()),
});
}
elems.push(Datum::String(s));
elems.push(Cow::Borrowed(s));
missing_ident = false;
} else if c.map(is_ident_start).unwrap_or(false) {
buf.prev();
let s = buf.take_while(is_ident_cont);
let s = temp_storage.push_string(s.to_ascii_lowercase());
elems.push(Datum::String(s));
elems.push(Cow::Owned(s.to_ascii_lowercase()));
missing_ident = false;
}
if missing_ident {
Expand Down Expand Up @@ -126,19 +118,5 @@ fn parse_ident<'a>(
_ => break,
}
}
Ok(
temp_storage
.try_make_datum(|packer| {
packer
.try_push_array(
&[
ArrayDimension {
lower_bound: 1,
length: elems.len(),
},
],
elems,
)
})?,
)
Ok(elems.into())
}
2 changes: 1 addition & 1 deletion src/repr/src/row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2643,7 +2643,7 @@ impl<'a> DatumList<'a> {
}
}

impl<'a> IntoIterator for &'a DatumList<'a> {
impl<'a> IntoIterator for &'_ DatumList<'a> {
type Item = Datum<'a>;
type IntoIter = DatumListIter<'a>;
fn into_iter(self) -> DatumListIter<'a> {
Expand Down
77 changes: 77 additions & 0 deletions src/repr/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fmt::{self, Debug};
use std::hash::Hash;
Expand Down Expand Up @@ -1940,6 +1941,40 @@ pub trait DatumType<'a, E>: Sized {
#[derive(Debug)]
pub struct ArrayRustType<T>(pub Vec<T>);

impl<T> From<Vec<T>> for ArrayRustType<T> {
fn from(v: Vec<T>) -> Self {
Self(v)
}
}

impl<B: ToOwned<Owned: AsColumnType>> AsColumnType for Cow<'_, B> {
fn as_column_type() -> SqlColumnType {
<B::Owned>::as_column_type()
}
}

impl<'a, E, B: ToOwned> DatumType<'a, E> for Cow<'a, B>
where
B::Owned: DatumType<'a, E>,
for<'b> &'b B: DatumType<'a, E>,
{
fn nullable() -> bool {
B::Owned::nullable()
}
fn fallible() -> bool {
B::Owned::fallible()
}
fn try_from_result(res: Result<Datum<'a>, E>) -> Result<Self, Result<Datum<'a>, E>> {
<&B>::try_from_result(res).map(|b| Cow::Borrowed(b))
}
fn into_result(self, temp_storage: &'a RowArena) -> Result<Datum<'a>, E> {
match self {
Cow::Owned(b) => b.into_result(temp_storage),
Cow::Borrowed(b) => b.into_result(temp_storage),
}
}
}

impl<B: AsColumnType> AsColumnType for Option<B> {
fn as_column_type() -> SqlColumnType {
B::as_column_type().nullable(true)
Expand Down Expand Up @@ -2310,6 +2345,48 @@ impl<'a, E> DatumType<'a, E> for ArrayRustType<String> {
}
}

impl AsColumnType for ArrayRustType<Cow<'_, str>> {
fn as_column_type() -> SqlColumnType {
SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(false)
}
}

impl<'a, E> DatumType<'a, E> for ArrayRustType<Cow<'a, str>> {
fn nullable() -> bool {
false
}

fn fallible() -> bool {
false
}

fn try_from_result(res: Result<Datum<'a>, E>) -> Result<Self, Result<Datum<'a>, E>> {
match res {
Ok(Datum::Array(arr)) => Ok(ArrayRustType(
arr.elements()
.into_iter()
.map(|d| Cow::Borrowed(d.unwrap_str()))
.collect(),
)),
_ => Err(res),
}
}

fn into_result(self, temp_storage: &'a RowArena) -> Result<Datum<'a>, E> {
Ok(temp_storage.make_datum(|packer| {
packer
.try_push_array(
&[ArrayDimension {
lower_bound: 1,
length: self.0.len(),
}],
self.0.iter().map(|elem| Datum::String(elem.as_ref())),
)
.expect("self is 1 dimensional, and its length is used for the array length");
}))
}
}

impl AsColumnType for Vec<u8> {
fn as_column_type() -> SqlColumnType {
SqlScalarType::Bytes.nullable(false)
Expand Down