diff --git a/src/expr/src/scalar/func.rs b/src/expr/src/scalar/func.rs index d83a4ff3264ad..870f2537895ee 100644 --- a/src/expr/src/scalar/func.rs +++ b/src/expr/src/scalar/func.rs @@ -44,8 +44,8 @@ use mz_repr::adt::range::{Range, RangeOps}; use mz_repr::adt::regex::Regex; use mz_repr::adt::timestamp::{CheckedTimestamp, TimestampLike}; use mz_repr::{ - Datum, DatumList, DatumMap, DatumType, ExcludeNull, Row, RowArena, SqlColumnType, - SqlScalarType, strconv, + ArrayRustType, Datum, DatumList, DatumMap, DatumType, ExcludeNull, Row, RowArena, + SqlColumnType, SqlScalarType, strconv, }; use mz_sql_parser::ast::display::FormatMode; use mz_sql_pretty::{PrettyConfig, pretty_str}; @@ -2298,16 +2298,9 @@ fn mz_acl_item_contains_privilege( Ok(contains) } -#[sqlfunc( - output_type = "mz_repr::ArrayRustType", - propagates_nulls = true -)] +#[sqlfunc] // transliterated from postgres/src/backend/utils/adt/misc.c -fn parse_ident<'a>( - ident: &str, - strict: bool, - temp_storage: &'a RowArena, -) -> Result, EvalError> { +fn parse_ident<'a>(ident: &'a str, strict: bool) -> Result>, EvalError> { fn is_ident_start(c: char) -> bool { matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\u{80}'..=char::MAX) } @@ -2337,13 +2330,12 @@ fn parse_ident<'a>( detail: Some("String has unclosed double quotes.".into()), }); } - elems.push(Datum::String(s)); + elems.push(Cow::Borrowed(s)); missing_ident = false; } else if c.map(is_ident_start).unwrap_or(false) { buf.prev(); let s = buf.take_while(is_ident_cont); - let s = temp_storage.push_string(s.to_ascii_lowercase()); - elems.push(Datum::String(s)); + elems.push(Cow::Owned(s.to_ascii_lowercase())); missing_ident = false; } @@ -2384,15 +2376,7 @@ fn parse_ident<'a>( } } - Ok(temp_storage.try_make_datum(|packer| { - packer.try_push_array( - &[ArrayDimension { - lower_bound: 1, - length: elems.len(), - }], - elems, - ) - })?) + Ok(elems.into()) } fn regexp_split_to_array_re<'a>( diff --git a/src/expr/src/scalar/snapshots/mz_expr__scalar__func__parse_ident.snap b/src/expr/src/scalar/snapshots/mz_expr__scalar__func__parse_ident.snap index 05ebc2c6c99f8..431ef1191a2f7 100644 --- a/src/expr/src/scalar/snapshots/mz_expr__scalar__func__parse_ident.snap +++ b/src/expr/src/scalar/snapshots/mz_expr__scalar__func__parse_ident.snap @@ -1,6 +1,6 @@ --- source: src/expr/src/scalar/func.rs -expression: "#[sqlfunc(output_type = \"mz_repr::ArrayRustType\", propagates_nulls = true)]\nfn parse_ident<'a>(\n ident: &str,\n strict: bool,\n temp_storage: &'a RowArena,\n) -> Result, EvalError> {\n fn is_ident_start(c: char) -> bool {\n matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\\u{80}'..= char::MAX)\n }\n fn is_ident_cont(c: char) -> bool {\n matches!(c, '0'..='9' | '$') || is_ident_start(c)\n }\n let mut elems = vec![];\n let buf = &mut LexBuf::new(ident);\n let mut after_dot = false;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n loop {\n let mut missing_ident = true;\n let c = buf.next();\n if c == Some('\"') {\n let s = buf.take_while(|ch| !matches!(ch, '\"'));\n if buf.next() != Some('\"') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"String has unclosed double quotes.\".into()),\n });\n }\n elems.push(Datum::String(s));\n missing_ident = false;\n } else if c.map(is_ident_start).unwrap_or(false) {\n buf.prev();\n let s = buf.take_while(is_ident_cont);\n let s = temp_storage.push_string(s.to_ascii_lowercase());\n elems.push(Datum::String(s));\n missing_ident = false;\n }\n if missing_ident {\n if c == Some('.') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier before \\\".\\\".\".into()),\n });\n } else if after_dot {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier after \\\".\\\".\".into()),\n });\n } else {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n }\n buf.take_while(|ch| ch.is_ascii_whitespace());\n match buf.next() {\n Some('.') => {\n after_dot = true;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n }\n Some(_) if strict => {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n _ => break,\n }\n }\n Ok(\n temp_storage\n .try_make_datum(|packer| {\n packer\n .try_push_array(\n &[\n ArrayDimension {\n lower_bound: 1,\n length: elems.len(),\n },\n ],\n elems,\n )\n })?,\n )\n}\n" +expression: "#[sqlfunc()]\nfn parse_ident<'a>(\n ident: &'a str,\n strict: bool,\n) -> Result>, EvalError> {\n fn is_ident_start(c: char) -> bool {\n matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\\u{80}'..= char::MAX)\n }\n fn is_ident_cont(c: char) -> bool {\n matches!(c, '0'..='9' | '$') || is_ident_start(c)\n }\n let mut elems = vec![];\n let buf = &mut LexBuf::new(ident);\n let mut after_dot = false;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n loop {\n let mut missing_ident = true;\n let c = buf.next();\n if c == Some('\"') {\n let s = buf.take_while(|ch| !matches!(ch, '\"'));\n if buf.next() != Some('\"') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"String has unclosed double quotes.\".into()),\n });\n }\n elems.push(Cow::Borrowed(s));\n missing_ident = false;\n } else if c.map(is_ident_start).unwrap_or(false) {\n buf.prev();\n let s = buf.take_while(is_ident_cont);\n elems.push(Cow::Owned(s.to_ascii_lowercase()));\n missing_ident = false;\n }\n if missing_ident {\n if c == Some('.') {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier before \\\".\\\".\".into()),\n });\n } else if after_dot {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: Some(\"No valid identifier after \\\".\\\".\".into()),\n });\n } else {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n }\n buf.take_while(|ch| ch.is_ascii_whitespace());\n match buf.next() {\n Some('.') => {\n after_dot = true;\n buf.take_while(|ch| ch.is_ascii_whitespace());\n }\n Some(_) if strict => {\n return Err(EvalError::InvalidIdentifier {\n ident: ident.into(),\n detail: None,\n });\n }\n _ => break,\n }\n }\n Ok(elems.into())\n}\n" --- #[derive( proptest_derive::Arbitrary, @@ -19,14 +19,14 @@ pub struct ParseIdent; impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent { type Input1 = &'a str; type Input2 = bool; - type Output = Result, EvalError>; + type Output = Result>, EvalError>; fn call( &self, a: Self::Input1, b: Self::Input2, temp_storage: &'a mz_repr::RowArena, ) -> Self::Output { - parse_ident(a, b, temp_storage) + parse_ident(a, b) } fn output_type( &self, @@ -34,7 +34,7 @@ impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent { input_type_b: mz_repr::SqlColumnType, ) -> mz_repr::SqlColumnType { use mz_repr::AsColumnType; - let output = >::as_column_type(); + let output = Self::Output::as_column_type(); let propagates_nulls = crate::func::binary::EagerBinaryFunc::propagates_nulls( self, ); @@ -46,12 +46,6 @@ impl<'a> crate::func::binary::EagerBinaryFunc<'a> for ParseIdent { && (input_type_a.nullable || input_type_b.nullable)), ) } - fn introduces_nulls(&self) -> bool { - as ::mz_repr::DatumType<'_, ()>>::nullable() - } - fn propagates_nulls(&self) -> bool { - true - } } impl std::fmt::Display for ParseIdent { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { @@ -59,10 +53,9 @@ impl std::fmt::Display for ParseIdent { } } fn parse_ident<'a>( - ident: &str, + ident: &'a str, strict: bool, - temp_storage: &'a RowArena, -) -> Result, EvalError> { +) -> Result>, EvalError> { fn is_ident_start(c: char) -> bool { matches!(c, 'A'..='Z' | 'a'..='z' | '_' | '\u{80}'..= char::MAX) } @@ -84,13 +77,12 @@ fn parse_ident<'a>( detail: Some("String has unclosed double quotes.".into()), }); } - elems.push(Datum::String(s)); + elems.push(Cow::Borrowed(s)); missing_ident = false; } else if c.map(is_ident_start).unwrap_or(false) { buf.prev(); let s = buf.take_while(is_ident_cont); - let s = temp_storage.push_string(s.to_ascii_lowercase()); - elems.push(Datum::String(s)); + elems.push(Cow::Owned(s.to_ascii_lowercase())); missing_ident = false; } if missing_ident { @@ -126,19 +118,5 @@ fn parse_ident<'a>( _ => break, } } - Ok( - temp_storage - .try_make_datum(|packer| { - packer - .try_push_array( - &[ - ArrayDimension { - lower_bound: 1, - length: elems.len(), - }, - ], - elems, - ) - })?, - ) + Ok(elems.into()) } diff --git a/src/repr/src/row.rs b/src/repr/src/row.rs index 041f8f8106080..50fbc9fceac7a 100644 --- a/src/repr/src/row.rs +++ b/src/repr/src/row.rs @@ -2643,7 +2643,7 @@ impl<'a> DatumList<'a> { } } -impl<'a> IntoIterator for &'a DatumList<'a> { +impl<'a> IntoIterator for &'_ DatumList<'a> { type Item = Datum<'a>; type IntoIter = DatumListIter<'a>; fn into_iter(self) -> DatumListIter<'a> { diff --git a/src/repr/src/scalar.rs b/src/repr/src/scalar.rs index 2adbd0c6dc82e..8e8bf7e47e581 100644 --- a/src/repr/src/scalar.rs +++ b/src/repr/src/scalar.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::borrow::Cow; use std::collections::BTreeMap; use std::fmt::{self, Debug}; use std::hash::Hash; @@ -1940,6 +1941,40 @@ pub trait DatumType<'a, E>: Sized { #[derive(Debug)] pub struct ArrayRustType(pub Vec); +impl From> for ArrayRustType { + fn from(v: Vec) -> Self { + Self(v) + } +} + +impl> AsColumnType for Cow<'_, B> { + fn as_column_type() -> SqlColumnType { + ::as_column_type() + } +} + +impl<'a, E, B: ToOwned> DatumType<'a, E> for Cow<'a, B> +where + B::Owned: DatumType<'a, E>, + for<'b> &'b B: DatumType<'a, E>, +{ + fn nullable() -> bool { + B::Owned::nullable() + } + fn fallible() -> bool { + B::Owned::fallible() + } + fn try_from_result(res: Result, E>) -> Result, E>> { + <&B>::try_from_result(res).map(|b| Cow::Borrowed(b)) + } + fn into_result(self, temp_storage: &'a RowArena) -> Result, E> { + match self { + Cow::Owned(b) => b.into_result(temp_storage), + Cow::Borrowed(b) => b.into_result(temp_storage), + } + } +} + impl AsColumnType for Option { fn as_column_type() -> SqlColumnType { B::as_column_type().nullable(true) @@ -2310,6 +2345,48 @@ impl<'a, E> DatumType<'a, E> for ArrayRustType { } } +impl AsColumnType for ArrayRustType> { + fn as_column_type() -> SqlColumnType { + SqlScalarType::Array(Box::new(SqlScalarType::String)).nullable(false) + } +} + +impl<'a, E> DatumType<'a, E> for ArrayRustType> { + fn nullable() -> bool { + false + } + + fn fallible() -> bool { + false + } + + fn try_from_result(res: Result, E>) -> Result, E>> { + match res { + Ok(Datum::Array(arr)) => Ok(ArrayRustType( + arr.elements() + .into_iter() + .map(|d| Cow::Borrowed(d.unwrap_str())) + .collect(), + )), + _ => Err(res), + } + } + + fn into_result(self, temp_storage: &'a RowArena) -> Result, E> { + Ok(temp_storage.make_datum(|packer| { + packer + .try_push_array( + &[ArrayDimension { + lower_bound: 1, + length: self.0.len(), + }], + self.0.iter().map(|elem| Datum::String(elem.as_ref())), + ) + .expect("self is 1 dimensional, and its length is used for the array length"); + })) + } +} + impl AsColumnType for Vec { fn as_column_type() -> SqlColumnType { SqlScalarType::Bytes.nullable(false)