diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 12e2c892f04..f97b2c6a8a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -112,7 +112,7 @@ jobs: - name: "wasm32 with default features" command: "build" target: wasm32-unknown-unknown - args: "--target wasm32-unknown-unknown --exclude vortex-roaring --exclude vortex-datafusion " + args: "--target wasm32-unknown-unknown --exclude vortex-datafusion" steps: - uses: actions/checkout@v4 - uses: ./.github/actions/cleanup diff --git a/Cargo.lock b/Cargo.lock index 1243e4eed5f..18ba9e4dec9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5149,6 +5149,7 @@ dependencies = [ "serde", "serde_json", "serde_test", + "static_assertions", "vortex-buffer", "vortex-error", "vortex-flatbuffers", diff --git a/vortex-array/src/array/chunked/variants.rs b/vortex-array/src/array/chunked/variants.rs index 44f0dd60aab..ad7cf866a8d 100644 --- a/vortex-array/src/array/chunked/variants.rs +++ b/vortex-array/src/array/chunked/variants.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use itertools::Itertools; use vortex_dtype::{DType, Field, FieldName}; use vortex_error::{vortex_err, vortex_panic, VortexExpect, VortexResult}; @@ -109,7 +111,7 @@ impl StructArrayTrait for ChunkedArray { )?; ChunkedArray::try_new( chunks, - DType::Struct(projected_dtype, self.dtype().nullability()), + DType::Struct(Arc::new(projected_dtype), self.dtype().nullability()), ) .map(|a| a.into_array()) } diff --git a/vortex-array/src/array/struct_/mod.rs b/vortex-array/src/array/struct_/mod.rs index 43bd5757d63..ad603fa397a 100644 --- a/vortex-array/src/array/struct_/mod.rs +++ b/vortex-array/src/array/struct_/mod.rs @@ -1,4 +1,5 @@ use std::fmt::{Debug, Display}; +use std::sync::Arc; use rkyv::from_bytes; use serde::{Deserialize, Serialize}; @@ -90,7 +91,7 @@ impl StructArray { } Self::try_from_parts( - DType::Struct(StructDType::new(names, field_dtypes), nullability), + DType::Struct(Arc::new(StructDType::new(names, field_dtypes)), nullability), length, RkyvMetadata(StructMetadata { validity: validity_metadata, diff --git a/vortex-array/src/arrow/dtype.rs b/vortex-array/src/arrow/dtype.rs index 53a60f67e1c..7e0a2be3b15 100644 --- a/vortex-array/src/arrow/dtype.rs +++ b/vortex-array/src/arrow/dtype.rs @@ -45,7 +45,7 @@ impl TryFromArrowType<&DataType> for PType { impl FromArrowType for DType { fn from_arrow(value: SchemaRef) -> Self { Self::Struct( - StructDType::from_arrow(value.fields()), + Arc::new(StructDType::from_arrow(value.fields())), Nullability::NonNullable, // Must match From for Array ) } @@ -87,7 +87,7 @@ impl FromArrowType<&Field> for DType { DataType::List(e) | DataType::LargeList(e) => { List(Arc::new(Self::from_arrow(e.as_ref())), nullability) } - DataType::Struct(f) => Struct(StructDType::from_arrow(f), nullability), + DataType::Struct(f) => Struct(Arc::new(StructDType::from_arrow(f)), nullability), _ => unimplemented!("Arrow data type not yet supported: {:?}", field.data_type()), } } @@ -200,10 +200,10 @@ mod test { assert_eq!( infer_data_type(&DType::Struct( - StructDType::from_iter([ + Arc::new(StructDType::from_iter([ ("field_a", DType::Bool(false.into())), ("field_b", DType::Utf8(true.into())) - ]), + ])), Nullability::NonNullable, )) .unwrap(), @@ -248,8 +248,8 @@ mod test { let _ = infer_schema(&schema_null).unwrap(); } - fn the_struct() -> StructDType { - StructDType::new( + fn the_struct() -> Arc { + Arc::new(StructDType::new( FieldNames::from([ FieldName::from("field_a"), FieldName::from("field_b"), @@ -260,6 +260,6 @@ mod test { DType::Utf8(Nullability::NonNullable), DType::Primitive(PType::I32, Nullability::Nullable), ], - ) + )) } } diff --git a/vortex-array/src/builders/struct_.rs b/vortex-array/src/builders/struct_.rs index 7ff652982bf..ec54fd267b1 100644 --- a/vortex-array/src/builders/struct_.rs +++ b/vortex-array/src/builders/struct_.rs @@ -1,4 +1,5 @@ use std::any::Any; +use std::sync::Arc; use itertools::Itertools; use vortex_dtype::{DType, Nullability, StructDType}; @@ -13,14 +14,14 @@ use crate::{ArrayData, IntoArrayData}; pub struct StructBuilder { builders: Vec>, validity: BoolBuilder, - struct_dtype: StructDType, + struct_dtype: Arc, nullability: Nullability, dtype: DType, } impl StructBuilder { pub fn with_capacity( - struct_dtype: StructDType, + struct_dtype: Arc, nullability: Nullability, capacity: usize, ) -> Self { @@ -127,10 +128,10 @@ mod tests { #[test] fn test_struct_builder() { - let sdt = StructDType::new( + let sdt = Arc::new(StructDType::new( vec![Arc::from("a"), Arc::from("b")].into(), vec![I32.into(), I32.into()], - ); + )); let dtype = DType::Struct(sdt.clone(), Nullability::NonNullable); let mut builder = StructBuilder::with_capacity(sdt, Nullability::NonNullable, 0); diff --git a/vortex-dtype/Cargo.toml b/vortex-dtype/Cargo.toml index 96081d53972..ca5ceb9d8b3 100644 --- a/vortex-dtype/Cargo.toml +++ b/vortex-dtype/Cargo.toml @@ -19,6 +19,7 @@ path = "src/lib.rs" bench = false [dependencies] +static_assertions = { workspace = true } arbitrary = { workspace = true, optional = true } flatbuffers = { workspace = true } half = { workspace = true, features = ["num-traits"] } diff --git a/vortex-dtype/src/arbitrary.rs b/vortex-dtype/src/arbitrary.rs index 98095d499dc..3a97eda881f 100644 --- a/vortex-dtype/src/arbitrary.rs +++ b/vortex-dtype/src/arbitrary.rs @@ -23,7 +23,7 @@ fn random_dtype(u: &mut Unstructured<'_>, depth: u8) -> Result { 2 => DType::Primitive(u.arbitrary()?, u.arbitrary()?), 3 => DType::Utf8(u.arbitrary()?), 4 => DType::Binary(u.arbitrary()?), - 5 => DType::Struct(random_struct_dtype(u, depth - 1)?, u.arbitrary()?), + 5 => DType::Struct(Arc::new(random_struct_dtype(u, depth - 1)?), u.arbitrary()?), 6 => DType::List(Arc::new(random_dtype(u, depth - 1)?), u.arbitrary()?), // Null, // Extension(ExtDType, Nullability), diff --git a/vortex-dtype/src/dtype.rs b/vortex-dtype/src/dtype.rs index c103ae26221..74f36b6f2ee 100644 --- a/vortex-dtype/src/dtype.rs +++ b/vortex-dtype/src/dtype.rs @@ -3,6 +3,7 @@ use std::hash::Hash; use std::sync::Arc; use itertools::Itertools; +use static_assertions::const_assert_eq; use DType::*; use crate::nullability::Nullability; @@ -31,13 +32,19 @@ pub enum DType { /// Binary data Binary(Nullability), /// A struct is composed of an ordered list of fields, each with a corresponding name and DType - Struct(StructDType, Nullability), + Struct(Arc, Nullability), /// A variable-length list type, parameterized by a single element DType List(Arc, Nullability), /// User-defined extension types Extension(Arc), } +#[cfg(not(target_arch = "wasm32"))] +const_assert_eq!(size_of::(), 16); + +#[cfg(target_arch = "wasm32")] +const_assert_eq!(size_of::(), 8); + impl DType { /// The default DType for bytes pub const BYTES: Self = Primitive(PType::U8, Nullability::NonNullable); @@ -197,15 +204,3 @@ impl Display for DType { } } } - -#[cfg(test)] -mod test { - use std::mem; - - use crate::dtype::DType; - - #[test] - fn size_of() { - assert_eq!(mem::size_of::(), 40); - } -} diff --git a/vortex-dtype/src/serde/flatbuffers/mod.rs b/vortex-dtype/src/serde/flatbuffers/mod.rs index 68f0053c653..144a17e3d93 100644 --- a/vortex-dtype/src/serde/flatbuffers/mod.rs +++ b/vortex-dtype/src/serde/flatbuffers/mod.rs @@ -127,7 +127,10 @@ impl TryFrom for DType { .ok_or_else(|| vortex_err!("failed to parse struct from flatbuffer"))?; let struct_dtype = StructDType::from_fb(fb_struct, vfdt.buffer().clone())?; - Ok(Self::Struct(struct_dtype, fb_struct.nullable().into())) + Ok(Self::Struct( + struct_dtype.into(), + fb_struct.nullable().into(), + )) } fb::Type::Extension => { let fb_ext = fb @@ -357,7 +360,8 @@ mod test { DType::Utf8(Nullability::NonNullable), DType::Primitive(PType::U16, Nullability::Nullable), ], - ), + ) + .into(), Nullability::NonNullable, )) } diff --git a/vortex-dtype/src/serde/flatbuffers/project.rs b/vortex-dtype/src/serde/flatbuffers/project.rs index 04982591d65..160cb921ac0 100644 --- a/vortex-dtype/src/serde/flatbuffers/project.rs +++ b/vortex-dtype/src/serde/flatbuffers/project.rs @@ -56,7 +56,7 @@ pub fn project_and_deserialize( .collect::>>()?; Ok(DType::Struct( - StructDType::from_iter(struct_dtype), + StructDType::from_iter(struct_dtype).into(), nullability, )) } diff --git a/vortex-dtype/src/serde/proto.rs b/vortex-dtype/src/serde/proto.rs index d294f17f625..c19d17eaee1 100644 --- a/vortex-dtype/src/serde/proto.rs +++ b/vortex-dtype/src/serde/proto.rs @@ -30,7 +30,7 @@ impl TryFrom<&pb::DType> for DType { .iter() .map(TryInto::::try_into) .collect::>>()?, - ), + ).into(), s.nullable.into(), )), DtypeType::List(l) => { diff --git a/vortex-dtype/src/struct_.rs b/vortex-dtype/src/struct_.rs index f0b13fdfec6..dd6d7c6c9ab 100644 --- a/vortex-dtype/src/struct_.rs +++ b/vortex-dtype/src/struct_.rs @@ -321,7 +321,7 @@ mod test { #[test] fn nullability() { assert!(!DType::Struct( - StructDType::new(vec![].into(), Vec::new()), + StructDType::new(vec![].into(), Vec::new()).into(), Nullability::NonNullable ) .is_nullable()); @@ -338,7 +338,7 @@ mod test { let b_type = DType::Bool(Nullability::NonNullable); let dtype = DType::Struct( - StructDType::from_iter([("A", a_type.clone()), ("B", b_type.clone())]), + StructDType::from_iter([("A", a_type.clone()), ("B", b_type.clone())]).into(), Nullability::Nullable, ); assert!(dtype.is_nullable()); diff --git a/vortex-expr/src/lib.rs b/vortex-expr/src/lib.rs index 151a5175c87..0a9818cefaf 100644 --- a/vortex-expr/src/lib.rs +++ b/vortex-expr/src/lib.rs @@ -131,11 +131,13 @@ dyn_hash::hash_trait_object!(VortexExpr); #[cfg(feature = "test-harness")] pub mod test_harness { + use std::sync::Arc; + use vortex_dtype::{DType, Nullability, PType, StructDType}; pub fn struct_dtype() -> DType { DType::Struct( - StructDType::new( + Arc::new(StructDType::new( [ "a".into(), "col1".into(), @@ -151,7 +153,7 @@ pub mod test_harness { DType::Bool(Nullability::NonNullable), DType::Bool(Nullability::NonNullable), ], - ), + )), Nullability::NonNullable, ) } @@ -270,13 +272,13 @@ mod tests { assert_eq!( lit(Scalar::struct_( DType::Struct( - StructDType::new( + Arc::new(StructDType::new( Arc::from([Arc::from("dog"), Arc::from("cat")]), vec![ DType::Primitive(PType::U32, Nullability::NonNullable), DType::Utf8(Nullability::NonNullable) ], - ), + )), Nullability::NonNullable ), vec![Scalar::from(32_u32), Scalar::from("rufus".to_string())] diff --git a/vortex-expr/src/literal.rs b/vortex-expr/src/literal.rs index 68102a0934e..2f632980627 100644 --- a/vortex-expr/src/literal.rs +++ b/vortex-expr/src/literal.rs @@ -114,13 +114,13 @@ mod tests { ); let sdtype = DType::Struct( - StructDType::new( + Arc::new(StructDType::new( Arc::from([Arc::from("dog"), Arc::from("cat")]), vec![ DType::Primitive(PType::U32, Nullability::NonNullable), DType::Utf8(Nullability::NonNullable), ], - ), + )), Nullability::NonNullable, ); assert_eq!( diff --git a/vortex-expr/src/select.rs b/vortex-expr/src/select.rs index 8ddcafd4544..2396d15d4c3 100644 --- a/vortex-expr/src/select.rs +++ b/vortex-expr/src/select.rs @@ -162,6 +162,8 @@ impl PartialEq for Select { #[cfg(test)] mod tests { + use std::sync::Arc; + use vortex_array::array::StructArray; use vortex_array::IntoArrayData; use vortex_buffer::buffer; @@ -201,11 +203,13 @@ mod tests { let select_expr = select(vec![FieldName::from("a")], ident()); let expected_dtype = DType::Struct( - dtype - .as_struct() - .unwrap() - .project(&[Field::from("a")]) - .unwrap(), + Arc::new( + dtype + .as_struct() + .unwrap() + .project(&[Field::from("a")]) + .unwrap(), + ), Nullability::NonNullable, ); assert_eq!(select_expr.return_dtype(&dtype).unwrap(), expected_dtype); @@ -231,11 +235,13 @@ mod tests { assert_eq!( select_expr_exclude.return_dtype(&dtype).unwrap(), DType::Struct( - dtype - .as_struct() - .unwrap() - .project(&[Field::from("a"), Field::from("bool1"), Field::from("bool2")]) - .unwrap(), + Arc::new( + dtype + .as_struct() + .unwrap() + .project(&[Field::from("a"), Field::from("bool1"), Field::from("bool2")]) + .unwrap() + ), Nullability::NonNullable ) ); diff --git a/vortex-expr/src/transform/field_mask.rs b/vortex-expr/src/transform/field_mask.rs index e3af6e425d6..49f832bf60c 100644 --- a/vortex-expr/src/transform/field_mask.rs +++ b/vortex-expr/src/transform/field_mask.rs @@ -60,6 +60,7 @@ impl<'a> Folder<'a> for FieldMaskFolder { #[cfg(test)] mod test { use std::iter; + use std::sync::Arc; use itertools::Itertools; use vortex_dtype::Nullability::NonNullable; @@ -70,12 +71,12 @@ mod test { fn dtype() -> DType { DType::Struct( - StructDType::new( + Arc::new(StructDType::new( ["A".into(), "B".into(), "C".into()].into(), iter::repeat(DType::Primitive(PType::I32, NonNullable)) .take(3) .collect(), - ), + )), NonNullable, ) } diff --git a/vortex-expr/src/transform/partition.rs b/vortex-expr/src/transform/partition.rs index 3a962e8fb73..2e1f675f3e6 100644 --- a/vortex-expr/src/transform/partition.rs +++ b/vortex-expr/src/transform/partition.rs @@ -249,6 +249,8 @@ impl MutNodeVisitor for ReplaceAccessesWithChild { #[cfg(test)] mod tests { + use std::sync::Arc; + use vortex_dtype::Nullability::NonNullable; use vortex_dtype::PType::I32; use vortex_dtype::{DType, StructDType}; @@ -260,17 +262,20 @@ mod tests { fn dtype() -> DType { DType::Struct( - StructDType::from_iter([ + Arc::new(StructDType::from_iter([ ( "a", DType::Struct( - StructDType::from_iter([("a", I32.into()), ("b", DType::from(I32))]), + Arc::new(StructDType::from_iter([ + ("a", I32.into()), + ("b", DType::from(I32)), + ])), NonNullable, ), ), ("b", I32.into()), ("c", I32.into()), - ]), + ])), NonNullable, ) } diff --git a/vortex-expr/src/transform/remove_select.rs b/vortex-expr/src/transform/remove_select.rs index 0c9f5616132..0b7f959fdae 100644 --- a/vortex-expr/src/transform/remove_select.rs +++ b/vortex-expr/src/transform/remove_select.rs @@ -55,6 +55,8 @@ impl MutNodeVisitor for RemoveSelectTransform<'_> { #[cfg(test)] mod tests { + use std::sync::Arc; + use vortex_dtype::Nullability::NonNullable; use vortex_dtype::PType::I32; use vortex_dtype::{DType, StructDType}; @@ -65,10 +67,10 @@ mod tests { #[test] fn test_remove_select() { let dtype = DType::Struct( - StructDType::new( + Arc::new(StructDType::new( ["a".into(), "b".into()].into(), vec![I32.into(), I32.into()], - ), + )), NonNullable, ); let e = select(["a".into(), "b".into()], ident()); diff --git a/vortex-file/src/tests.rs b/vortex-file/src/tests.rs index 19ec01abe43..5e28b6be20b 100644 --- a/vortex-file/src/tests.rs +++ b/vortex-file/src/tests.rs @@ -156,7 +156,10 @@ async fn test_read_projection() { assert_eq!( array.dtype(), &DType::Struct( - StructDType::new(vec!["strings".into()].into(), vec![strings_dtype.clone()]), + Arc::new(StructDType::new( + vec!["strings".into()].into(), + vec![strings_dtype.clone()] + )), Nullability::NonNullable, ) ); @@ -185,7 +188,10 @@ async fn test_read_projection() { assert_eq!( array.dtype(), &DType::Struct( - StructDType::new(vec!["numbers".into()].into(), vec![numbers_dtype.clone()]), + Arc::new(StructDType::new( + vec!["numbers".into()].into(), + vec![numbers_dtype.clone()] + )), Nullability::NonNullable, ) ); diff --git a/vortex-layout/src/layouts/chunked/stats_table.rs b/vortex-layout/src/layouts/chunked/stats_table.rs index ef3bb5962d1..443187d68ff 100644 --- a/vortex-layout/src/layouts/chunked/stats_table.rs +++ b/vortex-layout/src/layouts/chunked/stats_table.rs @@ -43,11 +43,9 @@ impl StatsTable { /// Returns the DType of the statistics table given a set of statistics and column [`DType`]. pub fn dtype_for_stats_table(column_dtype: &DType, present_stats: &[Stat]) -> DType { DType::Struct( - StructDType::from_iter( - present_stats - .iter() - .map(|stat| (stat.name(), stat.dtype(column_dtype).as_nullable())), - ), + Arc::new(StructDType::from_iter(present_stats.iter().map(|stat| { + (stat.name(), stat.dtype(column_dtype).as_nullable()) + }))), Nullability::NonNullable, ) } diff --git a/vortex-layout/src/layouts/struct_/eval_expr.rs b/vortex-layout/src/layouts/struct_/eval_expr.rs index 8e296e48822..452759bf5bb 100644 --- a/vortex-layout/src/layouts/struct_/eval_expr.rs +++ b/vortex-layout/src/layouts/struct_/eval_expr.rs @@ -78,10 +78,10 @@ mod tests { let layout = StructLayoutWriter::new( DType::Struct( - StructDType::new( + Arc::new(StructDType::new( vec!["a".into(), "b".into(), "c".into()].into(), vec![I32.into(), I32.into(), I32.into()], - ), + )), Nullability::NonNullable, ), vec![ diff --git a/vortex-scalar/src/display.rs b/vortex-scalar/src/display.rs index d177c09e67e..1ccb042fd76 100644 --- a/vortex-scalar/src/display.rs +++ b/vortex-scalar/src/display.rs @@ -180,7 +180,7 @@ mod tests { #[test] fn display_empty_struct() { fn dtype() -> DType { - DType::Struct(StructDType::new([].into(), vec![]), Nullable) + DType::Struct(Arc::new(StructDType::new([].into(), vec![])), Nullable) } assert_eq!(format!("{}", Scalar::null(dtype())), "null"); @@ -192,10 +192,10 @@ mod tests { fn display_one_field_struct() { fn dtype() -> DType { DType::Struct( - StructDType::new( + Arc::new(StructDType::new( [Arc::from("foo")].into(), vec![DType::Primitive(PType::U32, Nullable)], - ), + )), Nullable, ) } @@ -222,10 +222,10 @@ mod tests { let f1 = DType::Bool(Nullable); let f2 = DType::Primitive(PType::U32, Nullable); let dtype = DType::Struct( - StructDType::new( + Arc::new(StructDType::new( [Arc::from("foo"), Arc::from("bar")].into(), vec![f1.clone(), f2.clone()], - ), + )), Nullable, ); // } diff --git a/vortex-scalar/src/struct_.rs b/vortex-scalar/src/struct_.rs index d562a296923..7f61d873eea 100644 --- a/vortex-scalar/src/struct_.rs +++ b/vortex-scalar/src/struct_.rs @@ -147,7 +147,7 @@ impl<'a> StructScalar<'a> { ScalarValue(InnerScalarValue::Null) }; Ok(Scalar::new( - DType::Struct(projected_dtype, self.dtype().nullability()), + DType::Struct(Arc::new(projected_dtype), self.dtype().nullability()), new_fields, )) } @@ -190,7 +190,10 @@ mod tests { let f1_dt_null = f1_dt.with_nullability(Nullability::Nullable); let dtype = DType::Struct( - StructDType::new(vec!["a".into(), "b".into()].into(), vec![f0_dt, f1_dt]), + Arc::new(StructDType::new( + vec!["a".into(), "b".into()].into(), + vec![f0_dt, f1_dt], + )), Nullability::Nullable, ); diff --git a/vortex-scalar/src/value.rs b/vortex-scalar/src/value.rs index 62a5fb0ac79..343c498b5e1 100644 --- a/vortex-scalar/src/value.rs +++ b/vortex-scalar/src/value.rs @@ -219,6 +219,8 @@ impl InnerScalarValue { #[cfg(test)] mod test { + use std::sync::Arc; + use vortex_dtype::{DType, Nullability, PType, StructDType}; use crate::{InnerScalarValue, PValue, ScalarValue}; @@ -273,10 +275,10 @@ mod test { fn tstruct(left: &DType, right: &DType) -> DType { DType::Struct( - StructDType::new( + Arc::new(StructDType::new( vec!["left".into(), "right".into()].into(), vec![left.clone(), right.clone()], - ), + )), Nullability::NonNullable, ) } @@ -315,7 +317,7 @@ mod test { .is_instance_of(&DType::Binary(Nullability::Nullable))); assert!( ScalarValue(InnerScalarValue::Null).is_instance_of(&DType::Struct( - StructDType::new([].into(), [].into()), + Arc::new(StructDType::new([].into(), [].into())), Nullability::Nullable, )) ); diff --git a/vortex-scan/src/range_scan.rs b/vortex-scan/src/range_scan.rs index ca083704c0e..4552b069288 100644 --- a/vortex-scan/src/range_scan.rs +++ b/vortex-scan/src/range_scan.rs @@ -199,10 +199,10 @@ mod tests { fn dtype() -> DType { DType::Struct( - StructDType::new( + Arc::new(StructDType::new( vec!["a".into(), "b".into(), "c".into()].into(), vec![U64.into(), U64.into(), U64.into()], - ), + )), NonNullable, ) }