diff --git a/Cargo.lock b/Cargo.lock index cbf73b2a20c..608abd3ec69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9863,6 +9863,7 @@ dependencies = [ "bit-vec", "flatbuffers", "futures", + "insta", "itertools 0.14.0", "kanal", "moka", diff --git a/vortex-layout/Cargo.toml b/vortex-layout/Cargo.toml index 61b1253ef43..daa96d4e138 100644 --- a/vortex-layout/Cargo.toml +++ b/vortex-layout/Cargo.toml @@ -55,6 +55,7 @@ vortex-utils = { workspace = true, features = ["dashmap"] } [dev-dependencies] futures = { workspace = true, features = ["executor"] } +insta = { workspace = true } rstest = { workspace = true } temp-env = { workspace = true } tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/vortex-layout/src/layouts/list/mod.rs b/vortex-layout/src/layouts/list/mod.rs new file mode 100644 index 00000000000..c8671bbb588 --- /dev/null +++ b/vortex-layout/src/layouts/list/mod.rs @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod reader; +pub mod writer; + +use std::sync::Arc; + +use reader::ListReader; +use vortex_array::DeserializeMetadata; +use vortex_array::ProstMetadata; +use vortex_array::dtype::DType; +use vortex_array::dtype::Nullability; +use vortex_array::dtype::PType; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; +use vortex_session::registry::ReadContext; + +use crate::LayoutChildType; +use crate::LayoutEncodingRef; +use crate::LayoutId; +use crate::LayoutReaderRef; +use crate::LayoutRef; +use crate::VTable; +use crate::children::LayoutChildren; +use crate::segments::SegmentId; +use crate::segments::SegmentSource; +use crate::vtable; + +/// Child index of the `elements` layout. +pub const ELEMENTS_CHILD_INDEX: usize = 0; +/// Child index of the `offsets` layout. +pub const OFFSETS_CHILD_INDEX: usize = 1; +/// Child index of the `validity` layout (only present when the list dtype is nullable). +pub const VALIDITY_CHILD_INDEX: usize = 2; + +/// Number of children when the list dtype is non-nullable. +pub const NUM_CHILDREN_NON_NULLABLE: usize = 2; +/// Number of children when the list dtype is nullable. +pub const NUM_CHILDREN_NULLABLE: usize = 3; + +vtable!(List); + +impl VTable for List { + type Layout = ListLayout; + type Encoding = ListLayoutEncoding; + type Metadata = ProstMetadata; + + fn id(_encoding: &Self::Encoding) -> LayoutId { + LayoutId::new("vortex.list") + } + + fn encoding(_layout: &Self::Layout) -> LayoutEncodingRef { + LayoutEncodingRef::new_ref(ListLayoutEncoding.as_ref()) + } + + fn row_count(layout: &Self::Layout) -> u64 { + layout.row_count() + } + + fn dtype(layout: &Self::Layout) -> &DType { + &layout.dtype + } + + fn metadata(layout: &Self::Layout) -> Self::Metadata { + ProstMetadata(ListLayoutMetadata::new(layout.offsets_ptype())) + } + + fn segment_ids(_layout: &Self::Layout) -> Vec { + vec![] + } + + fn nchildren(layout: &Self::Layout) -> usize { + if layout.dtype.is_nullable() { + NUM_CHILDREN_NULLABLE + } else { + NUM_CHILDREN_NON_NULLABLE + } + } + + fn child(layout: &Self::Layout, idx: usize) -> VortexResult { + match (idx, layout.validity.as_ref()) { + (ELEMENTS_CHILD_INDEX, _) => Ok(Arc::clone(&layout.elements)), + (OFFSETS_CHILD_INDEX, _) => Ok(Arc::clone(&layout.offsets)), + (VALIDITY_CHILD_INDEX, Some(validity)) => Ok(Arc::clone(validity)), + _ => vortex_bail!("Invalid child index {idx} for ListLayout"), + } + } + + fn child_type(layout: &Self::Layout, idx: usize) -> LayoutChildType { + match (idx, layout.validity.is_some()) { + (ELEMENTS_CHILD_INDEX, _) => LayoutChildType::Auxiliary("elements".into()), + (OFFSETS_CHILD_INDEX, _) => LayoutChildType::Auxiliary("offsets".into()), + (VALIDITY_CHILD_INDEX, true) => LayoutChildType::Auxiliary("validity".into()), + _ => vortex_panic!("Invalid child index {idx} for ListLayout"), + } + } + + fn new_reader( + layout: &Self::Layout, + name: Arc, + segment_source: Arc, + session: &VortexSession, + ) -> VortexResult { + Ok(Arc::new(ListReader::try_new( + layout.clone(), + name, + segment_source, + session.clone(), + )?)) + } + + fn build( + _encoding: &Self::Encoding, + dtype: &DType, + _row_count: u64, + metadata: &::Output, + _segment_ids: Vec, + children: &dyn LayoutChildren, + _ctx: &ReadContext, + ) -> VortexResult { + validate_children(dtype, children.nchildren())?; + + let elements_dtype = dtype + .as_list_element_opt() + .ok_or_else(|| vortex_err!("ListLayout requires a List dtype, got {dtype}"))?; + let elements = children.child(ELEMENTS_CHILD_INDEX, elements_dtype.as_ref())?; + + let offsets_dtype = DType::Primitive(metadata.offsets_ptype(), Nullability::NonNullable); + let offsets = children.child(OFFSETS_CHILD_INDEX, &offsets_dtype)?; + + let validity = if dtype.is_nullable() { + Some(children.child(VALIDITY_CHILD_INDEX, &DType::Bool(Nullability::NonNullable))?) + } else { + None + }; + + Ok(ListLayout { + dtype: dtype.clone(), + elements, + offsets, + validity, + }) + } + + fn with_children(layout: &mut Self::Layout, children: Vec) -> VortexResult<()> { + validate_children(layout.dtype(), children.len())?; + + let mut iter = children.into_iter(); + layout.elements = iter + .next() + .ok_or_else(|| vortex_err!("missing elements child"))?; + layout.offsets = iter + .next() + .ok_or_else(|| vortex_err!("missing offsets child"))?; + layout.validity = if layout.dtype.is_nullable() { + Some( + iter.next() + .ok_or_else(|| vortex_err!("missing validity child"))?, + ) + } else { + None + }; + Ok(()) + } +} + +/// Validates expected number of children based on `dtype` +#[inline] +fn validate_children(dtype: &DType, n_children: usize) -> VortexResult<()> { + let mut expected = NUM_CHILDREN_NON_NULLABLE; + + if dtype.is_nullable() { + expected += 1; + }; + + vortex_ensure!( + n_children == expected, + "ListLayout expects {expected} children, got {n_children}", + ); + + Ok(()) +} + +#[derive(Debug)] +pub struct ListLayoutEncoding; + +/// Stores a list-typed array by shredding `elements`, `offsets`, and optional `validity` children. +#[derive(Clone, Debug)] +pub struct ListLayout { + dtype: DType, + elements: LayoutRef, + offsets: LayoutRef, + validity: Option, +} + +impl ListLayout { + /// Construct a new `ListLayout` from its components. + /// + /// # Invariants + /// + /// - `dtype` must be a [`DType::List`]. + /// - `validity` must be `Some` iff `dtype.is_nullable()`. + /// - `offsets.dtype()` must be a non-nullable integer. + /// - `offsets.row_count()` is the Arrow-canonical `n+1` for `n` lists (or `0` for empty). + /// - When present, `validity.row_count() == offsets.row_count().saturating_sub(1)`. + pub fn new( + dtype: DType, + elements: LayoutRef, + offsets: LayoutRef, + validity: Option, + ) -> Self { + Self { + dtype, + elements, + offsets, + validity, + } + } + + /// Number of lists in this layout. + #[inline] + pub fn row_count(&self) -> u64 { + self.offsets.row_count().saturating_sub(1) + } + + #[inline] + pub fn elements(&self) -> &LayoutRef { + &self.elements + } + + #[inline] + pub fn offsets(&self) -> &LayoutRef { + &self.offsets + } + + #[inline] + pub fn validity(&self) -> Option<&LayoutRef> { + self.validity.as_ref() + } + + /// The integer type used for the `offsets` child layout. + #[inline] + pub fn offsets_ptype(&self) -> PType { + self.offsets.dtype().as_ptype() + } + + /// The dtype of the inner elements column. + pub fn elements_dtype(&self) -> &DType { + self.dtype + .as_list_element_opt() + .vortex_expect("ListLayout dtype must be a List") + } +} + +#[derive(prost::Message)] +pub struct ListLayoutMetadata { + #[prost(enumeration = "PType", tag = "1")] + offsets_ptype: i32, +} + +impl ListLayoutMetadata { + pub fn new(offsets_ptype: PType) -> Self { + let mut metadata = Self::default(); + metadata.set_offsets_ptype(offsets_ptype); + metadata + } +} diff --git a/vortex-layout/src/layouts/list/reader.rs b/vortex-layout/src/layouts/list/reader.rs new file mode 100644 index 00000000000..dcedafb5282 --- /dev/null +++ b/vortex-layout/src/layouts/list/reader.rs @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::collections::BTreeSet; +use std::ops::Range; +use std::sync::Arc; + +use futures::FutureExt; +use futures::try_join; +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::MaskFuture; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::ConstantArray; +use vortex_array::arrays::ListArray; +use vortex_array::builtins::ArrayBuiltins; +use vortex_array::dtype::DType; +use vortex_array::dtype::FieldMask; +use vortex_array::dtype::Nullability; +use vortex_array::expr::Expression; +use vortex_array::expr::root; +use vortex_array::scalar_fn::fns::operators::Operator; +use vortex_array::validity::Validity; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_mask::Mask; +use vortex_session::VortexSession; + +use crate::ArrayFuture; +use crate::LayoutReader; +use crate::LayoutReaderRef; +use crate::SplitRange; +use crate::layouts::list::ListLayout; +use crate::segments::SegmentSource; + +/// Reader for [`ListLayout`]. +pub struct ListReader { + layout: ListLayout, + name: Arc, + session: VortexSession, + elements: LayoutReaderRef, + offsets: LayoutReaderRef, + validity: Option, +} + +impl ListReader { + pub(super) fn try_new( + layout: ListLayout, + name: Arc, + segment_source: Arc, + session: VortexSession, + ) -> VortexResult { + let elements = layout.elements().new_reader( + format!("{name}.elements").into(), + Arc::clone(&segment_source), + &session, + )?; + let offsets = layout.offsets().new_reader( + format!("{name}.offsets").into(), + Arc::clone(&segment_source), + &session, + )?; + let validity = layout + .validity() + .map(|v| { + v.new_reader( + format!("{name}.validity").into(), + Arc::clone(&segment_source), + &session, + ) + }) + .transpose()?; + + Ok(Self { + layout, + name, + session, + elements, + offsets, + validity, + }) + } + + fn fetch_offsets(&self, row_range: &Range) -> VortexResult { + // The offsets child has an extra entry, so reading `row_range` maps to offsets in + // `[row_range.start..row_range.end + 1)`. + let offsets_range = row_range.start..(row_range.end + 1); + let offsets_count = usize::try_from(offsets_range.end - offsets_range.start)?; + + self.offsets.projection_evaluation( + &offsets_range, + &root(), + MaskFuture::new_true(offsets_count), + ) + } +} + +/// Read `offsets[0]` and `offsets[-1]` and return the elements-buffer range they describe. +fn calculate_elements_range( + offsets: &ArrayRef, + session: &VortexSession, +) -> VortexResult> { + if offsets.is_empty() { + return Ok(0..0); + } + let mut exec_ctx = session.create_execution_ctx(); + let start = offsets + .execute_scalar(0, &mut exec_ctx)? + .as_primitive() + .as_::() + .vortex_expect("offset value fits in u64"); + let end = offsets + .execute_scalar(offsets.len() - 1, &mut exec_ctx)? + .as_primitive() + .as_::() + .vortex_expect("offset value fits in u64"); + Ok(start..end) +} + +/// Subtract `first` from every offset so the resulting offsets index into a sliced +/// `elements[first..]` buffer starting at zero. The constant array is cast to the offsets' dtype. +fn rebase_offsets(offsets: ArrayRef, first: u64) -> VortexResult { + let constant = ConstantArray::new(first, offsets.len()) + .into_array() + .cast(offsets.dtype().clone())?; + offsets.binary(constant, Operator::Sub) +} + +fn create_validity(validity_array: Option, nullability: Nullability) -> Validity { + match validity_array { + Some(arr) => Validity::Array(arr), + None => match nullability { + Nullability::Nullable => Validity::AllValid, + Nullability::NonNullable => Validity::NonNullable, + }, + } +} + +impl LayoutReader for ListReader { + fn name(&self) -> &Arc { + &self.name + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn dtype(&self) -> &DType { + self.layout.dtype() + } + + fn row_count(&self) -> u64 { + self.layout.row_count() + } + + fn register_splits( + &self, + field_mask: &[FieldMask], + split_range: &SplitRange, + splits: &mut BTreeSet, + ) -> VortexResult<()> { + self.offsets + .register_splits(field_mask, split_range, splits)?; + if let Some(validity) = &self.validity { + validity.register_splits(field_mask, split_range, splits)?; + } + Ok(()) + } + + fn pruning_evaluation( + &self, + _row_range: &Range, + _expr: &Expression, + mask: Mask, + ) -> VortexResult { + // All stats based pruning should already be done upstream + Ok(MaskFuture::ready(mask)) + } + + fn filter_evaluation( + &self, + _row_range: &Range, + _expr: &Expression, + _mask: MaskFuture, + ) -> VortexResult { + todo!() + } + + fn projection_evaluation( + &self, + row_range: &Range, + expr: &Expression, + mask: MaskFuture, + ) -> VortexResult { + let offsets_fut = self.fetch_offsets(row_range)?; + // Validity shares the list's row space, so the caller's mask is the right shape to + // push down. Children get to fold it into their reads however they like. + let validity_fut = self + .validity + .as_ref() + .map(|v| v.projection_evaluation(row_range, &root(), mask.clone())) + .transpose()?; + + let elements_reader = self.elements.clone(); + let session = self.session.clone(); + let nullability = self.layout.dtype().nullability(); + let expr = expr.clone(); + + Ok(async move { + // Fetch offsets and validity in parallel. Elements waits until we know + // exactly which slice of the elements buffer it actually needs. + let (offsets, validity_array) = try_join!(offsets_fut, async move { + match validity_fut { + Some(fut) => fut.await.map(Some), + None => Ok(None), + } + },)?; + + // Bound the elements read using offsets[0] and offsets[-1] + let elements_range = calculate_elements_range(&offsets, &session)?; + + // Rebase the offsets so they start at zero + let rebased_offsets = rebase_offsets(offsets, elements_range.start)?; + + // Fetch only the elements we actually need. + let elements_len = elements_range.end - elements_range.start; + let elements = elements_reader + .projection_evaluation( + &elements_range, + &root(), + MaskFuture::new_true(usize::try_from(elements_len)?), + )? + .await?; + + // Create ListArray + let validity = create_validity(validity_array, nullability); + let array = ListArray::try_new(elements, rebased_offsets, validity)?.into_array(); + + // Apply mask and expression + let mask = mask.await?; + let array = if !mask.all_true() { + array.filter(mask)? + } else { + array + }; + + array.apply(&expr) + } + .boxed()) + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use rstest::rstest; + use vortex_array::ArrayContext; + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ListArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_array::dtype::Nullability::NonNullable; + use vortex_buffer::buffer; + + use super::*; + use crate::LayoutRef; + use crate::LayoutStrategy; + use crate::layouts::flat::writer::FlatLayoutStrategy; + use crate::layouts::list::writer::ListLayoutStrategy; + use crate::segments::SegmentSource; + use crate::segments::TestSegments; + use crate::sequence::SequenceId; + use crate::sequence::SequentialArrayStreamExt; + use crate::test::SESSION; + + fn flat_list_strategy() -> ListLayoutStrategy { + let flat: Arc = Arc::new(FlatLayoutStrategy::default()); + ListLayoutStrategy::new(Arc::clone(&flat), Arc::clone(&flat), Arc::clone(&flat)) + } + + async fn write_layout( + strategy: &S, + array: ArrayRef, + ) -> VortexResult<(Arc, LayoutRef)> { + let segments = Arc::new(TestSegments::default()); + let segments_ref: Arc = Arc::::clone(&segments); + let (ptr, eof) = SequenceId::root().split(); + let stream = array.to_array_stream().sequenced(ptr); + let layout = strategy + .write_stream(ArrayContext::empty(), segments, stream, eof, &SESSION) + .await?; + Ok((segments_ref, layout)) + } + + fn materialize_u32_array(array: ArrayRef) -> Vec { + let mut ctx = SESSION.create_execution_ctx(); + array + .execute::(&mut ctx) + .unwrap() + .as_slice::() + .to_vec() + } + + #[rstest] + #[case::full(buffer![0u32, 2, 5, 5].into_array(), 0..5)] + #[case::partial_slice(buffer![2u32, 5, 5, 8].into_array(), 2..8)] + #[case::single_offset_is_empty(buffer![7u32].into_array(), 7..7)] + #[case::u64_offsets(buffer![10u64, 12, 15, 15].into_array(), 10..15)] + fn test_calculate_elements_range( + #[case] offsets: ArrayRef, + #[case] expected: Range, + ) -> VortexResult<()> { + assert_eq!(calculate_elements_range(&offsets, &SESSION)?, expected); + Ok(()) + } + + #[test] + fn calculate_elements_range_empty_offsets() -> VortexResult<()> { + let offsets = PrimitiveArray::empty::(NonNullable).into_array(); + assert_eq!(calculate_elements_range(&offsets, &SESSION)?, 0..0); + Ok(()) + } + + #[rstest] + #[case::first_zero_is_identity(buffer![0u32, 2, 5, 5].into_array(), 0, vec![0, 2, 5, 5])] + #[case::subtracts_first(buffer![3u32, 5, 8].into_array(), 3, vec![0, 2, 5])] + fn test_rebase_offsets( + #[case] offsets: ArrayRef, + #[case] first: u64, + #[case] expected: Vec, + ) -> VortexResult<()> { + let rebased = rebase_offsets(offsets, first)?; + assert_eq!(materialize_u32_array(rebased), expected); + Ok(()) + } + + fn create_basic_list_array(nullable: bool) -> ArrayRef { + let validity = if nullable { + Validity::Array(BoolArray::from_iter([true, false, true]).into_array()) + } else { + Validity::NonNullable + }; + + ListArray::try_new( + buffer![1i32, 2, 3, 4, 5].into_array(), + buffer![0u32, 2, 4, 5].into_array(), + validity, + ) + .expect("array is valid") + .into_array() + } + + #[tokio::test] + async fn fetch_offsets_includes_extra_endpoint() -> VortexResult<()> { + let list = create_basic_list_array(false); + + let (segments, layout) = write_layout(&flat_list_strategy(), list).await?; + let reader = layout.new_reader("".into(), segments, &SESSION)?; + let reader = reader + .as_any() + .downcast_ref::() + .expect("ListReader"); + + let offsets = reader.fetch_offsets(&(1..3))?.await?; + assert_eq!(materialize_u32_array(offsets), vec![2, 4, 5]); + + Ok(()) + } + + #[rstest] + #[case::full_range(0..3, false)] + #[case::partial_start(0..2, false)] + #[case::partial_end(1..3, false)] + #[case::middle_single(1..2, false)] + #[case::empty_range(1..1, false)] + #[case::full_range_null(0..3, true)] + #[tokio::test] + async fn projection_evaluation_round_trips( + #[case] row_range: Range, + #[case] nullable: bool, + ) -> VortexResult<()> { + let list = create_basic_list_array(nullable); + + let len = usize::try_from(row_range.end - row_range.start)?; + let (segments, layout) = write_layout(&flat_list_strategy(), list.clone()).await?; + let reader = layout.new_reader("".into(), segments, &SESSION)?; + + let result = reader + .projection_evaluation(&row_range, &root(), MaskFuture::new_true(len))? + .await?; + + let expected = + list.slice(usize::try_from(row_range.start)?..usize::try_from(row_range.end)?)?; + assert_arrays_eq!(result, expected); + Ok(()) + } + + #[tokio::test] + async fn projection_evaluation_applies_mask() -> VortexResult<()> { + let list = create_basic_list_array(false); + let (segments, layout) = write_layout(&flat_list_strategy(), list.clone()).await?; + let reader = layout.new_reader("".into(), segments, &SESSION)?; + + let mask = Mask::from_iter([true, false, true]); + let result = reader + .projection_evaluation(&(0..3), &root(), MaskFuture::ready(mask.clone()))? + .await?; + + let expected = list.filter(mask)?; + assert_arrays_eq!(result, expected); + Ok(()) + } +} diff --git a/vortex-layout/src/layouts/list/writer.rs b/vortex-layout/src/layouts/list/writer.rs new file mode 100644 index 00000000000..47dd9b46ccd --- /dev/null +++ b/vortex-layout/src/layouts/list/writer.rs @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::sync::Arc; + +use async_trait::async_trait; +use futures::StreamExt; +use futures::stream; +use vortex_array::ArrayContext; +use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::ListViewArray; +use vortex_array::arrays::list::ListDataParts; +use vortex_array::arrays::listview::list_from_list_view; +use vortex_array::dtype::DType; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_io::session::RuntimeSessionExt; +use vortex_session::VortexSession; + +use crate::IntoLayout; +use crate::LayoutRef; +use crate::LayoutStrategy; +use crate::layouts::list::ListLayout; +use crate::segments::SegmentSinkRef; +use crate::sequence::SendableSequentialStream; +use crate::sequence::SequenceId; +use crate::sequence::SequencePointer; +use crate::sequence::SequentialStream; +use crate::sequence::SequentialStreamAdapter; +use crate::sequence::SequentialStreamExt; + +/// Strategy for writing list-typed arrays. +/// +/// Single-chunk only. The strategy: +/// 1. Canonicalizes the input chunk into a [`ListViewArray`]. +/// 2. Calls [`list_from_list_view`] to rebuild it into zero-copy-to-list form +/// (sorted, gapless, non-overlapping offsets) and produce a [`ListArray`]. +/// 3. Writes the `elements`, `offsets`, and (when nullable) `validity` columns into +/// separately configurable downstream strategies, producing a single [`ListLayout`]. +/// +/// # Chunking +/// +/// `ListLayoutStrategy` bails on empty or multi-chunk input, matching the convention used by +/// [`FlatLayoutStrategy`](crate::layouts::flat::writer::FlatLayoutStrategy). +/// +/// [`ListArray`]: vortex_array::arrays::ListArray +pub struct ListLayoutStrategy { + elements: Arc, + offsets: Arc, + validity: Arc, +} + +impl ListLayoutStrategy { + pub fn new( + elements: Arc, + offsets: Arc, + validity: Arc, + ) -> Self { + Self { + elements, + offsets, + validity, + } + } +} + +#[async_trait] +impl LayoutStrategy for ListLayoutStrategy { + async fn write_stream( + &self, + ctx: ArrayContext, + segment_sink: SegmentSinkRef, + mut stream: SendableSequentialStream, + mut eof: SequencePointer, + session: &VortexSession, + ) -> VortexResult { + let dtype = stream.dtype().clone(); + if !dtype.is_list() { + vortex_bail!("ListLayoutStrategy requires a List dtype, got {dtype}"); + } + + // Writer wants exactly one chunk + let Some(chunk) = stream.next().await else { + vortex_bail!("ListLayoutStrategy needs a single chunk"); + }; + let (sequence_id, array) = chunk?; + + // Canonicalize to ListView, then rebuild into zctl + let mut exec_ctx = session.create_execution_ctx(); + let ListDataParts { + elements, + offsets, + validity, + .. + } = list_from_list_view(array.execute::(&mut exec_ctx)?)?.into_data_parts(); + + // There is one extra element in `offsets` + let row_count = offsets.len().saturating_sub(1); + let validity_array = if dtype.is_nullable() { + Some( + validity + .execute_mask(row_count, &mut exec_ctx)? + .into_array(), + ) + } else { + None + }; + + // Spawn each child write onto the runtime so they run concurrently + let handle = session.handle(); + let (elements_task, offsets_task, validity_task) = { + let mut sp = sequence_id.descend(); + let mut spawn_layout_writer = |strategy: Arc, array: ArrayRef| { + let stream = single_chunk_stream(array.dtype().clone(), sp.advance(), array); + let child_eof = eof.split_off(); + let ctx = ctx.clone(); + let segment_sink = segment_sink.clone(); + let session = session.clone(); + handle.spawn_nested(move |h| async move { + let session = session.with_handle(h); + strategy + .write_stream(ctx, segment_sink, stream, child_eof, &session) + .await + }) + }; + ( + spawn_layout_writer(self.elements.clone(), elements), + spawn_layout_writer(self.offsets.clone(), offsets), + validity_array.map(|arr| spawn_layout_writer(self.validity.clone(), arr)), + ) + }; + + // Should not have more than one chunk + if stream.next().await.is_some() { + vortex_bail!("ListLayoutStrategy received more than a single chunk"); + } + + let (elements_layout, offsets_layout, validity_layout) = + futures::try_join!(elements_task, offsets_task, async move { + match validity_task { + Some(t) => t.await.map(Some), + None => Ok(None), + } + },)?; + + Ok(ListLayout::new(dtype, elements_layout, offsets_layout, validity_layout).into_layout()) + } + + fn buffered_bytes(&self) -> u64 { + self.elements.buffered_bytes() + + self.offsets.buffered_bytes() + + self.validity.buffered_bytes() + } +} + +/// Wrap a single array as a one-shot [`SendableSequentialStream`] for handoff to a child writer. +fn single_chunk_stream( + dtype: DType, + sequence_id: SequenceId, + array: ArrayRef, +) -> SendableSequentialStream { + SequentialStreamAdapter::new( + dtype, + stream::once(async move { Ok((sequence_id, array)) }).boxed(), + ) + .sendable() +} + +#[cfg(test)] +mod tests { + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::ChunkedArray; + use vortex_array::arrays::ListArray; + use vortex_array::arrays::StructArray; + use vortex_array::dtype::Nullability; + use vortex_array::dtype::PType; + use vortex_array::validity::Validity; + use vortex_buffer::buffer; + + use super::*; + use crate::layouts::chunked::writer::ChunkedLayoutStrategy; + use crate::layouts::flat::writer::FlatLayoutStrategy; + use crate::layouts::table::TableStrategy; + use crate::segments::TestSegments; + use crate::sequence::SequentialArrayStreamExt; + use crate::test::SESSION; + + fn flat_list_strategy() -> ListLayoutStrategy { + let flat: Arc = Arc::new(FlatLayoutStrategy::default()); + ListLayoutStrategy::new(Arc::clone(&flat), Arc::clone(&flat), Arc::clone(&flat)) + } + + async fn write(strategy: &S, array: ArrayRef) -> VortexResult { + let segments = Arc::new(TestSegments::default()); + let (ptr, eof) = SequenceId::root().split(); + let stream = array.to_array_stream().sequenced(ptr); + strategy + .write_stream(ArrayContext::empty(), segments, stream, eof, &SESSION) + .await + } + + fn i32_list_dtype(nullable: bool) -> DType { + DType::List( + Arc::new(DType::Primitive(PType::I32, Nullability::NonNullable)), + if nullable { + Nullability::Nullable + } else { + Nullability::NonNullable + }, + ) + } + + fn create_basic_list(validity: Validity) -> ArrayRef { + ListArray::try_new( + buffer![1i32, 2, 3, 4, 5].into_array(), + buffer![0u32, 2, 5, 5].into_array(), + validity, + ) + .unwrap() + .into_array() + } + + #[tokio::test] + async fn basic_non_nullable_input() -> VortexResult<()> { + let list = create_basic_list(Validity::NonNullable); + + let layout = write(&flat_list_strategy(), list).await?; + assert_eq!(layout.row_count(), 3); + + insta::assert_snapshot!(layout.display_tree(), @" + vortex.list, dtype: list(i32), children: 2 + ├── elements: vortex.flat, dtype: i32, segment: 0 + └── offsets: vortex.flat, dtype: u32, segment: 1 + "); + Ok(()) + } + + #[tokio::test] + async fn basic_nullable_input() -> VortexResult<()> { + let list = create_basic_list(Validity::Array( + BoolArray::from_iter([true, false, true]).into_array(), + )); + + let layout = write(&flat_list_strategy(), list).await?; + assert_eq!(layout.row_count(), 3); + + insta::assert_snapshot!(layout.display_tree(), @" + vortex.list, dtype: list(i32)?, children: 3 + ├── elements: vortex.flat, dtype: i32, segment: 0 + ├── offsets: vortex.flat, dtype: u32, segment: 1 + └── validity: vortex.flat, dtype: bool, segment: 2 + "); + Ok(()) + } + + #[tokio::test] + async fn empty_stream_errors() { + let segments = Arc::new(TestSegments::default()); + let (_, eof) = SequenceId::root().split(); + let empty = stream::empty::>().boxed(); + let stream = SequentialStreamAdapter::new(i32_list_dtype(false), empty).sendable(); + + let err = flat_list_strategy() + .write_stream(ArrayContext::empty(), segments, stream, eof, &SESSION) + .await + .unwrap_err(); + insta::assert_snapshot!(err.to_string(), @"Other error: ListLayoutStrategy needs a single chunk"); + } + + #[tokio::test] + async fn chunked_list_input_without_chunked_strategy_fails() -> VortexResult<()> { + let chunk0 = ListArray::try_new( + buffer![1i32, 2].into_array(), + buffer![0u32, 2].into_array(), + Validity::NonNullable, + ) + .unwrap() + .into_array(); + let chunk1 = ListArray::try_new( + buffer![3i32, 4, 5].into_array(), + buffer![0u32, 3].into_array(), + Validity::NonNullable, + ) + .unwrap() + .into_array(); + let chunked = + ChunkedArray::try_new(vec![chunk0, chunk1], i32_list_dtype(false))?.into_array(); + + let err = write(&flat_list_strategy(), chunked).await.unwrap_err(); + insta::assert_snapshot!(err.to_string(), @"Other error: ListLayoutStrategy received more than a single chunk"); + Ok(()) + } + + #[tokio::test] + async fn list_of_struct_tree() -> VortexResult<()> { + let struct_array = StructArray::from_fields( + [ + ("a", buffer![1i32, 2, 3, 4, 5].into_array()), + ("b", buffer![10i32, 20, 30, 40, 50].into_array()), + ] + .as_slice(), + )? + .into_array(); + let list = ListArray::try_new( + struct_array, + buffer![0u32, 2, 5, 5].into_array(), + Validity::NonNullable, + )? + .into_array(); + + let flat: Arc = Arc::new(FlatLayoutStrategy::default()); + let table_strategy: Arc = + Arc::new(TableStrategy::new(Arc::clone(&flat), Arc::clone(&flat))); + let writer = ListLayoutStrategy::new(table_strategy, Arc::clone(&flat), Arc::clone(&flat)); + + let layout = write(&writer, list).await?; + insta::assert_snapshot!(layout.display_tree(), @" + vortex.list, dtype: list({a=i32, b=i32}), children: 2 + ├── elements: vortex.struct, dtype: {a=i32, b=i32}, children: 2 + │ ├── a: vortex.flat, dtype: i32, segment: 1 + │ └── b: vortex.flat, dtype: i32, segment: 2 + └── offsets: vortex.flat, dtype: u32, segment: 0 + "); + Ok(()) + } + + #[tokio::test] + async fn list_of_list_tree() -> VortexResult<()> { + let inner_list = ListArray::try_new( + buffer![1i32, 2, 3, 4, 5, 6].into_array(), + buffer![0u32, 2, 5, 5, 6].into_array(), + Validity::NonNullable, + )? + .into_array(); + let list = ListArray::try_new( + inner_list, + buffer![0u32, 2, 4].into_array(), + Validity::NonNullable, + )? + .into_array(); + + let flat: Arc = Arc::new(FlatLayoutStrategy::default()); + let inner_strategy: Arc = Arc::new(ListLayoutStrategy::new( + Arc::clone(&flat), + Arc::clone(&flat), + Arc::clone(&flat), + )); + let writer = ListLayoutStrategy::new(inner_strategy, Arc::clone(&flat), Arc::clone(&flat)); + + let layout = write(&writer, list).await?; + insta::assert_snapshot!(layout.display_tree(), @" + vortex.list, dtype: list(list(i32)), children: 2 + ├── elements: vortex.list, dtype: list(i32), children: 2 + │ ├── elements: vortex.flat, dtype: i32, segment: 1 + │ └── offsets: vortex.flat, dtype: u32, segment: 2 + └── offsets: vortex.flat, dtype: u32, segment: 0 + "); + Ok(()) + } + + #[tokio::test] + async fn chunked_list_input_with_chunked_strategy_succeeds() -> VortexResult<()> { + let chunk0 = ListArray::try_new( + buffer![1i32, 2, 3].into_array(), + buffer![0u32, 2, 3].into_array(), + Validity::NonNullable, + ) + .unwrap() + .into_array(); + let chunk1 = ListArray::try_new( + buffer![4i32, 5, 6, 7].into_array(), + buffer![0u32, 1, 4].into_array(), + Validity::NonNullable, + ) + .unwrap() + .into_array(); + + let chunked = + ChunkedArray::try_new(vec![chunk0, chunk1], i32_list_dtype(false))?.into_array(); + + let layout = write(&ChunkedLayoutStrategy::new(flat_list_strategy()), chunked).await?; + + insta::assert_snapshot!(layout.display_tree(), @" + vortex.chunked, dtype: list(i32), children: 2 + ├── [0]: vortex.list, dtype: list(i32), children: 2 + │ ├── elements: vortex.flat, dtype: i32, segment: 0 + │ └── offsets: vortex.flat, dtype: u32, segment: 1 + └── [1]: vortex.list, dtype: list(i32), children: 2 + ├── elements: vortex.flat, dtype: i32, segment: 2 + └── offsets: vortex.flat, dtype: u32, segment: 3 + "); + Ok(()) + } +} diff --git a/vortex-layout/src/layouts/mod.rs b/vortex-layout/src/layouts/mod.rs index 18df5b8f347..47fa31aa3d9 100644 --- a/vortex-layout/src/layouts/mod.rs +++ b/vortex-layout/src/layouts/mod.rs @@ -16,6 +16,7 @@ pub mod dict; pub mod file_stats; pub mod flat; pub(crate) mod foreign; +pub mod list; pub(crate) mod partitioned; pub mod repartition; pub mod row_idx; diff --git a/vortex-layout/src/session.rs b/vortex-layout/src/session.rs index 370fe391ca0..de2c1d54377 100644 --- a/vortex-layout/src/session.rs +++ b/vortex-layout/src/session.rs @@ -12,6 +12,7 @@ use crate::LayoutEncodingRef; use crate::layouts::chunked::ChunkedLayoutEncoding; use crate::layouts::dict::DictLayoutEncoding; use crate::layouts::flat::FlatLayoutEncoding; +use crate::layouts::list::ListLayoutEncoding; use crate::layouts::struct_::StructLayoutEncoding; use crate::layouts::zoned::ZonedLayoutEncoding; @@ -52,6 +53,7 @@ impl Default for LayoutSession { layouts.register(StructLayoutEncoding.id(), StructLayoutEncoding.as_ref()); layouts.register(ZonedLayoutEncoding.id(), ZonedLayoutEncoding.as_ref()); layouts.register(DictLayoutEncoding.id(), DictLayoutEncoding.as_ref()); + layouts.register(ListLayoutEncoding.id(), ListLayoutEncoding.as_ref()); Self { registry: layouts } }