merge

gatesn · gatesn · commit 66cde775983d · 2025-11-11T06:15:40.000-05:00
Signed-off-by: Nicholas Gates &lt;nick@nickgates.com&gt;
diff --git a/encodings/alp/src/alp/operator.rs b/encodings/alp/src/alp/operator.rs
@@ -3,15 +3,13 @@
 
 use crate::{match_each_alp_float_ptype, ALPArray, ALPFloat, ALPVTable, Exponents};
 use std::marker::PhantomData;
-use vortex_array::pipeline::{
-    BindContext, KernelContext, PipelineTransform, TransformKernel, VectorId,
-};
+use vortex_array::pipeline::{BindContext, PipelineTransform, TransformKernel};
 use vortex_array::vtable::{OperatorVTable, PipelineNode};
 use vortex_buffer::Buffer;
 use vortex_dtype::{match_each_integer_ptype, NativePType, PTypeDowncastExt};
 use vortex_error::VortexResult;
 use vortex_vector::primitive::PVector;
-use vortex_vector::VectorMut;
+use vortex_vector::{Vector, VectorMut};
 
 impl OperatorVTable<ALPVTable> for ALPVTable {
     fn pipeline_node(array: &ALPArray) -> Option<PipelineNode<'_>> {
@@ -20,22 +18,17 @@ impl OperatorVTable<ALPVTable> for ALPVTable {
 }
 
 impl PipelineTransform for ALPArray {
-    fn is_pipelined_child(&self, child_idx: usize) -> bool {
-        match child_idx {
-            0 => true,  // encoded array
-            _ => false, // patch indices + patch values
-        }
+    fn pipelined_child(&self) -> usize {
+        0 // The encoded vector is the first child
     }
 
     fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn TransformKernel>> {
-        let encoded_vector_id = ctx.pipelined_input(0);
         let exponents = self.exponents();
 
         match self.patches() {
             None => {
                 match_each_alp_float_ptype!(self.ptype(), |A| {
                     Ok(Box::new(ALPKernel::<A> {
-                        encoded_vector_id,
                         exponents,
                         _phantom: PhantomData,
                     }))
@@ -50,7 +43,6 @@ impl PipelineTransform for ALPArray {
                         let patch_indices: Buffer<P> = P::downcast(patch_idxs).into_buffer();
                         let patch_values: PVector<A> = A::downcast(patch_vals);
                         Ok(Box::new(PatchedALPKernel {
-                            encoded_vector_id,
                             exponents,
                             patch_indices,
                             patch_values,
@@ -63,20 +55,14 @@ impl PipelineTransform for ALPArray {
 }
 
 struct ALPKernel<A: ALPFloat> {
-    // The encoded vector that returns `A::ALPInt` values
-    encoded_vector_id: VectorId,
     // The ALP exponents
     exponents: Exponents,
     _phantom: PhantomData<A>,
 }
 
 impl<A: ALPFloat> TransformKernel for ALPKernel<A> {
-    fn step(&mut self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()> {
-        let encoded = ctx
-            .vector(self.encoded_vector_id)
-            .into_primitive()
-            .downcast::<A::ALPInt>()
-            .into_buffer();
+    fn step(&mut self, input: &VectorMut, out: &mut VectorMut) -> VortexResult<()> {
+        let encoded = input.into_primitive().downcast::<A::ALPInt>().into_buffer();
 
         let mut decoded = A::downcast(out.into_primitive());
         decoded.extend(
@@ -89,8 +75,6 @@ impl<A: ALPFloat> TransformKernel for ALPKernel<A> {
 }
 
 struct PatchedALPKernel<A: ALPFloat, P: NativePType> {
-    // The encoded vector that returns `A::ALPInt` values
-    encoded_vector_id: VectorId,
     // The ALP exponents
     exponents: Exponents,
     // The patch indices and values
@@ -99,12 +83,8 @@ struct PatchedALPKernel<A: ALPFloat, P: NativePType> {
 }
 
 impl<A: ALPFloat, P: NativePType> TransformKernel for PatchedALPKernel<A, P> {
-    fn step(&mut self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()> {
-        let encoded = ctx
-            .vector(self.encoded_vector_id)
-            .into_primitive()
-            .downcast::<A::ALPInt>()
-            .into_buffer();
+    fn step(&mut self, input: &Vector, out: &mut VectorMut) -> VortexResult<()> {
+        let encoded = input.into_primitive().downcast::<A::ALPInt>().into_buffer();
 
         let mut decoded = out.into_primitive().downcast::<A>();
         decoded.extend(
diff --git a/vortex-array/src/pipeline/mod.rs b/vortex-array/src/pipeline/mod.rs
@@ -6,7 +6,6 @@ pub mod source_driver;
 
 use std::ops::Deref;
 
-use bit_view::BitView;
 use vortex_error::VortexResult;
 use vortex_vector::{Vector, VectorMut, VectorMutOps};
 
@@ -21,10 +20,21 @@ pub const N_BYTES: usize = N / 8;
 /// Number of usize words needed to store N bits
 pub const N_WORDS: usize = N / usize::BITS as usize;
 
+/// A pipeline source node has zero pipelined inputs and produces data to feed into a pipeline.
+///
+/// All children of the array are considered to be batch inputs and will be fully computed before
+/// pipelined execution begins.
+pub trait PipelineSource: Deref<Target = dyn Array> {
+    /// Bind the operator into a [`SourceKernel`] for pipelined execution.
+    ///
+    /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
+    /// batch IDs for batch children. Each child can only be bound once.
+    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>>;
+}
+
 /// Indicates that an array supports acting as a transformation node in a pipelined execution.
 ///
-/// That is, it has one or more child arrays for which each input element produces a single output
-/// element. See [`PipelineSource`] for nodes that have zero pipelined children.
+/// Transform nodes have exactly one pipelined input, with zero or more batch inputs.
 pub trait PipelineTransform: Deref<Target = dyn Array> {
     // Whether this operator works by mutating its first child in-place.
     //
@@ -35,12 +45,8 @@ pub trait PipelineTransform: Deref<Target = dyn Array> {
     //     false
     // }
 
-    /// Returns whether the nth child of this array should be passed to the kernel as a pipelined
-    /// input vector, 1024 elements at a time.
-    ///
-    /// Any child that reports `false` will be treated as a batch input, and the full vector will be
-    /// computed before pipelined execution begins.
-    fn is_pipelined_child(&self, child_idx: usize) -> bool;
+    /// Returns the index of the array child that should be passed as a pipelined input
+    fn pipelined_child(&self) -> usize;
 
     /// Bind the operator into a [`TransformKernel`] for pipelined execution.
     ///
@@ -49,125 +55,89 @@ pub trait PipelineTransform: Deref<Target = dyn Array> {
     fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn TransformKernel>>;
 }
 
-/// Indicates that an array supports acting as a source node in a pipelined execution.
-pub trait PipelineSource: Deref<Target = dyn Array> {
-    /// Bind the operator into a [`SourceKernel`] for pipelined execution.
+/// Indicates that an array supports acting as a transformation node in a pipelined execution
+/// with multiple pipelined inputs and zero or more batch inputs.
+pub trait PipelineZipTransform: Deref<Target = dyn Array> {
+    /// Returns the index of the array child that should be passed as a pipelined input
+    fn is_pipelined_child(&self, child_idx: usize) -> bool;
+
+    /// Bind the operator into a [`TransformKernel`] for pipelined execution.
     ///
     /// The provided [`BindContext`] can be used to obtain vector IDs for pipelined children and
     /// batch IDs for batch children. Each child can only be bound once.
-    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn SourceKernel>>;
+    fn bind(&self, ctx: &mut dyn BindContext) -> VortexResult<Box<dyn TransformKernel>>;
 }
 
 /// The context used when binding an operator for execution.
 pub trait BindContext {
-    /// Returns a [`VectorId`] that can be passed to the [`KernelContext`] within the body of
-    /// the kernel to access the given child as a pipelined input vector.
-    ///
-    /// # Panics
-    ///
-    /// If the child index requested here was not marked as a pipelined child in
-    /// [`PipelineTransform::is_pipelined_child`].
-    fn pipelined_input(&self, child_idx: usize) -> VectorId;
-
     /// Returns the batch input vector for the given child.
     ///
     /// # Panics
     ///
     /// If the child index requested here was marked as a pipelined child in
     /// [`PipelineTransform::is_pipelined_child`].
-    fn batch_input(&self, child_idx: usize) -> BatchId;
+    fn batch_input(&self, child_idx: usize) -> Vector;
 }
 
-/// The ID of the vector to use.
-// TODO(ngates): make these opaque
-pub type VectorId = usize;
-pub type BatchId = usize;
-
-/// A kernel implements the physical compute required for pipelined execution. It is driven in a
-/// push-based way, typically as part of a larger pipeline of kernels.
-///
-/// By passing multiple vector computations through the same operator, we can amortize
-/// the setup costs (such as DType validation, stats short-circuiting, etc.), and to make better
-/// use of CPU caches by performing all operations while the data is hot.
+/// A source kernel produces data to feed into pipelined execution.
 ///
-/// The [`SourceKernel::step`] method will be invoked repeatedly to process chunks of data, [`N`]
-/// elements at a time. Each invocation is passed a selection mask indicating which elements of the
-/// chunk should be written to the start of the output vector.
+/// The kernel is provided with a mutable output vector that is guaranteed to have capacity for at
+/// least `2 * N` elements. Each invocation of the kernel is expected to append between `N` and
+/// `2 * N` elements to the output vector, except when the end of the data is reached.
 ///
-/// The mutable output vector is **guaranteed** to have a capacity of at least [`N`] elements. The
-/// caller makes no guarantee about the initial length of the output vector; and the kernel is
-/// expected to append `selection.true_count()` elements.
+/// Vectors of `N` elements will be propagated throughout the pipeline, and any remaining elements
+/// will be passed back to the kernel on the next iteration and appear at the start of the output
+/// vector.
 ///
-/// The pipeline may invoke the `SourceKernel::skip` method to skip over some number of chunks of data.
-/// The kernel should mutate any internal state as necessary to account for the skipped data.
+/// This kerfuffle allows kernels that are optimized for 1024-element chunks to operate efficiently,
+/// while avoiding passing very sparsely selected vectors throughout the pipeline.
 pub trait SourceKernel: Send {
-    /// Skip over the given number of chunks of data.
-    ///
-    /// For example, if `n` is 3, then the kernel should skip over `3 * N` elements of input data.
-    fn skip(&mut self, n: usize);
-
-    /// Attempts to perform a single step of the operator, appending data to the output vector.
-    ///
-    /// The provided selection mask indicates which elements of the current chunk should be
-    /// appended to the output vector.
-    ///
-    /// The provided output vector is guaranteed to have at least `N` elements of capacity.
-    fn step(
-        &mut self,
-        ctx: &KernelContext,
-        selection: &BitView,
-        out: &mut VectorMut,
-    ) -> VortexResult<()>;
+    /// Perform a single step of the kernel.
+    fn step(&mut self, out: &mut VectorMut) -> VortexResult<()>;
 }
 
+/// A transform kernel processes one or more input vectors and produces an output vector.
+///
+/// Besides the final chunk of data, each invocation of the kernel will be passed vectors of
+/// exactly `N` elements. The kernel **must** append exactly the same number of elements to its
+/// output vector.
+///
+/// The output vector is guaranteed to have at least `N` elements of capacity.
 pub trait TransformKernel: Send {
-    /// Attempts to perform a single step of the operator, appending data to the output vector.
-    ///
-    /// The input vectors can be accessed via the provided `KernelContext`.
-    ///
-    /// The provided output vector is guaranteed to have at least `N` elements of capacity.
-    fn step(&mut self, ctx: &KernelContext, out: &mut VectorMut) -> VortexResult<()>;
+    /// Perform a single step of the kernel.
+    fn step(&mut self, input: &Vector, out: &mut VectorMut) -> VortexResult<()>;
 }
 
-/// Context passed to kernels during execution, providing access to vectors.
-pub struct KernelContext {
-    /// The allocated vectors for intermediate results.
-    pub(crate) vectors: Vec<Vector>,
-    /// The batch input vectors.
-    pub(crate) batch_inputs: Vec<Vector>,
+/// A transform kernel that takes multiple input vectors and produces an output vector.
+///
+/// The pipeline driver will ensure that each invocation of the kernel is passed vectors of equal
+/// length.
+///
+/// The output vector is guaranteed to have at least `N` elements of capacity.
+pub trait ZipTransformKernel: Send {
+    /// Perform a single step of the kernel.
+    fn step(&mut self, inputs: &[Vector], out: &mut VectorMut) -> VortexResult<()>;
 }
 
-impl KernelContext {
-    pub fn empty() -> Self {
-        Self {
-            vectors: Vec::new(),
-        }
-    }
-
-    /// Get a vector by its ID.
-    pub fn vector(&self, vector_id: VectorId) -> &Vector {
-        &self.vectors[vector_id]
-    }
+/// A sink kernel consumes input vectors without producing output.
+pub trait SinkKernel: Send {
+    /// Perform a single step of the kernel.
+    fn step(&mut self, input: &Vector) -> VortexResult<()>;
 
-    /// Get a batch input by its ID.
-    pub fn batch_input(&self, batch_id: BatchId) -> &Vector {
-        &self.batch_inputs[batch_id]
-    }
+    /// Finalize the sink after all input has been processed.
+    fn finalize(&mut self) -> VortexResult<Vector>;
 }
 
 /// A general implementation of a source kernel that produces all null values.
-pub struct AllNullSourceKernel;
+pub struct AllNullSourceKernel {
+    remaining: usize,
+}
 
 impl SourceKernel for AllNullSourceKernel {
-    fn skip(&mut self, _n: usize) {}
-
-    fn step(
-        &mut self,
-        _ctx: &KernelContext,
-        selection: &BitView,
-        out: &mut VectorMut,
-    ) -> VortexResult<()> {
-        out.append_nulls(selection.true_count());
+    fn step(&mut self, out: &mut VectorMut) -> VortexResult<()> {
+        let to_produce = self.remaining.min(N);
+        self.remaining -= to_produce;
+        out.append_nulls(to_produce);
         Ok(())
     }
 }
diff --git a/vortex-array/src/vtable/operator.rs b/vortex-array/src/vtable/operator.rs
@@ -1,15 +1,15 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_error::{VortexResult, vortex_bail};
+use vortex_error::{vortex_bail, VortexResult};
 use vortex_mask::Mask;
 use vortex_vector::Vector;
 
-use crate::ArrayRef;
 use crate::array::IntoArray;
 use crate::execution::{BatchKernelRef, BindCtx, ExecutionCtx};
-use crate::pipeline::{PipelineSource, PipelineTransform};
+use crate::pipeline::{PipelineSource, PipelineTransform, PipelineZipTransform};
 use crate::vtable::{NotSupported, VTable};
+use crate::ArrayRef;
 
 /// A vtable for the new operator-based array functionality. Eventually this vtable will be
 /// merged into the main `VTable`, but for now it is kept separate to allow for incremental
@@ -110,6 +110,8 @@ pub enum PipelineNode<'a> {
     Source(&'a dyn PipelineSource),
     /// This node is a transformation node in a pipeline.
     Transform(&'a dyn PipelineTransform),
+    /// This node is a zip transformation node in a pipeline.
+    ZipTransform(&'a dyn PipelineZipTransform),
 }
 
 impl<V: VTable> OperatorVTable<V> for NotSupported {