vortex-data
diff --git a/‎.github/runs-on.yml‎
Lines changed: 0 additions & 17 deletions b/‎.github/runs-on.yml‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 6 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 1 addition & 6 deletions
diff --git a/‎.github/workflows/fuzz.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/fuzz.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/fuzzer-fix-automation.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/fuzzer-fix-automation.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/report-fuzz-crash.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/report-fuzz-crash.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎bench-vortex/src/random_access/take.rs‎
Lines changed: 12 additions & 5 deletions b/‎bench-vortex/src/random_access/take.rs‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎vortex-array/src/arrays/list/compute/take.rs‎
Lines changed: 49 additions & 45 deletions b/‎vortex-array/src/arrays/list/compute/take.rs‎
Lines changed: 49 additions & 45 deletions
@@ -9,20 +9,3 @@ images:
     arch: "arm64"
     name: "vortex-ci-*"
     owner: "375504701696"
-
-pools:
-  # Windows pool - covers UK (8am-6pm GMT) and US-East (8am-6pm EST)
-  # Combined in UK timezone: 8am-11pm (UK 8-6 + NYC 8-6 = UK 8-11pm)
-  # Stopped instances only (~20-30s boot vs 2-3min cold-start)
-  # Cost: ~$5/month (2 × $2.40 EBS storage)
-  windows-x64:
-    runner: family=m7i/cpu=8/image=windows22-full-x64/tag=rust-test-windows
-    timezone: "Europe/London"
-    schedule:
-      - name: working-hours
-        match:
-          day: ["monday", "tuesday", "wednesday", "thursday", "friday"]
-          time: ["08:00", "23:00"]
-        stopped: 2
-      - name: default
-        stopped: 1
@@ -489,12 +489,7 @@ jobs:
       matrix:
         include:
           - os: windows-x64
-            runner:
-              - runs-on=${{ github.run_id }}
-              - family=m7i
-              - cpu=8
-              - image=windows22-full-x64
-              - tag=rust-test-windows
+            runner: runs-on=${{ github.run_id }}/pool=windows-x64
           - os: linux-arm64
             runner:
               - runs-on=${{ github.run_id }}
 
@@ -56,7 +56,7 @@ jobs:
       - name: Run fuzzing target
         id: fuzz
         run: |
-          RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=1 cargo +nightly fuzz run --release --debug-assertions file_io -- -max_total_time=7200 2>&1 | tee fuzz_output.log
+          RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=1 cargo +nightly fuzz run --release --debug-assertions file_io -- -max_total_time=7200 -rss_limit_mb=0 2>&1 | tee fuzz_output.log
         continue-on-error: true
       - name: Check for crashes
         id: check
@@ -189,7 +189,7 @@ jobs:
       - name: Run fuzzing target
         id: fuzz
         run: |
-          RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=1 cargo +nightly fuzz run --release --debug-assertions array_ops -- -max_total_time=7200 2>&1 | tee fuzz_output.log
+          RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=1 cargo +nightly fuzz run --release --debug-assertions array_ops -- -max_total_time=7200 -rss_limit_mb=0 2>&1 | tee fuzz_output.log
         continue-on-error: true
       - name: Check for crashes
         id: check
 
@@ -187,7 +187,7 @@ jobs:
           echo "Attempting to reproduce crash with fuzzer (debug mode)..."
 
           # Run fuzzer with crash file (debug mode, no sanitizer, full backtrace)
-          RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=full timeout 30s cargo +nightly fuzz run --dev --sanitizer=none "${{ steps.extract.outputs.target }}" "${{ steps.download.outputs.crash_file_path }}" -- -runs=1 2>&1 | tee crash_reproduction.log
+          RUSTFLAGS="--cfg vortex_nightly" RUST_BACKTRACE=full timeout 30s cargo +nightly fuzz run --dev --sanitizer=none "${{ steps.extract.outputs.target }}" "${{ steps.download.outputs.crash_file_path }}" -- -runs=1 -rss_limit_mb=0 2>&1 | tee crash_reproduction.log
 
           FUZZ_EXIT_CODE=${PIPESTATUS[0]}
 
@@ -216,7 +216,7 @@ jobs:
 
           I ran:
           \`\`\`bash
-          cargo +nightly fuzz run --sanitizer=none ${{ steps.extract.outputs.target }} ${{ steps.download.outputs.crash_file_path }} -- -runs=1
+          cargo +nightly fuzz run --sanitizer=none ${{ steps.extract.outputs.target }} ${{ steps.download.outputs.crash_file_path }} -- -runs=1 -rss_limit_mb=0
           \`\`\`
 
           The fuzzer exited with code 0 (success).
@@ -275,7 +275,7 @@ jobs:
             - This ensures your work is visible and reviewable even if you hit the turn limit
             - Keep fixes minimal - only fix the specific bug
             - Follow CLAUDE.md code style guidelines
-            - **Use `--dev` flag** for faster builds: `cargo +nightly fuzz run --dev --sanitizer=none`
+            - **Use `--dev` flag** for faster builds: `cargo +nightly fuzz run --dev --sanitizer=none <target> <crash_file> -- -rss_limit_mb=0`
 
             ## Fixability Guidelines
 
 
@@ -155,7 +155,7 @@ jobs:
             ### Reproduction
 
             ```bash
-            cargo +nightly fuzz run --sanitizer=none $FUZZ_TARGET $FUZZ_TARGET/$CRASH_FILE
+            cargo +nightly fuzz run -D --sanitizer=none $FUZZ_TARGET $FUZZ_TARGET/$CRASH_FILE -- -rss_limit_mb=0
             ```
 
             ---
@@ -219,12 +219,12 @@ jobs:
             2. Reproduce locally:
             ```bash
             # The artifact contains $FUZZ_TARGET/$CRASH_FILE
-            cargo +nightly fuzz run --sanitizer=none $FUZZ_TARGET $FUZZ_TARGET/$CRASH_FILE
+            cargo +nightly fuzz run -D --sanitizer=none $FUZZ_TARGET $FUZZ_TARGET/$CRASH_FILE -- -rss_limit_mb=0
             ```
 
             3. Get full backtrace:
             ```bash
-            RUST_BACKTRACE=full cargo +nightly fuzz run --sanitizer=none $FUZZ_TARGET $FUZZ_TARGET/$CRASH_FILE
+            RUST_BACKTRACE=full cargo +nightly fuzz run -D --sanitizer=none $FUZZ_TARGET $FUZZ_TARGET/$CRASH_FILE -- -rss_limit_mb=0
             ```
 
             ---
 
@@ -25,9 +25,12 @@ use stream::StreamExt;
 use vortex::array::Array;
 use vortex::array::ArrayRef;
 use vortex::array::IntoArray;
+use vortex::array::VectorExecutor;
 use vortex::array::stream::ArrayStreamExt;
+use vortex::array::vectors::VectorIntoArray;
 use vortex::buffer::Buffer;
 use vortex::file::OpenOptionsSessionExt;
+use vortex::layout::layouts::USE_VORTEX_OPERATORS;
 use vortex::utils::aliases::hash_map::HashMap;
 
 use crate::SESSION;
@@ -43,18 +46,22 @@ pub async fn take_vortex_tokio(
 }
 
 async fn take_vortex(reader: impl AsRef<Path>, indices: Buffer<u64>) -> anyhow::Result<ArrayRef> {
-    Ok(SESSION
+    let array = SESSION
         .open_options()
         .open(reader.as_ref())
         .await?
         .scan()?
         .with_row_indices(indices)
         .into_array_stream()?
         .read_all()
-        .await?
-        // We canonicalize / decompress for equivalence to Arrow's `RecordBatch`es.
-        .to_canonical()
-        .into_array())
+        .await?;
+
+    // We canonicalize / decompress for equivalence to Arrow's `RecordBatch`es.
+    Ok(if *USE_VORTEX_OPERATORS {
+        array.execute_vector(&SESSION)?.into_array(array.dtype())
+    } else {
+        array.to_canonical().into_array()
+    })
 }
 
 pub async fn take_parquet(path: &Path, indices: Buffer<u64>) -> anyhow::Result<RecordBatch> {
 
@@ -1,14 +1,12 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: Copyright the Vortex contributors
 
-use vortex_buffer::BitBufferMut;
 use vortex_dtype::IntegerPType;
 use vortex_dtype::Nullability;
 use vortex_dtype::match_each_integer_ptype;
 use vortex_dtype::match_smallest_offset_type;
 use vortex_error::VortexExpect;
 use vortex_error::VortexResult;
-use vortex_mask::Mask;
 
 use crate::Array;
 use crate::ArrayRef;
@@ -22,7 +20,6 @@ use crate::compute::TakeKernel;
 use crate::compute::TakeKernelAdapter;
 use crate::compute::take;
 use crate::register_kernel;
-use crate::validity::Validity;
 use crate::vtable::ValidityHelper;
 
 // TODO(connor)[ListView]: Re-revert to the version where we simply convert to a `ListView` and call
@@ -37,21 +34,13 @@ impl TakeKernel for ListVTable {
     #[expect(clippy::cognitive_complexity)]
     fn take(&self, array: &ListArray, indices: &dyn Array) -> VortexResult<ArrayRef> {
         let indices = indices.to_primitive();
-        let offsets = array.offsets().to_primitive();
         // This is an over-approximation of the total number of elements in the resulting array.
         let total_approx = array.elements().len().saturating_mul(indices.len());
 
-        match_each_integer_ptype!(offsets.dtype().as_ptype(), |O| {
-            let offsets_slice = offsets.as_slice::<O>();
+        match_each_integer_ptype!(array.offsets().dtype().as_ptype(), |O| {
             match_each_integer_ptype!(indices.ptype(), |I| {
                 match_smallest_offset_type!(total_approx, |OutputOffsetType| {
-                    _take::<I, O, OutputOffsetType>(
-                        array,
-                        offsets_slice,
-                        &indices,
-                        array.validity_mask(),
-                        indices.validity_mask(),
-                    )
+                    _take::<I, O, OutputOffsetType>(array, &indices)
                 })
             })
         })
@@ -62,23 +51,19 @@ register_kernel!(TakeKernelAdapter(ListVTable).lift());
 
 fn _take<I: IntegerPType, O: IntegerPType, OutputOffsetType: IntegerPType>(
     array: &ListArray,
-    offsets: &[O],
     indices_array: &PrimitiveArray,
-    data_validity: Mask,
-    indices_validity_mask: Mask,
 ) -> VortexResult<ArrayRef> {
-    let indices: &[I] = indices_array.as_slice::<I>();
-
-    if !indices_validity_mask.all_true() || !data_validity.all_true() {
-        return _take_nullable::<I, O, OutputOffsetType>(
-            array,
-            offsets,
-            indices,
-            data_validity,
-            indices_validity_mask,
-        );
+    let data_validity = array.validity_mask();
+    let indices_validity = indices_array.validity_mask();
+
+    if !indices_validity.all_true() || !data_validity.all_true() {
+        return _take_nullable::<I, O, OutputOffsetType>(array, indices_array);
     }
 
+    let offsets_array = array.offsets().to_primitive();
+    let offsets: &[O] = offsets_array.as_slice();
+    let indices: &[I] = indices_array.as_slice();
+
     let mut new_offsets = PrimitiveBuilder::<OutputOffsetType>::with_capacity(
         Nullability::NonNullable,
         indices.len(),
@@ -120,21 +105,21 @@ fn _take<I: IntegerPType, O: IntegerPType, OutputOffsetType: IntegerPType>(
     Ok(ListArray::try_new(
         new_elements,
         new_offsets,
-        indices_array
-            .validity()
-            .clone()
-            .and(array.validity().clone()),
+        array.validity().clone().take(indices_array.as_ref())?,
     )?
     .to_array())
 }
 
 fn _take_nullable<I: IntegerPType, O: IntegerPType, OutputOffsetType: IntegerPType>(
     array: &ListArray,
-    offsets: &[O],
-    indices: &[I],
-    data_validity: Mask,
-    indices_validity: Mask,
+    indices_array: &PrimitiveArray,
 ) -> VortexResult<ArrayRef> {
+    let offsets_array = array.offsets().to_primitive();
+    let offsets: &[O] = offsets_array.as_slice();
+    let indices: &[I] = indices_array.as_slice();
+    let data_validity = array.validity_mask();
+    let indices_validity = indices_array.validity_mask();
+
     let mut new_offsets = PrimitiveBuilder::<OutputOffsetType>::with_capacity(
         Nullability::NonNullable,
         indices.len(),
@@ -153,28 +138,23 @@ fn _take_nullable<I: IntegerPType, O: IntegerPType, OutputOffsetType: IntegerPTy
     let mut current_offset = OutputOffsetType::zero();
     new_offsets.append_zero();
 
-    // Set all bits to invalid and selectively set which values are valid.
-    let mut new_validity = BitBufferMut::new_unset(indices.len());
-
     for (idx, data_idx) in indices.iter().enumerate() {
         if !indices_validity.value(idx) {
             new_offsets.append_value(current_offset);
-            // Bit buffer already has this set to invalid.
             continue;
         }
 
         let data_idx: usize = data_idx.as_();
 
         if !data_validity.value(data_idx) {
             new_offsets.append_value(current_offset);
-            // Bit buffer already has this set to invalid.
             continue;
         }
 
         let start = offsets[data_idx];
         let stop = offsets[data_idx + 1];
 
-        // See the note it the `take` on the reasoning
+        // See the note in `_take` on the reasoning.
         let additional: usize = (stop - start).as_();
 
         elements_to_take.reserve_exact(additional);
@@ -184,17 +164,18 @@ fn _take_nullable<I: IntegerPType, O: IntegerPType, OutputOffsetType: IntegerPTy
         current_offset +=
             OutputOffsetType::from_usize((stop - start).as_()).vortex_expect("offset conversion");
         new_offsets.append_value(current_offset);
-        new_validity.set(idx);
     }
 
     let elements_to_take = elements_to_take.finish();
     let new_offsets = new_offsets.finish();
     let new_elements = take(array.elements(), elements_to_take.as_ref())?;
 
-    let new_validity = Validity::from(new_validity.freeze());
-    // data are indexes are nullable, so the final result is also nullable.
-
-    Ok(ListArray::try_new(new_elements, new_offsets, new_validity)?.to_array())
+    Ok(ListArray::try_new(
+        new_elements,
+        new_offsets,
+        array.validity().clone().take(indices_array.as_ref())?,
+    )?
+    .to_array())
 }
 
 #[cfg(test)]
@@ -460,4 +441,27 @@ mod test {
         assert!(result_view.is_invalid(1));
         assert!(result_view.is_valid(2));
     }
+
+    /// Regression test for validity length mismatch bug.
+    ///
+    /// When source array has `Validity::Array(...)` and indices are non-nullable,
+    /// the result validity must have length equal to indices.len(), not source.len().
+    #[test]
+    fn test_take_validity_length_mismatch_regression() {
+        // Source array with explicit validity array (length 2).
+        let list = ListArray::try_new(
+            buffer![1i32, 2, 3, 4].into_array(),
+            buffer![0, 2, 4].into_array(),
+            Validity::Array(BoolArray::from_iter(vec![true, true]).to_array()),
+        )
+        .unwrap()
+        .to_array();
+
+        // Take more indices than source length (4 vs 2) with non-nullable indices.
+        let idx = buffer![0u32, 1, 0, 1].into_array();
+
+        // This should not panic - result should have length 4.
+        let result = take(&list, &idx).unwrap();
+        assert_eq!(result.len(), 4);
+    }
 }