wip

danking · danking · commit ca17b759df3a · 2025-01-30T14:41:43.000-05:00
diff --git a/bench-vortex/benches/clickbench.rs b/bench-vortex/benches/clickbench.rs
@@ -0,0 +1,70 @@
+#![feature(exit_status_error)]
+
+use std::path::PathBuf;
+use std::process::Command;
+
+use bench_vortex::clickbench::{clickbench_queries, HITS_SCHEMA};
+use bench_vortex::{clickbench, execute_query, get_session_with_cache, idempotent, IdempotentPath};
+use criterion::{criterion_group, criterion_main, Criterion};
+use tokio::runtime::Builder;
+
+fn benchmark(c: &mut Criterion) {
+    let runtime = Builder::new_multi_thread().enable_all().build().unwrap();
+    let basepath = "clickbench".to_data_path();
+
+    // The clickbench-provided file is missing some higher-level type info, so we reprocess it
+    // to add that info, see https://github.com/ClickHouse/ClickBench/issues/7.
+    for idx in 0..100 {
+        let output_path = basepath.join(format!("hits_{idx}.parquet"));
+        idempotent(&output_path, |output_path| {
+            eprintln!("Fixing parquet file {idx}");
+            let home = std::env::var("HOME").unwrap_or_else(|_| "/home/ci-runner".to_string());
+            let command = format!(
+                "
+                SET home_directory='{home}';
+                INSTALL HTTPFS;
+                COPY (SELECT * REPLACE
+                    (epoch_ms(EventTime * 1000) AS EventTime, \
+                    epoch_ms(ClientEventTime * 1000) AS ClientEventTime, \
+                    epoch_ms(LocalEventTime * 1000) AS LocalEventTime, \
+                        DATE '1970-01-01' + INTERVAL (EventDate) DAYS AS EventDate) \
+                FROM read_parquet('https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{idx}.parquet', binary_as_string=True)) TO '{}' (FORMAT 'parquet');",
+                output_path.to_str().unwrap()
+            );
+            Command::new("duckdb")
+                .arg("-c")
+                .arg(command)
+                .status()?
+                .exit_ok()?;
+
+            anyhow::Ok(PathBuf::from(output_path))
+        })
+        .unwrap();
+    }
+
+    let session_context = get_session_with_cache();
+    let context = session_context.clone();
+
+    runtime.block_on(async move {
+        clickbench::register_vortex_files(context, "hits", basepath.as_path(), &HITS_SCHEMA)
+            .await
+            .unwrap();
+    });
+
+    let mut group = c.benchmark_group("clickbench");
+
+    for (idx, query) in clickbench_queries().into_iter() {
+        let context = session_context.clone();
+        group.bench_function(format!("q-{:02}", idx), |b| {
+            b.to_async(&runtime)
+                .iter(|| async { execute_query(&context, &query).await.unwrap() });
+        });
+    }
+}
+
+criterion_group!(
+    name = benches;
+    config = Criterion::default().sample_size(10);
+    targets = benchmark
+);
+criterion_main!(benches);
diff --git a/encodings/alp/src/alp/compress.rs b/encodings/alp/src/alp/compress.rs
@@ -53,9 +53,10 @@ where
     let values_slice = values.as_slice::<T>();
 
     let exponents = T::find_best_exponents(values_slice);
-    let (encoded, exceptional_positions) = T::chunked_encode(values.as_slice::<T>(), exponents);
+    let (encoded, exceptional_positions) = T::encode_chunkwise(values.as_slice::<T>(), exponents);
 
     let encoded_array = PrimitiveArray::new(encoded, values.validity()).into_array();
+
     let validity = values.logical_validity()?;
     let n_valid = validity.true_count();
     let exceptional_positions = if n_valid == 0 {
diff --git a/encodings/alp/src/alp/mod.rs b/encodings/alp/src/alp/mod.rs
@@ -119,7 +119,7 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
     /// Unlike [Self::encode], this operation processes no more than [Self::ENCODE_CHUNK_SIZE]
     /// elements at once which can make better use of the L1 cache because [Self::encode] makes two
     /// passes over `values`: first to encode and second to extract the exceptional values.
-    fn chunked_encode(
+    fn encode_chunkwise(
         values: &[Self],
         exponents: Exponents,
     ) -> (Buffer<Self::ALPInt>, Buffer<u64>) {
@@ -209,79 +209,6 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
     }
 }
 
-#[allow(clippy::cast_possible_truncation)]
-fn _encode_chunk_unchecked<T: ALPFloat>(
-    chunk: &[T],
-    exp: Exponents,
-    encoded_output: &mut BufferMut<T::ALPInt>,
-    patch_indices: &mut BufferMut<u64>,
-    patch_values: &mut BufferMut<T>,
-    fill_value: &mut Option<T::ALPInt>,
-) {
-    let num_prev_encoded = encoded_output.len();
-    let num_prev_patches = patch_indices.len();
-    assert_eq!(patch_indices.len(), patch_values.len());
-    let has_filled = fill_value.is_some();
-
-    // encode the chunk, counting the number of patches
-    let mut chunk_patch_count = 0;
-    encoded_output.extend(chunk.iter().map(|v| {
-        let encoded = unsafe { T::encode_single_unchecked(*v, exp) };
-        let decoded = T::decode_single(encoded, exp);
-        let neq = (decoded != *v) as usize;
-        chunk_patch_count += neq;
-        encoded
-    }));
-    let chunk_patch_count = chunk_patch_count; // immutable hereafter
-    assert_eq!(encoded_output.len(), num_prev_encoded + chunk.len());
-
-    if chunk_patch_count > 0 {
-        // we need to gather the patches for this chunk
-        // preallocate space for the patches (plus one because our loop may attempt to write one past the end)
-        patch_indices.reserve(chunk_patch_count + 1);
-        patch_values.reserve(chunk_patch_count + 1);
-
-        // record the patches in this chunk
-        let patch_indices_mut = patch_indices.spare_capacity_mut();
-        let patch_values_mut = patch_values.spare_capacity_mut();
-        let mut chunk_patch_index = 0;
-        for i in num_prev_encoded..encoded_output.len() {
-            let decoded = T::decode_single(encoded_output[i], exp);
-            // write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
-            patch_indices_mut[chunk_patch_index].write(i as u64);
-            patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
-            chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize;
-        }
-        assert_eq!(chunk_patch_index, chunk_patch_count);
-        unsafe {
-            patch_indices.set_len(num_prev_patches + chunk_patch_count);
-            patch_values.set_len(num_prev_patches + chunk_patch_count);
-        }
-    }
-
-    // find the first successfully encoded value (i.e., not patched)
-    // this is our fill value for missing values
-    if fill_value.is_none() && (num_prev_encoded + chunk_patch_count < encoded_output.len()) {
-        assert_eq!(num_prev_encoded, num_prev_patches);
-        for i in num_prev_encoded..encoded_output.len() {
-            if i >= patch_indices.len() || patch_indices[i] != i as u64 {
-                *fill_value = Some(encoded_output[i]);
-                break;
-            }
-        }
-    }
-
-    // replace the patched values in the encoded array with the fill value
-    // for better downstream compression
-    if let Some(fill_value) = fill_value {
-        // handle the edge case where the first N >= 1 chunks are all patches
-        let start_patch = if !has_filled { 0 } else { num_prev_patches };
-        for patch_idx in &patch_indices[start_patch..] {
-            encoded_output[*patch_idx as usize] = *fill_value;
-        }
-    }
-}
-
 impl ALPFloat for f32 {
     type ALPInt = i32;
     const FRACTIONAL_BITS: u8 = 23;
diff --git a/vortex-sampling-compressor/src/compressors/alp.rs b/vortex-sampling-compressor/src/compressors/alp.rs
@@ -1,11 +1,13 @@
 use vortex_alp::{alp_encode_components, ALPArray, ALPEncoding, ALPRDEncoding};
 use vortex_array::aliases::hash_set::HashSet;
 use vortex_array::array::PrimitiveArray;
+use vortex_array::compute::fill_null;
 use vortex_array::variants::PrimitiveArrayTrait;
 use vortex_array::{Array, Encoding, EncodingId, IntoArray, IntoArrayVariant};
 use vortex_dtype::PType;
 use vortex_error::VortexResult;
 use vortex_fastlanes::BitPackedEncoding;
+use vortex_scalar::Scalar;
 
 use super::alp_rd::ALPRDCompressor;
 use crate::compressors::{CompressedArray, CompressionTree, EncodingCompressor};
@@ -41,8 +43,9 @@ impl EncodingCompressor for ALPCompressor {
         like: Option<CompressionTree<'a>>,
         ctx: SamplingCompressor<'a>,
     ) -> VortexResult<CompressedArray<'a>> {
-        let (exponents, encoded, patches) =
-            alp_encode_components(&array.clone().into_primitive()?)?;
+        let nulls_zeroed =
+            fill_null(array, Scalar::from(0.0).cast(array.dtype())?)?.into_primitive()?;
+        let (exponents, encoded, patches) = alp_encode_components(&nulls_zeroed)?;
 
         let compressed_encoded = ctx
             .named("packed")