chore[cuda]: clean up kernel tests (#6139)

joseph-isaacs · web-flow · commit cb8b56ab9ccf · 2026-01-26T12:58:59.000Z
Signed-off-by: Joe Isaacs &lt;joe.isaacs@live.co.uk&gt;
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/vortex-cuda/Cargo.toml b/vortex-cuda/Cargo.toml
@@ -48,6 +48,7 @@ tokio = { workspace = true, features = ["rt", "macros"] }
 vortex-array = { workspace = true, features = ["_test-harness"] }
 vortex-cuda = { path = ".", features = ["_test-harness"] }
 vortex-dtype = { workspace = true, features = ["cudarc"] }
+vortex-scalar = { workspace = true }
 
 [build-dependencies]
 
diff --git a/vortex-cuda/src/canonical.rs b/vortex-cuda/src/canonical.rs
@@ -15,14 +15,14 @@ use vortex_error::VortexResult;
 /// Move all canonical data from to_host from device.
 #[async_trait]
 pub trait CanonicalCudaExt {
-    async fn to_host(self) -> VortexResult<Self>
+    async fn into_host(self) -> VortexResult<Self>
     where
         Self: Sized;
 }
 
 #[async_trait]
 impl CanonicalCudaExt for Canonical {
-    async fn to_host(self) -> VortexResult<Self> {
+    async fn into_host(self) -> VortexResult<Self> {
         match self {
             n @ Canonical::Null(_) => Ok(n),
             Canonical::Bool(bool) => {
diff --git a/vortex-cuda/src/kernel/encodings/alp.rs b/vortex-cuda/src/kernel/encodings/alp.rs
@@ -120,16 +120,18 @@ mod tests {
     use vortex_alp::Exponents;
     use vortex_array::IntoArray;
     use vortex_array::arrays::PrimitiveArray;
+    use vortex_array::assert_arrays_eq;
     use vortex_array::validity::Validity::NonNullable;
     use vortex_buffer::Buffer;
     use vortex_error::VortexExpect;
     use vortex_session::VortexSession;
 
     use super::*;
+    use crate::CanonicalCudaExt;
     use crate::session::CudaSession;
 
     #[tokio::test]
-    async fn test_cuda_alp_decompression_f32() {
+    async fn test_cuda_alp_decompression_f32() -> VortexResult<()> {
         let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
@@ -144,34 +146,20 @@ mod tests {
             PrimitiveArray::new(Buffer::from(encoded_data.clone()), NonNullable).into_array(),
             exponents,
             None,
-        )
-        .vortex_expect("failed to create ALP array");
+        )?;
 
-        let result = ALPExecutor
+        let cpu_result = alp_array.to_canonical()?;
+
+        let gpu_result = ALPExecutor
             .execute(alp_array.to_array(), &mut cuda_ctx)
             .await
-            .vortex_expect("GPU decompression failed");
-
-        let result_buf =
-            Buffer::<f32>::from_byte_buffer(result.as_primitive().buffer_handle().to_host().await);
-
-        assert_eq!(result_buf.len(), encoded_data.len());
-
-        // Check decoded values
-        let expected: Vec<f32> = encoded_data.iter().map(|&v| v as f32 * 100.0).collect();
-        for (i, (&got, &want)) in result_buf
-            .as_slice()
-            .iter()
-            .zip(expected.iter())
-            .enumerate()
-        {
-            assert!(
-                (got - want).abs() < 1e-6,
-                "Mismatch at {}: got {}, want {}",
-                i,
-                got,
-                want
-            );
-        }
+            .vortex_expect("GPU decompression failed")
+            .into_host()
+            .await?
+            .into_array();
+
+        assert_arrays_eq!(cpu_result.into_array(), gpu_result);
+
+        Ok(())
     }
 }
diff --git a/vortex-cuda/src/kernel/encodings/for_.rs b/vortex-cuda/src/kernel/encodings/for_.rs
@@ -104,148 +104,52 @@ where
 #[cfg(test)]
 #[cfg(cuda_available)]
 mod tests {
+    use rstest::rstest;
     use vortex_array::IntoArray;
     use vortex_array::arrays::PrimitiveArray;
     use vortex_array::assert_arrays_eq;
     use vortex_array::validity::Validity::NonNullable;
     use vortex_buffer::Buffer;
+    use vortex_dtype::NativePType;
     use vortex_error::VortexExpect;
     use vortex_fastlanes::FoRArray;
+    use vortex_scalar::Scalar;
     use vortex_session::VortexSession;
 
     use super::*;
+    use crate::CanonicalCudaExt;
     use crate::session::CudaSession;
 
-    #[tokio::test]
-    async fn test_cuda_for_decompression_u8() {
-        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
-            .vortex_expect("failed to create execution context");
-
-        #[allow(clippy::cast_possible_truncation)]
-        let input_data: Vec<u8> = (0..5000).map(|i| (i % 246) as u8).collect();
-
-        let for_array = FoRArray::try_new(
-            PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
-            10u8.into(),
-        )
-        .vortex_expect("failed to create FoR array");
-
-        // Decode on CPU
-        let cpu_result = for_array
-            .to_canonical()
-            .vortex_expect("CPU canonicalize failed");
-
-        // Decode on GPU
-        let gpu_result = FoRExecutor
-            .execute(for_array.to_array(), &mut cuda_ctx)
-            .await
-            .vortex_expect("GPU decompression failed");
-
-        // Copy GPU result back to host for comparison
-        let gpu_host = Buffer::<u8>::from_byte_buffer(
-            gpu_result.into_primitive().buffer_handle().to_host().await,
-        );
-        let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
-
-        assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
-    }
-
-    #[tokio::test]
-    async fn test_cuda_for_decompression_u16() {
-        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
-            .vortex_expect("failed to create execution context");
-
-        let input_data: Vec<u16> = (0..5000).map(|i| (i % 5000) as u16).collect();
-
-        let for_array = FoRArray::try_new(
-            PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
-            1000u16.into(),
-        )
-        .vortex_expect("failed to create FoR array");
-
-        // Decode on CPU
-        let cpu_result = for_array
-            .to_canonical()
-            .vortex_expect("CPU canonicalize failed");
-
-        // Decode on GPU
-        let gpu_result = FoRExecutor
-            .execute(for_array.to_array(), &mut cuda_ctx)
-            .await
-            .vortex_expect("GPU decompression failed");
-
-        // Copy GPU result back to host for comparison
-        let gpu_host = Buffer::<u16>::from_byte_buffer(
-            gpu_result.into_primitive().buffer_handle().to_host().await,
-        );
-        let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
-
-        assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
-    }
-
-    #[tokio::test]
-    async fn test_cuda_for_decompression_u32() {
-        let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
-            .vortex_expect("failed to create execution context");
-
-        let input_data: Vec<u32> = (0..5000).map(|i| (i % 5000) as u32).collect();
-
-        let for_array = FoRArray::try_new(
+    fn make_for_array<T: NativePType + Into<Scalar>>(input_data: Vec<T>, reference: T) -> FoRArray {
+        FoRArray::try_new(
             PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
-            100000u32.into(),
+            reference.into(),
         )
-        .vortex_expect("failed to create FoR array");
-
-        // Decode on CPU
-        let cpu_result = for_array
-            .to_canonical()
-            .vortex_expect("CPU canonicalize failed");
-
-        // Decode on GPU
-        let gpu_result = FoRExecutor
-            .execute(for_array.to_array(), &mut cuda_ctx)
-            .await
-            .vortex_expect("GPU decompression failed");
-
-        // Copy GPU result back to host for comparison
-        let gpu_host = Buffer::<u32>::from_byte_buffer(
-            gpu_result.into_primitive().buffer_handle().to_host().await,
-        );
-        let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
-
-        assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
+        .unwrap()
     }
 
+    #[rstest]
+    #[case::u8(make_for_array((0..5000).map(|i| (i % 246) as u8).collect(), 10u8))]
+    #[case::u16(make_for_array((0..5000).map(|i| (i % 5000) as u16).collect(), 1000u16))]
+    #[case::u32(make_for_array((0..5000).map(|i| (i % 5000) as u32).collect(), 100000u32))]
+    #[case::u64(make_for_array((0..5000).map(|i| (i % 5000) as u64).collect(), 1000000u64))]
     #[tokio::test]
-    async fn test_cuda_for_decompression_u64() {
+    async fn test_cuda_for_decompression(#[case] for_array: FoRArray) -> VortexResult<()> {
         let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
-        let input_data: Vec<u64> = (0..5000).map(|i| (i % 5000) as u64).collect();
-
-        let for_array = FoRArray::try_new(
-            PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
-            1000000u64.into(),
-        )
-        .vortex_expect("failed to create FoR array");
-
-        // Decode on CPU
-        let cpu_result = for_array
-            .to_canonical()
-            .vortex_expect("CPU canonicalize failed");
+        let cpu_result = for_array.to_canonical()?;
 
-        // Decode on GPU
         let gpu_result = FoRExecutor
             .execute(for_array.to_array(), &mut cuda_ctx)
             .await
-            .vortex_expect("GPU decompression failed");
+            .vortex_expect("GPU decompression failed")
+            .into_host()
+            .await?
+            .into_array();
 
-        // Copy GPU result back to host for comparison
-        let gpu_host = Buffer::<u64>::from_byte_buffer(
-            gpu_result.into_primitive().buffer_handle().to_host().await,
-        );
-        let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
+        assert_arrays_eq!(cpu_result.into_array(), gpu_result);
 
-        assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
+        Ok(())
     }
 }
diff --git a/vortex-cuda/src/kernel/encodings/zigzag.rs b/vortex-cuda/src/kernel/encodings/zigzag.rs
@@ -118,10 +118,11 @@ mod tests {
     use vortex_zigzag::ZigZagArray;
 
     use super::*;
+    use crate::CanonicalCudaExt;
     use crate::session::CudaSession;
 
     #[tokio::test]
-    async fn test_cuda_zigzag_decompression_u32() {
+    async fn test_cuda_zigzag_decompression_u32() -> VortexResult<()> {
         let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
             .vortex_expect("failed to create execution context");
 
@@ -131,26 +132,20 @@ mod tests {
 
         let zigzag_array = ZigZagArray::try_new(
             PrimitiveArray::new(Buffer::from(encoded_data), NonNullable).into_array(),
-        )
-        .vortex_expect("failed to create ZigZag array");
+        )?;
 
-        // Decode on CPU
-        let cpu_result = zigzag_array
-            .to_canonical()
-            .vortex_expect("CPU canonicalize failed");
+        let cpu_result = zigzag_array.to_canonical()?;
 
-        // Decode on GPU
         let gpu_result = ZigZagExecutor
             .execute(zigzag_array.to_array(), &mut cuda_ctx)
             .await
-            .vortex_expect("GPU decompression failed");
+            .vortex_expect("GPU decompression failed")
+            .into_host()
+            .await?
+            .into_array();
 
-        // Copy GPU result back to host for comparison
-        let gpu_host = Buffer::<i32>::from_byte_buffer(
-            gpu_result.into_primitive().buffer_handle().to_host().await,
-        );
-        let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
+        assert_arrays_eq!(cpu_result.into_array(), gpu_result);
 
-        assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
+        Ok(())
     }
 }