Skip to content

Commit cb8b56a

Browse files
chore[cuda]: clean up kernel tests (#6139)
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent e6182e8 commit cb8b56a

File tree

6 files changed

+50
-161
lines changed

6 files changed

+50
-161
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-cuda/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ tokio = { workspace = true, features = ["rt", "macros"] }
4848
vortex-array = { workspace = true, features = ["_test-harness"] }
4949
vortex-cuda = { path = ".", features = ["_test-harness"] }
5050
vortex-dtype = { workspace = true, features = ["cudarc"] }
51+
vortex-scalar = { workspace = true }
5152

5253
[build-dependencies]
5354

vortex-cuda/src/canonical.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@ use vortex_error::VortexResult;
1515
/// Move all canonical data from to_host from device.
1616
#[async_trait]
1717
pub trait CanonicalCudaExt {
18-
async fn to_host(self) -> VortexResult<Self>
18+
async fn into_host(self) -> VortexResult<Self>
1919
where
2020
Self: Sized;
2121
}
2222

2323
#[async_trait]
2424
impl CanonicalCudaExt for Canonical {
25-
async fn to_host(self) -> VortexResult<Self> {
25+
async fn into_host(self) -> VortexResult<Self> {
2626
match self {
2727
n @ Canonical::Null(_) => Ok(n),
2828
Canonical::Bool(bool) => {

vortex-cuda/src/kernel/encodings/alp.rs

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -120,16 +120,18 @@ mod tests {
120120
use vortex_alp::Exponents;
121121
use vortex_array::IntoArray;
122122
use vortex_array::arrays::PrimitiveArray;
123+
use vortex_array::assert_arrays_eq;
123124
use vortex_array::validity::Validity::NonNullable;
124125
use vortex_buffer::Buffer;
125126
use vortex_error::VortexExpect;
126127
use vortex_session::VortexSession;
127128

128129
use super::*;
130+
use crate::CanonicalCudaExt;
129131
use crate::session::CudaSession;
130132

131133
#[tokio::test]
132-
async fn test_cuda_alp_decompression_f32() {
134+
async fn test_cuda_alp_decompression_f32() -> VortexResult<()> {
133135
let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
134136
.vortex_expect("failed to create execution context");
135137

@@ -144,34 +146,20 @@ mod tests {
144146
PrimitiveArray::new(Buffer::from(encoded_data.clone()), NonNullable).into_array(),
145147
exponents,
146148
None,
147-
)
148-
.vortex_expect("failed to create ALP array");
149+
)?;
149150

150-
let result = ALPExecutor
151+
let cpu_result = alp_array.to_canonical()?;
152+
153+
let gpu_result = ALPExecutor
151154
.execute(alp_array.to_array(), &mut cuda_ctx)
152155
.await
153-
.vortex_expect("GPU decompression failed");
154-
155-
let result_buf =
156-
Buffer::<f32>::from_byte_buffer(result.as_primitive().buffer_handle().to_host().await);
157-
158-
assert_eq!(result_buf.len(), encoded_data.len());
159-
160-
// Check decoded values
161-
let expected: Vec<f32> = encoded_data.iter().map(|&v| v as f32 * 100.0).collect();
162-
for (i, (&got, &want)) in result_buf
163-
.as_slice()
164-
.iter()
165-
.zip(expected.iter())
166-
.enumerate()
167-
{
168-
assert!(
169-
(got - want).abs() < 1e-6,
170-
"Mismatch at {}: got {}, want {}",
171-
i,
172-
got,
173-
want
174-
);
175-
}
156+
.vortex_expect("GPU decompression failed")
157+
.into_host()
158+
.await?
159+
.into_array();
160+
161+
assert_arrays_eq!(cpu_result.into_array(), gpu_result);
162+
163+
Ok(())
176164
}
177165
}

vortex-cuda/src/kernel/encodings/for_.rs

Lines changed: 21 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -104,148 +104,52 @@ where
104104
#[cfg(test)]
105105
#[cfg(cuda_available)]
106106
mod tests {
107+
use rstest::rstest;
107108
use vortex_array::IntoArray;
108109
use vortex_array::arrays::PrimitiveArray;
109110
use vortex_array::assert_arrays_eq;
110111
use vortex_array::validity::Validity::NonNullable;
111112
use vortex_buffer::Buffer;
113+
use vortex_dtype::NativePType;
112114
use vortex_error::VortexExpect;
113115
use vortex_fastlanes::FoRArray;
116+
use vortex_scalar::Scalar;
114117
use vortex_session::VortexSession;
115118

116119
use super::*;
120+
use crate::CanonicalCudaExt;
117121
use crate::session::CudaSession;
118122

119-
#[tokio::test]
120-
async fn test_cuda_for_decompression_u8() {
121-
let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
122-
.vortex_expect("failed to create execution context");
123-
124-
#[allow(clippy::cast_possible_truncation)]
125-
let input_data: Vec<u8> = (0..5000).map(|i| (i % 246) as u8).collect();
126-
127-
let for_array = FoRArray::try_new(
128-
PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
129-
10u8.into(),
130-
)
131-
.vortex_expect("failed to create FoR array");
132-
133-
// Decode on CPU
134-
let cpu_result = for_array
135-
.to_canonical()
136-
.vortex_expect("CPU canonicalize failed");
137-
138-
// Decode on GPU
139-
let gpu_result = FoRExecutor
140-
.execute(for_array.to_array(), &mut cuda_ctx)
141-
.await
142-
.vortex_expect("GPU decompression failed");
143-
144-
// Copy GPU result back to host for comparison
145-
let gpu_host = Buffer::<u8>::from_byte_buffer(
146-
gpu_result.into_primitive().buffer_handle().to_host().await,
147-
);
148-
let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
149-
150-
assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
151-
}
152-
153-
#[tokio::test]
154-
async fn test_cuda_for_decompression_u16() {
155-
let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
156-
.vortex_expect("failed to create execution context");
157-
158-
let input_data: Vec<u16> = (0..5000).map(|i| (i % 5000) as u16).collect();
159-
160-
let for_array = FoRArray::try_new(
161-
PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
162-
1000u16.into(),
163-
)
164-
.vortex_expect("failed to create FoR array");
165-
166-
// Decode on CPU
167-
let cpu_result = for_array
168-
.to_canonical()
169-
.vortex_expect("CPU canonicalize failed");
170-
171-
// Decode on GPU
172-
let gpu_result = FoRExecutor
173-
.execute(for_array.to_array(), &mut cuda_ctx)
174-
.await
175-
.vortex_expect("GPU decompression failed");
176-
177-
// Copy GPU result back to host for comparison
178-
let gpu_host = Buffer::<u16>::from_byte_buffer(
179-
gpu_result.into_primitive().buffer_handle().to_host().await,
180-
);
181-
let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
182-
183-
assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
184-
}
185-
186-
#[tokio::test]
187-
async fn test_cuda_for_decompression_u32() {
188-
let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
189-
.vortex_expect("failed to create execution context");
190-
191-
let input_data: Vec<u32> = (0..5000).map(|i| (i % 5000) as u32).collect();
192-
193-
let for_array = FoRArray::try_new(
123+
fn make_for_array<T: NativePType + Into<Scalar>>(input_data: Vec<T>, reference: T) -> FoRArray {
124+
FoRArray::try_new(
194125
PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
195-
100000u32.into(),
126+
reference.into(),
196127
)
197-
.vortex_expect("failed to create FoR array");
198-
199-
// Decode on CPU
200-
let cpu_result = for_array
201-
.to_canonical()
202-
.vortex_expect("CPU canonicalize failed");
203-
204-
// Decode on GPU
205-
let gpu_result = FoRExecutor
206-
.execute(for_array.to_array(), &mut cuda_ctx)
207-
.await
208-
.vortex_expect("GPU decompression failed");
209-
210-
// Copy GPU result back to host for comparison
211-
let gpu_host = Buffer::<u32>::from_byte_buffer(
212-
gpu_result.into_primitive().buffer_handle().to_host().await,
213-
);
214-
let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
215-
216-
assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
128+
.unwrap()
217129
}
218130

131+
#[rstest]
132+
#[case::u8(make_for_array((0..5000).map(|i| (i % 246) as u8).collect(), 10u8))]
133+
#[case::u16(make_for_array((0..5000).map(|i| (i % 5000) as u16).collect(), 1000u16))]
134+
#[case::u32(make_for_array((0..5000).map(|i| (i % 5000) as u32).collect(), 100000u32))]
135+
#[case::u64(make_for_array((0..5000).map(|i| (i % 5000) as u64).collect(), 1000000u64))]
219136
#[tokio::test]
220-
async fn test_cuda_for_decompression_u64() {
137+
async fn test_cuda_for_decompression(#[case] for_array: FoRArray) -> VortexResult<()> {
221138
let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
222139
.vortex_expect("failed to create execution context");
223140

224-
let input_data: Vec<u64> = (0..5000).map(|i| (i % 5000) as u64).collect();
225-
226-
let for_array = FoRArray::try_new(
227-
PrimitiveArray::new(Buffer::from(input_data), NonNullable).into_array(),
228-
1000000u64.into(),
229-
)
230-
.vortex_expect("failed to create FoR array");
231-
232-
// Decode on CPU
233-
let cpu_result = for_array
234-
.to_canonical()
235-
.vortex_expect("CPU canonicalize failed");
141+
let cpu_result = for_array.to_canonical()?;
236142

237-
// Decode on GPU
238143
let gpu_result = FoRExecutor
239144
.execute(for_array.to_array(), &mut cuda_ctx)
240145
.await
241-
.vortex_expect("GPU decompression failed");
146+
.vortex_expect("GPU decompression failed")
147+
.into_host()
148+
.await?
149+
.into_array();
242150

243-
// Copy GPU result back to host for comparison
244-
let gpu_host = Buffer::<u64>::from_byte_buffer(
245-
gpu_result.into_primitive().buffer_handle().to_host().await,
246-
);
247-
let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
151+
assert_arrays_eq!(cpu_result.into_array(), gpu_result);
248152

249-
assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
153+
Ok(())
250154
}
251155
}

vortex-cuda/src/kernel/encodings/zigzag.rs

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,11 @@ mod tests {
118118
use vortex_zigzag::ZigZagArray;
119119

120120
use super::*;
121+
use crate::CanonicalCudaExt;
121122
use crate::session::CudaSession;
122123

123124
#[tokio::test]
124-
async fn test_cuda_zigzag_decompression_u32() {
125+
async fn test_cuda_zigzag_decompression_u32() -> VortexResult<()> {
125126
let mut cuda_ctx = CudaSession::create_execution_ctx(VortexSession::empty())
126127
.vortex_expect("failed to create execution context");
127128

@@ -131,26 +132,20 @@ mod tests {
131132

132133
let zigzag_array = ZigZagArray::try_new(
133134
PrimitiveArray::new(Buffer::from(encoded_data), NonNullable).into_array(),
134-
)
135-
.vortex_expect("failed to create ZigZag array");
135+
)?;
136136

137-
// Decode on CPU
138-
let cpu_result = zigzag_array
139-
.to_canonical()
140-
.vortex_expect("CPU canonicalize failed");
137+
let cpu_result = zigzag_array.to_canonical()?;
141138

142-
// Decode on GPU
143139
let gpu_result = ZigZagExecutor
144140
.execute(zigzag_array.to_array(), &mut cuda_ctx)
145141
.await
146-
.vortex_expect("GPU decompression failed");
142+
.vortex_expect("GPU decompression failed")
143+
.into_host()
144+
.await?
145+
.into_array();
147146

148-
// Copy GPU result back to host for comparison
149-
let gpu_host = Buffer::<i32>::from_byte_buffer(
150-
gpu_result.into_primitive().buffer_handle().to_host().await,
151-
);
152-
let gpu_array = PrimitiveArray::new(gpu_host, NonNullable);
147+
assert_arrays_eq!(cpu_result.into_array(), gpu_result);
153148

154-
assert_arrays_eq!(cpu_result.into_array(), gpu_array.into_array());
149+
Ok(())
155150
}
156151
}

0 commit comments

Comments
 (0)