22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
44#![ allow( clippy:: unwrap_used) ]
5- #![ cfg ( gpu_unstable ) ]
5+ #![ allow ( dead_code ) ]
66
77use std:: sync:: Arc ;
88use std:: time:: Duration ;
@@ -17,9 +17,6 @@ use vortex_buffer::BufferMut;
1717use vortex_dtype:: NativePType ;
1818use vortex_error:: VortexUnwrap ;
1919use vortex_fastlanes:: { BitPackedArray , FoRArray } ;
20- use vortex_gpu:: {
21- create_run_jit_kernel, cuda_bit_unpack_timed, cuda_for_bp_unpack_timed, cuda_for_unpack_timed,
22- } ;
2320
2421// Data sizes: 1GB, 2.5GB, 5GB, 10GB
2522// These are approximate sizes in bytes, accounting for bit-packing compression
@@ -91,6 +88,7 @@ fn make_alp_array(len: usize) -> ArrayRef {
9188 . into_array ( )
9289}
9390
91+ #[ cfg( gpu_unstable) ]
9492fn benchmark_gpu_decompress_kernel_only ( c : & mut Criterion ) {
9593 let mut group = c. benchmark_group ( "gpu_decompress_kernel_only" ) ;
9694
@@ -112,7 +110,8 @@ fn benchmark_gpu_decompress_kernel_only(c: &mut Criterion) {
112110 let mut total_time = Duration :: ZERO ;
113111 for _ in 0 ..iters {
114112 // This only measures kernel execution time, not memory transfers
115- let kernel_time_ns = cuda_bit_unpack_timed ( array, Arc :: clone ( & ctx) ) . unwrap ( ) ;
113+ let kernel_time_ns =
114+ vortex_gpu:: cuda_bit_unpack_timed ( array, Arc :: clone ( & ctx) ) . unwrap ( ) ;
116115 total_time += kernel_time_ns;
117116 }
118117 total_time
@@ -123,6 +122,7 @@ fn benchmark_gpu_decompress_kernel_only(c: &mut Criterion) {
123122 group. finish ( ) ;
124123}
125124
125+ #[ cfg( gpu_unstable) ]
126126fn benchmark_gpu_for_decompress_kernel_only ( c : & mut Criterion ) {
127127 let mut group = c. benchmark_group ( "gpu_for_decompress_kernel_only" ) ;
128128
@@ -143,7 +143,7 @@ fn benchmark_gpu_for_decompress_kernel_only(c: &mut Criterion) {
143143 for _ in 0 ..iters {
144144 // This only measures kernel execution time, not memory transfers
145145 let ( _result, kernel_time) =
146- cuda_for_unpack_timed ( array, Arc :: clone ( & ctx) ) . unwrap ( ) ;
146+ vortex_gpu :: cuda_for_unpack_timed ( array, Arc :: clone ( & ctx) ) . unwrap ( ) ;
147147 total_time += kernel_time;
148148 }
149149 total_time
@@ -154,6 +154,7 @@ fn benchmark_gpu_for_decompress_kernel_only(c: &mut Criterion) {
154154 group. finish ( ) ;
155155}
156156
157+ #[ cfg( gpu_unstable) ]
157158fn benchmark_gpu_for_bp_fused_decompress_kernel_only ( c : & mut Criterion ) {
158159 let mut group = c. benchmark_group ( "gpu_for_bp_fused_decompress_kernel_only" ) ;
159160
@@ -174,7 +175,7 @@ fn benchmark_gpu_for_bp_fused_decompress_kernel_only(c: &mut Criterion) {
174175 for _ in 0 ..iters {
175176 // This only measures kernel execution time, not memory transfers
176177 let ( _result, kernel_time) =
177- cuda_for_bp_unpack_timed ( array, Arc :: clone ( & ctx) ) . unwrap ( ) ;
178+ vortex_gpu :: cuda_for_bp_unpack_timed ( array, Arc :: clone ( & ctx) ) . unwrap ( ) ;
178179 total_time += kernel_time;
179180 }
180181 total_time
@@ -185,6 +186,7 @@ fn benchmark_gpu_for_bp_fused_decompress_kernel_only(c: &mut Criterion) {
185186 group. finish ( ) ;
186187}
187188
189+ #[ cfg( gpu_unstable) ]
188190fn benchmark_gpu_for_bp_jit_decompress_kernel_only ( c : & mut Criterion ) {
189191 let mut group = c. benchmark_group ( "benchmark_gpu_for_bp_jit_decompress_kernel_only" ) ;
190192
@@ -205,7 +207,8 @@ fn benchmark_gpu_for_bp_jit_decompress_kernel_only(c: &mut Criterion) {
205207 let mut total_time = Duration :: ZERO ;
206208 for _ in 0 ..iters {
207209 // This only measures kernel execution time, not memory transfers
208- let ( _result, kernel_time) = create_run_jit_kernel ( & ctx, array) . unwrap ( ) ;
210+ let ( _result, kernel_time) =
211+ vortex_gpu:: create_run_jit_kernel ( & ctx, array) . unwrap ( ) ;
209212 total_time += kernel_time. elapsed ( ) . unwrap ( ) ;
210213 }
211214 total_time
@@ -216,23 +219,7 @@ fn benchmark_gpu_for_bp_jit_decompress_kernel_only(c: &mut Criterion) {
216219 group. finish ( ) ;
217220}
218221
219- #[ allow( dead_code) ]
220- fn benchmark_cpu_canonicalize ( c : & mut Criterion ) {
221- let mut group = c. benchmark_group ( "cpu_canonicalize" ) ;
222-
223- for ( len, label) in DATA_SIZES {
224- let len = len. next_multiple_of ( 1024 ) ;
225- let array = make_bitpackable_array :: < u32 > ( len) ;
226-
227- group. throughput ( Throughput :: Bytes ( ( len * size_of :: < u32 > ( ) ) as u64 ) ) ;
228- group. bench_with_input ( BenchmarkId :: new ( "u32" , label) , & array, |b, array| {
229- b. iter ( || array. clone ( ) . into_array ( ) . to_canonical ( ) ) ;
230- } ) ;
231- }
232-
233- group. finish ( ) ;
234- }
235-
222+ #[ cfg( gpu_unstable) ]
236223criterion_group ! (
237224 benches,
238225 benchmark_gpu_decompress_kernel_only,
@@ -242,4 +229,8 @@ criterion_group!(
242229) ;
243230
244231// criterion_group!(benches, benchmark_gpu_for_bp_jit_decompress_kernel_only);
232+ #[ cfg( gpu_unstable) ]
245233criterion_main ! ( benches) ;
234+
235+ #[ cfg( not( gpu_unstable) ) ]
236+ fn main ( ) { }
0 commit comments