@@ -326,6 +326,58 @@ where
326326 InMemoryPageIterator :: new ( pages)
327327}
328328
329+ fn build_delta_encoded_incr_primitive_page_iterator < T > (
330+ column_desc : ColumnDescPtr ,
331+ null_density : f32 ,
332+ increment : usize ,
333+ stepped : bool ,
334+ ) -> impl PageIterator + Clone
335+ where
336+ T : parquet:: data_type:: DataType ,
337+ T :: T : SampleUniform + FromPrimitive ,
338+ {
339+ let max_def_level = column_desc. max_def_level ( ) ;
340+ let max_rep_level = column_desc. max_rep_level ( ) ;
341+ let rep_levels = vec ! [ 0 ; VALUES_PER_PAGE ] ;
342+ let mut rng = seedable_rng ( ) ;
343+ let mut pages: Vec < Vec < parquet:: column:: page:: Page > > = Vec :: new ( ) ;
344+ let mut running_val: usize = 1 ;
345+ for _i in 0 ..NUM_ROW_GROUPS {
346+ let mut column_chunk_pages = Vec :: new ( ) ;
347+ for _j in 0 ..PAGES_PER_GROUP {
348+ // generate page
349+ let mut values = Vec :: with_capacity ( VALUES_PER_PAGE ) ;
350+ let mut def_levels = Vec :: with_capacity ( VALUES_PER_PAGE ) ;
351+ for k in 0 ..VALUES_PER_PAGE {
352+ let def_level = if rng. random :: < f32 > ( ) < null_density {
353+ max_def_level - 1
354+ } else {
355+ max_def_level
356+ } ;
357+ if def_level == max_def_level {
358+ let value = FromPrimitive :: from_usize ( running_val) . unwrap ( ) ;
359+ running_val = if !stepped || k % 2 == 1 {
360+ running_val + increment
361+ } else {
362+ running_val
363+ } ;
364+ values. push ( value) ;
365+ }
366+ def_levels. push ( def_level) ;
367+ }
368+ let mut page_builder =
369+ DataPageBuilderImpl :: new ( column_desc. clone ( ) , values. len ( ) as u32 , true ) ;
370+ page_builder. add_rep_levels ( max_rep_level, & rep_levels) ;
371+ page_builder. add_def_levels ( max_def_level, & def_levels) ;
372+ page_builder. add_values :: < T > ( Encoding :: DELTA_BINARY_PACKED , & values) ;
373+ column_chunk_pages. push ( page_builder. consume ( ) ) ;
374+ }
375+ pages. push ( column_chunk_pages) ;
376+ }
377+
378+ InMemoryPageIterator :: new ( pages)
379+ }
380+
329381fn build_dictionary_encoded_primitive_page_iterator < T > (
330382 column_desc : ColumnDescPtr ,
331383 null_density : f32 ,
@@ -439,6 +491,52 @@ fn build_plain_encoded_byte_array_page_iterator_inner(
439491 InMemoryPageIterator :: new ( pages)
440492}
441493
494+ fn build_constant_prefix_byte_array_page_iterator (
495+ column_desc : ColumnDescPtr ,
496+ null_density : f32 ,
497+ encoding : Encoding ,
498+ const_string : bool ,
499+ ) -> impl PageIterator + Clone {
500+ let max_def_level = column_desc. max_def_level ( ) ;
501+ let max_rep_level = column_desc. max_rep_level ( ) ;
502+ let rep_levels = vec ! [ 0 ; VALUES_PER_PAGE ] ;
503+ let mut rng = seedable_rng ( ) ;
504+ let mut pages: Vec < Vec < parquet:: column:: page:: Page > > = Vec :: new ( ) ;
505+ for i in 0 ..NUM_ROW_GROUPS {
506+ let mut column_chunk_pages = Vec :: new ( ) ;
507+ for j in 0 ..PAGES_PER_GROUP {
508+ // generate page
509+ let mut values = Vec :: with_capacity ( VALUES_PER_PAGE ) ;
510+ let mut def_levels = Vec :: with_capacity ( VALUES_PER_PAGE ) ;
511+ for k in 0 ..VALUES_PER_PAGE {
512+ let def_level = if rng. random :: < f32 > ( ) < null_density {
513+ max_def_level - 1
514+ } else {
515+ max_def_level
516+ } ;
517+ if def_level == max_def_level {
518+ let string_value = if const_string {
519+ "01234567890123456789012345678901" . to_string ( )
520+ } else {
521+ format ! ( "01234567890123456789012345678901:{:x}{j}{i}" , ( k % 16 ) )
522+ } ;
523+ values. push ( parquet:: data_type:: ByteArray :: from ( string_value. as_str ( ) ) ) ;
524+ }
525+ def_levels. push ( def_level) ;
526+ }
527+ let mut page_builder =
528+ DataPageBuilderImpl :: new ( column_desc. clone ( ) , values. len ( ) as u32 , true ) ;
529+ page_builder. add_rep_levels ( max_rep_level, & rep_levels) ;
530+ page_builder. add_def_levels ( max_def_level, & def_levels) ;
531+ page_builder. add_values :: < ByteArrayType > ( encoding, & values) ;
532+ column_chunk_pages. push ( page_builder. consume ( ) ) ;
533+ }
534+ pages. push ( column_chunk_pages) ;
535+ }
536+
537+ InMemoryPageIterator :: new ( pages)
538+ }
539+
442540fn build_plain_encoded_byte_array_page_iterator (
443541 column_desc : ColumnDescPtr ,
444542 null_density : f32 ,
@@ -1094,6 +1192,99 @@ fn bench_primitive<T>(
10941192 assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
10951193 } ) ;
10961194
1195+ // binary packed same value
1196+ let data = build_delta_encoded_incr_primitive_page_iterator :: < T > (
1197+ mandatory_column_desc. clone ( ) ,
1198+ 0.0 ,
1199+ 0 ,
1200+ false ,
1201+ ) ;
1202+ group. bench_function ( "binary packed single value" , |b| {
1203+ b. iter ( || {
1204+ let array_reader =
1205+ create_primitive_array_reader ( data. clone ( ) , mandatory_column_desc. clone ( ) ) ;
1206+ count = bench_array_reader ( array_reader) ;
1207+ } ) ;
1208+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1209+ } ) ;
1210+
1211+ let data = build_delta_encoded_incr_primitive_page_iterator :: < T > (
1212+ mandatory_column_desc. clone ( ) ,
1213+ 0.0 ,
1214+ 0 ,
1215+ false ,
1216+ ) ;
1217+ group. bench_function ( "binary packed skip single value" , |b| {
1218+ b. iter ( || {
1219+ let array_reader =
1220+ create_primitive_array_reader ( data. clone ( ) , mandatory_column_desc. clone ( ) ) ;
1221+ count = bench_array_reader_skip ( array_reader) ;
1222+ } ) ;
1223+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1224+ } ) ;
1225+
1226+ // binary packed monotonically increasing
1227+ let data = build_delta_encoded_incr_primitive_page_iterator :: < T > (
1228+ mandatory_column_desc. clone ( ) ,
1229+ 0.0 ,
1230+ 1 ,
1231+ false ,
1232+ ) ;
1233+ group. bench_function ( "binary packed increasing value" , |b| {
1234+ b. iter ( || {
1235+ let array_reader =
1236+ create_primitive_array_reader ( data. clone ( ) , mandatory_column_desc. clone ( ) ) ;
1237+ count = bench_array_reader ( array_reader) ;
1238+ } ) ;
1239+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1240+ } ) ;
1241+
1242+ let data = build_delta_encoded_incr_primitive_page_iterator :: < T > (
1243+ mandatory_column_desc. clone ( ) ,
1244+ 0.0 ,
1245+ 1 ,
1246+ false ,
1247+ ) ;
1248+ group. bench_function ( "binary packed skip increasing value" , |b| {
1249+ b. iter ( || {
1250+ let array_reader =
1251+ create_primitive_array_reader ( data. clone ( ) , mandatory_column_desc. clone ( ) ) ;
1252+ count = bench_array_reader_skip ( array_reader) ;
1253+ } ) ;
1254+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1255+ } ) ;
1256+
1257+ // binary packed increasing stepped
1258+ let data = build_delta_encoded_incr_primitive_page_iterator :: < T > (
1259+ mandatory_column_desc. clone ( ) ,
1260+ 0.0 ,
1261+ 1 ,
1262+ true ,
1263+ ) ;
1264+ group. bench_function ( "binary packed stepped increasing value" , |b| {
1265+ b. iter ( || {
1266+ let array_reader =
1267+ create_primitive_array_reader ( data. clone ( ) , mandatory_column_desc. clone ( ) ) ;
1268+ count = bench_array_reader ( array_reader) ;
1269+ } ) ;
1270+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1271+ } ) ;
1272+
1273+ let data = build_delta_encoded_incr_primitive_page_iterator :: < T > (
1274+ mandatory_column_desc. clone ( ) ,
1275+ 0.0 ,
1276+ 1 ,
1277+ true ,
1278+ ) ;
1279+ group. bench_function ( "binary packed skip stepped increasing value" , |b| {
1280+ b. iter ( || {
1281+ let array_reader =
1282+ create_primitive_array_reader ( data. clone ( ) , mandatory_column_desc. clone ( ) ) ;
1283+ count = bench_array_reader_skip ( array_reader) ;
1284+ } ) ;
1285+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1286+ } ) ;
1287+
10971288 // dictionary encoded, no NULLs
10981289 let data =
10991290 build_dictionary_encoded_primitive_page_iterator :: < T > ( mandatory_column_desc. clone ( ) , 0.0 ) ;
@@ -1594,6 +1785,66 @@ fn add_benches(c: &mut Criterion) {
15941785 assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
15951786 } ) ;
15961787
1788+ // delta byte array with constant prefix and suffix lengths
1789+ let delta_string_const_prefix_no_null_data = build_constant_prefix_byte_array_page_iterator (
1790+ mandatory_string_column_desc. clone ( ) ,
1791+ 0.0 ,
1792+ Encoding :: DELTA_BYTE_ARRAY ,
1793+ false ,
1794+ ) ;
1795+ group. bench_function (
1796+ "const prefix delta byte array encoded, mandatory, no NULLs" ,
1797+ |b| {
1798+ b. iter ( || {
1799+ let array_reader = create_byte_array_reader (
1800+ delta_string_const_prefix_no_null_data. clone ( ) ,
1801+ mandatory_string_column_desc. clone ( ) ,
1802+ ) ;
1803+ count = bench_array_reader ( array_reader) ;
1804+ } ) ;
1805+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1806+ } ,
1807+ ) ;
1808+
1809+ // delta byte array with constant prefix and no suffix
1810+ let delta_string_const_no_null_data = build_constant_prefix_byte_array_page_iterator (
1811+ mandatory_string_column_desc. clone ( ) ,
1812+ 0.0 ,
1813+ Encoding :: DELTA_BYTE_ARRAY ,
1814+ true ,
1815+ ) ;
1816+ group. bench_function ( "const delta byte array encoded, mandatory, no NULLs" , |b| {
1817+ b. iter ( || {
1818+ let array_reader = create_byte_array_reader (
1819+ delta_string_const_no_null_data. clone ( ) ,
1820+ mandatory_string_column_desc. clone ( ) ,
1821+ ) ;
1822+ count = bench_array_reader ( array_reader) ;
1823+ } ) ;
1824+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1825+ } ) ;
1826+
1827+ // delta length byte array with constant lengths
1828+ let delta_string_const_no_null_data = build_constant_prefix_byte_array_page_iterator (
1829+ mandatory_string_column_desc. clone ( ) ,
1830+ 0.0 ,
1831+ Encoding :: DELTA_LENGTH_BYTE_ARRAY ,
1832+ true ,
1833+ ) ;
1834+ group. bench_function (
1835+ "const delta length byte array encoded, mandatory, no NULLs" ,
1836+ |b| {
1837+ b. iter ( || {
1838+ let array_reader = create_byte_array_reader (
1839+ delta_string_const_no_null_data. clone ( ) ,
1840+ mandatory_string_column_desc. clone ( ) ,
1841+ ) ;
1842+ count = bench_array_reader ( array_reader) ;
1843+ } ) ;
1844+ assert_eq ! ( count, EXPECTED_VALUE_COUNT ) ;
1845+ } ,
1846+ ) ;
1847+
15971848 group. finish ( ) ;
15981849
15991850 // binary benchmarks
0 commit comments