1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use arrow_array:: builder:: Int32Builder ;
18+ use arrow_array:: builder:: { Date32Builder , Decimal128Builder , Int32Builder } ;
1919use arrow_array:: { builder:: StringBuilder , RecordBatch } ;
2020use arrow_schema:: { DataType , Field , Schema } ;
21- use comet:: execution:: shuffle:: { write_ipc_compressed , CompressionCodec , ShuffleWriterExec } ;
21+ use comet:: execution:: shuffle:: { CompressionCodec , ShuffleBlockWriter , ShuffleWriterExec } ;
2222use criterion:: { criterion_group, criterion_main, Criterion } ;
2323use datafusion:: physical_plan:: metrics:: Time ;
2424use datafusion:: {
@@ -31,67 +31,56 @@ use std::sync::Arc;
3131use tokio:: runtime:: Runtime ;
3232
3333fn criterion_benchmark ( c : & mut Criterion ) {
34+ let batch = create_batch ( 8192 , true ) ;
3435 let mut group = c. benchmark_group ( "shuffle_writer" ) ;
35- group. bench_function ( "shuffle_writer: encode (no compression))" , |b| {
36- let batch = create_batch ( 8192 , true ) ;
37- let mut buffer = vec ! [ ] ;
38- let ipc_time = Time :: default ( ) ;
39- b. iter ( || {
40- buffer. clear ( ) ;
41- let mut cursor = Cursor :: new ( & mut buffer) ;
42- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: None , & ipc_time)
43- } ) ;
44- } ) ;
45- group. bench_function ( "shuffle_writer: encode and compress (snappy)" , |b| {
46- let batch = create_batch ( 8192 , true ) ;
47- let mut buffer = vec ! [ ] ;
48- let ipc_time = Time :: default ( ) ;
49- b. iter ( || {
50- buffer. clear ( ) ;
51- let mut cursor = Cursor :: new ( & mut buffer) ;
52- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Snappy , & ipc_time)
53- } ) ;
54- } ) ;
55- group. bench_function ( "shuffle_writer: encode and compress (lz4)" , |b| {
56- let batch = create_batch ( 8192 , true ) ;
57- let mut buffer = vec ! [ ] ;
58- let ipc_time = Time :: default ( ) ;
59- b. iter ( || {
60- buffer. clear ( ) ;
61- let mut cursor = Cursor :: new ( & mut buffer) ;
62- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Lz4Frame , & ipc_time)
63- } ) ;
64- } ) ;
65- group. bench_function ( "shuffle_writer: encode and compress (zstd level 1)" , |b| {
66- let batch = create_batch ( 8192 , true ) ;
67- let mut buffer = vec ! [ ] ;
68- let ipc_time = Time :: default ( ) ;
69- b. iter ( || {
70- buffer. clear ( ) ;
71- let mut cursor = Cursor :: new ( & mut buffer) ;
72- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Zstd ( 1 ) , & ipc_time)
73- } ) ;
74- } ) ;
75- group. bench_function ( "shuffle_writer: encode and compress (zstd level 6)" , |b| {
76- let batch = create_batch ( 8192 , true ) ;
77- let mut buffer = vec ! [ ] ;
78- let ipc_time = Time :: default ( ) ;
79- b. iter ( || {
80- buffer. clear ( ) ;
81- let mut cursor = Cursor :: new ( & mut buffer) ;
82- write_ipc_compressed ( & batch, & mut cursor, & CompressionCodec :: Zstd ( 6 ) , & ipc_time)
83- } ) ;
84- } ) ;
85- group. bench_function ( "shuffle_writer: end to end" , |b| {
86- let ctx = SessionContext :: new ( ) ;
87- let exec = create_shuffle_writer_exec ( CompressionCodec :: Zstd ( 1 ) ) ;
88- b. iter ( || {
89- let task_ctx = ctx. task_ctx ( ) ;
90- let stream = exec. execute ( 0 , task_ctx) . unwrap ( ) ;
91- let rt = Runtime :: new ( ) . unwrap ( ) ;
92- criterion:: black_box ( rt. block_on ( collect ( stream) ) . unwrap ( ) ) ;
93- } ) ;
94- } ) ;
36+ for compression_codec in & [
37+ CompressionCodec :: None ,
38+ CompressionCodec :: Lz4Frame ,
39+ CompressionCodec :: Snappy ,
40+ CompressionCodec :: Zstd ( 1 ) ,
41+ CompressionCodec :: Zstd ( 6 ) ,
42+ ] {
43+ for enable_fast_encoding in [ true , false ] {
44+ let name = format ! ( "shuffle_writer: write encoded (enable_fast_encoding={enable_fast_encoding}, compression={compression_codec:?})" ) ;
45+ group. bench_function ( name, |b| {
46+ let mut buffer = vec ! [ ] ;
47+ let ipc_time = Time :: default ( ) ;
48+ let w = ShuffleBlockWriter :: try_new (
49+ & batch. schema ( ) ,
50+ enable_fast_encoding,
51+ compression_codec. clone ( ) ,
52+ )
53+ . unwrap ( ) ;
54+ b. iter ( || {
55+ buffer. clear ( ) ;
56+ let mut cursor = Cursor :: new ( & mut buffer) ;
57+ w. write_batch ( & batch, & mut cursor, & ipc_time) . unwrap ( ) ;
58+ } ) ;
59+ } ) ;
60+ }
61+ }
62+
63+ for compression_codec in [
64+ CompressionCodec :: None ,
65+ CompressionCodec :: Lz4Frame ,
66+ CompressionCodec :: Snappy ,
67+ CompressionCodec :: Zstd ( 1 ) ,
68+ CompressionCodec :: Zstd ( 6 ) ,
69+ ] {
70+ group. bench_function (
71+ format ! ( "shuffle_writer: end to end (compression = {compression_codec:?}" ) ,
72+ |b| {
73+ let ctx = SessionContext :: new ( ) ;
74+ let exec = create_shuffle_writer_exec ( compression_codec. clone ( ) ) ;
75+ b. iter ( || {
76+ let task_ctx = ctx. task_ctx ( ) ;
77+ let stream = exec. execute ( 0 , task_ctx) . unwrap ( ) ;
78+ let rt = Runtime :: new ( ) . unwrap ( ) ;
79+ rt. block_on ( collect ( stream) ) . unwrap ( ) ;
80+ } ) ;
81+ } ,
82+ ) ;
83+ }
9584}
9685
9786fn create_shuffle_writer_exec ( compression_codec : CompressionCodec ) -> ShuffleWriterExec {
@@ -104,6 +93,7 @@ fn create_shuffle_writer_exec(compression_codec: CompressionCodec) -> ShuffleWri
10493 compression_codec,
10594 "/tmp/data.out" . to_string ( ) ,
10695 "/tmp/index.out" . to_string ( ) ,
96+ true ,
10797 )
10898 . unwrap ( )
10999}
@@ -121,11 +111,19 @@ fn create_batch(num_rows: usize, allow_nulls: bool) -> RecordBatch {
121111 let schema = Arc :: new ( Schema :: new ( vec ! [
122112 Field :: new( "c0" , DataType :: Int32 , true ) ,
123113 Field :: new( "c1" , DataType :: Utf8 , true ) ,
114+ Field :: new( "c2" , DataType :: Date32 , true ) ,
115+ Field :: new( "c3" , DataType :: Decimal128 ( 11 , 2 ) , true ) ,
124116 ] ) ) ;
125117 let mut a = Int32Builder :: new ( ) ;
126118 let mut b = StringBuilder :: new ( ) ;
119+ let mut c = Date32Builder :: new ( ) ;
120+ let mut d = Decimal128Builder :: new ( )
121+ . with_precision_and_scale ( 11 , 2 )
122+ . unwrap ( ) ;
127123 for i in 0 ..num_rows {
128124 a. append_value ( i as i32 ) ;
125+ c. append_value ( i as i32 ) ;
126+ d. append_value ( ( i * 1000000 ) as i128 ) ;
129127 if allow_nulls && i % 10 == 0 {
130128 b. append_null ( ) ;
131129 } else {
@@ -134,7 +132,13 @@ fn create_batch(num_rows: usize, allow_nulls: bool) -> RecordBatch {
134132 }
135133 let a = a. finish ( ) ;
136134 let b = b. finish ( ) ;
137- RecordBatch :: try_new ( schema. clone ( ) , vec ! [ Arc :: new( a) , Arc :: new( b) ] ) . unwrap ( )
135+ let c = c. finish ( ) ;
136+ let d = d. finish ( ) ;
137+ RecordBatch :: try_new (
138+ schema. clone ( ) ,
139+ vec ! [ Arc :: new( a) , Arc :: new( b) , Arc :: new( c) , Arc :: new( d) ] ,
140+ )
141+ . unwrap ( )
138142}
139143
140144fn config ( ) -> Criterion {
0 commit comments