1717
1818use chrono:: { DateTime , Utc } ;
1919use datafusion:: common:: utils:: get_available_parallelism;
20- use datafusion:: error:: DataFusionError ;
2120use datafusion:: { error:: Result , DATAFUSION_VERSION } ;
2221use serde:: { Deserialize , Deserializer , Serialize , Serializer } ;
23- use serde_json:: Value ;
24- use std:: error:: Error ;
2522use std:: {
26- collections:: HashMap ,
2723 path:: Path ,
2824 time:: { Duration , SystemTime } ,
2925} ;
@@ -69,6 +65,10 @@ pub struct RunContext {
6965 pub datafusion_version : String ,
7066 /// Number of CPU cores
7167 pub num_cpus : usize ,
68+ /// Number of workers involved in a distributed query
69+ pub workers : usize ,
70+ /// Number of physical threads used per worker
71+ pub threads : usize ,
7272 /// Start time
7373 #[ serde(
7474 serialize_with = "serialize_start_time" ,
@@ -79,18 +79,14 @@ pub struct RunContext {
7979 pub arguments : Vec < String > ,
8080}
8181
82- impl Default for RunContext {
83- fn default ( ) -> Self {
84- Self :: new ( )
85- }
86- }
87-
8882impl RunContext {
89- pub fn new ( ) -> Self {
83+ pub fn new ( workers : usize , threads : usize ) -> Self {
9084 Self {
9185 benchmark_version : env ! ( "CARGO_PKG_VERSION" ) . to_owned ( ) ,
9286 datafusion_version : DATAFUSION_VERSION . to_owned ( ) ,
9387 num_cpus : get_available_parallelism ( ) ,
88+ workers,
89+ threads,
9490 start_time : SystemTime :: now ( ) ,
9591 arguments : std:: env:: args ( ) . skip ( 1 ) . collect :: < Vec < String > > ( ) ,
9692 }
@@ -99,13 +95,14 @@ impl RunContext {
9995
10096/// A single iteration of a benchmark query
10197#[ derive( Debug , Serialize , Deserialize ) ]
102- struct QueryIter {
98+ pub struct QueryIter {
10399 #[ serde(
104100 serialize_with = "serialize_elapsed" ,
105101 deserialize_with = "deserialize_elapsed"
106102 ) ]
107- elapsed : Duration ,
108- row_count : usize ,
103+ pub elapsed : Duration ,
104+ pub row_count : usize ,
105+ pub n_tasks : usize ,
109106}
110107/// A single benchmark case
111108#[ derive( Debug , Serialize , Deserialize ) ]
@@ -119,29 +116,20 @@ pub struct BenchQuery {
119116 start_time : SystemTime ,
120117 success : bool ,
121118}
122- /// Internal representation of a single benchmark query iteration result.
123- pub struct QueryResult {
124- pub elapsed : Duration ,
125- pub row_count : usize ,
126- }
119+
127120/// collects benchmark run data and then serializes it at the end
121+ #[ derive( Debug , Serialize , Deserialize ) ]
128122pub struct BenchmarkRun {
129123 context : RunContext ,
130124 queries : Vec < BenchQuery > ,
131125 current_case : Option < usize > ,
132126}
133127
134- impl Default for BenchmarkRun {
135- fn default ( ) -> Self {
136- Self :: new ( )
137- }
138- }
139-
140128impl BenchmarkRun {
141129 // create new
142- pub fn new ( ) -> Self {
130+ pub fn new ( workers : usize , threads : usize ) -> Self {
143131 Self {
144- context : RunContext :: new ( ) ,
132+ context : RunContext :: new ( workers , threads ) ,
145133 queries : vec ! [ ] ,
146134 current_case : None ,
147135 }
@@ -161,11 +149,9 @@ impl BenchmarkRun {
161149 }
162150 }
163151 /// Write a new iteration to the current case
164- pub fn write_iter ( & mut self , elapsed : Duration , row_count : usize ) {
152+ pub fn write_iter ( & mut self , query_iter : QueryIter ) {
165153 if let Some ( idx) = self . current_case {
166- self . queries [ idx]
167- . iterations
168- . push ( QueryIter { elapsed, row_count } )
154+ self . queries [ idx] . iterations . push ( query_iter)
169155 } else {
170156 panic ! ( "no cases existed yet" ) ;
171157 }
@@ -195,10 +181,7 @@ impl BenchmarkRun {
195181
196182 /// Stringify data into formatted json
197183 pub fn to_json ( & self ) -> String {
198- let mut output = HashMap :: < & str , Value > :: new ( ) ;
199- output. insert ( "context" , serde_json:: to_value ( & self . context ) . unwrap ( ) ) ;
200- output. insert ( "queries" , serde_json:: to_value ( & self . queries ) . unwrap ( ) ) ;
201- serde_json:: to_string_pretty ( & output) . unwrap ( )
184+ serde_json:: to_string_pretty ( & self ) . unwrap ( )
202185 }
203186
204187 /// Write data as json into output path if it exists.
@@ -217,15 +200,14 @@ impl BenchmarkRun {
217200 return Ok ( ( ) ) ;
218201 } ;
219202
220- let mut prev_output: HashMap < & str , Value > =
221- serde_json:: from_slice ( & prev) . map_err ( external) ?;
222-
223- let prev_queries: Vec < BenchQuery > =
224- serde_json:: from_value ( prev_output. remove ( "queries" ) . unwrap ( ) ) . map_err ( external) ?;
203+ let Ok ( prev_output) = serde_json:: from_slice :: < Self > ( & prev) else {
204+ return Ok ( ( ) ) ;
205+ } ;
225206
226207 let mut header_printed = false ;
227208 for query in self . queries . iter ( ) {
228- let Some ( prev_query) = prev_queries. iter ( ) . find ( |v| v. query == query. query ) else {
209+ let Some ( prev_query) = prev_output. queries . iter ( ) . find ( |v| v. query == query. query )
210+ else {
229211 continue ;
230212 } ;
231213 if prev_query. iterations . is_empty ( ) {
@@ -248,10 +230,24 @@ impl BenchmarkRun {
248230 if !header_printed {
249231 header_printed = true ;
250232 let datetime: DateTime < Utc > = prev_query. start_time . into ( ) ;
251- println ! (
233+ let header = format ! (
252234 "==== Comparison with the previous benchmark from {} ====" ,
253235 datetime. format( "%Y-%m-%d %H:%M:%S UTC" )
254236 ) ;
237+ println ! ( "{header}" ) ;
238+ // Print machine information
239+ println ! ( "os: {}" , std:: env:: consts:: OS ) ;
240+ println ! ( "arch: {}" , std:: env:: consts:: ARCH ) ;
241+ println ! ( "cpu cores: {}" , get_available_parallelism( ) ) ;
242+ println ! (
243+ "threads: {} -> {}" ,
244+ prev_output. context. threads, self . context. threads
245+ ) ;
246+ println ! (
247+ "workers: {} -> {}" ,
248+ prev_output. context. workers, self . context. workers
249+ ) ;
250+ println ! ( "{}" , "=" . repeat( header. len( ) ) )
255251 }
256252 println ! (
257253 "{:>8}: prev={avg_prev:>4} ms, new={avg:>4} ms, diff={f:.2} {tag} {emoji}" ,
@@ -272,7 +268,3 @@ impl BenchQuery {
272268 / self . iterations . len ( ) as u128
273269 }
274270}
275-
276- fn external ( err : impl Error + Send + Sync + ' static ) -> DataFusionError {
277- DataFusionError :: External ( Box :: new ( err) )
278- }
0 commit comments