1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ use chrono:: { DateTime , Utc } ;
1819use datafusion:: common:: utils:: get_available_parallelism;
20+ use datafusion:: error:: DataFusionError ;
1921use datafusion:: { error:: Result , DATAFUSION_VERSION } ;
20- use serde:: { Serialize , Serializer } ;
22+ use serde:: { Deserialize , Deserializer , Serialize , Serializer } ;
2123use serde_json:: Value ;
24+ use std:: error:: Error ;
2225use std:: {
2326 collections:: HashMap ,
2427 path:: Path ,
@@ -36,14 +39,29 @@ where
3639 . as_secs ( ) ,
3740 )
3841}
42+ fn deserialize_start_time < ' de , D > ( des : D ) -> Result < SystemTime , D :: Error >
43+ where
44+ D : Deserializer < ' de > ,
45+ {
46+ let secs = u64:: deserialize ( des) ?;
47+ Ok ( SystemTime :: UNIX_EPOCH + Duration :: from_secs ( secs) )
48+ }
49+
3950fn serialize_elapsed < S > ( elapsed : & Duration , ser : S ) -> Result < S :: Ok , S :: Error >
4051where
4152 S : Serializer ,
4253{
4354 let ms = elapsed. as_secs_f64 ( ) * 1000.0 ;
4455 ser. serialize_f64 ( ms)
4556}
46- #[ derive( Debug , Serialize ) ]
57+ fn deserialize_elapsed < ' de , D > ( des : D ) -> Result < Duration , D :: Error >
58+ where
59+ D : Deserializer < ' de > ,
60+ {
61+ let ms = f64:: deserialize ( des) ?;
62+ Ok ( Duration :: from_secs_f64 ( ms / 1000.0 ) )
63+ }
64+ #[ derive( Debug , Serialize , Deserialize ) ]
4765pub struct RunContext {
4866 /// Benchmark crate version
4967 pub benchmark_version : String ,
@@ -52,7 +70,10 @@ pub struct RunContext {
5270 /// Number of CPU cores
5371 pub num_cpus : usize ,
5472 /// Start time
55- #[ serde( serialize_with = "serialize_start_time" ) ]
73+ #[ serde(
74+ serialize_with = "serialize_start_time" ,
75+ deserialize_with = "deserialize_start_time"
76+ ) ]
5677 pub start_time : SystemTime ,
5778 /// CLI arguments
5879 pub arguments : Vec < String > ,
@@ -77,18 +98,24 @@ impl RunContext {
7798}
7899
79100/// A single iteration of a benchmark query
80- #[ derive( Debug , Serialize ) ]
101+ #[ derive( Debug , Serialize , Deserialize ) ]
81102struct QueryIter {
82- #[ serde( serialize_with = "serialize_elapsed" ) ]
103+ #[ serde(
104+ serialize_with = "serialize_elapsed" ,
105+ deserialize_with = "deserialize_elapsed"
106+ ) ]
83107 elapsed : Duration ,
84108 row_count : usize ,
85109}
86110/// A single benchmark case
87- #[ derive( Debug , Serialize ) ]
111+ #[ derive( Debug , Serialize , Deserialize ) ]
88112pub struct BenchQuery {
89113 query : String ,
90114 iterations : Vec < QueryIter > ,
91- #[ serde( serialize_with = "serialize_start_time" ) ]
115+ #[ serde(
116+ serialize_with = "serialize_start_time" ,
117+ deserialize_with = "deserialize_start_time"
118+ ) ]
92119 start_time : SystemTime ,
93120 success : bool ,
94121}
@@ -181,4 +208,71 @@ impl BenchmarkRun {
181208 } ;
182209 Ok ( ( ) )
183210 }
211+
212+ pub fn maybe_compare_with_previous ( & self , maybe_path : Option < impl AsRef < Path > > ) -> Result < ( ) > {
213+ let Some ( path) = maybe_path else {
214+ return Ok ( ( ) ) ;
215+ } ;
216+ let Ok ( prev) = std:: fs:: read ( path) else {
217+ return Ok ( ( ) ) ;
218+ } ;
219+
220+ let mut prev_output: HashMap < & str , Value > =
221+ serde_json:: from_slice ( & prev) . map_err ( external) ?;
222+
223+ let prev_queries: Vec < BenchQuery > =
224+ serde_json:: from_value ( prev_output. remove ( "queries" ) . unwrap ( ) ) . map_err ( external) ?;
225+
226+ let mut header_printed = false ;
227+ for query in self . queries . iter ( ) {
228+ let Some ( prev_query) = prev_queries. iter ( ) . find ( |v| v. query == query. query ) else {
229+ continue ;
230+ } ;
231+ if prev_query. iterations . is_empty ( ) {
232+ continue ;
233+ }
234+ if query. iterations . is_empty ( ) {
235+ println ! ( "{}: Failed ❌" , query. query) ;
236+ continue ;
237+ }
238+
239+ let avg_prev = prev_query. avg ( ) ;
240+ let avg = query. avg ( ) ;
241+ let ( f, tag, emoji) = if avg < avg_prev {
242+ let f = avg_prev as f64 / avg as f64 ;
243+ ( f, "faster" , if f > 1.2 { "✅" } else { "✔" } )
244+ } else {
245+ let f = avg as f64 / avg_prev as f64 ;
246+ ( f, "slower" , if f > 1.2 { "❌" } else { "✖" } )
247+ } ;
248+ if !header_printed {
249+ header_printed = true ;
250+ let datetime: DateTime < Utc > = prev_query. start_time . into ( ) ;
251+ println ! (
252+ "==== Comparison with the previous benchmark from {} ====" ,
253+ datetime. format( "%Y-%m-%d %H:%M:%S UTC" )
254+ ) ;
255+ }
256+ println ! (
257+ "{:>8}: prev={avg_prev:>4} ms, new={avg:>4} ms, diff={f:.2} {tag} {emoji}" ,
258+ query. query
259+ ) ;
260+ }
261+
262+ Ok ( ( ) )
263+ }
264+ }
265+
266+ impl BenchQuery {
267+ fn avg ( & self ) -> u128 {
268+ self . iterations
269+ . iter ( )
270+ . map ( |v| v. elapsed . as_millis ( ) )
271+ . sum :: < u128 > ( )
272+ / self . iterations . len ( ) as u128
273+ }
274+ }
275+
276+ fn external ( err : impl Error + Send + Sync + ' static ) -> DataFusionError {
277+ DataFusionError :: External ( Box :: new ( err) )
184278}
0 commit comments