99from collections import defaultdict
1010from dataclasses import dataclass
1111import matplotlib .dates as mdates
12- import numpy as np
1312from benches .result import BenchmarkRun , Result
1413
1514@dataclass
@@ -24,220 +23,21 @@ class BenchmarkSeries:
2423 runs : list [BenchmarkRun ]
2524
2625@dataclass
27- class LatestResults :
28- benchmark_label : str
29- run_values : dict [str , float ]
30-
31- @classmethod
32- def from_dict (cls , label : str , values : dict [str , float ]) -> 'LatestResults' :
33- return cls (benchmark_label = label , run_values = values )
34-
35- def get_latest_results (benchmarks : list [BenchmarkSeries ]) -> dict [str , LatestResults ]:
36- latest_results : dict [str , LatestResults ] = {}
37- for benchmark in benchmarks :
38- run_values = {
39- run .name : max (run .results , key = lambda x : x .date ).value
40- for run in benchmark .runs
41- }
42- latest_results [benchmark .label ] = LatestResults .from_dict (benchmark .label , run_values )
43- return latest_results
44-
45- def prepare_normalized_data (latest_results : dict [str , LatestResults ],
46- benchmarks : list [BenchmarkSeries ],
47- group_benchmarks : list [str ],
48- non_baseline_runs : list [str ],
49- baseline_name : str ) -> list [list [float ]]:
50- normalized_data = []
51- benchmark_map = {b .label : b for b in benchmarks }
52-
53- for run_name in non_baseline_runs :
54- run_data : list [float ] = []
55- for benchmark_label in group_benchmarks :
56- benchmark_data = latest_results [benchmark_label ].run_values
57- if run_name not in benchmark_data or baseline_name not in benchmark_data :
58- run_data .append (None )
59- continue
60-
61- baseline_value = benchmark_data [baseline_name ]
62- current_value = benchmark_data [run_name ]
63-
64- normalized_value = ((baseline_value / current_value ) if benchmark_map [benchmark_label ].metadata .lower_is_better
65- else (current_value / baseline_value )) * 100
66- run_data .append (normalized_value )
67- normalized_data .append (run_data )
68- return normalized_data
69-
70- def format_benchmark_label (label : str ) -> list [str ]:
71- words = re .split (' |_' , label )
72- lines = []
73- current_line = []
74-
75- # max line length 30
76- for word in words :
77- if len (' ' .join (current_line + [word ])) > 30 :
78- lines .append (' ' .join (current_line ))
79- current_line = [word ]
80- else :
81- current_line .append (word )
82-
83- if current_line :
84- lines .append (' ' .join (current_line ))
85-
86- return lines
87-
88- def create_bar_plot (ax : plt .Axes ,
89- normalized_data : list [list [float ]],
90- group_benchmarks : list [str ],
91- non_baseline_runs : list [str ],
92- latest_results : dict [str , LatestResults ],
93- benchmarks : list [BenchmarkSeries ],
94- baseline_name : str ) -> float :
95- x = np .arange (len (group_benchmarks ))
96- width = 0.8 / len (non_baseline_runs )
97- max_height = 0
98- benchmark_map = {b .label : b for b in benchmarks }
99-
100- for i , (run_name , run_data ) in enumerate (zip (non_baseline_runs , normalized_data )):
101- offset = width * i - width * (len (non_baseline_runs ) - 1 ) / 2
102- positions = x + offset
103- valid_data = [v if v is not None else 0 for v in run_data ]
104- rects = ax .bar (positions , valid_data , width , label = run_name )
105-
106- for rect , value , benchmark_label in zip (rects , run_data , group_benchmarks ):
107- if value is not None :
108- height = rect .get_height ()
109- if height > max_height :
110- max_height = height
111-
112- ax .text (rect .get_x () + rect .get_width ()/ 2. , height + 2 ,
113- f'{ value :.1f} %' ,
114- ha = 'center' , va = 'bottom' )
115-
116- benchmark_data = latest_results [benchmark_label ].run_values
117- baseline_value = benchmark_data [baseline_name ]
118- current_value = benchmark_data [run_name ]
119- unit = benchmark_map [benchmark_label ].metadata .unit
120-
121- tooltip_labels = [
122- f"Run: { run_name } \n "
123- f"Value: { current_value :.2f} { unit } \n "
124- f"Normalized to ({ baseline_name } ): { baseline_value :.2f} { unit } \n "
125- f"Normalized: { value :.1f} %"
126- ]
127- tooltip = mpld3 .plugins .LineHTMLTooltip (rect , tooltip_labels , css = '.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}' )
128- mpld3 .plugins .connect (ax .figure , tooltip )
129-
130- return max_height
131-
132- def add_chart_elements (ax : plt .Axes ,
133- group_benchmarks : list [str ],
134- group_name : str ,
135- max_height : float ) -> None :
136- top_padding = max_height * 0.2
137- ax .set_ylim (0 , max_height + top_padding )
138- ax .set_ylabel ('Performance relative to baseline (%)' )
139- ax .set_title (f'Performance Comparison (Normalized to Baseline) - { group_name } Group' )
140- ax .set_xticks ([])
141-
142- for idx , label in enumerate (group_benchmarks ):
143- split_labels = format_benchmark_label (label )
144- for i , sublabel in enumerate (split_labels ):
145- y_pos = max_height + (top_padding * 0.5 ) + 2 - (i * top_padding * 0.15 )
146- ax .text (idx , y_pos , sublabel ,
147- ha = 'center' ,
148- style = 'italic' ,
149- color = '#666666' )
150-
151- ax .grid (True , axis = 'y' , alpha = 0.2 )
152- ax .legend (bbox_to_anchor = (1 , 1 ), loc = 'upper left' )
153-
154- def split_large_groups (benchmark_groups ):
155- miscellaneous = []
156- new_groups = defaultdict (list )
157-
158- split_happened = False
159- for group , labels in benchmark_groups .items ():
160- if len (labels ) == 1 :
161- miscellaneous .extend (labels )
162- elif len (labels ) > 5 :
163- split_happened = True
164- mid = len (labels ) // 2
165- new_groups [group ] = labels [:mid ]
166- new_groups [group + '_' ] = labels [mid :]
167- else :
168- new_groups [group ] = labels
169-
170- if miscellaneous :
171- new_groups ['Miscellaneous' ] = miscellaneous
172-
173- if split_happened :
174- return split_large_groups (new_groups )
175- else :
176- return new_groups
177-
178- def group_benchmark_labels (benchmark_labels ):
179- benchmark_groups = defaultdict (list )
180- for label in benchmark_labels :
181- group = re .match (r'^[^_\s]+' , label )[0 ]
182- benchmark_groups [group ].append (label )
183- return split_large_groups (benchmark_groups )
184-
185- def create_normalized_bar_chart (benchmarks : list [BenchmarkSeries ], baseline_name : str ) -> list [str ]:
186- latest_results = get_latest_results (benchmarks )
187-
188- run_names = sorted (list (set (
189- name for result in latest_results .values ()
190- for name in result .run_values .keys ()
191- )))
192-
193- if baseline_name not in run_names :
194- return []
195-
196- benchmark_labels = [b .label for b in benchmarks ]
197-
198- benchmark_groups = group_benchmark_labels (benchmark_labels )
199-
200- html_charts = []
201-
202- for group_name , group_benchmarks in benchmark_groups .items ():
203- plt .close ('all' )
204- non_baseline_runs = [n for n in run_names if n != baseline_name ]
205-
206- if len (non_baseline_runs ) == 0 :
207- continue
208-
209- normalized_data = prepare_normalized_data (
210- latest_results , benchmarks , group_benchmarks ,
211- non_baseline_runs , baseline_name
212- )
213-
214- fig , ax = plt .subplots (figsize = (10 , 6 ))
215- max_height = create_bar_plot (
216- ax , normalized_data , group_benchmarks , non_baseline_runs ,
217- latest_results , benchmarks , baseline_name
218- )
219- add_chart_elements (ax , group_benchmarks , group_name , max_height )
220-
221- plt .tight_layout ()
222- html_charts .append (mpld3 .fig_to_html (fig ))
223- plt .close (fig )
224-
225- return html_charts
26+ class BenchmarkTimeSeries :
27+ label : str
28+ html : str
22629
227- def create_time_series_chart (benchmarks : list [BenchmarkSeries ], github_repo : str ) -> str :
30+ def create_time_series_chart (benchmarks : list [BenchmarkSeries ], github_repo : str ) -> list [ BenchmarkTimeSeries ] :
22831 plt .close ('all' )
22932
23033 num_benchmarks = len (benchmarks )
23134 if num_benchmarks == 0 :
23235 return
23336
234- fig , axes = plt .subplots (num_benchmarks , 1 , figsize = (10 , max (4 * num_benchmarks , 30 )))
235-
236- if num_benchmarks == 1 :
237- axes = [axes ]
37+ html_charts = []
23838
239- for idx , benchmark in enumerate (benchmarks ):
240- ax = axes [ idx ]
39+ for _ , benchmark in enumerate (benchmarks ):
40+ fig , ax = plt . subplots ( figsize = ( 10 , 4 ))
24141
24242 for run in benchmark .runs :
24343 sorted_points = sorted (run .results , key = lambda x : x .date )
@@ -277,13 +77,12 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
27777 ax .grid (True , alpha = 0.2 )
27878 ax .legend (bbox_to_anchor = (1 , 1 ), loc = 'upper left' )
27979 ax .xaxis .set_major_formatter (mdates .ConciseDateFormatter ('%Y-%m-%d %H:%M:%S' ))
280- ax .xaxis .set_major_locator (mdates .AutoDateLocator ())
28180
282- plt .tight_layout ()
283- html = mpld3 .fig_to_html (fig )
81+ plt .tight_layout ()
82+ html_charts .append (BenchmarkTimeSeries (html = mpld3 .fig_to_html (fig ), label = benchmark .label ))
83+ plt .close (fig )
28484
285- plt .close (fig )
286- return html
85+ return html_charts
28786
28887def process_benchmark_data (benchmark_runs : list [BenchmarkRun ], compare_names : list [str ]) -> list [BenchmarkSeries ]:
28988 benchmark_metadata : dict [str , BenchmarkMetadata ] = {}
@@ -319,12 +118,10 @@ def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: li
319118 return benchmark_series
320119
321120def generate_html (benchmark_runs : list [BenchmarkRun ], github_repo : str , compare_names : list [str ]) -> str :
322- baseline_name = compare_names [0 ]
323121 benchmarks = process_benchmark_data (benchmark_runs , compare_names )
324122
325- comparison_html_charts = create_normalized_bar_chart (benchmarks , baseline_name )
326- timeseries_html = create_time_series_chart (benchmarks , github_repo )
327- comparison_charts_html = '\n ' .join (f'<div class="chart"><div>{ chart } </div></div>' for chart in comparison_html_charts )
123+ timeseries = create_time_series_chart (benchmarks , github_repo )
124+ timeseries_charts_html = '\n ' .join (f'<div class="chart" data-label="{ ts .label } "><div>{ ts .html } </div></div>' for ts in timeseries )
328125
329126 html_template = f"""
330127 <!DOCTYPE html>
@@ -375,18 +172,44 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
375172 margin-bottom: 16px;
376173 }}
377174 }}
175+ .filter-container {{
176+ text-align: center;
177+ margin-bottom: 24px;
178+ }}
179+ .filter-container input {{
180+ padding: 8px;
181+ font-size: 16px;
182+ border: 1px solid #ccc;
183+ border-radius: 4px;
184+ width: 400px;
185+ max-width: 100%;
186+ }}
378187 </style>
188+ <script>
189+ function filterCharts() {{
190+ const regexInput = document.getElementById('bench-filter').value;
191+ const regex = new RegExp(regexInput, 'i');
192+ const charts = document.querySelectorAll('.chart');
193+ charts.forEach(chart => {{
194+ const label = chart.getAttribute('data-label');
195+ if (regex.test(label)) {{
196+ chart.style.display = '';
197+ }} else {{
198+ chart.style.display = 'none';
199+ }}
200+ }});
201+ }}
202+ </script>
379203 </head>
380204 <body>
381205 <div class="container">
382206 <h1>Benchmark Results</h1>
383- <h2>Latest Results Comparison</h2>
384- <div class="chart">
385- { comparison_charts_html }
207+ <div class="filter-container">
208+ <input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
386209 </div>
387210 <h2>Historical Results</h2>
388- <div class="chart ">
389- { timeseries_html }
211+ <div class="charts ">
212+ { timeseries_charts_html }
390213 </div>
391214 </div>
392215 </body>
0 commit comments