11import argparse
22import json
3- from typing import Tuple , List , Any
4- import requests
3+ import logging
4+ import os
55from dataclasses import dataclass
66from datetime import datetime
7+ from typing import Any , Dict , List , Optional , Tuple
8+
79import pandas as pd
8- import logging
9- import os
10+ import requests
1011
1112logging .basicConfig (level = logging .INFO )
1213
5657 ["mv3" , "mps" , "iphone_15" , "ios_18.0" ],
5758]
5859
60+
61+ # The abbreviations used to generate the short name for the benchmark result table
62+ # this is used to avoid the long table name issue when generating excel file (<=31 characters)
5963ABBREVIATIONS = {
6064 "samsung" : "smg" ,
6165 "galaxy" : "gx" ,
6569 "xnnpackq8" : "xnnq8" ,
6670}
6771
68-
6972def abbreviate (s ):
7073 for full , abbr in ABBREVIATIONS .items ():
7174 s = s .replace (full , abbr )
@@ -138,29 +141,74 @@ def argparser():
138141
139142class ExecutorchBenchmarkFetcher :
140143 """
141- Fetch benchmark data from HUD
144+ Fetch and process benchmark data from HUD API for ExecutorchBenchmark.
145+
142146 Usage:
143147 fetcher = ExecutorchBenchmarkFetcher()
144- fetcher.get_data(start_time, end_time)
145-
146- fetcher.toDataFrame() -> return a list of dataframes, one for private devices, one for public devices
147- fetcher.toExcelSheet(output_dir=".") -> write to excel files, one for private devices, one for public devices
148+ fetcher.run(start_time, end_time, private_device_matching_list, public_device_matching_list)
149+ # Convert results to DataFrames
150+ private_dfs, public_dfs = fetcher.to_df()
151+ # Export results to Excel files
152+ fetcher.to_excel(output_dir=".")
148153 """
149154
150- def __init__ (self , env = "prod" , disable_logging = False ):
155+ def __init__ (
156+ self ,
157+ env = "prod" ,
158+ disable_logging = False ,
159+ group_table_fields = None ,
160+ group_row_fields = None ,
161+ ):
162+ """
163+ Initialize the ExecutorchBenchmarkFetcher.
164+
165+ Args:
166+ env: Environment to use ("local" or "prod")
167+ disable_logging: Whether to suppress log output
168+ group_table_fields: Custom fields to group tables by (defaults to device, backend, arch, model)
169+ group_row_fields: Custom fields to group rows by (defaults to workflow_id, job_id, granularity_bucket)
170+ """
151171 self .env = env
152172 self .base_url = self ._get_base_url ()
153- self .query_group_table_by_fields = ["device" , "backend" , "arch" , "model" ]
154- self .query_group_row_by_fields = ["workflow_id" , "job_id" , "granularity_bucket" ]
173+ self .query_group_table_by_fields = (
174+ group_table_fields
175+ if group_table_fields
176+ else ["device" , "backend" , "arch" , "model" ]
177+ )
178+ self .query_group_row_by_fields = (
179+ group_row_fields
180+ if group_row_fields
181+ else ["workflow_id" , "job_id" , "granularity_bucket" ]
182+ )
155183 self .data = None
156184 self .disable_logging = disable_logging
157185 self .results_private = []
158186 self .results_public = []
159187
160188 def run (
161- self , start_time , end_time , privateDeviceMatchings , publicDeviceMatchings
162- ) -> Tuple [List [Any ], List [Any ]]:
163-
189+ self ,
190+ start_time : str ,
191+ end_time : str ,
192+ privateDeviceMatchings : List [List [str ]],
193+ publicDeviceMatchings : List [List [str ]],
194+ ) -> Tuple [List [Dict [str , Any ]], List [Dict [str , Any ]]]:
195+ """
196+ Execute the benchmark data fetching and processing workflow.
197+
198+ This method orchestrates the entire process:
199+ 1. Fetches raw data from the HUD API for the specified time range
200+ 2. Processes and normalizes the data
201+ 3. Filters results based on device matching criteria for both private and public devices
202+
203+ Args:
204+ start_time: ISO8601 formatted start time (YYYY-MM-DDTHH:MM:SS)
205+ end_time: ISO8601 formatted end time (YYYY-MM-DDTHH:MM:SS)
206+ privateDeviceMatchings: List of keyword lists for matching private devices
207+ publicDeviceMatchings: List of keyword lists for matching public devices
208+
209+ Returns:
210+ Tuple containing (private_device_results, public_device_results)
211+ """
164212 self .data = self ._fetch_data (start_time , end_time )
165213 if not self .disable_logging :
166214 self .print_all_names ()
@@ -177,7 +225,17 @@ def run(
177225 )
178226 return (self .results_private , self .results_public )
179227
180- def toDataFrame (self ):
228+ def to_df (self ) -> Tuple [Any , Any ]:
229+ """
230+ Convert benchmark results to pandas DataFrames.
231+
232+ Transforms the raw benchmark results into DataFrames for easier analysis
233+ and manipulation.
234+
235+ Returns:
236+ Tuple containing (private_device_dataframes, public_device_dataframes)
237+ Each item is a list of dictionaries with 'groupInfo' and 'df' keys
238+ """
181239 private_dfs = [
182240 {"groupInfo" : item ["groupInfo" ], "df" : pd .DataFrame (item ["rows" ])}
183241 for item in self .results_private
@@ -188,7 +246,19 @@ def toDataFrame(self):
188246 ]
189247 return (private_dfs , public_dfs )
190248
191- def toExcelSheet (self , output_dir = "." ):
249+ def to_excel (self , output_dir : str = "." ) -> None :
250+ """
251+ Export benchmark results to Excel files.
252+
253+ Creates two Excel files:
254+ - res_private.xlsx: Results for private devices
255+ - res_public.xlsx: Results for public devices
256+
257+ Each file contains multiple sheets, one per benchmark configuration.
258+
259+ Args:
260+ output_dir: Directory to save Excel files
261+ """
192262 if not os .path .exists (output_dir ):
193263 os .makedirs (output_dir )
194264 logging .info (f"Created output directory: { output_dir } " )
@@ -199,7 +269,19 @@ def toExcelSheet(self, output_dir="."):
199269 self ._write_multi_sheet_excel (self .results_private , private_path )
200270 self ._write_multi_sheet_excel (self .results_public , public_path )
201271
202- def _write_multi_sheet_excel (self , data_list , output_path ):
272+ def _write_multi_sheet_excel (
273+ self , data_list : List [Dict [str , Any ]], output_path : str
274+ ) -> None :
275+ """
276+ Write multiple benchmark results to sheets in an Excel file.
277+
278+ Creates an Excel file with multiple sheets, one for each benchmark configuration.
279+ Handles sheet name length limitations and truncates names if necessary.
280+
281+ Args:
282+ data_list: List of benchmark result dictionaries
283+ output_path: Path to save the Excel file
284+ """
203285 logging .info (
204286 f"\n ========= Generate excel file with multiple sheets for { output_path } ========= \n "
205287 )
@@ -219,21 +301,45 @@ def _write_multi_sheet_excel(self, data_list, output_path):
219301 df = pd .DataFrame (rows )
220302 df .to_excel (writer , sheet_name = sheet_name or "Sheet" , index = False )
221303
222- def _fetch_data (self , start_time , end_time ):
304+ def _fetch_data (
305+ self , start_time : str , end_time : str
306+ ) -> Optional [List [Dict [str , Any ]]]:
307+ """
308+ Fetch and process benchmark data for the specified time range.
309+
310+ Args:
311+ start_time: ISO8601 formatted start time
312+ end_time: ISO8601 formatted end time
313+
314+ Returns:
315+ Processed benchmark data or None if fetch failed
316+ """
223317 data = self ._fetch_execu_torch_data (start_time , end_time )
224318 if data is None :
225319 return None
226320 self .data = self ._process (data )
227321 return self .data
228322
229- def _get_base_url (self ):
323+ def _get_base_url (self ) -> str :
324+ """
325+ Get the base URL for API requests based on environment.
326+
327+ Returns:
328+ Base URL string for the configured environment
329+ """
230330 base_urls = {
231331 "local" : "http://localhost:3000" ,
232332 "prod" : "https://hud.pytorch.org" ,
233333 }
234334 return base_urls [self .env ]
235335
236- def print_all_names (self ):
336+ def print_all_names (self ) -> None :
337+ """
338+ Print all benchmark table names found in the data.
339+
340+ Separates results by device type (public/private) and displays counts.
341+ This is useful for debugging and understanding what data is available.
342+ """
237343 if not self .data :
238344 return
239345 logging .info ("peeking table result:" )
@@ -263,7 +369,22 @@ def print_all_names(self):
263369 for name in private_ones :
264370 logging .info (name )
265371
266- def _process (self , data ):
372+ def _process (self , data : List [Dict [str , Any ]]) -> List [Dict [str , Any ]]:
373+ """
374+ Process raw benchmark data.
375+
376+ This method:
377+ 1. Normalizes string values in groupInfo
378+ 2. Creates table_name from group info components
379+ 3. Determines aws_type (public/private) based on device name
380+ 4. Sorts results by table_name
381+
382+ Args:
383+ data: Raw benchmark data from API
384+
385+ Returns:
386+ Processed benchmark data
387+ """
267388 for item in data :
268389 group = item .get ("groupInfo" , {})
269390 item ["groupInfo" ] = {
@@ -275,11 +396,15 @@ def _process(self, data):
275396 f"{ group ['model' ]} |{ group ['backend' ]} |{ group ['device' ]} |{ group ['arch' ]} "
276397 )
277398 name = self .normalize_string (name )
399+ # Add full name joined by the group key fields
278400 item ["table_name" ] = name
401+
402+ # Mark aws_type: private or public
279403 if group .get ("device" , "" ).find ("private" ) != - 1 :
280404 item ["groupInfo" ]["aws_type" ] = "private"
281405 else :
282406 item ["groupInfo" ]["aws_type" ] = "public"
407+ # Sort by table name
283408 data .sort (key = lambda x : x ["table_name" ])
284409 logging .info (f"fetched { len (data )} table views" )
285410 return data
@@ -364,7 +489,7 @@ def normalize_string(self, s, replace="_"):
364489 )
365490
366491 if args .outputType == "df" :
367- private , public = fetcher .toDataFrame ()
492+ private , public = fetcher .to_df ()
368493 logging .info (
369494 f"=====================Printing private device benchmark results in dataframe====================="
370495 )
@@ -383,7 +508,7 @@ def normalize_string(self, s, replace="_"):
383508 logging .info (
384509 f"Writing benchmark results to excel file: { args .outputDir } /res_private.xlsx"
385510 )
386- fetcher .toExcelSheet (args .outputDir )
511+ fetcher .to_excel (args .outputDir )
387512 else :
388513 logging .info (
389514 f"======================Printing private device benchmark results in json format======================"
0 commit comments