@@ -41,34 +41,32 @@ def extract_speedup_data_from_subdirs(benchmark_path: str) -> dict:
4141 # but os.walk is also robust for nested directories if needed in the future.
4242 for root , _ , files in os .walk (current_dir_path ):
4343 for file in files :
44- if file .endswith (".json" ):
45- json_file = os .path .join (root , file )
46- try :
47- with open (json_file , "r" ) as f :
48- data = json .load (f )
49- performance = data .get ("performance" , {})
50- if not performance :
51- continue
52-
53- speedup_data = performance .get ("speedup" )
54- if isinstance (speedup_data , dict ):
55- # Prioritize 'e2e' speedup, fallback to 'gpu'
56- if "e2e" in speedup_data :
57- data_by_subdir [subdir_name ].append (
58- speedup_data ["e2e" ]
59- )
60- elif "gpu" in speedup_data :
61- data_by_subdir [subdir_name ].append (
62- speedup_data ["gpu" ]
63- )
64- elif isinstance (speedup_data , (float , int )):
65- data_by_subdir [subdir_name ].append (speedup_data )
66-
67- except (json .JSONDecodeError , KeyError ) as e :
68- print (
69- f"Warning: Failed to read or parse file -> { json_file } , Error: { e } "
70- )
71- continue
44+ if not file .endswith (".json" ):
45+ continue
46+
47+ json_file = os .path .join (root , file )
48+ try :
49+ with open (json_file , "r" ) as f :
50+ data = json .load (f )
51+ performance = data .get ("performance" , {})
52+ if not performance :
53+ continue
54+
55+ speedup_data = performance .get ("speedup" )
56+ if isinstance (speedup_data , dict ):
57+ # Prioritize 'e2e' speedup, fallback to 'gpu'
58+ if "e2e" in speedup_data :
59+ data_by_subdir [subdir_name ].append (speedup_data ["e2e" ])
60+ elif "gpu" in speedup_data :
61+ data_by_subdir [subdir_name ].append (speedup_data ["gpu" ])
62+ elif isinstance (speedup_data , (float , int )):
63+ data_by_subdir [subdir_name ].append (speedup_data )
64+
65+ except (json .JSONDecodeError , KeyError ) as e :
66+ print (
67+ f"Warning: Failed to read or parse file -> { json_file } , Error: { e } "
68+ )
69+ continue
7270
7371 return data_by_subdir
7472
@@ -85,6 +83,72 @@ def load_json_file(filepath: str) -> dict:
8583 return {}
8684
8785
86+ def detect_sample_error_code (log_text : str ) -> str :
87+ """
88+ Detect the error code for a single sample from log text.
89+
90+ This function is used for bug subgraph detection. It analyzes log text
91+ (which can be generated from a single sample) and returns an error code.
92+
93+ Args:
94+ log_text: Log text content (can be a string or list of lines)
95+
96+ Returns:
97+ Error code string. Possible values:
98+ - "correct": Sample executed successfully
99+ - "eager_fail": Eager model execution failed
100+ - "compile_fail": Compiled model compilation failed
101+ - "runtime_fail": Runtime error during execution
102+ - "unknown": Unable to determine error type
103+ """
104+ if isinstance (log_text , str ):
105+ lines = log_text .split ("\n " )
106+ else :
107+ lines = log_text
108+
109+ # Define regex patterns for error detection
110+ patterns = {
111+ "result_status" : re .compile (r"\[Result\] status: (.+)" ),
112+ "failure" : re .compile (r"\[Fail due to (.+)\.\]" ),
113+ }
114+
115+ # Error type mapping based on failure reason keywords
116+ error_keywords = {
117+ "eager" : "eager_fail" ,
118+ "compiled" : "compile_fail" ,
119+ }
120+
121+ for i , line in enumerate (lines ):
122+ result_status_match = patterns ["result_status" ].search (line )
123+ if not result_status_match :
124+ continue
125+
126+ status = result_status_match .group (1 ).strip ()
127+ if status == "success" :
128+ return "correct"
129+
130+ if status != "failed" :
131+ continue
132+
133+ # Check the next line for failure reason
134+ if (i + 1 ) >= len (lines ):
135+ return "runtime_fail"
136+
137+ error_reason_match = patterns ["failure" ].search (lines [i + 1 ])
138+ if not error_reason_match :
139+ return "runtime_fail"
140+
141+ reason = error_reason_match .group (1 ).lower ()
142+ # Check for specific error keywords
143+ for keyword , error_code in error_keywords .items ():
144+ if keyword in reason :
145+ return error_code
146+
147+ return "runtime_fail"
148+
149+ return "unknown"
150+
151+
88152def parse_logs_to_data (log_file : str ) -> list :
89153 """
90154 Parse a structured log file generated by the benchmark script and
@@ -189,32 +253,39 @@ def parse_logs_to_data(log_file: str) -> list:
189253 data ["correctness" ][key .strip ()] = values
190254 continue
191255
192- # Look for the status, and if it's "failed", look ahead to the next line.
193- result_status_match = patterns ["result_status" ].search (line )
194- if result_status_match :
195- status = result_status_match .group (1 ).strip ()
196- data ["result" ]["status" ] = status
197- if status == "failed" and (i + 1 ) < len (lines ):
198- error_reason_match = patterns ["failure" ].search (lines [i + 1 ])
199- if error_reason_match :
200- reason = error_reason_match .group (1 ).lower ()
201- if "eager" in reason :
202- data ["performance" ]["failure" ] = "eager"
203- data ["result" ]["status" ] = "eager_fail"
204- elif "compiled" in reason :
205- data ["performance" ]["failure" ] = "compiled"
206- data ["result" ]["status" ] = "compile_fail"
207- else :
208- data ["performance" ]["failure" ] = "other"
209- data ["result" ]["status" ] = "runtime_fail"
210- continue
211-
256+ # Check for speedup
212257 speedup_match = patterns ["speedup" ].search (line )
213258 if speedup_match :
214259 key , value_str = speedup_match .groups ()
215260 data ["performance" ]["speedup" ][key .strip ()] = float (value_str )
216261 continue
217262
263+ # Look for the status, and if it's "failed", look ahead to the next line.
264+ result_status_match = patterns ["result_status" ].search (line )
265+ if not result_status_match :
266+ continue
267+
268+ status = result_status_match .group (1 ).strip ()
269+ data ["result" ]["status" ] = status
270+ if status != "failed" or (i + 1 ) >= len (lines ):
271+ continue
272+
273+ error_reason_match = patterns ["failure" ].search (lines [i + 1 ])
274+ if not error_reason_match :
275+ continue
276+
277+ reason = error_reason_match .group (1 ).lower ()
278+ if "eager" in reason :
279+ data ["performance" ]["failure" ] = "eager"
280+ data ["result" ]["status" ] = "eager_fail"
281+ elif "compiled" in reason :
282+ data ["performance" ]["failure" ] = "compiled"
283+ data ["result" ]["status" ] = "compile_fail"
284+ else :
285+ data ["performance" ]["failure" ] = "other"
286+ data ["result" ]["status" ] = "runtime_fail"
287+ continue
288+
218289 # After parsing all lines, process the results
219290 if not all_runs_data :
220291 print ("No processable log entries found in the file." )
@@ -223,30 +294,24 @@ def parse_logs_to_data(log_file: str) -> list:
223294 samples = []
224295 for run_key , data in all_runs_data .items ():
225296 try :
297+ speedup_dict = data ["performance" ].get ("speedup" , {})
298+
226299 # Build result field with status and speedup (for compatibility with log2json output format)
227- if data ["result" ]["status" ] == "success" :
300+ if data ["result" ]["status" ] == "success" and speedup_dict :
228301 speedup_data = {}
229- if "e2e" in data ["performance" ]["speedup" ]:
230- e2e_value = data ["performance" ]["speedup" ]["e2e" ]
231- speedup_data ["e2e" ] = {"mean" : e2e_value }
232- if "gpu" in data ["performance" ]["speedup" ]:
233- gpu_value = data ["performance" ]["speedup" ]["gpu" ]
234- speedup_data ["gpu" ] = {"mean" : gpu_value }
302+ for key in ["e2e" , "gpu" ]:
303+ if key in speedup_dict :
304+ speedup_data [key ] = {"mean" : speedup_dict [key ]}
235305 if speedup_data :
236306 data ["result" ]["speedup" ] = speedup_data
237307
238- # Ensure performance.speedup.e2e is a direct value (not nested dict)
308+ # Ensure performance.speedup.e2e/gpu are direct values (not nested dict)
239309 # This is required by calculate_s_scores which uses performance_data.get("speedup", {}).get("e2e")
240- if "speedup" in data ["performance" ]:
241- speedup_dict = data ["performance" ]["speedup" ]
242- if "e2e" in speedup_dict :
243- e2e_val = speedup_dict ["e2e" ]
244- if isinstance (e2e_val , dict ) and "mean" in e2e_val :
245- speedup_dict ["e2e" ] = e2e_val ["mean" ]
246- if "gpu" in speedup_dict :
247- gpu_val = speedup_dict ["gpu" ]
248- if isinstance (gpu_val , dict ) and "mean" in gpu_val :
249- speedup_dict ["gpu" ] = gpu_val ["mean" ]
310+ for key in ["e2e" , "gpu" ]:
311+ if key in speedup_dict :
312+ val = speedup_dict [key ]
313+ if isinstance (val , dict ) and "mean" in val :
314+ speedup_dict [key ] = val ["mean" ]
250315
251316 samples .append (data )
252317
@@ -261,53 +326,31 @@ def parse_logs_to_data(log_file: str) -> list:
261326 return samples
262327
263328
264- def load_one_folder (folder_path : str ) -> list :
265- """
266- Traverse all .json files in a *single* folder and load all raw data.
267- Returns a list of raw data dictionaries.
268- """
269- if not os .path .isdir (folder_path ):
270- return []
271-
272- folder_name = os .path .basename (folder_path )
273- samples = []
274- print (f" - Loading JSON files from folder: { folder_path } " )
275-
276- for filename in os .listdir (folder_path ):
277- if filename .endswith (".json" ):
278- filepath = os .path .join (folder_path , filename )
279- data = load_json_file (filepath )
280- if data :
281- samples .append (data )
282- return samples
283-
284-
285329def scan_all_folders (benchmark_path : str ) -> dict :
286330 """
287- Unified entry point that supports both log files and JSON directories:
288- - If benchmark_path is a log file → parse it directly and return data as a single curve.
289- - If benchmark_path is a directory with .json files directly under it → treat them as a single curve.
290- - Otherwise, fallback to the old logic where subdirectories represent curves .
291- Returns dict[folder_name ] -> list_of_samples
331+ Unified entry point that supports log files and directories:
332+ - If benchmark_path is a log file (.log or .txt) → parse it directly and return data as a single curve.
333+ - If benchmark_path is a directory → scan for .log and .txt files in the directory,
334+ each log file becomes a curve .
335+ Returns dict[curve_name ] -> list_of_samples
292336 """
293- # Check if the path is a log file
337+ # Handle single log file
294338 if os .path .isfile (benchmark_path ):
295339 print (f"Detected log file: '{ benchmark_path } '" )
296340 samples = parse_logs_to_data (benchmark_path )
297- if samples :
298- # Use the log file name (without extension) as the curve name
299- folder_name = (
300- os .path .splitext (os .path .basename (benchmark_path ))[0 ] or "benchmark"
301- )
302- print (
303- f" - Parsed log file → 1 curve '{ folder_name } ' "
304- f"with { len (samples )} samples."
305- )
306- return {folder_name : samples }
307- else :
341+ if not samples :
308342 print (f" - No valid data found in log file." )
309343 return {}
310344
345+ folder_name = (
346+ os .path .splitext (os .path .basename (benchmark_path ))[0 ] or "benchmark"
347+ )
348+ print (
349+ f" - Parsed log file → 1 curve '{ folder_name } ' "
350+ f"with { len (samples )} samples."
351+ )
352+ return {folder_name : samples }
353+
311354 # Check if it's a directory
312355 if not os .path .isdir (benchmark_path ):
313356 print (
@@ -317,27 +360,38 @@ def scan_all_folders(benchmark_path: str) -> dict:
317360
318361 print (f"Scanning '{ benchmark_path } ' ..." )
319362
320- # Try flat structure, directly read JSON
321- flat_samples = load_one_folder ( benchmark_path )
322- if flat_samples : # ≥1 JSON loaded successfully
323- folder_name = os . path . basename ( benchmark_path ) or "benchmark"
324- print (
325- f" - Detected flat structure → 1 curve ' { folder_name } ' "
326- f"with { len ( flat_samples ) } samples."
327- )
328- return { folder_name : flat_samples }
363+ # Find .log and .txt files in the directory
364+ log_files = sorted (
365+ [
366+ f
367+ for f in os . listdir ( benchmark_path )
368+ if os . path . isfile ( os . path . join ( benchmark_path , f ))
369+ and f . endswith (( ".log" , ".txt" ))
370+ ]
371+ )
329372
330- # Fall back to subdirectories as curves logic
373+ if not log_files :
374+ print (" - No log files (.log or .txt) found in directory." )
375+ return {}
376+
377+ # Process log files, each becomes a curve
331378 all_results = {}
332- print (" - No JSON files found at top level → scanning sub-folders." )
333- for entry in os .listdir (benchmark_path ):
334- folder_full_path = os .path .join (benchmark_path , entry )
335- if os .path .isdir (folder_full_path ):
336- samples = load_one_folder (folder_full_path )
337- if samples :
338- all_results [entry ] = samples
339- print (f" - Folder '{ entry } ' loaded { len (samples )} samples." )
340- print (f"Total folders loaded: { len (all_results )} " )
379+ print (f" - Found { len (log_files )} log file(s) → each becomes a curve." )
380+ for log_file in log_files :
381+ log_file_path = os .path .join (benchmark_path , log_file )
382+ samples = parse_logs_to_data (log_file_path )
383+ if not samples :
384+ continue
385+
386+ curve_name = os .path .splitext (log_file )[0 ] or "benchmark"
387+ all_results [curve_name ] = samples
388+ print (f" - Curve '{ curve_name } ': { len (samples )} samples." )
389+
390+ if not all_results :
391+ print (" - No valid data found in any log file." )
392+ return {}
393+
394+ print (f"Total curves loaded: { len (all_results )} " )
341395 return all_results
342396
343397
@@ -526,7 +580,6 @@ def print_stat_info(
526580 rectified_speedups .append (regularized_speedup )
527581
528582 # ES(t) calculation: based on state change
529- rec_speedup_fake_degrad = 0
530583 if t_key < 1 :
531584 if fail_type is not None or speedup is None :
532585 rec_speedup_fake_degrad = fpdb
0 commit comments