|
1 | 1 | import glob |
2 | 2 | import os |
3 | 3 | import subprocess |
4 | | - |
5 | 4 | import pandas as pd |
6 | 5 |
|
7 | | - |
8 | 6 | PATTERN = "benchmarking_*.py" |
9 | | -FINAL_CSV_FILENAME = "collated_results.csv" |
| 7 | +FINAL_CSV_FILENAME = "collated_results.py" |
10 | 8 | GITHUB_SHA = os.getenv("GITHUB_SHA", None) |
11 | 9 |
|
12 | 10 |
|
13 | 11 | class SubprocessCallException(Exception): |
14 | 12 | pass |
15 | 13 |
|
16 | 14 |
|
17 | | -# Taken from `test_examples_utils.py` |
18 | 15 | def run_command(command: list[str], return_stdout=False): |
19 | | - """ |
20 | | - Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture |
21 | | - if an error occurred while running `command` |
22 | | - """ |
23 | 16 | try: |
24 | 17 | output = subprocess.check_output(command, stderr=subprocess.STDOUT) |
25 | | - if return_stdout: |
26 | | - if hasattr(output, "decode"): |
27 | | - output = output.decode("utf-8") |
28 | | - return output |
| 18 | + if return_stdout and hasattr(output, "decode"): |
| 19 | + return output.decode("utf-8") |
29 | 20 | except subprocess.CalledProcessError as e: |
30 | 21 | raise SubprocessCallException( |
31 | | - f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}" |
| 22 | + f"Command `{' '.join(command)}` failed with:\n{e.output.decode()}" |
32 | 23 | ) from e |
33 | 24 |
|
34 | 25 |
|
| 26 | +def merge_csvs(final_csv: str = "collated_results.csv"): |
| 27 | + all_csvs = glob.glob("*.csv") |
| 28 | + if not all_csvs: |
| 29 | + print("No result CSVs found to merge.") |
| 30 | + return |
| 31 | + |
| 32 | + df_list = [] |
| 33 | + for f in all_csvs: |
| 34 | + try: |
| 35 | + d = pd.read_csv(f) |
| 36 | + except pd.errors.EmptyDataError: |
| 37 | + # If a file existed but was zero‐bytes or corrupted, skip it |
| 38 | + continue |
| 39 | + df_list.append(d) |
| 40 | + |
| 41 | + if not df_list: |
| 42 | + print("All result CSVs were empty or invalid; nothing to merge.") |
| 43 | + return |
| 44 | + |
| 45 | + final_df = pd.concat(df_list, ignore_index=True) |
| 46 | + if GITHUB_SHA is not None: |
| 47 | + final_df["github_sha"] = GITHUB_SHA |
| 48 | + final_df.to_csv(final_csv, index=False) |
| 49 | + print(f"Merged {len(all_csvs)} partial CSVs → {final_csv}.") |
| 50 | + |
| 51 | + |
35 | 52 | def run_scripts(): |
36 | 53 | python_files = sorted(glob.glob(PATTERN)) |
37 | 54 | python_files = [f for f in python_files if f != "benchmarking_utils.py"] |
38 | 55 |
|
39 | 56 | for file in python_files: |
40 | | - print(f"****** Running file: {file} ******") |
41 | | - command = f"python {file}" |
| 57 | + script_name = file.split(".py")[0].split("_")[-1] # example: benchmarking_foo.py -> foo |
| 58 | + print(f"\n****** Running file: {file} ******") |
| 59 | + |
| 60 | + partial_csv = f"{script_name}.csv" |
| 61 | + if os.path.exists(partial_csv): |
| 62 | + os.remove(partial_csv) |
| 63 | + |
| 64 | + command = ["python", file] |
42 | 65 | try: |
43 | | - run_command(command.split()) |
| 66 | + run_command(command) |
| 67 | + print(f"→ {file} finished normally.") |
44 | 68 | except SubprocessCallException as e: |
45 | 69 | print(f"Error running {file}:\n{e}") |
46 | | - continue |
47 | | - |
48 | | - |
49 | | -def merge_csvs(): |
50 | | - all_csvs = glob.glob("*.csv") |
51 | | - final_df = pd.concat([pd.read_csv(f) for f in all_csvs]).reset_index(drop=True) |
52 | | - if GITHUB_SHA: |
53 | | - final_df["github_sha"] = GITHUB_SHA |
54 | | - final_df.to_csv(FINAL_CSV_FILENAME) |
55 | | - |
| 70 | + finally: |
| 71 | + print(f"→ Merging partial CSVs after {file} …") |
| 72 | + merge_csvs(final_csv=FINAL_CSV_FILENAME) |
56 | 73 |
|
57 | | -if __name__ == "__main__": |
58 | | - run_scripts() |
59 | | - merge_csvs() |
| 74 | + print(f"\nAll scripts attempted. Final collated CSV: {FINAL_CSV_FILENAME}") |
0 commit comments