Skip to content

Commit 8f54710

Browse files
authored
[CI][Benchmarks] Archive cutoff date (#19514)
Archived runs older than 3 times the specified days are not included in the dashboard, i.e.. when archiving data older than 7 days, runs older than 21 days are not included. This change will prevent the archived data file used in the dashboard from a limitless size grow.
1 parent 7e16ea8 commit 8f54710

File tree

4 files changed

+50
-10
lines changed

4 files changed

+50
-10
lines changed

devops/scripts/benchmarks/CONTRIB.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,9 @@ The suite is structured around three main components: Suites, Benchmarks, and Re
4242
* **Fields (set by Benchmark):**
4343
* `label`: Unique identifier for this *specific result type* within the benchmark instance (e.g., "Submit In Order Time"). Ideally contains `benchmark.name()`.
4444
* `value`: The measured numerical result (float).
45-
* `unit`: The unit of the value (string, e.g., "μs", "GB/s", "token/s").
4645
* `command`: The command list used to run the benchmark (`list[str]`).
4746
* `env`: Environment variables used (`dict[str, str]`).
48-
* `stdout`: Full standard output of the benchmark run (string).
49-
* `passed`: Boolean indicating if verification passed (default: `True`).
47+
* `unit`: The unit of the value (string, e.g., "μs", "GB/s", "token/s").
5048
* `stddev`: Standard deviation, if calculated by the benchmark itself (float, default: 0.0).
5149
* `git_url`, `git_hash`: Git info for the benchmark's source code (string).
5250
* **Fields (set by Framework):**

devops/scripts/benchmarks/history.py

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
class BenchmarkHistory:
2121
runs = []
22+
TIMESTAMP_FORMAT = "%Y%m%d_%H%M%S"
2223

2324
def __init__(self, dir):
2425
self.dir = dir
@@ -31,7 +32,12 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
3132
else:
3233
return None
3334

34-
def load(self, n: int):
35+
def load(self):
36+
"""
37+
Load benchmark runs from the results directory.
38+
This method loads files after the specified archiving criteria,
39+
sorts them by timestamp, and stores the results in self.runs.
40+
"""
3541
results_dir = Path(self.dir) / "results"
3642
if not results_dir.exists() or not results_dir.is_dir():
3743
log.warning(
@@ -42,7 +48,7 @@ def load(self, n: int):
4248
# Get all JSON files in the results directory
4349
benchmark_files = list(results_dir.glob("*.json"))
4450

45-
# Extract timestamp and sort files by it
51+
# Extract timestamp
4652
def extract_timestamp(file_path: Path) -> str:
4753
try:
4854
# Assumes results are stored as <name>_YYYYMMDD_HHMMSS.json
@@ -51,11 +57,45 @@ def extract_timestamp(file_path: Path) -> str:
5157
except IndexError:
5258
return ""
5359

60+
baseline_drop_after = options.archive_baseline_days * 3
61+
pr_drop_after = options.archive_pr_days * 3
62+
baseline_cutoff_date = datetime.now(timezone.utc) - timedelta(
63+
days=baseline_drop_after
64+
)
65+
log.debug(f"Baseline cutoff date: {baseline_cutoff_date}")
66+
pr_cutoff_date = datetime.now(timezone.utc) - timedelta(days=pr_drop_after)
67+
log.debug(f"PR cutoff date: {pr_cutoff_date}")
68+
69+
# Filter out files that exceed archiving criteria three times the specified days
70+
def is_file_too_old(file_path: Path) -> bool:
71+
try:
72+
if file_path.stem.startswith("Baseline_"):
73+
cutoff_date = baseline_cutoff_date
74+
else:
75+
cutoff_date = pr_cutoff_date
76+
77+
timestamp_str = extract_timestamp(file_path)
78+
if not timestamp_str:
79+
return False
80+
81+
file_timestamp = datetime.strptime(timestamp_str, self.TIMESTAMP_FORMAT)
82+
# Add timezone info for proper comparison
83+
file_timestamp = file_timestamp.replace(tzinfo=timezone.utc)
84+
return file_timestamp < cutoff_date
85+
except Exception as e:
86+
log.warning(f"Error processing timestamp for {file_path.name}: {e}")
87+
return False
88+
89+
benchmark_files = [
90+
file for file in benchmark_files if not is_file_too_old(file)
91+
]
92+
93+
# Sort files by timestamp
5494
benchmark_files.sort(key=extract_timestamp, reverse=True)
5595

56-
# Load the first n benchmark files
96+
# Load benchmark files
5797
benchmark_runs = []
58-
for file_path in benchmark_files[:n]:
98+
for file_path in benchmark_files:
5999
benchmark_run = self.load_result(file_path)
60100
if benchmark_run:
61101
benchmark_runs.append(benchmark_run)
@@ -163,7 +203,7 @@ def save(self, save_name, results: list[Result], to_file=True):
163203

164204
# Use formatted timestamp for the filename
165205
timestamp = (
166-
datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
206+
datetime.now(tz=timezone.utc).strftime(self.TIMESTAMP_FORMAT)
167207
if options.timestamp_override is None
168208
else options.timestamp_override
169209
)

devops/scripts/benchmarks/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
293293
# limit how many files we load.
294294
# should this be configurable?
295295
log.info(f"Loading benchmark history from {results_dir}...")
296-
history.load(1000)
296+
history.load()
297297
log.info(f"Loaded {len(history.runs)} benchmark runs.")
298298

299299
if compare_names:

devops/scripts/benchmarks/options.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,9 @@ class Options:
9090
git_commit_override: str = None
9191
# Archiving settings
9292
# Archived runs are stored separately from the main dataset but are still accessible
93-
# via the HTML UI when "Include archived runs" is enabled
93+
# via the HTML UI when "Include archived runs" is enabled.
94+
# Archived runs older than 3 times the specified days are not included in the dashboard,
95+
# ie. when archiving data older than 7 days, runs older than 21 days are not included.
9496
archive_baseline_days: int = 30 # Archive Baseline_* runs after 30 days
9597
archive_pr_days: int = 7 # Archive other (PR/dev) runs after 7 days
9698

0 commit comments

Comments
 (0)