Skip to content

Commit 4fa6031

Browse files
authored
Merge branch 'master' into codex/create-crontab-script-for-daily-database-backup
2 parents fca3f5a + 645dd85 commit 4fa6031

File tree

4 files changed

+462
-1
lines changed

4 files changed

+462
-1
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ python src/run.py --help
3131

3232
for a description of all available options.
3333

34+
Running:
35+
36+
```bash
37+
python src/run.py plot
38+
```
39+
40+
will generate the performance and version stream plots using the records stored
41+
in MongoDB.
42+
3443
## MongoDB backup script
3544

3645
`scripts/backup_mongodb.sh` dumps a MongoDB database to a Dropbox-synced
@@ -42,4 +51,3 @@ Make it executable before scheduling it with `cron`:
4251
```bash
4352
chmod +x scripts/backup_mongodb.sh
4453
```
45-

src/db.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
"""Database helpers used by the CLI and plotting utilities."""
3+
4+
from __future__ import annotations
5+
6+
import datetime
7+
from typing import Tuple
8+
9+
import pandas as pd
10+
from pymongo import MongoClient
11+
12+
13+
def load_event(event_name: str, unique: bool = True) -> pd.DataFrame:
14+
"""Load one event collection from MongoDB."""
15+
db = MongoClient().fmriprep_stats
16+
data = pd.DataFrame(list(db[event_name].find()))
17+
if len(data) == 0:
18+
raise RuntimeError(f"No records of event '{event_name}'")
19+
20+
data["dateCreated"] = pd.to_datetime(data["dateCreated"])
21+
data["date_minus_time"] = data["dateCreated"].apply(
22+
lambda df: datetime.datetime(year=df.year, month=df.month, day=df.day)
23+
)
24+
if unique:
25+
data = data.drop_duplicates(subset=["run_uuid"])
26+
return data
27+
28+
29+
def massage_versions(
30+
started: pd.DataFrame, success: pd.DataFrame
31+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
32+
"""Normalize version strings as done in the analysis notebook."""
33+
started = started.copy()
34+
success = success.copy()
35+
36+
started = started.fillna(value={"environment_version": "older"})
37+
success = success.fillna(value={"environment_version": "older"})
38+
39+
started.loc[started.environment_version == "v0.0.1", "environment_version"] = "older"
40+
success.loc[success.environment_version == "v0.0.1", "environment_version"] = "older"
41+
42+
started.loc[started.environment_version.str.startswith("20.0"), "environment_version"] = "older"
43+
success.loc[success.environment_version.str.startswith("20.0"), "environment_version"] = "older"
44+
started.loc[started.environment_version.str.startswith("20.1"), "environment_version"] = "older"
45+
success.loc[success.environment_version.str.startswith("20.1"), "environment_version"] = "older"
46+
47+
versions = sorted(
48+
{
49+
".".join(v.split(".")[:2])
50+
for v in started.environment_version.unique()
51+
if "." in str(v)
52+
}
53+
)
54+
for ver in versions:
55+
started.loc[started.environment_version.str.startswith(ver), "environment_version"] = ver
56+
success.loc[success.environment_version.str.startswith(ver), "environment_version"] = ver
57+
58+
return started, success

src/run.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828

2929
import click
3030
from api import parallel_fetch, ISSUES, DEFAULT_MAX_ERRORS
31+
from db import load_event, massage_versions
32+
from viz import plot_performance, plot_version_stream
3133

3234
DEFAULT_DAYS_WINDOW = 90
3335
DEFAULT_CHUNK_DAYS = 1
@@ -149,6 +151,25 @@ def get(event, start_date, end_date, days, chunk_days, jobs, max_errors, cached_
149151
click.echo(f"{datetime.now(timezone.utc):%Y-%m-%d %H:%M:%S} [Finished]")
150152

151153

154+
@cli.command()
155+
@click.option("-o", "--output-dir", type=click.Path(file_okay=False, dir_okay=True, writable=True), default=".")
156+
@click.option("--drop-cutoff", default=None, help="Ignore versions older than this")
157+
def plot(output_dir, drop_cutoff):
158+
"""Generate plots using records stored in MongoDB."""
159+
today = datetime.now().date().strftime("%Y%m%d")
160+
out_perf = os.path.join(output_dir, f"{today}_weekly.png")
161+
out_ver = os.path.join(output_dir, f"{today}_versionstream.png")
162+
163+
unique_started = load_event("started")
164+
unique_success = load_event("success")
165+
166+
plot_performance(unique_started, unique_success, drop_cutoff=drop_cutoff, out_file=out_perf)
167+
168+
started_v, success_v = massage_versions(unique_started, unique_success)
169+
plot_version_stream(started_v, success_v, drop_cutoff=drop_cutoff, out_file=out_ver)
170+
click.echo(f"Saved plots to {output_dir}")
171+
172+
152173
if __name__ == "__main__":
153174
""" Install entry-point """
154175
cli()

0 commit comments

Comments
 (0)