Skip to content

Commit 6bd3ba7

Browse files
authored
Merge branch 'master' into codex/create-crontab-script-for-daily-database-backup
2 parents a339386 + 3de151f commit 6bd3ba7

File tree

5 files changed

+463
-3
lines changed

5 files changed

+463
-3
lines changed

README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ python src/run.py --help
3131

3232
for a description of all available options.
3333

34+
Running:
35+
36+
```bash
37+
python src/run.py plot
38+
```
39+
40+
will generate the performance and version stream plots using the records stored
41+
in MongoDB.
42+
3443
## MongoDB backup script
3544

3645
`scripts/backup_mongodb.sh` dumps a MongoDB database to a Dropbox-synced
@@ -42,7 +51,6 @@ Make it executable before scheduling it with `cron`:
4251
```bash
4352
chmod +x scripts/backup_mongodb.sh
4453
```
45-
4654
Store `DBNAME` (and optional credentials) in environment variables rather than
4755
editing the script. You may create a file named `~/.mongodb_backup_env` with
4856
content like:
@@ -54,4 +62,3 @@ export DBNAME="fmriprep_stats"
5462
```
5563

5664
The backup script will source this file if present.
57-

scripts/backup_mongodb.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ fi
1515
: "${DBNAME:?Set DBNAME, e.g., export DBNAME=your_db}"
1616

1717
DATE=$(date +%Y-%m-%d)
18-
BACKUP_DIR="$HOME/Dropbox/backups"
18+
BACKUP_DIR="$HOME/Dropbox/fmriprep_stats"
1919
BACKUP_PATH="$BACKUP_DIR/db_backup_${DATE}"
2020

2121
mkdir -p "$BACKUP_DIR"

src/db.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
"""Database helpers used by the CLI and plotting utilities."""
3+
4+
from __future__ import annotations
5+
6+
import datetime
7+
from typing import Tuple
8+
9+
import pandas as pd
10+
from pymongo import MongoClient
11+
12+
13+
def load_event(event_name: str, unique: bool = True) -> pd.DataFrame:
14+
"""Load one event collection from MongoDB."""
15+
db = MongoClient().fmriprep_stats
16+
data = pd.DataFrame(list(db[event_name].find()))
17+
if len(data) == 0:
18+
raise RuntimeError(f"No records of event '{event_name}'")
19+
20+
data["dateCreated"] = pd.to_datetime(data["dateCreated"])
21+
data["date_minus_time"] = data["dateCreated"].apply(
22+
lambda df: datetime.datetime(year=df.year, month=df.month, day=df.day)
23+
)
24+
if unique:
25+
data = data.drop_duplicates(subset=["run_uuid"])
26+
return data
27+
28+
29+
def massage_versions(
30+
started: pd.DataFrame, success: pd.DataFrame
31+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
32+
"""Normalize version strings as done in the analysis notebook."""
33+
started = started.copy()
34+
success = success.copy()
35+
36+
started = started.fillna(value={"environment_version": "older"})
37+
success = success.fillna(value={"environment_version": "older"})
38+
39+
started.loc[started.environment_version == "v0.0.1", "environment_version"] = "older"
40+
success.loc[success.environment_version == "v0.0.1", "environment_version"] = "older"
41+
42+
started.loc[started.environment_version.str.startswith("20.0"), "environment_version"] = "older"
43+
success.loc[success.environment_version.str.startswith("20.0"), "environment_version"] = "older"
44+
started.loc[started.environment_version.str.startswith("20.1"), "environment_version"] = "older"
45+
success.loc[success.environment_version.str.startswith("20.1"), "environment_version"] = "older"
46+
47+
versions = sorted(
48+
{
49+
".".join(v.split(".")[:2])
50+
for v in started.environment_version.unique()
51+
if "." in str(v)
52+
}
53+
)
54+
for ver in versions:
55+
started.loc[started.environment_version.str.startswith(ver), "environment_version"] = ver
56+
success.loc[success.environment_version.str.startswith(ver), "environment_version"] = ver
57+
58+
return started, success

src/run.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828

2929
import click
3030
from api import parallel_fetch, ISSUES, DEFAULT_MAX_ERRORS
31+
from db import load_event, massage_versions
32+
from viz import plot_performance, plot_version_stream
3133

3234
DEFAULT_DAYS_WINDOW = 90
3335
DEFAULT_CHUNK_DAYS = 1
@@ -149,6 +151,26 @@ def get(event, start_date, end_date, days, chunk_days, jobs, max_errors, cached_
149151
click.echo(f"{datetime.now(timezone.utc):%Y-%m-%d %H:%M:%S} [Finished]")
150152

151153

154+
@cli.command()
155+
@click.option("-o", "--output-dir", type=click.Path(file_okay=False, dir_okay=True, writable=True), default=".")
156+
@click.option("--drop-cutoff", default=None, help="Ignore versions older than this")
157+
def plot(output_dir, drop_cutoff):
158+
"""Generate plots using records stored in MongoDB."""
159+
today = datetime.now().date().strftime("%Y%m%d")
160+
out_perf = os.path.join(output_dir, f"{today}_weekly.png")
161+
out_ver = os.path.join(output_dir, f"{today}_versionstream.png")
162+
163+
unique_started = load_event("started")
164+
unique_success = load_event("success")
165+
166+
plot_performance(unique_started, unique_success, drop_cutoff=drop_cutoff, out_file=out_perf)
167+
click.echo(f"Saved {out_perf}.")
168+
169+
started_v, success_v = massage_versions(unique_started, unique_success)
170+
plot_version_stream(started_v, success_v, drop_cutoff=drop_cutoff, out_file=out_ver)
171+
click.echo(f"Saved {out_ver}")
172+
173+
152174
if __name__ == "__main__":
153175
""" Install entry-point """
154176
cli()

0 commit comments

Comments
 (0)