Skip to content

Commit 69d1c94

Browse files
committed
refactor: move db helpers and fix version plots
1 parent f10348f commit 69d1c94

File tree

3 files changed

+63
-19
lines changed

3 files changed

+63
-19
lines changed

src/db.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
"""Database helpers used by the CLI and plotting utilities."""
3+
4+
from __future__ import annotations
5+
6+
import datetime
7+
from typing import Tuple
8+
9+
import pandas as pd
10+
from pymongo import MongoClient
11+
12+
13+
def load_event(event_name: str, unique: bool = True) -> pd.DataFrame:
14+
"""Load one event collection from MongoDB."""
15+
db = MongoClient().fmriprep_stats
16+
data = pd.DataFrame(list(db[event_name].find()))
17+
if len(data) == 0:
18+
raise RuntimeError(f"No records of event '{event_name}'")
19+
20+
data["dateCreated"] = pd.to_datetime(data["dateCreated"])
21+
data["date_minus_time"] = data["dateCreated"].apply(
22+
lambda df: datetime.datetime(year=df.year, month=df.month, day=df.day)
23+
)
24+
if unique:
25+
data = data.drop_duplicates(subset=["run_uuid"])
26+
return data
27+
28+
29+
def massage_versions(
30+
started: pd.DataFrame, success: pd.DataFrame
31+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
32+
"""Normalize version strings as done in the analysis notebook."""
33+
started = started.copy()
34+
success = success.copy()
35+
36+
started = started.fillna(value={"environment_version": "older"})
37+
success = success.fillna(value={"environment_version": "older"})
38+
39+
started.loc[started.environment_version == "v0.0.1", "environment_version"] = "older"
40+
success.loc[success.environment_version == "v0.0.1", "environment_version"] = "older"
41+
42+
started.loc[started.environment_version.str.startswith("20.0"), "environment_version"] = "older"
43+
success.loc[success.environment_version.str.startswith("20.0"), "environment_version"] = "older"
44+
started.loc[started.environment_version.str.startswith("20.1"), "environment_version"] = "older"
45+
success.loc[success.environment_version.str.startswith("20.1"), "environment_version"] = "older"
46+
47+
versions = sorted(
48+
{
49+
".".join(v.split(".")[:2])
50+
for v in started.environment_version.unique()
51+
if "." in str(v)
52+
}
53+
)
54+
for ver in versions:
55+
started.loc[started.environment_version.str.startswith(ver), "environment_version"] = ver
56+
success.loc[success.environment_version.str.startswith(ver), "environment_version"] = ver
57+
58+
return started, success

src/run.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828

2929
import click
3030
from api import parallel_fetch, ISSUES, DEFAULT_MAX_ERRORS
31-
from viz import load_event, plot_performance, plot_version_stream
31+
from db import load_event, massage_versions
32+
from viz import plot_performance, plot_version_stream
3233

3334
DEFAULT_DAYS_WINDOW = 90
3435
DEFAULT_CHUNK_DAYS = 1
@@ -163,7 +164,9 @@ def plot(output_dir, drop_cutoff):
163164
unique_success = load_event("success")
164165

165166
plot_performance(unique_started, unique_success, drop_cutoff=drop_cutoff, out_file=out_perf)
166-
plot_version_stream(unique_started, unique_success, drop_cutoff=drop_cutoff, out_file=out_ver)
167+
168+
started_v, success_v = massage_versions(unique_started, unique_success)
169+
plot_version_stream(started_v, success_v, drop_cutoff=drop_cutoff, out_file=out_ver)
167170
click.echo(f"Saved plots to {output_dir}")
168171

169172

src/viz.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import numpy as np
88
import pandas as pd
99
import matplotlib.pyplot as plt
10-
from pymongo import MongoClient
1110
from scipy.interpolate import RBFInterpolator
1211

1312

@@ -27,22 +26,6 @@ def _parse(vstr):
2726
_vparse = np.vectorize(_parse)
2827

2928

30-
def load_event(event_name: str, unique: bool = True) -> pd.DataFrame:
31-
"""Load one event collection from MongoDB."""
32-
db = MongoClient().fmriprep_stats
33-
data = pd.DataFrame(list(db[event_name].find()))
34-
if len(data) == 0:
35-
raise RuntimeError(f"No records of event '{event_name}'")
36-
37-
data["dateCreated"] = pd.to_datetime(data["dateCreated"])
38-
data["date_minus_time"] = data["dateCreated"].apply(
39-
lambda df: datetime.datetime(year=df.year, month=df.month, day=df.day)
40-
)
41-
if unique:
42-
data = data.drop_duplicates(subset=["run_uuid"])
43-
return data
44-
45-
4629
# -----------------------------------------------------------------------------
4730
# Plotting functions
4831
# -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)