Skip to content

Commit d865962

Browse files
committed
black formatting
1 parent da9573d commit d865962

File tree

2 files changed

+87
-29
lines changed

2 files changed

+87
-29
lines changed

autoprot/analysis/stat_test.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def ttest(
3737
logged=True,
3838
):
3939
# noinspection PyUnresolvedReferences
40-
"""
40+
r"""
4141
Perform one or two sample ttest.
4242
4343
Parameters
@@ -269,7 +269,13 @@ def cohen_d(df, group1, group2):
269269

270270

271271
def limma(
272-
df, reps, cond="", custom_design=None, coef=None, print_r=False, return_cols=False
272+
df: pd.DataFrame,
273+
reps: list[list[str]],
274+
cond: str = "",
275+
custom_design: str = None,
276+
coef: str = None,
277+
print_r: bool = False,
278+
return_cols: bool = False,
273279
):
274280
# sourcery skip: extract-method, inline-immediately-returned-variable
275281
# noinspection PyUnresolvedReferences
@@ -342,6 +348,10 @@ def limma(
342348
343349
"""
344350
# TODO: better handle coefficient extraction in R
351+
# check input dtypes
352+
if not isinstance(df, pd.DataFrame):
353+
raise TypeError("df must be a pandas DataFrame")
354+
345355
df = df.copy()
346356
d = os.getcwd()
347357

autoprot/visualization/basic.py

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,14 +1373,20 @@ def _prep_volcano_data(
13731373

13741374
if p_thresh is not None:
13751375
# (2) significant by score
1376+
# in contrast to typical applications, we now want to find values below the p_threshold
13761377
df, _, _, _, p_sig_idx = _prep_ratio_data(
1377-
df, p_colname, None, p_thresh, signficance_label="p-value"
1378+
df,
1379+
p_colname,
1380+
None,
1381+
p_thresh,
1382+
significance_label="p-value",
1383+
direction="inside",
13781384
)
13791385

13801386
if log_fc_thresh is not None:
13811387
# (3) significant above or below fc-thresh
13821388
df, _, _, _, logfc_sig_idx = _prep_ratio_data(
1383-
df, log_fc_colname, None, log_fc_thresh, signficance_label="log2FC"
1389+
df, log_fc_colname, None, log_fc_thresh, significance_label="log2FC"
13841390
)
13851391

13861392
if p_thresh is not None and log_fc_thresh is not None:
@@ -1398,7 +1404,7 @@ def volcano(
13981404
log_fc_colname: str,
13991405
p_colname: str = None,
14001406
score_colname: str = None,
1401-
p_thresh: float | None = 0.05,
1407+
p_thresh: float | None | tuple[float | None] = 0.05,
14021408
log_fc_thresh: float | None = np.log2(2),
14031409
pointsize_colname: str | float = None,
14041410
pointsize_scaler: float = 1,
@@ -1759,7 +1765,21 @@ def volcano(
17591765
ax, ratio_thresh_x=log_fc_thresh, ratio_thresh_y=None
17601766
)
17611767
if p_thresh is not None:
1762-
_ratio_plot_style_axes(ax, ratio_thresh_x=None, ratio_thresh_y=p_thresh)
1768+
# convert p value to score
1769+
if isinstance(p_thresh, (int, float)):
1770+
score = -np.log10(p_thresh)
1771+
elif isinstance(p_thresh, (list, tuple)):
1772+
score = tuple(
1773+
(
1774+
-np.log10(x) if isinstance(x, (float, int)) else None
1775+
for x in p_thresh
1776+
)
1777+
)
1778+
else:
1779+
raise ValueError(
1780+
f"[volcano] Cannot convert p_thresh {p_thresh} to score"
1781+
)
1782+
_ratio_plot_style_axes(ax, ratio_thresh_x=None, ratio_thresh_y=score)
17631783

17641784
if ret_fig:
17651785
return fig
@@ -1935,7 +1955,8 @@ def _prep_ratio_data(
19351955
col_name1: str,
19361956
col_name2: str | None,
19371957
ratio_thresh: tuple[float | None, float | None] | float | None,
1938-
signficance_label: str = "ratio_thresh",
1958+
significance_label: str = "ratio_thresh",
1959+
direction: Literal["inside", "outside"] = "outside",
19391960
) -> tuple[pd.DataFrame, str, str, pd.Index, pd.Index]:
19401961
"""
19411962
Prepare ratio data for analysis.
@@ -1959,8 +1980,11 @@ def _prep_ratio_data(
19591980
lower and upper bounds for the ratio threshold. If a single float is provided,
19601981
it is treated as both the lower and upper bound. If None is included in the tuple,
19611982
that bound is ignored.
1962-
signficance_label : str
1983+
significance_label : str
19631984
The label to assign to significant ratios in the 'SigCat' column.
1985+
direction : Literal['inside', 'outside']
1986+
Whether values inside the lower, upper boundary our outside of it are considered significant.
1987+
If a single number is given as threshold, the boundaries are calculates as (-value, value)
19641988
19651989
Returns
19661990
-------
@@ -1983,7 +2007,7 @@ def _prep_ratio_data(
19832007
pass
19842008
else:
19852009
raise ValueError(
1986-
f"[prep_ratio_data] The ratio threshold must be a number or a tuple of axactly two numbers but was {type(ratio_thresh)}: {ratio_thresh}."
2010+
f"[prep_ratio_data] The ratio threshold must be a number or a tuple of exactly two numbers but was {type(ratio_thresh)}: {ratio_thresh}."
19872011
)
19882012

19892013
if not isinstance(col_name1, str):
@@ -2007,42 +2031,66 @@ def _prep_ratio_data(
20072031

20082032
df["SigCat"] = "not significant" # default value
20092033
if ratio_thresh is not None:
2010-
if not isinstance(ratio_thresh, tuple):
2034+
if isinstance(ratio_thresh, (int, float, None)):
20112035
ratio_thresh = (-ratio_thresh, ratio_thresh)
2036+
print(
2037+
f"[prep_ratio_data] Setting threshold for {significance_label} to {ratio_thresh}."
2038+
)
20122039

2013-
if ratio_thresh[1] is None:
2014-
sig_up = pd.Series([False] * len(df), index=df.index)
2040+
if ratio_thresh[0] is None:
2041+
sig_lower = pd.Series([False] * len(df), index=df.index)
20152042
else:
2016-
sig_up = df[col_name1] > ratio_thresh[1]
2043+
if direction == "inside":
2044+
sig_lower = df[col_name1] > ratio_thresh[0]
2045+
else:
2046+
sig_lower = df[col_name1] < ratio_thresh[0]
20172047

2018-
if ratio_thresh[0] is None:
2019-
sig_down = pd.Series([False] * len(df), index=df.index)
2048+
if ratio_thresh[1] is None:
2049+
sig_upper = pd.Series([False] * len(df), index=df.index)
20202050
else:
2021-
sig_down = df[col_name1] < ratio_thresh[0]
2051+
if direction == "inside":
2052+
sig_upper = df[col_name1] < ratio_thresh[1]
2053+
else:
2054+
sig_upper = df[col_name1] > ratio_thresh[1]
20222055

20232056
if col_name2 is None:
20242057
# significantly up or down
2025-
df.loc[sig_up | sig_down, "SigCat"] = signficance_label
2058+
if direction == "inside":
2059+
# when the value is inside the interval, both tests must be true
2060+
df.loc[sig_upper & sig_lower, "SigCat"] = significance_label
2061+
else:
2062+
df.loc[sig_upper | sig_lower, "SigCat"] = significance_label
20262063
else:
2027-
2028-
if ratio_thresh[1] is None:
2029-
sig_up_2 = pd.Series([False] * len(df), index=df.index)
2064+
if ratio_thresh[0] is None:
2065+
sig_lower2 = pd.Series([False] * len(df), index=df.index)
20302066
else:
2031-
sig_up_2 = df[col_name2] > ratio_thresh[1]
2067+
if direction == "inside":
2068+
sig_lower2 = df[col_name2] > ratio_thresh[0]
2069+
else:
2070+
sig_lower2 = df[col_name2] < ratio_thresh[0]
20322071

2033-
if ratio_thresh[0] is None:
2034-
sig_down_2 = pd.Series([False] * len(df), index=df.index)
2072+
if ratio_thresh[1] is None:
2073+
sig_upper2 = pd.Series([False] * len(df), index=df.index)
20352074
else:
2036-
sig_down_2 = df[col_name2] < ratio_thresh[0]
2075+
if direction == "inside":
2076+
sig_upper2 = df[col_name2] < ratio_thresh[1]
2077+
else:
2078+
sig_upper2 = df[col_name2] > ratio_thresh[1]
20372079

20382080
# significantly up or down in both
2039-
df.loc[
2040-
(sig_up & sig_up_2) | (sig_down & sig_down_2),
2041-
"SigCat",
2042-
] = signficance_label
2081+
if direction == "inside":
2082+
df.loc[
2083+
(sig_upper & sig_lower) & (sig_upper2 & sig_lower2),
2084+
"SigCat",
2085+
] = significance_label
2086+
else:
2087+
df.loc[
2088+
(sig_upper & sig_upper2) | (sig_lower & sig_lower2),
2089+
"SigCat",
2090+
] = significance_label
20432091

20442092
non_sig_idx = df[df["SigCat"] == "not significant"].index
2045-
sig_idx = df[df["SigCat"] == signficance_label].index
2093+
sig_idx = df[df["SigCat"] == significance_label].index
20462094

20472095
return df, col_name1, col_name2, non_sig_idx, sig_idx
20482096

0 commit comments

Comments
 (0)