Skip to content

Commit 00a48f2

Browse files
authored
🔧 limit scikit learn version to <1.7 (#17)
* 🔧 limit scikit learn version to <1.7 - parameter names for scoring changed * 🐛 add data to repo as upstream removed CSVs * 🎨 format and remove unused argments * 🐛 fix url * 🐛 set correct filepath * 🎨 set link to main branch (needs merge to work)
1 parent 9e659b0 commit 00a48f2

File tree

7 files changed

+537
-10
lines changed

7 files changed

+537
-10
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Prostate Cancer Dataset
2+
3+
From [SurvSet](https://github.com/ErikinBC/SurvSet)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# %%
2+
# pip install SurvSet
3+
4+
# %%
5+
from pathlib import Path
6+
7+
from SurvSet.data import SurvLoader
8+
9+
loader = SurvLoader()
10+
# List of available datasets and meta-info
11+
loader.df_ds.head()
12+
13+
# %%
14+
df, ref = loader.load_dataset(ds_name='prostate').values()
15+
16+
# %%
17+
fname = "prostate.csv"
18+
19+
df.to_csv(fname, index=False)

docs/tutorial/data/prostate/prostate.csv

Lines changed: 503 additions & 0 deletions
Large diffs are not rendered by default.

docs/tutorial/explorative_analysis.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
"TARGET = \"event\"\n",
8383
"TIME_KM = \"time\"\n",
8484
"FOLDER = \"prostate\"\n",
85-
"CLINIC = \"https://raw.githubusercontent.com/ErikinBC/SurvSet/main/SurvSet/_datagen/output/prostate.csv\"\n",
85+
"CLINIC = \"https://raw.githubusercontent.com/RasmussenLab/njab/main/docs/tutorial/data/prostate/prostate.csv\"\n",
8686
"val_ids: str = \"\" # List of comma separated values or filepath\n",
8787
"#\n",
8888
"# list or string of csv, eg. \"var1,var2\"\n",

docs/tutorial/explorative_analysis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
TARGET = "event"
5454
TIME_KM = "time"
5555
FOLDER = "prostate"
56-
CLINIC = "https://raw.githubusercontent.com/ErikinBC/SurvSet/main/SurvSet/_datagen/output/prostate.csv"
56+
CLINIC = "https://raw.githubusercontent.com/RasmussenLab/njab/main/docs/tutorial/data/prostate/prostate.csv"
5757
val_ids: str = "" # List of comma separated values or filepath
5858
#
5959
# list or string of csv, eg. "var1,var2"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ dependencies = [
1717
"omegaconf",
1818
"numpy",
1919
"pandas",
20-
"scikit-learn>=1.4",
20+
"scikit-learn>=1.4,<1.7",
2121
"statsmodels",
2222
"matplotlib",
2323
"mrmr_selection",

src/njab/stats/ancova.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
"""Analysis of covariance using pingouin and statsmodels."""
22
from __future__ import annotations
3+
34
import numpy as np
45
import pandas as pd
56
import pingouin as pg
67
import statsmodels
78

89

9-
def ancova_pg(df_long: pd.DataFrame,
10-
feat_col: str,
11-
dv: str,
12-
between: str,
13-
covar: list[str] | str,
14-
fdr=0.05) -> pd.DataFrame:
10+
def ancova_pg(
11+
df_long: pd.DataFrame,
12+
feat_col: str,
13+
dv: str,
14+
between: str,
15+
covar: list[str] | str,
16+
) -> pd.DataFrame:
1517
""" Analysis of covariance (ANCOVA) using pg.ancova
1618
https://pingouin-stats.org/generated/pingouin.ancova.html
1719
@@ -117,7 +119,7 @@ def get_scores(self):
117119
covar=self.covar)
118120
return scores.set_index(feat_col)
119121

120-
def ancova(self, random_seed=123):
122+
def ancova(self):
121123
raise NotImplementedError
122124

123125

0 commit comments

Comments
 (0)