Skip to content

Commit 55036f5

Browse files
author
Henry
committed
🐛 remove missing values from list of p-values before FDR calc
The calculation with any missing values leads to NAN q-values for all entries. This happens mostly due to multicollinearity, which could be check for in the ANCOVA.
1 parent 12f19c5 commit 55036f5

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/njab/stats/ancova.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Analysis of covariance using pingouin and statsmodels."""
22
from __future__ import annotations
3+
34
import numpy as np
45
import pandas as pd
5-
66
import pingouin as pg
77
import statsmodels
88

@@ -52,6 +52,7 @@ def ancova_pg(df_long: pd.DataFrame,
5252
# num_covar = len(covar)
5353

5454
for feat_name, data_feat in df_long.groupby(feat_col):
55+
# ? drop duplicated colummns in long data format?
5556
ancova = pg.ancova(data=data_feat, dv=dv, between=between, covar=covar)
5657
ancova[feat_col] = feat_name
5758
scores.append(ancova)
@@ -137,6 +138,8 @@ def ancova(self, random_seed=123):
137138

138139
scores = self.get_scores()
139140
scores = filter_residuals_from_scores(scores)
141+
# drop nan values (due to multicollinearity of features - i.e. duplicated features)
142+
scores = scores.dropna()
140143
scores = add_fdr_scores(scores, random_seed=random_seed)
141144
self.scores = scores
142145
return scores.set_index('Source', append=True)

0 commit comments

Comments
 (0)