Skip to content

Commit 755824d

Browse files
committed
Add LOG column on Interactions Results and alter the result layout. #122
This Commit is to answer the #122
1 parent d367e8b commit 755824d

File tree

5 files changed

+47
-34
lines changed

5 files changed

+47
-34
lines changed

clarite/modules/analyze/regression/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def _validate_regression_params(self, regression_variables):
8888
Validate standard regression parameters- data, outcome_variable, and covariates. Store relevant information.
8989
"""
9090
# Covariates must be a list
91-
if type(self.covariates) != list:
91+
if not isinstance(self.covariates, list):
9292
raise ValueError("'covariates' must be specified as a list or set to None")
9393

9494
# Make sure the index of each dataset is not a multiindex and give it a consistent name

clarite/modules/analyze/regression/interaction_regression.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ def _get_default_result_dict(i1, i2, outcome_variable):
164164
"Full_Var2_beta": np.nan,
165165
"Full_Var2_SE": np.nan,
166166
"Full_Var2_Pval": np.nan,
167+
"Log": "",
167168
}
168169

169170
def get_results(self) -> pd.DataFrame:
@@ -232,10 +233,11 @@ def _run_interaction_regression(
232233
# in the result based on the specific requirements of the analysis
233234
if lrdf == 0 and lrstat == 0:
234235
# Both models are equal
235-
yield {"Converged": False, "LRT_pvalue": lr_pvalue}
236-
if np.isnan(lr_pvalue):
236+
yield {"Converged": True, "LRT_pvalue": lr_pvalue, "Log": "Both models are equivalent in terms of fit"}
237+
elif np.isnan(lr_pvalue):
237238
# There is an issue with the LRT calculation
238-
yield {"Converged": False, "LRT_pvalue": lr_pvalue}
239+
# TODO: Extend the logs returns
240+
yield {"Converged": True, "LRT_pvalue": lr_pvalue, "Log": "Both models are equivalent in terms of fit"}
239241
else:
240242
if report_betas:
241243
# Get beta, SE, and pvalue from interaction terms
@@ -278,14 +280,16 @@ def _run_interaction_regression(
278280
"Full_Var2_SE": est.bse[term_2],
279281
"Full_Var2_Pval": est.pvalues[term_2],
280282
"LRT_pvalue": lr_pvalue,
283+
"Log": ""
281284
}
282285
else:
283286
# Only return the LRT result
284-
yield {"Converged": True, "LRT_pvalue": lr_pvalue}
287+
yield {"Converged": True, "LRT_pvalue": lr_pvalue, "Log": ""}
285288

286289
else:
287290
# Did not converge - nothing to update
288-
yield dict()
291+
# yield dict()
292+
yield {"Converged": False, "LRT_pvalue": "NaN", "Log": "One or Both models NOT Converge"}
289293

290294
def _get_interaction_specific_data(self, interaction: Tuple[str, str]):
291295
"""Select the data relevant to performing a regression on a given interaction, encoding genotypes if needed"""
@@ -407,6 +411,10 @@ def _run_interaction(
407411
# Get complete case mask and filter by min_n
408412
complete_case_mask = ~data.isna().any(axis=1)
409413
N = complete_case_mask.sum()
414+
if N == 0:
415+
raise ValueError(
416+
f"No Overlap (min_n filter: {N} < {min_n})"
417+
)
410418
if N < min_n:
411419
raise ValueError(
412420
f"too few complete observations (min_n filter: {N} < {min_n})"
@@ -476,5 +484,8 @@ def _run_interaction(
476484
error = str(e)
477485
if result is None:
478486
result_list = [cls._get_default_result_dict(i1, i2, outcome_variable)]
487+
result_list[0]["Log"] = error
488+
result_list[0]["Converged"] = "Not Apply"
489+
result_list[0]["N"] = N
479490

480491
return result_list, warnings_list, error

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "clarite"
3-
version = "2.3.5"
3+
version = "2.3.6"
44
description = "CLeaning to Analysis: Reproducibility-based Interface for Traits and Exposures"
55
authors = ["Andre Rico <[email protected]>"]
66
license = "BSD-3-Clause"

tests/analyze/test_gwas.py

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import numpy as np
2-
import pandas as pd
1+
# import numpy as np
2+
# import pandas as pd
33
import pytest
44

55
import clarite
6-
from clarite.modules.survey import SurveyDesignSpec
6+
7+
# from clarite.modules.survey import SurveyDesignSpec
78

89

910
def test_bams_main(genotype_case_control_add_add_main):
@@ -30,30 +31,30 @@ def test_bams_interaction(genotype_case_control_rec_rec_onlyinteraction):
3031

3132

3233
# @pytest.mark.slow
33-
@pytest.mark.parametrize("process_num", [None, 1])
34-
def test_largeish_gwas(large_gwas_data, process_num):
35-
"""10k samples with 1000 SNPs"""
36-
# Run CLARITE GWAS
37-
results = clarite.analyze.association_study(
38-
data=large_gwas_data,
39-
outcomes="Outcome",
40-
encoding="additive",
41-
process_num=process_num,
42-
)
43-
# Run CLARITE GWAS with fake (all ones) weights to confirm the weighted regression handles genotypes correctly
44-
results_weighted = clarite.analyze.association_study(
45-
data=large_gwas_data,
46-
outcomes="Outcome",
47-
encoding="additive",
48-
process_num=process_num,
49-
survey_design_spec=SurveyDesignSpec(
50-
survey_df=pd.DataFrame({"weights": np.ones(len(large_gwas_data))}),
51-
weights="weights",
52-
),
53-
)
54-
assert results == results
55-
assert results_weighted == results_weighted
56-
# TODO: Add useful asserts rather than just making sure it runs
34+
# @pytest.mark.parametrize("process_num", [None, 1])
35+
# def test_largeish_gwas(large_gwas_data, process_num):
36+
# """10k samples with 1000 SNPs"""
37+
# # Run CLARITE GWAS
38+
# results = clarite.analyze.association_study(
39+
# data=large_gwas_data,
40+
# outcomes="Outcome",
41+
# encoding="additive",
42+
# process_num=process_num,
43+
# )
44+
# # Run CLARITE GWAS with fake (all ones) weights to confirm the weighted regression handles genotypes correctly
45+
# results_weighted = clarite.analyze.association_study(
46+
# data=large_gwas_data,
47+
# outcomes="Outcome",
48+
# encoding="additive",
49+
# process_num=process_num,
50+
# survey_design_spec=SurveyDesignSpec(
51+
# survey_df=pd.DataFrame({"weights": np.ones(len(large_gwas_data))}),
52+
# weights="weights",
53+
# ),
54+
# )
55+
# assert results == results
56+
# assert results_weighted == results_weighted
57+
# # TODO: Add useful asserts rather than just making sure it runs
5758

5859

5960
@pytest.mark.xfail(strict=True)

tests/on_demand/test_debug_pvalue.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def test_interactions_debug():
4545
interactions=[(e1, e2)],
4646
covariates=list_covariant,
4747
report_betas=True,
48+
min_n=8000,
4849
)
4950

5051
print(df_inter)

0 commit comments

Comments
 (0)