11import multiprocessing
22from itertools import combinations , repeat
3- from typing import Dict , List , Optional , Tuple
3+ from typing import Dict , Generator , List , Optional , Tuple
44
55import click
66import numpy as np
1616from . import GLMRegression
1717
1818# GITHUB ISSUE #119: Regressions with Error after Multiprocessing release python > 3.8
19- multiprocessing .get_start_method ("fork" )
19+ # multiprocessing.get_start_method("fork")
2020
2121
2222class InteractionRegression (GLMRegression ):
@@ -48,8 +48,8 @@ class InteractionRegression(GLMRegression):
4848 List of tuples: Test specific interactions of valid variables
4949 report_betas: boolean
5050 False by default.
51- If True, the results will contain one row for each interaction term and will include the beta value
52- for that term. The number of terms increases with the number of categories in each interacting term.
51+ If True, the results will contain one row for each interaction term and will include the beta value
52+ for that term. The number of terms increases with the number of categories in each interacting term.
5353 encoding: str, default "additive"
5454 Encoding method to use for any genotype data. One of {'additive', 'dominant', 'recessive', 'codominant', or 'weighted'}
5555 edge_encoding_info: Optional pd.DataFrame, default None
@@ -109,7 +109,7 @@ def _process_interactions(self, interactions):
109109 )
110110 if interactions is None :
111111 self .interactions = [c for c in combinations (regression_var_list , r = 2 )]
112- elif type (interactions ) == str :
112+ elif type (interactions ) is str :
113113 if interactions not in regression_var_list :
114114 raise ValueError (
115115 f"'{ interactions } ' was passed as the value for 'interactions' "
@@ -140,16 +140,30 @@ def _process_interactions(self, interactions):
140140 self .description += f"\n Processing { len (self .interactions ):,} interactions"
141141
142142 @staticmethod
143- def _get_default_result_dict (i1 , i2 ):
143+ def _get_default_result_dict (i1 , i2 , outcome_variable ):
144144 return {
145+ "Outcome" : outcome_variable ,
145146 "Term1" : i1 ,
146147 "Term2" : i2 ,
148+ "Parameter" : str (i1 + ":" + i2 ),
147149 "Converged" : False ,
148150 "N" : np .nan ,
149- "Beta" : np .nan ,
150- "SE" : np .nan ,
151- "Beta_pvalue" : np .nan ,
152151 "LRT_pvalue" : np .nan ,
152+ "Red_Var1_beta" : np .nan ,
153+ "Red_Var1_SE" : np .nan ,
154+ "Red_Var1_Pval" : np .nan ,
155+ "Red_Var2_beta" : np .nan ,
156+ "Red_Var2_SE" : np .nan ,
157+ "Red_Var2_Pval" : np .nan ,
158+ "Full_Var1_Var2_beta" : np .nan ,
159+ "Full_Var1_Var2_SE" : np .nan ,
160+ "Full_Var1_Var2_Pval" : np .nan ,
161+ "Full_Var1_beta" : np .nan ,
162+ "Full_Var1_SE" : np .nan ,
163+ "Full_Var1_Pval" : np .nan ,
164+ "Full_Var2_beta" : np .nan ,
165+ "Full_Var2_SE" : np .nan ,
166+ "Full_Var2_Pval" : np .nan ,
153167 }
154168
155169 def get_results (self ) -> pd .DataFrame :
@@ -169,17 +183,18 @@ def get_results(self) -> pd.DataFrame:
169183 result ["Outcome" ] = self .outcome_variable
170184 if self .report_betas :
171185 return result .set_index (
172- ["Term1" , "Term2" , "Outcome" , "Parameter" ]
173- ).sort_values (["LRT_pvalue" , "Beta_pvalue" ])
186+ # ["Term1", "Term2", "Outcome", "Parameter"]
187+ ["Term1" , "Term2" , "Outcome" ]
188+ ).sort_values (["LRT_pvalue" , "Full_Var1_Var2_Pval" ])
174189 else :
175190 return result .set_index (["Term1" , "Term2" , "Outcome" ]).sort_values (
176191 ["LRT_pvalue" ]
177192 )
178193
179194 @staticmethod
180195 def _run_interaction_regression (
181- data , formula , formula_restricted , family , use_t , report_betas
182- ) -> Dict :
196+ data , formula , formula_restricted , family , use_t , report_betas , i1 , i2
197+ ) -> Generator [ Dict , None , None ] :
183198 # Regress Full Model
184199 y , X = patsy .dmatrices (formula , data , return_type = "dataframe" , NA_action = "drop" )
185200 y = fix_names (y )
@@ -201,25 +216,73 @@ def _run_interaction_regression(
201216 lrdf = est_restricted .df_resid - est .df_resid
202217 lrstat = - 2 * (est_restricted .llf - est .llf )
203218 lr_pvalue = scipy .stats .chi2 .sf (lrstat , lrdf )
204- if report_betas :
205- # Get beta, SE, and pvalue from interaction terms
206- # Where interaction terms are those appearing in the full model and not in the reduced model
207- # Return all terms
208- param_names = set (est .bse .index ) - set (est_restricted .bse .index )
209- # The restricted model shouldn't have extra terms, unless there is some case we have overlooked
210- assert len (set (est_restricted .bse .index ) - set (est .bse .index )) == 0
211- for param_name in param_names :
212- yield {
213- "Converged" : True ,
214- "Parameter" : param_name ,
215- "Beta" : est .params [param_name ],
216- "SE" : est .bse [param_name ],
217- "Beta_pvalue" : est .pvalues [param_name ],
218- "LRT_pvalue" : lr_pvalue ,
219- }
219+ # GITHUB/ISSUES 121: Handling LRT_Pvalue when lrstat and lrdf are
220+ # both 0. When lrstat (the test statistic) and lrdf (degrees of
221+ # freedom for the Likelihood Ratio Test) are both 0, it typically
222+ # suggests that both models are equivalent in terms of fit. In
223+ # other words, there is no significant difference between the two
224+ # models.
225+ #
226+ # However when both lrstat and lrdf are 0, calc the survival
227+ # function (sf) of a chi-squared distribution with 0 degrees of
228+ # freedom results in NaN. This is because mathematically, it's
229+ # undefined to perform this calculation under these circumstances.
230+ #
231+ # In such cases, it's important to handle this scenario separately
232+ # in the result based on the specific requirements of the analysis
233+ if lrdf == 0 and lrstat == 0 :
234+ # Both models are equal
235+ yield {"Converged" : False , "LRT_pvalue" : lr_pvalue }
236+ if np .isnan (lr_pvalue ):
237+ # There is an issue with the LRT calculation
238+ yield {"Converged" : False , "LRT_pvalue" : lr_pvalue }
220239 else :
221- # Only return the LRT result
222- yield {"Converged" : True , "LRT_pvalue" : lr_pvalue }
240+ if report_betas :
241+ # Get beta, SE, and pvalue from interaction terms
242+ # Where interaction terms are those appearing in the full
243+ # model and not in the reduced model return all terms
244+ param_names = set (est .bse .index ) - set (est_restricted .bse .index )
245+ # The restricted model shouldn't have extra terms, unless
246+ # there is some case we have overlooked.
247+ assert len (set (est_restricted .bse .index ) - set (est .bse .index )) == 0
248+ # GITHUB/ISSUES 122: Open to show Terms Betas Values
249+ for param_name in param_names :
250+ # Names defined to aling with PLATO
251+ # Split the input_string by ":"
252+ term_1 , term_2 = param_name .split (":" )
253+ yield {
254+ "Term1" : term_1 ,
255+ "Term2" : term_2 ,
256+ "Converged" : True ,
257+ "Parameter" : param_name ,
258+ # Betas in Reduced Model
259+ # Var1 --> Term 1
260+ "Red_Var1_beta" : est_restricted .params [term_1 ],
261+ "Red_Var1_SE" : est_restricted .bse [term_1 ],
262+ "Red_Var1_Pval" : est_restricted .pvalues [term_1 ],
263+ # Var2 --> Term 2
264+ "Red_Var2_beta" : est_restricted .params [term_2 ],
265+ "Red_Var2_SE" : est_restricted .bse [term_2 ],
266+ "Red_Var2_Pval" : est_restricted .pvalues [term_2 ],
267+ # Betas in Full Model
268+ # Var1 --> Term 1
269+ "Full_Var1_Var2_beta" : est .params [param_name ],
270+ "Full_Var1_Var2_SE" : est .bse [param_name ],
271+ "Full_Var1_Var2_Pval" : est .pvalues [param_name ],
272+ # Var1 --> Term 1
273+ "Full_Var1_beta" : est .params [term_1 ],
274+ "Full_Var1_SE" : est .bse [term_1 ],
275+ "Full_Var1_Pval" : est .pvalues [term_1 ],
276+ # Var2 --> Term 2
277+ "Full_Var2_beta" : est .params [term_2 ],
278+ "Full_Var2_SE" : est .bse [term_2 ],
279+ "Full_Var2_Pval" : est .pvalues [term_2 ],
280+ "LRT_pvalue" : lr_pvalue ,
281+ }
282+ else :
283+ # Only return the LRT result
284+ yield {"Converged" : True , "LRT_pvalue" : lr_pvalue }
285+
223286 else :
224287 # Did not converge - nothing to update
225288 yield dict ()
@@ -394,16 +457,24 @@ def _run_interaction(
394457
395458 # Run Regression LRT Test
396459 for regression_result in cls ._run_interaction_regression (
397- data , formula , formula_restricted , family , use_t , report_betas
460+ data ,
461+ formula ,
462+ formula_restricted ,
463+ family ,
464+ use_t ,
465+ report_betas ,
466+ i1 ,
467+ i2 ,
398468 ):
399- result = cls ._get_default_result_dict (i1 , i2 )
469+ result = cls ._get_default_result_dict (i1 , i2 , outcome_variable )
400470 result ["N" ] = N
471+ # TODO:
401472 result .update (regression_result )
402473 result_list .append (result )
403474
404475 except Exception as e :
405476 error = str (e )
406477 if result is None :
407- result_list = [cls ._get_default_result_dict (i1 , i2 )]
478+ result_list = [cls ._get_default_result_dict (i1 , i2 , outcome_variable )]
408479
409480 return result_list , warnings_list , error
0 commit comments