HallLab
diff --git a/‎clarite/modules/analyze/__init__.py‎
Lines changed: 11 additions & 14 deletions b/‎clarite/modules/analyze/__init__.py‎
Lines changed: 11 additions & 14 deletions
diff --git a/‎clarite/modules/analyze/association_study.py‎
Lines changed: 7 additions & 7 deletions b/‎clarite/modules/analyze/association_study.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎clarite/modules/analyze/interaction_study.py‎
Lines changed: 8 additions & 8 deletions b/‎clarite/modules/analyze/interaction_study.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎clarite/modules/analyze/regression/__init__.py‎
Lines changed: 10 additions & 10 deletions b/‎clarite/modules/analyze/regression/__init__.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎clarite/modules/analyze/regression/base.py‎
Lines changed: 7 additions & 6 deletions b/‎clarite/modules/analyze/regression/base.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎clarite/modules/analyze/regression/glm_regression.py‎
Lines changed: 8 additions & 8 deletions b/‎clarite/modules/analyze/regression/glm_regression.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎clarite/modules/analyze/regression/r_survey_regression.py‎
Lines changed: 15 additions & 16 deletions b/‎clarite/modules/analyze/regression/r_survey_regression.py‎
Lines changed: 15 additions & 16 deletions
diff --git a/‎clarite/modules/analyze/regression/weighted_glm_regression.py‎
Lines changed: 9 additions & 9 deletions b/‎clarite/modules/analyze/regression/weighted_glm_regression.py‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎clarite/modules/describe.py‎
Lines changed: 7 additions & 10 deletions b/‎clarite/modules/describe.py‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎clarite/modules/load.py‎
Lines changed: 2 additions & 5 deletions b/‎clarite/modules/load.py‎
Lines changed: 2 additions & 5 deletions
@@ -4,12 +4,9 @@
 
 Functions used for analyses such as EWAS
 
-  .. autosummary::
-     :toctree: modules/analyze
-
-     association_study
-     interaction_study
-     add_corrected_pvalues
+  .. autofunction:: association_study
+  .. autofunction:: interaction_study
+  .. autofunction:: add_corrected_pvalues
 
 """
 
@@ -20,11 +17,11 @@
 from . import regression
 
 __all__ = [
-    association_study,
-    ewas,
-    interaction_study,
-    add_corrected_pvalues,
-    regression,
+    "association_study",
+    "ewas",
+    "interaction_study",
+    "add_corrected_pvalues",
+    "regression",
 ]
 
 # Constants
@@ -42,6 +39,6 @@
 ]
 corrected_pvalue_columns = ["pvalue_bonferroni", "pvalue_fdr"]
 
-__all__.append(required_result_columns)
-__all__.append(result_columns)
-__all__.append(corrected_pvalue_columns)
+__all__.append("required_result_columns")
+__all__.append("result_columns")
+__all__.append("corrected_pvalue_columns")
@@ -19,7 +19,7 @@ def association_study(
     covariates: Optional[Union[str, List[str]]] = None,
     regression_kind: Optional[Union[str, Type[regression.Regression]]] = None,
     encoding: str = "additive",
-    weighted_encoding_info: Optional[pd.DataFrame] = None,
+    edge_encoding_info: Optional[pd.DataFrame] = None,
     **kwargs,
 ):
     """
@@ -45,8 +45,8 @@ def association_study(
         and 'weighted_glm' if it is.
     encoding: str, default "additive"
         Encoding method to use for any genotype data.  One of {'additive', 'dominant', 'recessive', 'codominant', or 'weighted'}
-    weighted_encoding_info: Optional pd.DataFrame, default None
-        If weighted encoding is used, this must be provided.  See Pandas-Genomics documentation on weighted encodings.
+    edge_encoding_info: Optional pd.DataFrame, default None
+        If edge encoding is used, this must be provided.  See Pandas-Genomics documentation on edge encodings.
     kwargs: Keyword arguments specific to the Regression being used
 
     Returns
@@ -73,13 +73,13 @@ def association_study(
             data = data.genomics.encode_recessive()
         elif encoding == "codominant":
             data = data.genomics.encode_codominant()
-        elif encoding == "weighted":
-            if weighted_encoding_info is None:
+        elif encoding == "edge":
+            if edge_encoding_info is None:
                 raise ValueError(
-                    "'weighted_encoding_info' must be provided when using weighted encoding"
+                    "'edge_encoding_info' must be provided when using edge encoding"
                 )
             else:
-                data = data.genomics.encode_weighted(weighted_encoding_info)
+                data = data.genomics.encode_edge(edge_encoding_info)
         else:
             raise ValueError(f"Genotypes provided with unknown 'encoding': {encoding}")
 
 
@@ -13,7 +13,7 @@ def interaction_study(
     interactions: Optional[Union[List[Tuple[str, str]], str]] = None,
     covariates: Optional[Union[str, List[str]]] = None,
     encoding: str = "additive",
-    weighted_encoding_info: Optional[pd.DataFrame] = None,
+    edge_encoding_info: Optional[pd.DataFrame] = None,
     report_betas: bool = False,
     min_n: int = 200,
 ):
@@ -39,9 +39,9 @@ def interaction_study(
     covariates: str, List[str], or None (default)
         The variable (str) or variables (List) to be used as covariates in each regression.
     encoding: str, default "additive""
-        Encoding method to use for any genotype data.  One of {'additive', 'dominant', 'recessive', 'codominant', or 'weighted'}
-    weighted_encoding_info: Optional pd.DataFrame, default None
-        If weighted encoding is used, this must be provided.  See Pandas-Genomics documentation on weighted encodings.
+        Encoding method to use for any genotype data.  One of {'additive', 'dominant', 'recessive', 'codominant', or 'edge'}
+    edge_encoding_info: Optional pd.DataFrame, default None
+        If edge encoding is used, this must be provided.  See Pandas-Genomics documentation on edge encoding.
     report_betas: boolean
         False by default.
           If True, the results will contain one row for each interaction term and will include the beta value,
@@ -75,13 +75,13 @@ def interaction_study(
             data = data.genomics.encode_recessive()
         elif encoding == "codominant":
             data = data.genomics.encode_codominant()
-        elif encoding == "weighted":
-            if weighted_encoding_info is None:
+        elif encoding == "edge":
+            if edge_encoding_info is None:
                 raise ValueError(
-                    "'weighted_encoding_info' must be provided when using weighted encoding"
+                    "'edge_encoding_info' must be provided when using edge encoding"
                 )
             else:
-                data = data.genomics.encode_weighted(weighted_encoding_info)
+                data = data.genomics.encode_edge(edge_encoding_info)
         else:
             raise ValueError(f"Genotypes provided with unknown 'encoding': {encoding}")
 
 
@@ -8,8 +8,8 @@
 .. autoclass:: Regression
 
 
-clarite.analyze.ewas
---------------------
+clarite.analyze.association_study
+---------------------------------
 
 The `regression_kind` parameter can be set to use one of three regression classes, or a custom subclass of `Regression`
 can be created.
@@ -21,8 +21,8 @@
 .. autoclass:: RSurveyRegression
 
 
-clarite.analyze.interactions
-----------------------------
+clarite.analyze.interaction_study
+---------------------------------
 
 .. autoclass:: InteractionRegression
 
@@ -43,10 +43,10 @@
 
 
 __all__ = [
-    GLMRegression,
-    RSurveyRegression,
-    WeightedGLMRegression,
-    Regression,
-    InteractionRegression,
-    builtin_regression_kinds,
+    "GLMRegression",
+    "RSurveyRegression",
+    "WeightedGLMRegression",
+    "Regression",
+    "InteractionRegression",
+    "builtin_regression_kinds",
 ]
@@ -12,8 +12,8 @@ class Regression(metaclass=ABCMeta):
     """
     Abstract Base Class for Regression objects used in EWAS.
 
-    Minimum Parameters
-    ------------------
+    Parameters
+    ----------
     data: pd.DataFrame
         Data used in the analysis
     outcome_variable: str
@@ -25,10 +25,11 @@ class Regression(metaclass=ABCMeta):
         Any variables in the DataFrames not listed as covariates are regressed.
         Use `None` or an empty list when no covariates are being used.
 
-    Abstract Methods
-    ----------------
-    run() -> None
-    get_results() -> pd.DataFrame
+    Notes
+    -----
+    These are the abstract methods:
+    * run() -> None
+    * get_results() -> pd.DataFrame
     """
 
     def __init__(
 
@@ -20,21 +20,21 @@ class GLMRegression(Regression):
     Statsmodels GLM Regression.
     This class handles running a regression for each variable of interest and collecting results.
 
-    Regression Methods
-    ------------------
+    Notes
+    -----
+    * The family used is either Gaussian (continuous outcomes) or binomial(logit) for binary outcomes.
+    * Covariates variables that are constant produce warnings and are ignored
+    * The dataset is subset to drop missing values, and the same dataset is used for both models in the LRT
+
+    *Regression Methods*
+
     Binary variables
         Treated as continuous features, with values of 0 and 1 (the larger value in the original data is encoded as 1).
     Categorical variables
         The results of a likelihood ratio test are used to calculate a pvalue.  No Beta or SE values are reported.
     Continuous variables
         A GLM is used to obtain Beta, SE, and pvalue results.
 
-    Notes
-    -----
-    * The family used is either Gaussian (continuous outcomes) or binomial(logit) for binary outcomes.
-    * Covariates variables that are constant produce warnings and are ignored
-    * The dataset is subset to drop missing values, and the same dataset is used for both models in the LRT
-
     Parameters
     ----------
     data:
 
@@ -1,7 +1,6 @@
 from pathlib import Path
 from typing import List, Optional
 
-import numpy as np
 import pandas as pd
 
 from clarite.internal.utilities import requires, _get_dtypes
@@ -12,33 +11,33 @@
 
 class RSurveyRegression(Regression):
     """
-      Run regressions by calling R from Python
-      When a SurveyDesignSpec is provided, the R *survey* library is used.
-      Results should match those run with either GLMRegression or WeightedGLMRegression.
+    Run regressions by calling R from Python
+    When a SurveyDesignSpec is provided, the R *survey* library is used.
+    Results should match those run with either GLMRegression or WeightedGLMRegression.
 
-      Parameters
-      ----------
-      data:
+    Parameters
+    ----------
+    data:
         The data to be analyzed, including the outcome, covariates, and any variables to be regressed.
-      outcome_variable:
+    outcome_variable:
         The variable to be used as the output (y) of the regression
-      covariates:
+    covariates:
         The variables to be used as covariates. Any variables in the DataFrames not listed as covariates are regressed.
-      survey_design_spec:
-          A SurveyDesignSpec object is used to create SurveyDesign objects for each regression.
-          Use None if unweighted regression is desired.
-      min-n:
+    survey_design_spec:
+        A SurveyDesignSpec object is used to create SurveyDesign objects for each regression.
+        Use None if unweighted regression is desired.
+    min-n:
         Minimum number of complete-case observations (no NA values for outcome, covariates, variable, or weight)
         Defaults to 200
     report_betas: boolean
-      False by default.
+        False by default.
         If True, the results will contain one row for each categorical value (other than the reference category) and
         will include the beta value, standard error (SE), and beta pvalue for that specific category. The number of
         terms increases with the number of categories.
     standardize_data: boolean
         False by default.
-          If True, numeric data will be standardized using z-scores before regression.
-          This will affect the beta values and standard error, but not the pvalues.
+        If True, numeric data will be standardized using z-scores before regression.
+        This will affect the beta values and standard error, but not the pvalues.
     """
 
     def __init__(
 
@@ -21,15 +21,6 @@ class WeightedGLMRegression(GLMRegression):
     The statistical adjustments (primarily the covariance calculation) are designed to match results when running with
     the R `survey` library.
 
-    Regression Methods
-    ------------------
-    Binary variables
-        Treated as continuous features, with values of 0 and 1 (the larger value in the original data is encoded as 1).
-    Categorical variables
-        The results of a likelihood ratio test are used to calculate a pvalue.  No Beta or SE values are reported.
-    Continuous variables
-        A GLM is used to obtain Beta, SE, and pvalue results.
-
     Notes
     -----
     * The family used is Gaussian for continuous outcomes or binomial(logit) for binary outcomes.
@@ -40,6 +31,15 @@ class WeightedGLMRegression(GLMRegression):
     * Categorical variables run with a survey design will not report Diff_AIC as it may not be possible to calculate
       it accurately
 
+    *Regression Methods*
+
+    Binary variables
+        Treated as continuous features, with values of 0 and 1 (the larger value in the original data is encoded as 1).
+    Categorical variables
+        The results of a likelihood ratio test are used to calculate a pvalue.  No Beta or SE values are reported.
+    Continuous variables
+        A GLM is used to obtain Beta, SE, and pvalue results.
+
     Parameters
     ----------
     data:
 
@@ -4,15 +4,12 @@
 
 Functions that are used to gather information about some data
 
-  .. autosummary::
-     :toctree: modules/describe
-
-     correlations
-     freq_table
-     get_types
-     percent_na
-     skewness
-     summarize
+     .. autofunction:: correlations
+     .. autofunction:: freq_table
+     .. autofunction:: get_types
+     .. autofunction:: percent_na
+     .. autofunction:: skewness
+     .. autofunction:: summarize
 
 """
 
@@ -215,7 +212,7 @@ def skewness(data: pd.DataFrame, dropna: bool = False):
     result: pd.DataFrame
         DataFrame listing three values for each continuous variable and NA for others: skew, zscore, and pvalue
         The test null hypothesis is that the skewness of the samples population is the same as the corresponding
-         normal distribution.  The pvalue is the two-sided pvalue for the hypothesis test
+        normal distribution.  The pvalue is the two-sided pvalue for the hypothesis test
 
     Examples
     --------
 
@@ -4,11 +4,8 @@
 
 Load data from different formats or sources
 
-  .. autosummary::
-     :toctree: modules/load
-
-     from_tsv
-     from_csv
+     .. autofunction:: from_tsv
+     .. autofunction:: from_csv
 """
 
 from typing import Optional, Union