update docstrings

juanitorduz · juanitorduz · commit 1ddf7f8deaf6 · 2025-11-12T11:58:19.000+01:00
diff --git a/causalpy/data/datasets.py b/causalpy/data/datasets.py
@@ -49,9 +49,22 @@ def _get_data_home() -> pathlib.Path:
 
 
 def load_data(dataset: str | None = None) -> pd.DataFrame:
-    """Loads the requested dataset and returns a pandas DataFrame.
+    """Load the requested dataset and return a pandas DataFrame.
 
-    :param dataset: The desired dataset to load
+    Parameters
+    ----------
+    dataset : str, optional
+        The desired dataset to load. If None, raises ValueError.
+
+    Returns
+    -------
+    pd.DataFrame
+        The loaded dataset as a pandas DataFrame.
+
+    Raises
+    ------
+    ValueError
+        If the requested dataset is not found.
     """
 
     if dataset in DATASETS:
diff --git a/causalpy/experiments/base.py b/causalpy/experiments/base.py
@@ -62,7 +62,14 @@ def idata(self) -> az.InferenceData:
         return self.model.idata
 
     def print_coefficients(self, round_to: int | None = None) -> None:
-        """Ask the model to print its coefficients."""
+        """Ask the model to print its coefficients.
+
+        Parameters
+        ----------
+        round_to : int, optional
+            Number of significant figures to round to. Defaults to None,
+            in which case 2 significant figures are used.
+        """
         self.model.print_coefficients(self.labels, round_to)
 
     def plot(self, *args: Any, **kwargs: Any) -> tuple:
diff --git a/causalpy/experiments/diff_in_diff.py b/causalpy/experiments/diff_in_diff.py
@@ -49,20 +49,24 @@ class DifferenceInDifferences(BaseExperiment):
 
     .. note::
 
-        There is no pre/post intervention data distinction for DiD, we fit all the
-        data available.
-    :param data:
-        A pandas dataframe
-    :param formula:
-        A statistical model formula
-    :param time_variable_name:
-        Name of the data column for the time variable
-    :param group_variable_name:
-        Name of the data column for the group variable
-    :param post_treatment_variable_name:
-        Name of the data column indicating post-treatment period (default: "post_treatment")
-    :param model:
-        A PyMC model for difference in differences
+        There is no pre/post intervention data distinction for DiD, we fit
+        all the data available.
+
+    Parameters
+    ----------
+    data : pd.DataFrame
+        A pandas dataframe.
+    formula : str
+        A statistical model formula.
+    time_variable_name : str
+        Name of the data column for the time variable.
+    group_variable_name : str
+        Name of the data column for the group variable.
+    post_treatment_variable_name : str, optional
+        Name of the data column indicating post-treatment period.
+        Defaults to "post_treatment".
+    model : PyMCModel or RegressorMixin, optional
+        A PyMC model for difference in differences. Defaults to None.
 
     Example
     --------
diff --git a/causalpy/experiments/instrumental_variable.py b/causalpy/experiments/instrumental_variable.py
@@ -27,31 +27,30 @@
 
 
 class InstrumentalVariable(BaseExperiment):
-    """
-    A class to analyse instrumental variable style experiments.
-
-    :param instruments_data: A pandas dataframe of instruments
-                             for our treatment variable. Should contain
-                             instruments Z, and treatment t
-    :param data: A pandas dataframe of covariates for fitting
-                 the focal regression of interest. Should contain covariates X
-                 including treatment t and outcome y
-    :param instruments_formula: A statistical model formula for
-                                the instrumental stage regression
-                                e.g. t ~ 1 + z1 + z2 + z3
-    :param formula: A statistical model formula for the \n
-                    focal regression e.g. y ~ 1 + t + x1 + x2 + x3
-    :param model: A PyMC model
-    :param priors: An optional dictionary of priors for the
-                   mus and sigmas of both regressions. If priors are not
-                   specified we will substitute MLE estimates for the beta
-                   coefficients. Greater control can be achieved
-                   by specifying the priors directly e.g. priors = {
-                                    "mus": [0, 0],
-                                    "sigmas": [1, 1],
-                                    "eta": 2,
-                                    "lkj_sd": 2,
-                                    }
+    """A class to analyse instrumental variable style experiments.
+
+    Parameters
+    ----------
+    instruments_data : pd.DataFrame
+        A pandas dataframe of instruments for our treatment variable.
+        Should contain instruments Z, and treatment t.
+    data : pd.DataFrame
+        A pandas dataframe of covariates for fitting the focal regression
+        of interest. Should contain covariates X including treatment t and
+        outcome y.
+    instruments_formula : str
+        A statistical model formula for the instrumental stage regression,
+        e.g. ``t ~ 1 + z1 + z2 + z3``.
+    formula : str
+        A statistical model formula for the focal regression,
+        e.g. ``y ~ 1 + t + x1 + x2 + x3``.
+    model : BaseExperiment, optional
+        A PyMC model. Defaults to None.
+    priors : dict, optional
+        Dictionary of priors for the mus and sigmas of both regressions.
+        If priors are not specified we will substitute MLE estimates for
+        the beta coefficients. Example: ``priors = {"mus": [0, 0],
+        "sigmas": [1, 1], "eta": 2, "lkj_sd": 2}``.
 
     Example
     --------
diff --git a/causalpy/experiments/inverse_propensity_weighting.py b/causalpy/experiments/inverse_propensity_weighting.py
@@ -31,22 +31,23 @@
 
 
 class InversePropensityWeighting(BaseExperiment):
-    """
-    A class to analyse inverse propensity weighting experiments.
+    """A class to analyse inverse propensity weighting experiments.
 
-    :param data:
-        A pandas dataframe
-    :param formula:
-        A statistical model formula for the propensity model
-    :param outcome_variable
-        A string denoting the outcome variable in datq to be reweighted
-    :param weighting_scheme:
-        A string denoting which weighting scheme to use among: 'raw', 'robust',
-        'doubly robust' or 'overlap'. See Aronow and Miller "Foundations
-        of Agnostic Statistics" for discussion and computation of these
-        weighting schemes.
-    :param model:
-        A PyMC model
+    Parameters
+    ----------
+    data : pd.DataFrame
+        A pandas dataframe.
+    formula : str
+        A statistical model formula for the propensity model.
+    outcome_variable : str
+        A string denoting the outcome variable in data to be reweighted.
+    weighting_scheme : str
+        A string denoting which weighting scheme to use among: 'raw',
+        'robust', 'doubly robust' or 'overlap'. See Aronow and Miller
+        "Foundations of Agnostic Statistics" for discussion and computation
+        of these weighting schemes.
+    model : BaseExperiment, optional
+        A PyMC model. Defaults to None.
 
     Example
     --------
diff --git a/causalpy/plot_utils.py b/causalpy/plot_utils.py
@@ -35,21 +35,28 @@ def plot_xY(
     hdi_prob: float = 0.94,
     label: str | None = None,
 ) -> Tuple[Line2D, PolyCollection]:
-    """
-    Utility function to plot HDI intervals.
-
-    :param x:
-        Pandas datetime index or numpy array of x-axis values
-    :param y:
-        Xarray data array of y-axis data
-    :param ax:
-        Matplotlib ax object
-    :param plot_hdi_kwargs:
-        Dictionary of keyword arguments passed to ax.plot()
-    :param hdi_prob:
-        The size of the HDI, default is 0.94
-    :param label:
-        The plot label
+    """Plot HDI intervals.
+
+    Parameters
+    ----------
+    x : pd.DatetimeIndex, np.ndarray, pd.Index, pd.Series, or ExtensionArray
+        Pandas datetime index or numpy array of x-axis values.
+    Y : xr.DataArray
+        Xarray data array of y-axis data.
+    ax : plt.Axes
+        Matplotlib axes object.
+    plot_hdi_kwargs : dict, optional
+        Dictionary of keyword arguments passed to ax.plot().
+    hdi_prob : float, optional
+        The size of the HDI. Default is 0.94.
+    label : str, optional
+        The plot label.
+
+    Returns
+    -------
+    tuple
+        Tuple of (Line2D, PolyCollection) handles for the plot line and
+        HDI patch.
     """
 
     if plot_hdi_kwargs is None:
@@ -86,13 +93,20 @@ def get_hdi_to_df(
     x: xr.DataArray,
     hdi_prob: float = 0.94,
 ) -> pd.DataFrame:
-    """
-    Utility function to calculate and recover HDI intervals.
+    """Calculate and recover HDI intervals.
+
+    Parameters
+    ----------
+    x : xr.DataArray
+        Xarray data array.
+    hdi_prob : float, optional
+        The size of the HDI. Default is 0.94.
 
-    :param x:
-        Xarray data array
-    :param hdi_prob:
-        The size of the HDI, default is 0.94
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame containing the HDI intervals with 'lower' and 'higher'
+        columns.
     """
     hdi_result = az.hdi(x, hdi_prob=hdi_prob)
 
diff --git a/causalpy/pymc_models.py b/causalpy/pymc_models.py
@@ -173,8 +173,15 @@ def __init__(
         priors: dict[str, Any] | None = None,
     ) -> None:
         """
-        :param sample_kwargs: A dictionary of kwargs that get unpacked and passed to the
-            :func:`pymc.sample` function. Defaults to an empty dictionary.
+        Parameters
+        ----------
+        sample_kwargs : dict, optional
+            Dictionary of kwargs that get unpacked and passed to the
+            :func:`pymc.sample` function. Defaults to an empty dictionary
+            if None.
+        priors : dict, optional
+            Dictionary of priors for the model. Defaults to None, in which
+            case default priors are used.
         """
         super().__init__()
         self.idata = None
@@ -224,8 +231,23 @@ def _data_setter(self, X: xr.DataArray) -> None:
     def fit(
         self, X: xr.DataArray, y: xr.DataArray, coords: Dict[str, Any] | None = None
     ) -> az.InferenceData:
-        """Draw samples from posterior, prior predictive, and posterior predictive
-        distributions, placing them in the model's idata attribute.
+        """Draw samples from posterior, prior predictive, and posterior
+        predictive distributions.
+
+        Parameters
+        ----------
+        X : xr.DataArray
+            Input features as an xarray DataArray.
+        y : xr.DataArray
+            Target variable as an xarray DataArray.
+        coords : dict, optional
+            Dictionary with coordinate names for named dimensions.
+            Defaults to None.
+
+        Returns
+        -------
+        az.InferenceData
+            InferenceData object containing the samples.
         """
 
         # Ensure random_seed is used in sample_prior_predictive() and
@@ -356,6 +378,16 @@ def calculate_cumulative_impact(self, impact: xr.DataArray) -> xr.DataArray:
     def print_coefficients(
         self, labels: list[str], round_to: int | None = None
     ) -> None:
+        """Print the model coefficients with their labels.
+
+        Parameters
+        ----------
+        labels : list of str
+            List of strings representing the coefficient names.
+        round_to : int, optional
+            Number of significant figures to round to. Defaults to None,
+            in which case 2 significant figures are used.
+        """
         if self.idata is None:
             raise RuntimeError("Model has not been fit")
 
@@ -627,19 +659,27 @@ def build_model(  # type: ignore
         coords: Dict[str, Any],
         priors: Dict[str, Any],
     ) -> None:
-        """Specify model with treatment regression and focal regression data and priors
-
-        :param X: A pandas dataframe used to predict our outcome y
-        :param Z: A pandas dataframe used to predict our treatment variable t
-        :param y: An array of values representing our focal outcome y
-        :param t: An array of values representing the treatment t of
-                  which we're interested in estimating the causal impact
-        :param coords: A dictionary with the coordinate names for our
-                       instruments and covariates
-        :param priors: An optional dictionary of priors for the mus and
-                      sigmas of both regressions
-                      :code:`priors = {"mus": [0, 0], "sigmas": [1, 1],
-                      "eta": 2, "lkj_sd": 2}`
+        """Specify model with treatment regression and focal regression
+        data and priors.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            Array used to predict our outcome y.
+        Z : np.ndarray
+            Array used to predict our treatment variable t.
+        y : np.ndarray
+            Array of values representing our focal outcome y.
+        t : np.ndarray
+            Array representing the treatment t of which we're interested
+            in estimating the causal impact.
+        coords : dict
+            Dictionary with the coordinate names for our instruments and
+            covariates.
+        priors : dict
+            Dictionary of priors for the mus and sigmas of both
+            regressions. Example: ``priors = {"mus": [0, 0],
+            "sigmas": [1, 1], "eta": 2, "lkj_sd": 2}``.
         """
 
         # --- Priors ---
@@ -725,13 +765,33 @@ def fit(  # type: ignore
         priors: Dict[str, Any],
         ppc_sampler: str | None = None,
     ) -> az.InferenceData:
-        """Draw samples from posterior distribution and potentially
-        from the prior and posterior predictive distributions. The
-        fit call can take values for the
-        ppc_sampler = ['jax', 'pymc', None]
-        We default to None, so the user can determine if they wish
-        to spend time sampling the posterior predictive distribution
-        independently.
+        """Draw samples from posterior distribution and potentially from
+        the prior and posterior predictive distributions.
+
+        Parameters
+        ----------
+        X : np.ndarray
+            Array used to predict our outcome y.
+        Z : np.ndarray
+            Array used to predict our treatment variable t.
+        y : np.ndarray
+            Array of values representing our focal outcome y.
+        t : np.ndarray
+            Array representing the treatment variable.
+        coords : dict
+            Dictionary with coordinate names for named dimensions.
+        priors : dict
+            Dictionary of priors for the model.
+        ppc_sampler : str, optional
+            Sampler for posterior predictive distribution. Can be 'jax',
+            'pymc', or None. Defaults to None, so the user can determine
+            if they wish to spend time sampling the posterior predictive
+            distribution independently.
+
+        Returns
+        -------
+        az.InferenceData
+            InferenceData object containing the samples.
         """
 
         # Ensure random_seed is used in sample_prior_predictive() and
diff --git a/causalpy/skl_models.py b/causalpy/skl_models.py
diff --git a/causalpy/utils.py b/causalpy/utils.py
diff --git a/docs/source/_static/interrogate_badge.svg b/docs/source/_static/interrogate_badge.svg