diff --git a/docs/source/api/index.md b/docs/source/api/index.md index c95c6a9..2b9adab 100644 --- a/docs/source/api/index.md +++ b/docs/source/api/index.md @@ -49,6 +49,7 @@ you should jump to {ref}`array_stats_api` and read forward. arviz_stats.ci_in_rope arviz_stats.ecdf arviz_stats.eti + arviz_stats.iqr arviz_stats.hdi arviz_stats.histogram arviz_stats.kde @@ -57,13 +58,16 @@ you should jump to {ref}`array_stats_api` and read forward. arviz_stats.loo_metrics arviz_stats.loo_r2 arviz_stats.loo_score + arviz_stats.mad arviz_stats.mean arviz_stats.median arviz_stats.metrics arviz_stats.mode arviz_stats.qds arviz_stats.residual_r2 + arviz_stats.std arviz_stats.summary + arviz_stats.var arviz_stats.wasserstein ``` diff --git a/src/arviz_stats/__init__.py b/src/arviz_stats/__init__.py index 47e1e7f..7e524a0 100644 --- a/src/arviz_stats/__init__.py +++ b/src/arviz_stats/__init__.py @@ -26,7 +26,7 @@ from arviz_stats.psense import psense, psense_summary from arviz_stats.metrics import bayesian_r2, kl_divergence, metrics, residual_r2, wasserstein from arviz_stats.sampling_diagnostics import bfmi, ess, mcse, rhat, rhat_nested, diagnose - from arviz_stats.summary import summary, ci_in_rope, mean, median, mode + from arviz_stats.summary import summary, ci_in_rope, mean, median, mode, std, var, iqr, mad from arviz_stats.manipulation import thin, weight_predictions from arviz_stats.bayes_factor import bayes_factor from arviz_stats.visualization import ecdf, eti, hdi, histogram, kde, qds diff --git a/src/arviz_stats/accessors.py b/src/arviz_stats/accessors.py index e440b74..c0aff79 100644 --- a/src/arviz_stats/accessors.py +++ b/src/arviz_stats/accessors.py @@ -322,6 +322,22 @@ def mode(self, dim=None, **kwargs): """Compute mode for all variables in the dataset.""" return self._apply("mode", dim=dim, **kwargs) + def std(self, dim=None, **kwargs): + """Compute standard deviation for all variables in the dataset.""" + return self._apply("std", dim=dim, **kwargs) + + def var(self, dim=None, **kwargs): + """Compute variance for all variables in the dataset.""" + return self._apply("var", dim=dim, **kwargs) + + def mad(self, dim=None, **kwargs): + """Compute median absolute deviation for all variables in the dataset.""" + return self._apply("mad", dim=dim, **kwargs) + + def iqr(self, dim=None, **kwargs): + """Compute interquantile range for all variables in the dataset.""" + return self._apply("iqr", dim=dim, **kwargs) + def srs_estimator(self, n_data_points, **kwargs): """Compute simple random sampling estimate for subsampled LOO.""" return self._apply( diff --git a/src/arviz_stats/base/array.py b/src/arviz_stats/base/array.py index 4081a0b..e932394 100644 --- a/src/arviz_stats/base/array.py +++ b/src/arviz_stats/base/array.py @@ -773,7 +773,7 @@ def mean(self, ary, round_to=None, skipna=False, axis=None): skipna : bool, default False Whether to ignore NaN values when computing the mean. axis : int, sequence of int or None, default -1 - Axis or axes along which to compute the mode. + Axis or axes along which to compute the mean. Returns ------- @@ -804,7 +804,7 @@ def median(self, ary, round_to=None, skipna=False, axis=None): skipna : bool, default False Whether to ignore NaN values when computing the median. axis : int, sequence of int or None, default -1 - Axis or axes along which to compute the mode. + Axis or axes along which to compute the median. Returns ------- @@ -852,6 +852,132 @@ def mode(self, ary, round_to=None, skipna=False, axis=None): ) return mode_ufunc(ary, round_to=round_to, skipna=skipna) + def std(self, ary, round_to=None, skipna=False, axis=None): + """Compute standard deviation of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + Whether to ignore NaN values when computing the standard deviation. + axis : int, sequence of int or None, default -1 + Axis or axes along which to compute the standard deviation. + + Returns + ------- + std : array-like + Standard deviation of the input values along the specified axis. + """ + ary, axes = process_ary_axes(ary, axis) + std_ufunc = make_ufunc( + self._std, + n_output=1, + n_input=1, + n_dims=len(axes), + ravel=False, + ) + return std_ufunc(ary, round_to=round_to, skipna=skipna) + + def var(self, ary, round_to=None, skipna=False, axis=None): + """Compute variance of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + Whether to ignore NaN values when computing the variance. + axis : int, sequence of int or None, default -1 + Axis or axes along which to compute the variance. + + Returns + ------- + var : array-like + Variance of the input values along the specified axis. + """ + ary, axes = process_ary_axes(ary, axis) + var_ufunc = make_ufunc( + self._var, + n_output=1, + n_input=1, + n_dims=len(axes), + ravel=False, + ) + return var_ufunc(ary, round_to=round_to, skipna=skipna) + + def mad(self, ary, round_to=None, skipna=False, axis=None): + """Compute mean absolute deviation of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + Whether to ignore NaN values when computing the mean absolute deviation. + axis : int, sequence of int or None, default -1 + Axis or axes along which to compute the mean absolute deviation. + + Returns + ------- + mad : array-like + Mean absolute deviation of the input values along the specified axis. + """ + ary, axes = process_ary_axes(ary, axis) + mad_ufunc = make_ufunc( + self._mad, + n_output=1, + n_input=1, + n_dims=len(axes), + ravel=False, + ) + return mad_ufunc(ary, round_to=round_to, skipna=skipna) + + def iqr(self, ary, quantiles=(0.25, 0.75), round_to=None, skipna=False, axis=None): + """Compute interquantile range of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + quantiles : tuple of float, default (0.25, 0.75) + Quantiles to compute the interquartile range. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + Whether to ignore NaN values when computing the interquantile range. + axis : int, sequence of int or None, default -1 + Axis or axes along which to compute the interquantile range. + + Returns + ------- + iqr : array-like + Interquantile range of the input values along the specified axis. + """ + ary, axes = process_ary_axes(ary, axis) + iqr_ufunc = make_ufunc( + self._iqr, + n_output=1, + n_input=1, + n_dims=len(axes), + ravel=False, + ) + return iqr_ufunc(ary, quantiles=quantiles, round_to=round_to, skipna=skipna) + def loo( self, ary, diff --git a/src/arviz_stats/base/core.py b/src/arviz_stats/base/core.py index d3cd323..07f3925 100644 --- a/src/arviz_stats/base/core.py +++ b/src/arviz_stats/base/core.py @@ -490,3 +490,92 @@ def _mode(self, ary, round_to=None, skipna=False): # For discrete data, we use the most frequent value. vals, cnts = np.unique(ary, return_counts=True) return vals[cnts.argmax()] + + def _std(self, ary, round_to=None, skipna=False, axis=None): + """Compute standard deviation of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + If True, ignore NaN values. + axis : int, sequence of int or None, default None + Axis or axes along which to compute the standard deviation. + """ + if skipna: + ary = ary[~np.isnan(ary)] + return round_num(np.std(ary, axis=axis, ddof=1), round_to) + + def _var(self, ary, round_to=None, skipna=False, axis=None): + """Compute variance of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + If True, ignore NaN values. + axis : int, sequence of int or None, default None + Axis or axes along which to compute the variance. + """ + if skipna: + ary = ary[~np.isnan(ary)] + return round_num(np.var(ary, axis=axis, ddof=1), round_to) + + def _mad(self, ary, round_to=None, skipna=False, axis=None): + """Compute median absolute deviation of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + If True, ignore NaN values. + axis : int, sequence of int or None, default None + Axis or axes along which to compute the median absolute deviation. + """ + if skipna: + ary = ary[~np.isnan(ary)] + median = np.median(ary, axis=axis) + mad = np.median(np.abs(ary - median), axis=axis) + return round_num(mad, round_to) + + def _iqr(self, ary, quantiles=(0.25, 0.75), round_to=None, skipna=False, axis=None): + """Compute interquantile range of values along the specified axis. + + Parameters + ---------- + values : array-like + Input array. + quantiles : tuple of two floats, default (0.25, 0.75) + Quantiles to compute the interquantile range. Defaults to (0.25, 0.75), that is, + the interquartile range. + round_to : int or str, optional + If integer, number of decimal places to round the result. If string of the + form '2g' number of significant digits to round the result. Defaults to '2g'. + Use None to return raw numbers. + skipna : bool, default False + If True, ignore NaN values. + axis : int, sequence of int or None, default None + Axis or axes along which to compute the interquartile range. + """ + if skipna: + ary = ary[~np.isnan(ary)] + if ary.size == 0: + return np.nan + q1, q3 = np.quantile(ary, quantiles, axis=axis) + iqr = q3 - q1 + return round_num(iqr, round_to) diff --git a/src/arviz_stats/base/dataarray.py b/src/arviz_stats/base/dataarray.py index 03ea48f..cbb6b4e 100644 --- a/src/arviz_stats/base/dataarray.py +++ b/src/arviz_stats/base/dataarray.py @@ -978,6 +978,59 @@ def mode(self, da, round_to=None, skipna=False, dim=None): kwargs={"round_to": round_to, "skipna": skipna, "axis": np.arange(-len(dims), 0, 1)}, ) + def std(self, da, round_to=None, skipna=False, dim=None): + """Compute standard deviation on DataArray input.""" + dims = validate_dims(dim) + + return apply_ufunc( + self.array_class.std, + da, + input_core_dims=[dims], + output_core_dims=[[]], + kwargs={"round_to": round_to, "skipna": skipna, "axis": np.arange(-len(dims), 0, 1)}, + ) + + def var(self, da, round_to=None, skipna=False, dim=None): + """Compute variance on DataArray input.""" + dims = validate_dims(dim) + + return apply_ufunc( + self.array_class.var, + da, + input_core_dims=[dims], + output_core_dims=[[]], + kwargs={"round_to": round_to, "skipna": skipna, "axis": np.arange(-len(dims), 0, 1)}, + ) + + def mad(self, da, round_to=None, skipna=False, dim=None): + """Compute median absolute deviation on DataArray input.""" + dims = validate_dims(dim) + + return apply_ufunc( + self.array_class.mad, + da, + input_core_dims=[dims], + output_core_dims=[[]], + kwargs={"round_to": round_to, "skipna": skipna, "axis": np.arange(-len(dims), 0, 1)}, + ) + + def iqr(self, da, quantiles=(0.25, 0.75), round_to=None, skipna=False, dim=None): + """Compute interquartile range on DataArray input.""" + dims = validate_dims(dim) + + return apply_ufunc( + self.array_class.iqr, + da, + input_core_dims=[dims], + output_core_dims=[[]], + kwargs={ + "quantiles": quantiles, + "round_to": round_to, + "skipna": skipna, + "axis": np.arange(-len(dims), 0, 1), + }, + ) + def srs_estimator(self, da, n_data_points): """Compute simple random sampling estimate for subsampled LOO on DataArray. diff --git a/src/arviz_stats/summary.py b/src/arviz_stats/summary.py index f563336..d56312b 100644 --- a/src/arviz_stats/summary.py +++ b/src/arviz_stats/summary.py @@ -10,7 +10,7 @@ from arviz_stats.utils import _apply_multi_input_function from arviz_stats.validate import validate_dims -__all__ = ["summary", "ci_in_rope", "mean", "median", "mode"] +__all__ = ["summary", "ci_in_rope", "mean", "median", "mode", "std", "var", "iqr", "mad"] def summary( @@ -168,14 +168,14 @@ def summary( mean_value = dataset.azstats.mean(dim=sample_dims, skipna=skipna).expand_dims( summary=["mean"] ) - std = dataset.std(dim=sample_dims, skipna=skipna).expand_dims(summary=["sd"]) + std_value = dataset.std(dim=sample_dims, skipna=skipna).expand_dims(summary=["sd"]) ci = ( dataset.azstats.eti(prob=ci_prob, dim=sample_dims, skipna=skipna) .rename({"ci_bound": "summary"}) .assign_coords(summary=[f"{ci_kind}{ci_perc}_lb", f"{ci_kind}{ci_perc}_ub"]) ) - to_concat.extend((mean_value, std, ci)) + to_concat.extend((mean_value, std_value, ci)) if kind in ["diagnostics", "all"]: ess_bulk = dataset.azstats.ess(sample_dims=sample_dims, method="bulk").expand_dims( @@ -198,7 +198,7 @@ def summary( median_value = dataset.azstats.median(dim=sample_dims, skipna=skipna).expand_dims( summary=["median"] ) - mad = stats.median_abs_deviation( + mad_value = stats.median_abs_deviation( dataset, dims=("chain", "draw"), nan_policy="omit" if skipna else "propagate" ).expand_dims(summary=["mad"]) ci = ( @@ -207,7 +207,7 @@ def summary( .assign_coords(summary=[f"eti{ci_perc}_lb", f"eti{ci_perc}_ub"]) ) - to_concat.extend((median_value, mad, ci)) + to_concat.extend((median_value, mad_value, ci)) if kind in ["diagnostics_median", "all_median"]: ess_median = dataset.azstats.ess(sample_dims=sample_dims, method="median").expand_dims( @@ -526,7 +526,7 @@ def mean( Returns ------- ndarray, DataArray, Dataset, DataTree - Requested mode of the provided input. + Requested mean of the provided input. See Also -------- @@ -631,7 +631,7 @@ def median( Returns ------- ndarray, DataArray, Dataset, DataTree - Requested mode of the provided input. + Requested median of the provided input. See Also -------- @@ -793,3 +793,424 @@ def mode( skipna=skipna, **kwargs, ) + + +def std( + data, + dim=None, + group="posterior", + var_names=None, + filter_vars=None, + coords=None, + round_to=None, + skipna=False, + **kwargs, +): + r"""Compute the standard deviation. + + The standard deviation is a measure of the amount of variation or dispersion of a set of values. + + Parameters + ---------- + data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like + Input data. It will have different pre-processing applied to it depending on its type: + + - array-like: call array layer within ``arviz-stats``. + - xarray object: apply dimension aware function to all relevant subsets + - others: passed to :func:`arviz_base.convert_to_dataset` then treated as + :class:`xarray.Dataset`. This option is discouraged due to needing this conversion + which is completely automated and will be needed again in future executions or + similar functions. + + It is recommended to first perform the conversion manually and then call + ``arviz_stats.mode``. This allows controlling the conversion step and inspecting + its results. + dim : sequence of hashable, optional + Dimensions over which to compute the mode. Defaults to ``rcParams["data.sample_dims"]``. + group : hashable, default "posterior" + Group on which to compute the mode + var_names : str or list of str, optional + Names of the variables for which the mode should be computed. + filter_vars : {None, "like", "regex"}, default None + coords : dict, optional + Dictionary of dimension/index names to coordinate values defining a subset + of the data for which to perform the computation. + round_to: int or str or None, optional + If integer, number of decimal places to round the result. Integers can be negative. + If string of the form '2g' number of significant digits to round the result. + Defaults to rcParams["stats.round_to"] if None. Use the string "None" or "none" to + return raw numbers. + skipna: bool, default False + If True, ignore NaN values. + **kwargs : any, optional + Forwarded to the array or dataarray interface for mode. + + Returns + ------- + ndarray, DataArray, Dataset, DataTree + Requested standard deviation of the provided input. + + See Also + -------- + :func:`arviz_stats.var`, :func:`arviz_stats.mad` + + + Examples + -------- + Calculate the standard deviation of a Normal random variable: + + .. ipython:: + + In [1]: import arviz_stats as azs + ...: import numpy as np + ...: data = np.random.default_rng().normal(size=2000) + ...: azs.std(data) + + Calculate the standard deviations for specific variables: + + .. ipython:: + + In [1]: import arviz_base as azb + ...: dt = azb.load_arviz_data("centered_eight") + ...: azs.std(dt, var_names=["mu", "theta"]) + + Calculate the standard deviations excluding the school dimension: + + .. ipython:: + + In [1]: azs.std(dt, dim=["chain", "draw"]) + """ + if round_to is None: + round_to = rcParams["stats.round_to"] + + return _apply_multi_input_function( + "std", + data, + dim, + "dim", + group=group, + var_names=var_names, + filter_vars=filter_vars, + coords=coords, + round_to=round_to, + skipna=skipna, + **kwargs, + ) + + +def var( + data, + dim=None, + group="posterior", + var_names=None, + filter_vars=None, + coords=None, + round_to=None, + skipna=False, + **kwargs, +): + r"""Compute the variance. + + The variance is a measure of the dispersion of a set of values. It is calculated as the + average of the squared differences from the mean. + + + Parameters + ---------- + data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like + Input data. It will have different pre-processing applied to it depending on its type: + + - array-like: call array layer within ``arviz-stats``. + - xarray object: apply dimension aware function to all relevant subsets + - others: passed to :func:`arviz_base.convert_to_dataset` then treated as + :class:`xarray.Dataset`. This option is discouraged due to needing this conversion + which is completely automated and will be needed again in future executions or + similar functions. + + It is recommended to first perform the conversion manually and then call + ``arviz_stats.mode``. This allows controlling the conversion step and inspecting + its results. + dim : sequence of hashable, optional + Dimensions over which to compute the mode. Defaults to ``rcParams["data.sample_dims"]``. + group : hashable, default "posterior" + Group on which to compute the mode + var_names : str or list of str, optional + Names of the variables for which the mode should be computed. + filter_vars : {None, "like", "regex"}, default None + coords : dict, optional + Dictionary of dimension/index names to coordinate values defining a subset + of the data for which to perform the computation. + round_to: int or str or None, optional + If integer, number of decimal places to round the result. Integers can be negative. + If string of the form '2g' number of significant digits to round the result. + Defaults to rcParams["stats.round_to"] if None. Use the string "None" or "none" to + return raw numbers. + skipna: bool, default False + If True, ignore NaN values. + **kwargs : any, optional + Forwarded to the array or dataarray interface for mode. + + Returns + ------- + ndarray, DataArray, Dataset, DataTree + Requested variance of the provided input. + + See Also + -------- + :func:`arviz_stats.std`, :func:`arviz_stats.mad` + + + Examples + -------- + Calculate the variance of a Normal random variable: + + .. ipython:: + + In [1]: import arviz_stats as azs + ...: import numpy as np + ...: data = np.random.default_rng().normal(size=2000) + ...: azs.var(data) + + Calculate the variances for specific variables: + + .. ipython:: + + In [1]: import arviz_base as azb + ...: dt = azb.load_arviz_data("centered_eight") + ...: azs.var(dt, var_names=["mu", "theta"]) + + Calculate the variances excluding the school dimension: + + .. ipython:: + + In [1]: azs.var(dt, dim=["chain", "draw"]) + """ + if round_to is None: + round_to = rcParams["stats.round_to"] + + return _apply_multi_input_function( + "var", + data, + dim, + "dim", + group=group, + var_names=var_names, + filter_vars=filter_vars, + coords=coords, + round_to=round_to, + skipna=skipna, + **kwargs, + ) + + +def mad( + data, + dim=None, + group="posterior", + var_names=None, + filter_vars=None, + coords=None, + round_to=None, + skipna=False, + **kwargs, +): + r"""Compute the median absolute deviation (MAD). + + The MAD is a robust measure of the variability of a univariate sample of quantitative data. + It is the median of the absolute deviations from the median of the data. + + Parameters + ---------- + data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like + Input data. It will have different pre-processing applied to it depending on its type: + + - array-like: call array layer within ``arviz-stats``. + - xarray object: apply dimension aware function to all relevant subsets + - others: passed to :func:`arviz_base.convert_to_dataset` then treated as + :class:`xarray.Dataset`. This option is discouraged due to needing this conversion + which is completely automated and will be needed again in future executions or + similar functions. + + It is recommended to first perform the conversion manually and then call + ``arviz_stats.mode``. This allows controlling the conversion step and inspecting + its results. + dim : sequence of hashable, optional + Dimensions over which to compute the mode. Defaults to ``rcParams["data.sample_dims"]``. + group : hashable, default "posterior" + Group on which to compute the mode + var_names : str or list of str, optional + Names of the variables for which the mode should be computed. + filter_vars : {None, "like", "regex"}, default None + coords : dict, optional + Dictionary of dimension/index names to coordinate values defining a subset + of the data for which to perform the computation. + round_to: int or str or None, optional + If integer, number of decimal places to round the result. Integers can be negative. + If string of the form '2g' number of significant digits to round the result. + Defaults to rcParams["stats.round_to"] if None. Use the string "None" or "none" to + return raw numbers. + skipna: bool, default False + If True, ignore NaN values. + **kwargs : any, optional + Forwarded to the array or dataarray interface for mode. + + Returns + ------- + ndarray, DataArray, Dataset, DataTree + Requested mode of the provided input. + + See Also + -------- + :func:`arviz_stats.iqr`, :func:`arviz_stats.std` + + + Examples + -------- + Calculate the median absolute deviation of a Normal random variable: + + .. ipython:: + + In [1]: import arviz_stats as azs + ...: import numpy as np + ...: data = np.random.default_rng().normal(size=2000) + ...: azs.mad(data) + + Calculate the median absolute deviations for specific variables: + + .. ipython:: + + In [1]: import arviz_base as azb + ...: dt = azb.load_arviz_data("centered_eight") + ...: azs.mad(dt, var_names=["mu", "theta"]) + + Calculate the median absolute deviations excluding the school dimension: + + .. ipython:: + + In [1]: azs.mad(dt, dim=["chain", "draw"]) + """ + if round_to is None: + round_to = rcParams["stats.round_to"] + + return _apply_multi_input_function( + "mad", + data, + dim, + "dim", + group=group, + var_names=var_names, + filter_vars=filter_vars, + coords=coords, + round_to=round_to, + skipna=skipna, + **kwargs, + ) + + +def iqr( + data, + dim=None, + group="posterior", + var_names=None, + filter_vars=None, + coords=None, + quantiles=(0.25, 0.75), + round_to=None, + skipna=False, + **kwargs, +): + r"""Compute the interquantile range. + + The interquantile range (IQR) is a measure of statistical dispersion, being equal to the + difference between two quantiles. The most common choice for the quantiles it to use the + 0.25 and 0.75 quantiles. This is known as the interquartile range. + + Parameters + ---------- + data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like + Input data. It will have different pre-processing applied to it depending on its type: + + - array-like: call array layer within ``arviz-stats``. + - xarray object: apply dimension aware function to all relevant subsets + - others: passed to :func:`arviz_base.convert_to_dataset` then treated as + :class:`xarray.Dataset`. This option is discouraged due to needing this conversion + which is completely automated and will be needed again in future executions or + similar functions. + + It is recommended to first perform the conversion manually and then call + ``arviz_stats.mode``. This allows controlling the conversion step and inspecting + its results. + dim : sequence of hashable, optional + Dimensions over which to compute the mode. Defaults to ``rcParams["data.sample_dims"]``. + group : hashable, default "posterior" + Group on which to compute the mode + var_names : str or list of str, optional + Names of the variables for which the mode should be computed. + filter_vars : {None, "like", "regex"}, default None + coords : dict, optional + Dictionary of dimension/index names to coordinate values defining a subset + of the data for which to perform the computation. + quantiles : tuple of float, default (0.25, 0.75) + Quantiles to use for the interquantile range calculation. Must be between 0 and 1. + round_to: int or str or None, optional + If integer, number of decimal places to round the result. Integers can be negative. + If string of the form '2g' number of significant digits to round the result. + Defaults to rcParams["stats.round_to"] if None. Use the string "None" or "none" to + return raw numbers. + skipna: bool, default False + If True, ignore NaN values. + **kwargs : any, optional + Forwarded to the array or dataarray interface for mode. + + Returns + ------- + ndarray, DataArray, Dataset, DataTree + Requested mode of the provided input. + + See Also + -------- + :func:`arviz_stats.mean`, :func:`arviz_stats.mode` + + + Examples + -------- + Calculate the interquantile range of a Normal random variable: + + .. ipython:: + + In [1]: import arviz_stats as azs + ...: import numpy as np + ...: data = np.random.default_rng().normal(size=2000) + ...: azs.iqr(data) + + Calculate the interquantile ranges for specific variables: + + .. ipython:: + + In [1]: import arviz_base as azb + ...: dt = azb.load_arviz_data("centered_eight") + ...: azs.iqr(dt, var_names=["mu", "theta"]) + + Calculate the interquantile ranges excluding the school dimension: + + .. ipython:: + + In [1]: azs.iqr(dt, dim=["chain", "draw"]) + """ + if round_to is None: + round_to = rcParams["stats.round_to"] + + return _apply_multi_input_function( + "iqr", + data, + dim, + "dim", + group=group, + var_names=var_names, + filter_vars=filter_vars, + coords=coords, + quantiles=quantiles, + round_to=round_to, + skipna=skipna, + **kwargs, + ) diff --git a/tests/base/test_array.py b/tests/base/test_array.py index d13d77d..db82341 100644 --- a/tests/base/test_array.py +++ b/tests/base/test_array.py @@ -1,6 +1,6 @@ """Tests for array interface base functions.""" -# pylint: disable=redefined-outer-name, no-self-use, protected-access +# pylint: disable=redefined-outer-name, no-self-use, protected-access, too-many-lines import pytest from arviz_stats.base.array import ( @@ -631,6 +631,66 @@ def test_mode_axis(self, array_stats, axis): assert result.ndim < ary.ndim +class TestStd: + def test_std_basic(self, array_stats): + rng = np.random.default_rng(42) + x = rng.normal(5, 1, 1000) + result = array_stats.std(x) + assert_allclose(result, 1, rtol=0.1) + + @pytest.mark.parametrize("axis", [0, 1, -1]) + def test_std_axis(self, array_stats, axis): + rng = np.random.default_rng(42) + ary = rng.normal(size=(10, 20, 30)) + result = array_stats.std(ary, axis=axis) + assert result.ndim < ary.ndim + + +class TestVar: + def test_var_basic(self, array_stats): + rng = np.random.default_rng(42) + x = rng.normal(5, 2, 1000) + result = array_stats.var(x) + assert_allclose(result, 4, rtol=0.1) + + @pytest.mark.parametrize("axis", [0, 1, -1]) + def test_var_axis(self, array_stats, axis): + rng = np.random.default_rng(42) + ary = rng.normal(size=(10, 20, 30)) + result = array_stats.var(ary, axis=axis) + assert result.ndim < ary.ndim + + +class TestMAD: + def test_mad_basic(self, array_stats): + rng = np.random.default_rng(42) + x = rng.normal(5, 1, 1000) + result = array_stats.mad(x) + assert_allclose(result, 0.67, rtol=0.1) + + @pytest.mark.parametrize("axis", [0, 1, -1]) + def test_mad_axis(self, array_stats, axis): + rng = np.random.default_rng(42) + ary = rng.normal(size=(10, 20, 30)) + result = array_stats.mad(ary, axis=axis) + assert result.ndim < ary.ndim + + +class TestIQR: + def test_iqr_basic(self, array_stats): + rng = np.random.default_rng(42) + x = rng.normal(5, 1, 1000) + result = array_stats.iqr(x) + assert_allclose(result, 1.3, rtol=0.1) + + @pytest.mark.parametrize("axis", [0, 1, -1]) + def test_iqr_axis(self, array_stats, axis): + rng = np.random.default_rng(42) + ary = rng.normal(size=(10, 20, 30)) + result = array_stats.iqr(ary, axis=axis) + assert result.ndim < ary.ndim + + class TestMetrics: def test_residual_r2_basic(self, array_stats): rng = np.random.default_rng(42) diff --git a/tests/base/test_core.py b/tests/base/test_core.py index 8ced592..bcb65fa 100644 --- a/tests/base/test_core.py +++ b/tests/base/test_core.py @@ -290,3 +290,99 @@ def test_mode_with_nan(self, core): x = np.array([1.0, 2.0, 2.0, np.nan, 2.0, 3.0]) result = core._mode(x) assert not np.isnan(result) + + +class TestStd: + def test_std_continuous(self, core, rng): + x = rng.normal(5, 1, 1000) + result = core._std(x) + assert_allclose(result, 1.0, rtol=0.1) + + def test_std_single_value(self, core): + x = np.array([5.0]) + result = core._std(x) + assert np.isnan(result) + + def test_std_empty(self, core): + x = np.array([]) + result = core._std(x) + assert np.isnan(result) + + def test_std_with_nan(self, core): + x = np.array([1.0, 2.0, 2.0, np.nan, 2.0, 3.0]) + result = core._std(x) + assert np.isnan(result) + result = core._std(x, skipna=True) + assert not np.isnan(result) + + +class TestVar: + def test_var_continuous(self, core, rng): + x = rng.normal(5, 2, 1000) + result = core._var(x) + assert_allclose(result, 4.0, rtol=0.1) + + def test_var_single_value(self, core): + x = np.array([5.0]) + result = core._var(x) + assert np.isnan(result) + + def test_var_empty(self, core): + x = np.array([]) + result = core._var(x) + assert np.isnan(result) + + def test_var_with_nan(self, core): + x = np.array([1.0, 2.0, 2.0, np.nan, 2.0, 3.0]) + result = core._var(x) + assert np.isnan(result) + result = core._var(x, skipna=True) + assert not np.isnan(result) + + +class TestMAD: + def test_mad_continuous(self, core, rng): + x = rng.normal(5, 1, 1000) + result = core._mad(x) + assert_allclose(result, 0.67, rtol=0.1) + + def test_mad_single_value(self, core): + x = np.array([5.0]) + result = core._mad(x) + assert result == 0.0 + + def test_mad_empty(self, core): + x = np.array([]) + result = core._mad(x) + assert np.isnan(result) + + def test_mad_with_nan(self, core): + x = np.array([1.0, 2.0, 2.0, np.nan, 2.0, 3.0]) + result = core._mad(x) + assert np.isnan(result) + result = core._mad(x, skipna=True) + assert not np.isnan(result) + + +class TestIQR: + def test_iqr_continuous(self, core, rng): + x = rng.normal(5, 1, 1000) + result = core._iqr(x) + assert_allclose(result, 1.34, rtol=0.1) + + def test_iqr_single_value(self, core): + x = np.array([5.0]) + result = core._iqr(x) + assert result == 0.0 + + def test_iqr_empty(self, core): + x = np.array([]) + result = core._iqr(x) + assert np.isnan(result) + + def test_iqr_with_nan(self, core): + x = np.array([1.0, 2.0, 2.0, np.nan, 2.0, 3.0]) + result = core._iqr(x) + assert np.isnan(result) + result = core._iqr(x, skipna=True) + assert not np.isnan(result) diff --git a/tests/test_minimal.py b/tests/test_minimal.py index 03dd399..23e7f42 100644 --- a/tests/test_minimal.py +++ b/tests/test_minimal.py @@ -358,6 +358,46 @@ def test_mode_multimodal(data_discrete_multimodal): assert mode.shape == (2,) +def test_std(data_discrete): + std = array_stats.std(data_discrete, axis=(0, 1)) + assert std.shape == (3,) + + +def test_std_multimodal(data_multimodal): + std = array_stats.std(data_multimodal, axis=(0, 1)) + assert std.shape == (2,) + + +def test_var(data_discrete): + var = array_stats.var(data_discrete, axis=(0, 1)) + assert var.shape == (3,) + + +def test_var_multimodal(data_multimodal): + var = array_stats.var(data_multimodal, axis=(0, 1)) + assert var.shape == (2,) + + +def test_mad(data_discrete): + mad = array_stats.mad(data_discrete, axis=(0, 1)) + assert mad.shape == (3,) + + +def test_mad_multimodal(data_multimodal): + mad = array_stats.mad(data_multimodal, axis=(0, 1)) + assert mad.shape == (2,) + + +def test_iqr(data_discrete): + iqr = array_stats.iqr(data_discrete, axis=(0, 1)) + assert iqr.shape == (3,) + + +def test_iqr_multimodal(data_multimodal): + iqr = array_stats.iqr(data_multimodal, axis=(0, 1)) + assert iqr.shape == (2,) + + @pytest.mark.parametrize("factor", [2, 5]) def test_thin_factor_values(data_c0d1, factor): thinned = array_stats.thin(data_c0d1, factor=factor, chain_axis=0, draw_axis=1) diff --git a/tests/test_summary.py b/tests/test_summary.py index 053bf7f..1ab953e 100644 --- a/tests/test_summary.py +++ b/tests/test_summary.py @@ -177,6 +177,58 @@ def test_mode(datatree): assert result.shape == () +def test_std(datatree): + result = mode(datatree) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior["mu"]) + assert result.shape == () + result = mode(datatree.posterior["mu"].values) + assert result.shape == () + + +def test_var(datatree): + result = mode(datatree) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior["mu"]) + assert result.shape == () + result = mode(datatree.posterior["mu"].values) + assert result.shape == () + + +def test_mad(datatree): + result = mode(datatree) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior["mu"]) + assert result.shape == () + result = mode(datatree.posterior["mu"].values) + assert result.shape == () + + +def test_iqr(datatree): + result = mode(datatree) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior) + assert result["mu"].shape == () + assert result["theta"].shape == (7,) + result = mode(datatree.posterior["mu"]) + assert result.shape == () + result = mode(datatree.posterior["mu"].values) + assert result.shape == () + + @pytest.mark.parametrize("filter_vars", [None, "like", "regex"]) def test_summary_filter_vars(datatree, filter_vars): if filter_vars == "like":